{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 1000, "global_step": 30114, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.641429235571495e-05, "grad_norm": 640.6401977539062, "learning_rate": 2.2123893805309732e-09, "loss": 116.0, "step": 1 }, { "epoch": 0.0001328285847114299, "grad_norm": 356.0709533691406, "learning_rate": 4.4247787610619464e-09, "loss": 65.0625, "step": 2 }, { "epoch": 0.00019924287706714485, "grad_norm": 596.5497436523438, "learning_rate": 6.63716814159292e-09, "loss": 99.125, "step": 3 }, { "epoch": 0.0002656571694228598, "grad_norm": 580.257080078125, "learning_rate": 8.849557522123893e-09, "loss": 85.5, "step": 4 }, { "epoch": 0.00033207146177857477, "grad_norm": 615.1389770507812, "learning_rate": 1.1061946902654867e-08, "loss": 78.25, "step": 5 }, { "epoch": 0.0003984857541342897, "grad_norm": 584.73828125, "learning_rate": 1.327433628318584e-08, "loss": 92.75, "step": 6 }, { "epoch": 0.00046490004649000463, "grad_norm": 699.5057983398438, "learning_rate": 1.5486725663716813e-08, "loss": 91.125, "step": 7 }, { "epoch": 0.0005313143388457196, "grad_norm": 1006.1519165039062, "learning_rate": 1.7699115044247786e-08, "loss": 89.625, "step": 8 }, { "epoch": 0.0005977286312014345, "grad_norm": 793.4696655273438, "learning_rate": 1.991150442477876e-08, "loss": 65.5, "step": 9 }, { "epoch": 0.0006641429235571495, "grad_norm": 673.3154296875, "learning_rate": 2.2123893805309735e-08, "loss": 110.0, "step": 10 }, { "epoch": 0.0007305572159128645, "grad_norm": 551.7874145507812, "learning_rate": 2.4336283185840707e-08, "loss": 87.75, "step": 11 }, { "epoch": 0.0007969715082685794, "grad_norm": 575.9374389648438, "learning_rate": 2.654867256637168e-08, "loss": 103.0, "step": 12 }, { "epoch": 0.0008633858006242943, "grad_norm": 596.6921997070312, "learning_rate": 2.8761061946902653e-08, "loss": 78.5, "step": 13 }, { "epoch": 0.0009298000929800093, "grad_norm": 607.3186645507812, "learning_rate": 3.0973451327433626e-08, "loss": 99.875, "step": 14 }, { "epoch": 0.0009962143853357243, "grad_norm": 613.3646240234375, "learning_rate": 3.31858407079646e-08, "loss": 92.875, "step": 15 }, { "epoch": 0.0010626286776914391, "grad_norm": 671.8380737304688, "learning_rate": 3.539823008849557e-08, "loss": 83.9375, "step": 16 }, { "epoch": 0.0011290429700471542, "grad_norm": 669.3338012695312, "learning_rate": 3.761061946902655e-08, "loss": 92.125, "step": 17 }, { "epoch": 0.001195457262402869, "grad_norm": 720.828857421875, "learning_rate": 3.982300884955752e-08, "loss": 79.25, "step": 18 }, { "epoch": 0.001261871554758584, "grad_norm": 570.262451171875, "learning_rate": 4.20353982300885e-08, "loss": 78.0625, "step": 19 }, { "epoch": 0.001328285847114299, "grad_norm": 516.40380859375, "learning_rate": 4.424778761061947e-08, "loss": 84.8125, "step": 20 }, { "epoch": 0.001394700139470014, "grad_norm": 651.96728515625, "learning_rate": 4.6460176991150445e-08, "loss": 84.75, "step": 21 }, { "epoch": 0.001461114431825729, "grad_norm": 633.5459594726562, "learning_rate": 4.8672566371681415e-08, "loss": 89.5, "step": 22 }, { "epoch": 0.0015275287241814438, "grad_norm": 477.36834716796875, "learning_rate": 5.088495575221239e-08, "loss": 87.5625, "step": 23 }, { "epoch": 0.0015939430165371588, "grad_norm": 684.845703125, "learning_rate": 5.309734513274336e-08, "loss": 96.875, "step": 24 }, { "epoch": 0.0016603573088928738, "grad_norm": 547.5321044921875, "learning_rate": 5.5309734513274336e-08, "loss": 75.3125, "step": 25 }, { "epoch": 0.0017267716012485887, "grad_norm": 562.6327514648438, "learning_rate": 5.7522123893805306e-08, "loss": 100.5, "step": 26 }, { "epoch": 0.0017931858936043037, "grad_norm": 805.2745361328125, "learning_rate": 5.973451327433628e-08, "loss": 81.875, "step": 27 }, { "epoch": 0.0018596001859600185, "grad_norm": 560.2885131835938, "learning_rate": 6.194690265486725e-08, "loss": 70.4375, "step": 28 }, { "epoch": 0.0019260144783157336, "grad_norm": 779.9680786132812, "learning_rate": 6.415929203539823e-08, "loss": 93.625, "step": 29 }, { "epoch": 0.0019924287706714486, "grad_norm": 759.3457641601562, "learning_rate": 6.63716814159292e-08, "loss": 79.375, "step": 30 }, { "epoch": 0.0020588430630271637, "grad_norm": 788.9654541015625, "learning_rate": 6.858407079646017e-08, "loss": 99.0, "step": 31 }, { "epoch": 0.0021252573553828783, "grad_norm": 530.9995727539062, "learning_rate": 7.079646017699114e-08, "loss": 89.0625, "step": 32 }, { "epoch": 0.0021916716477385933, "grad_norm": 844.7169799804688, "learning_rate": 7.300884955752213e-08, "loss": 95.625, "step": 33 }, { "epoch": 0.0022580859400943083, "grad_norm": 753.751708984375, "learning_rate": 7.52212389380531e-08, "loss": 97.125, "step": 34 }, { "epoch": 0.0023245002324500234, "grad_norm": 496.194091796875, "learning_rate": 7.743362831858406e-08, "loss": 80.0625, "step": 35 }, { "epoch": 0.002390914524805738, "grad_norm": 653.2459106445312, "learning_rate": 7.964601769911503e-08, "loss": 81.4375, "step": 36 }, { "epoch": 0.002457328817161453, "grad_norm": 740.3855590820312, "learning_rate": 8.185840707964602e-08, "loss": 92.625, "step": 37 }, { "epoch": 0.002523743109517168, "grad_norm": 632.206787109375, "learning_rate": 8.4070796460177e-08, "loss": 92.875, "step": 38 }, { "epoch": 0.002590157401872883, "grad_norm": 611.5645141601562, "learning_rate": 8.628318584070796e-08, "loss": 94.9375, "step": 39 }, { "epoch": 0.002656571694228598, "grad_norm": 861.438720703125, "learning_rate": 8.849557522123894e-08, "loss": 92.4375, "step": 40 }, { "epoch": 0.0027229859865843128, "grad_norm": 605.6342163085938, "learning_rate": 9.070796460176991e-08, "loss": 95.0625, "step": 41 }, { "epoch": 0.002789400278940028, "grad_norm": 593.606201171875, "learning_rate": 9.292035398230089e-08, "loss": 91.5, "step": 42 }, { "epoch": 0.002855814571295743, "grad_norm": 752.308837890625, "learning_rate": 9.513274336283185e-08, "loss": 88.3125, "step": 43 }, { "epoch": 0.002922228863651458, "grad_norm": 807.1900634765625, "learning_rate": 9.734513274336283e-08, "loss": 72.4688, "step": 44 }, { "epoch": 0.002988643156007173, "grad_norm": 442.05120849609375, "learning_rate": 9.95575221238938e-08, "loss": 84.3125, "step": 45 }, { "epoch": 0.0030550574483628875, "grad_norm": 824.4887084960938, "learning_rate": 1.0176991150442478e-07, "loss": 119.75, "step": 46 }, { "epoch": 0.0031214717407186026, "grad_norm": 568.417724609375, "learning_rate": 1.0398230088495574e-07, "loss": 73.2812, "step": 47 }, { "epoch": 0.0031878860330743176, "grad_norm": 565.6078491210938, "learning_rate": 1.0619469026548672e-07, "loss": 78.1875, "step": 48 }, { "epoch": 0.0032543003254300326, "grad_norm": 584.2362060546875, "learning_rate": 1.0840707964601769e-07, "loss": 86.125, "step": 49 }, { "epoch": 0.0033207146177857477, "grad_norm": 712.2047729492188, "learning_rate": 1.1061946902654867e-07, "loss": 119.5, "step": 50 }, { "epoch": 0.0033871289101414623, "grad_norm": 938.6514892578125, "learning_rate": 1.1283185840707964e-07, "loss": 77.25, "step": 51 }, { "epoch": 0.0034535432024971773, "grad_norm": 681.6726684570312, "learning_rate": 1.1504424778761061e-07, "loss": 105.625, "step": 52 }, { "epoch": 0.0035199574948528924, "grad_norm": 582.8438110351562, "learning_rate": 1.172566371681416e-07, "loss": 80.9375, "step": 53 }, { "epoch": 0.0035863717872086074, "grad_norm": 738.5426025390625, "learning_rate": 1.1946902654867256e-07, "loss": 102.125, "step": 54 }, { "epoch": 0.0036527860795643225, "grad_norm": 1864.2054443359375, "learning_rate": 1.2168141592920352e-07, "loss": 91.375, "step": 55 }, { "epoch": 0.003719200371920037, "grad_norm": 754.727783203125, "learning_rate": 1.238938053097345e-07, "loss": 92.5, "step": 56 }, { "epoch": 0.003785614664275752, "grad_norm": 960.1768188476562, "learning_rate": 1.2610619469026549e-07, "loss": 79.0625, "step": 57 }, { "epoch": 0.003852028956631467, "grad_norm": 818.2954711914062, "learning_rate": 1.2831858407079647e-07, "loss": 87.875, "step": 58 }, { "epoch": 0.003918443248987182, "grad_norm": 947.1857299804688, "learning_rate": 1.3053097345132742e-07, "loss": 110.625, "step": 59 }, { "epoch": 0.003984857541342897, "grad_norm": 555.9225463867188, "learning_rate": 1.327433628318584e-07, "loss": 90.0, "step": 60 }, { "epoch": 0.004051271833698612, "grad_norm": 926.6248779296875, "learning_rate": 1.3495575221238936e-07, "loss": 76.4375, "step": 61 }, { "epoch": 0.004117686126054327, "grad_norm": 636.7302856445312, "learning_rate": 1.3716814159292035e-07, "loss": 82.8125, "step": 62 }, { "epoch": 0.0041841004184100415, "grad_norm": 766.4182739257812, "learning_rate": 1.393805309734513e-07, "loss": 98.75, "step": 63 }, { "epoch": 0.0042505147107657565, "grad_norm": 582.6116943359375, "learning_rate": 1.4159292035398229e-07, "loss": 79.75, "step": 64 }, { "epoch": 0.0043169290031214716, "grad_norm": 1391.355224609375, "learning_rate": 1.4380530973451327e-07, "loss": 100.75, "step": 65 }, { "epoch": 0.004383343295477187, "grad_norm": 617.5366821289062, "learning_rate": 1.4601769911504425e-07, "loss": 89.0, "step": 66 }, { "epoch": 0.004449757587832902, "grad_norm": 1036.2388916015625, "learning_rate": 1.4823008849557523e-07, "loss": 94.875, "step": 67 }, { "epoch": 0.004516171880188617, "grad_norm": 533.9644775390625, "learning_rate": 1.504424778761062e-07, "loss": 99.625, "step": 68 }, { "epoch": 0.004582586172544332, "grad_norm": 896.219482421875, "learning_rate": 1.5265486725663715e-07, "loss": 97.25, "step": 69 }, { "epoch": 0.004649000464900047, "grad_norm": 655.6805419921875, "learning_rate": 1.5486725663716813e-07, "loss": 76.25, "step": 70 }, { "epoch": 0.004715414757255762, "grad_norm": 682.7789306640625, "learning_rate": 1.570796460176991e-07, "loss": 83.75, "step": 71 }, { "epoch": 0.004781829049611476, "grad_norm": 2622.671875, "learning_rate": 1.5929203539823007e-07, "loss": 105.875, "step": 72 }, { "epoch": 0.004848243341967191, "grad_norm": 609.609619140625, "learning_rate": 1.6150442477876105e-07, "loss": 77.875, "step": 73 }, { "epoch": 0.004914657634322906, "grad_norm": 578.1624755859375, "learning_rate": 1.6371681415929203e-07, "loss": 84.375, "step": 74 }, { "epoch": 0.004981071926678621, "grad_norm": 569.22021484375, "learning_rate": 1.6592920353982302e-07, "loss": 84.1875, "step": 75 }, { "epoch": 0.005047486219034336, "grad_norm": 505.4549560546875, "learning_rate": 1.68141592920354e-07, "loss": 78.1875, "step": 76 }, { "epoch": 0.005113900511390051, "grad_norm": 680.3079833984375, "learning_rate": 1.7035398230088493e-07, "loss": 80.375, "step": 77 }, { "epoch": 0.005180314803745766, "grad_norm": 409.9525146484375, "learning_rate": 1.725663716814159e-07, "loss": 68.6875, "step": 78 }, { "epoch": 0.005246729096101481, "grad_norm": 855.506103515625, "learning_rate": 1.747787610619469e-07, "loss": 102.125, "step": 79 }, { "epoch": 0.005313143388457196, "grad_norm": 606.393310546875, "learning_rate": 1.7699115044247788e-07, "loss": 101.25, "step": 80 }, { "epoch": 0.005379557680812911, "grad_norm": 612.3898315429688, "learning_rate": 1.7920353982300883e-07, "loss": 67.125, "step": 81 }, { "epoch": 0.0054459719731686255, "grad_norm": 789.7510986328125, "learning_rate": 1.8141592920353982e-07, "loss": 94.875, "step": 82 }, { "epoch": 0.0055123862655243406, "grad_norm": 712.2133178710938, "learning_rate": 1.836283185840708e-07, "loss": 111.625, "step": 83 }, { "epoch": 0.005578800557880056, "grad_norm": 744.3705444335938, "learning_rate": 1.8584070796460178e-07, "loss": 85.125, "step": 84 }, { "epoch": 0.005645214850235771, "grad_norm": 602.2304077148438, "learning_rate": 1.880530973451327e-07, "loss": 110.25, "step": 85 }, { "epoch": 0.005711629142591486, "grad_norm": 507.156005859375, "learning_rate": 1.902654867256637e-07, "loss": 86.125, "step": 86 }, { "epoch": 0.005778043434947201, "grad_norm": 955.841796875, "learning_rate": 1.9247787610619468e-07, "loss": 99.6875, "step": 87 }, { "epoch": 0.005844457727302916, "grad_norm": 423.1438293457031, "learning_rate": 1.9469026548672566e-07, "loss": 88.3125, "step": 88 }, { "epoch": 0.005910872019658631, "grad_norm": 799.7440185546875, "learning_rate": 1.9690265486725664e-07, "loss": 91.25, "step": 89 }, { "epoch": 0.005977286312014346, "grad_norm": 566.26416015625, "learning_rate": 1.991150442477876e-07, "loss": 77.4375, "step": 90 }, { "epoch": 0.00604370060437006, "grad_norm": 674.4170532226562, "learning_rate": 2.0132743362831858e-07, "loss": 97.375, "step": 91 }, { "epoch": 0.006110114896725775, "grad_norm": 622.0730590820312, "learning_rate": 2.0353982300884956e-07, "loss": 103.75, "step": 92 }, { "epoch": 0.00617652918908149, "grad_norm": 700.599853515625, "learning_rate": 2.0575221238938052e-07, "loss": 100.5, "step": 93 }, { "epoch": 0.006242943481437205, "grad_norm": 575.8194580078125, "learning_rate": 2.0796460176991148e-07, "loss": 75.875, "step": 94 }, { "epoch": 0.00630935777379292, "grad_norm": 866.3402709960938, "learning_rate": 2.1017699115044246e-07, "loss": 94.625, "step": 95 }, { "epoch": 0.006375772066148635, "grad_norm": 689.7462158203125, "learning_rate": 2.1238938053097344e-07, "loss": 98.375, "step": 96 }, { "epoch": 0.00644218635850435, "grad_norm": 836.433349609375, "learning_rate": 2.1460176991150442e-07, "loss": 95.0, "step": 97 }, { "epoch": 0.006508600650860065, "grad_norm": 576.3231201171875, "learning_rate": 2.1681415929203538e-07, "loss": 111.375, "step": 98 }, { "epoch": 0.00657501494321578, "grad_norm": 727.649658203125, "learning_rate": 2.1902654867256636e-07, "loss": 85.5, "step": 99 }, { "epoch": 0.006641429235571495, "grad_norm": 622.8093872070312, "learning_rate": 2.2123893805309735e-07, "loss": 87.875, "step": 100 }, { "epoch": 0.0067078435279272096, "grad_norm": 779.7064208984375, "learning_rate": 2.234513274336283e-07, "loss": 98.75, "step": 101 }, { "epoch": 0.006774257820282925, "grad_norm": 677.5739135742188, "learning_rate": 2.2566371681415928e-07, "loss": 70.1875, "step": 102 }, { "epoch": 0.00684067211263864, "grad_norm": 479.6736755371094, "learning_rate": 2.2787610619469024e-07, "loss": 87.5, "step": 103 }, { "epoch": 0.006907086404994355, "grad_norm": 611.23779296875, "learning_rate": 2.3008849557522122e-07, "loss": 114.5, "step": 104 }, { "epoch": 0.00697350069735007, "grad_norm": 643.9347534179688, "learning_rate": 2.323008849557522e-07, "loss": 86.375, "step": 105 }, { "epoch": 0.007039914989705785, "grad_norm": 656.306396484375, "learning_rate": 2.345132743362832e-07, "loss": 99.75, "step": 106 }, { "epoch": 0.0071063292820615, "grad_norm": 671.3726196289062, "learning_rate": 2.3672566371681415e-07, "loss": 68.4375, "step": 107 }, { "epoch": 0.007172743574417215, "grad_norm": 483.28472900390625, "learning_rate": 2.3893805309734513e-07, "loss": 75.75, "step": 108 }, { "epoch": 0.00723915786677293, "grad_norm": 564.9719848632812, "learning_rate": 2.4115044247787606e-07, "loss": 92.5, "step": 109 }, { "epoch": 0.007305572159128645, "grad_norm": 739.8251342773438, "learning_rate": 2.4336283185840704e-07, "loss": 80.8125, "step": 110 }, { "epoch": 0.007371986451484359, "grad_norm": 591.480224609375, "learning_rate": 2.45575221238938e-07, "loss": 78.1875, "step": 111 }, { "epoch": 0.007438400743840074, "grad_norm": 596.2813110351562, "learning_rate": 2.47787610619469e-07, "loss": 82.25, "step": 112 }, { "epoch": 0.007504815036195789, "grad_norm": 848.8912353515625, "learning_rate": 2.5e-07, "loss": 103.1875, "step": 113 }, { "epoch": 0.007571229328551504, "grad_norm": 1162.75830078125, "learning_rate": 2.5221238938053097e-07, "loss": 86.4375, "step": 114 }, { "epoch": 0.007637643620907219, "grad_norm": 533.9456176757812, "learning_rate": 2.5442477876106195e-07, "loss": 80.0625, "step": 115 }, { "epoch": 0.007704057913262934, "grad_norm": 671.5496215820312, "learning_rate": 2.5663716814159294e-07, "loss": 94.5, "step": 116 }, { "epoch": 0.007770472205618649, "grad_norm": 774.971923828125, "learning_rate": 2.588495575221239e-07, "loss": 86.0, "step": 117 }, { "epoch": 0.007836886497974364, "grad_norm": 536.7562255859375, "learning_rate": 2.6106194690265485e-07, "loss": 74.9375, "step": 118 }, { "epoch": 0.00790330079033008, "grad_norm": 581.3994140625, "learning_rate": 2.6327433628318583e-07, "loss": 87.5625, "step": 119 }, { "epoch": 0.007969715082685794, "grad_norm": 472.7863464355469, "learning_rate": 2.654867256637168e-07, "loss": 78.4375, "step": 120 }, { "epoch": 0.00803612937504151, "grad_norm": 546.1000366210938, "learning_rate": 2.676991150442478e-07, "loss": 84.875, "step": 121 }, { "epoch": 0.008102543667397225, "grad_norm": 675.1554565429688, "learning_rate": 2.6991150442477873e-07, "loss": 91.25, "step": 122 }, { "epoch": 0.00816895795975294, "grad_norm": 952.3250732421875, "learning_rate": 2.721238938053097e-07, "loss": 89.125, "step": 123 }, { "epoch": 0.008235372252108655, "grad_norm": 526.0009765625, "learning_rate": 2.743362831858407e-07, "loss": 97.125, "step": 124 }, { "epoch": 0.008301786544464368, "grad_norm": 749.5096435546875, "learning_rate": 2.765486725663717e-07, "loss": 88.0, "step": 125 }, { "epoch": 0.008368200836820083, "grad_norm": 766.8788452148438, "learning_rate": 2.787610619469026e-07, "loss": 70.3125, "step": 126 }, { "epoch": 0.008434615129175798, "grad_norm": 706.9832763671875, "learning_rate": 2.809734513274336e-07, "loss": 87.1875, "step": 127 }, { "epoch": 0.008501029421531513, "grad_norm": 927.012451171875, "learning_rate": 2.8318584070796457e-07, "loss": 101.625, "step": 128 }, { "epoch": 0.008567443713887228, "grad_norm": 522.3720703125, "learning_rate": 2.8539823008849555e-07, "loss": 98.375, "step": 129 }, { "epoch": 0.008633858006242943, "grad_norm": 703.4334106445312, "learning_rate": 2.8761061946902654e-07, "loss": 108.25, "step": 130 }, { "epoch": 0.008700272298598658, "grad_norm": 645.8463134765625, "learning_rate": 2.898230088495575e-07, "loss": 83.875, "step": 131 }, { "epoch": 0.008766686590954373, "grad_norm": 747.4113159179688, "learning_rate": 2.920353982300885e-07, "loss": 101.25, "step": 132 }, { "epoch": 0.008833100883310088, "grad_norm": 506.3396911621094, "learning_rate": 2.942477876106195e-07, "loss": 72.4375, "step": 133 }, { "epoch": 0.008899515175665803, "grad_norm": 717.0906982421875, "learning_rate": 2.9646017699115047e-07, "loss": 94.625, "step": 134 }, { "epoch": 0.008965929468021518, "grad_norm": 769.8269653320312, "learning_rate": 2.986725663716814e-07, "loss": 90.25, "step": 135 }, { "epoch": 0.009032343760377233, "grad_norm": 603.2570190429688, "learning_rate": 3.008849557522124e-07, "loss": 94.875, "step": 136 }, { "epoch": 0.009098758052732948, "grad_norm": 873.4959716796875, "learning_rate": 3.0309734513274336e-07, "loss": 70.8125, "step": 137 }, { "epoch": 0.009165172345088663, "grad_norm": 657.3741455078125, "learning_rate": 3.053097345132743e-07, "loss": 87.4375, "step": 138 }, { "epoch": 0.009231586637444378, "grad_norm": 674.6549682617188, "learning_rate": 3.075221238938053e-07, "loss": 83.75, "step": 139 }, { "epoch": 0.009298000929800094, "grad_norm": 759.401123046875, "learning_rate": 3.0973451327433626e-07, "loss": 88.25, "step": 140 }, { "epoch": 0.009364415222155809, "grad_norm": 679.4528198242188, "learning_rate": 3.1194690265486724e-07, "loss": 85.5, "step": 141 }, { "epoch": 0.009430829514511524, "grad_norm": 665.3855590820312, "learning_rate": 3.141592920353982e-07, "loss": 95.875, "step": 142 }, { "epoch": 0.009497243806867239, "grad_norm": 706.2380981445312, "learning_rate": 3.163716814159292e-07, "loss": 88.75, "step": 143 }, { "epoch": 0.009563658099222952, "grad_norm": 686.3504028320312, "learning_rate": 3.1858407079646014e-07, "loss": 77.1875, "step": 144 }, { "epoch": 0.009630072391578667, "grad_norm": 788.5548706054688, "learning_rate": 3.207964601769911e-07, "loss": 91.125, "step": 145 }, { "epoch": 0.009696486683934382, "grad_norm": 710.8297729492188, "learning_rate": 3.230088495575221e-07, "loss": 75.375, "step": 146 }, { "epoch": 0.009762900976290097, "grad_norm": 740.53564453125, "learning_rate": 3.252212389380531e-07, "loss": 88.25, "step": 147 }, { "epoch": 0.009829315268645812, "grad_norm": 576.7034301757812, "learning_rate": 3.2743362831858407e-07, "loss": 74.375, "step": 148 }, { "epoch": 0.009895729561001527, "grad_norm": 709.434326171875, "learning_rate": 3.2964601769911505e-07, "loss": 80.875, "step": 149 }, { "epoch": 0.009962143853357242, "grad_norm": 579.1680908203125, "learning_rate": 3.3185840707964603e-07, "loss": 85.5, "step": 150 }, { "epoch": 0.010028558145712957, "grad_norm": 354.0645446777344, "learning_rate": 3.34070796460177e-07, "loss": 71.375, "step": 151 }, { "epoch": 0.010094972438068672, "grad_norm": 637.50634765625, "learning_rate": 3.36283185840708e-07, "loss": 98.75, "step": 152 }, { "epoch": 0.010161386730424387, "grad_norm": 1310.5230712890625, "learning_rate": 3.3849557522123893e-07, "loss": 60.8125, "step": 153 }, { "epoch": 0.010227801022780102, "grad_norm": 550.610595703125, "learning_rate": 3.4070796460176986e-07, "loss": 90.875, "step": 154 }, { "epoch": 0.010294215315135817, "grad_norm": 423.6199951171875, "learning_rate": 3.4292035398230084e-07, "loss": 69.875, "step": 155 }, { "epoch": 0.010360629607491532, "grad_norm": 558.76171875, "learning_rate": 3.451327433628318e-07, "loss": 81.875, "step": 156 }, { "epoch": 0.010427043899847247, "grad_norm": 524.2839965820312, "learning_rate": 3.473451327433628e-07, "loss": 75.75, "step": 157 }, { "epoch": 0.010493458192202963, "grad_norm": 798.0537719726562, "learning_rate": 3.495575221238938e-07, "loss": 91.375, "step": 158 }, { "epoch": 0.010559872484558678, "grad_norm": 601.5868530273438, "learning_rate": 3.5176991150442477e-07, "loss": 78.0, "step": 159 }, { "epoch": 0.010626286776914393, "grad_norm": 736.8407592773438, "learning_rate": 3.5398230088495575e-07, "loss": 89.0, "step": 160 }, { "epoch": 0.010692701069270108, "grad_norm": 503.21368408203125, "learning_rate": 3.561946902654867e-07, "loss": 77.0, "step": 161 }, { "epoch": 0.010759115361625823, "grad_norm": 529.09765625, "learning_rate": 3.5840707964601767e-07, "loss": 82.125, "step": 162 }, { "epoch": 0.010825529653981536, "grad_norm": 478.8519287109375, "learning_rate": 3.6061946902654865e-07, "loss": 77.1875, "step": 163 }, { "epoch": 0.010891943946337251, "grad_norm": 2300.594970703125, "learning_rate": 3.6283185840707963e-07, "loss": 75.25, "step": 164 }, { "epoch": 0.010958358238692966, "grad_norm": 638.9989624023438, "learning_rate": 3.650442477876106e-07, "loss": 68.0625, "step": 165 }, { "epoch": 0.011024772531048681, "grad_norm": 638.7012939453125, "learning_rate": 3.672566371681416e-07, "loss": 80.75, "step": 166 }, { "epoch": 0.011091186823404396, "grad_norm": 414.54931640625, "learning_rate": 3.694690265486726e-07, "loss": 69.9375, "step": 167 }, { "epoch": 0.011157601115760111, "grad_norm": 612.8683471679688, "learning_rate": 3.7168141592920356e-07, "loss": 77.625, "step": 168 }, { "epoch": 0.011224015408115826, "grad_norm": 718.232177734375, "learning_rate": 3.7389380530973454e-07, "loss": 78.5, "step": 169 }, { "epoch": 0.011290429700471541, "grad_norm": 512.90380859375, "learning_rate": 3.761061946902654e-07, "loss": 68.25, "step": 170 }, { "epoch": 0.011356843992827256, "grad_norm": 718.4387817382812, "learning_rate": 3.783185840707964e-07, "loss": 79.625, "step": 171 }, { "epoch": 0.011423258285182971, "grad_norm": 474.680908203125, "learning_rate": 3.805309734513274e-07, "loss": 65.9375, "step": 172 }, { "epoch": 0.011489672577538686, "grad_norm": 541.1656494140625, "learning_rate": 3.8274336283185837e-07, "loss": 74.875, "step": 173 }, { "epoch": 0.011556086869894401, "grad_norm": 563.3125, "learning_rate": 3.8495575221238935e-07, "loss": 60.75, "step": 174 }, { "epoch": 0.011622501162250116, "grad_norm": 469.4345703125, "learning_rate": 3.8716814159292034e-07, "loss": 57.4375, "step": 175 }, { "epoch": 0.011688915454605832, "grad_norm": 613.8688354492188, "learning_rate": 3.893805309734513e-07, "loss": 67.375, "step": 176 }, { "epoch": 0.011755329746961547, "grad_norm": 644.13037109375, "learning_rate": 3.915929203539823e-07, "loss": 54.25, "step": 177 }, { "epoch": 0.011821744039317262, "grad_norm": 739.6556396484375, "learning_rate": 3.938053097345133e-07, "loss": 67.0, "step": 178 }, { "epoch": 0.011888158331672977, "grad_norm": 519.6035766601562, "learning_rate": 3.960176991150442e-07, "loss": 66.4375, "step": 179 }, { "epoch": 0.011954572624028692, "grad_norm": 1550.623046875, "learning_rate": 3.982300884955752e-07, "loss": 54.6875, "step": 180 }, { "epoch": 0.012020986916384407, "grad_norm": 549.6812133789062, "learning_rate": 4.004424778761062e-07, "loss": 62.6875, "step": 181 }, { "epoch": 0.01208740120874012, "grad_norm": 688.2095336914062, "learning_rate": 4.0265486725663716e-07, "loss": 62.25, "step": 182 }, { "epoch": 0.012153815501095835, "grad_norm": 454.6360168457031, "learning_rate": 4.0486725663716814e-07, "loss": 53.125, "step": 183 }, { "epoch": 0.01222022979345155, "grad_norm": 509.9248962402344, "learning_rate": 4.0707964601769913e-07, "loss": 58.9375, "step": 184 }, { "epoch": 0.012286644085807265, "grad_norm": 456.27435302734375, "learning_rate": 4.092920353982301e-07, "loss": 51.75, "step": 185 }, { "epoch": 0.01235305837816298, "grad_norm": 421.9388427734375, "learning_rate": 4.1150442477876104e-07, "loss": 51.875, "step": 186 }, { "epoch": 0.012419472670518695, "grad_norm": 498.2381896972656, "learning_rate": 4.1371681415929197e-07, "loss": 61.5625, "step": 187 }, { "epoch": 0.01248588696287441, "grad_norm": 472.46722412109375, "learning_rate": 4.1592920353982295e-07, "loss": 46.6875, "step": 188 }, { "epoch": 0.012552301255230125, "grad_norm": 556.36083984375, "learning_rate": 4.1814159292035393e-07, "loss": 42.9062, "step": 189 }, { "epoch": 0.01261871554758584, "grad_norm": 623.2911376953125, "learning_rate": 4.203539823008849e-07, "loss": 50.9375, "step": 190 }, { "epoch": 0.012685129839941555, "grad_norm": 575.8719482421875, "learning_rate": 4.225663716814159e-07, "loss": 49.6875, "step": 191 }, { "epoch": 0.01275154413229727, "grad_norm": 641.346923828125, "learning_rate": 4.247787610619469e-07, "loss": 45.0625, "step": 192 }, { "epoch": 0.012817958424652985, "grad_norm": 374.0616455078125, "learning_rate": 4.2699115044247787e-07, "loss": 50.125, "step": 193 }, { "epoch": 0.0128843727170087, "grad_norm": 449.8600769042969, "learning_rate": 4.2920353982300885e-07, "loss": 51.75, "step": 194 }, { "epoch": 0.012950787009364416, "grad_norm": 427.46240234375, "learning_rate": 4.3141592920353983e-07, "loss": 45.75, "step": 195 }, { "epoch": 0.01301720130172013, "grad_norm": 412.7389831542969, "learning_rate": 4.3362831858407076e-07, "loss": 45.0, "step": 196 }, { "epoch": 0.013083615594075846, "grad_norm": 618.090576171875, "learning_rate": 4.3584070796460174e-07, "loss": 48.0, "step": 197 }, { "epoch": 0.01315002988643156, "grad_norm": 585.6238403320312, "learning_rate": 4.380530973451327e-07, "loss": 48.9375, "step": 198 }, { "epoch": 0.013216444178787276, "grad_norm": 401.9342956542969, "learning_rate": 4.402654867256637e-07, "loss": 38.6875, "step": 199 }, { "epoch": 0.01328285847114299, "grad_norm": 405.5523681640625, "learning_rate": 4.424778761061947e-07, "loss": 40.5, "step": 200 }, { "epoch": 0.013349272763498704, "grad_norm": 467.21441650390625, "learning_rate": 4.446902654867257e-07, "loss": 44.125, "step": 201 }, { "epoch": 0.013415687055854419, "grad_norm": 405.9369812011719, "learning_rate": 4.469026548672566e-07, "loss": 38.5312, "step": 202 }, { "epoch": 0.013482101348210134, "grad_norm": 454.36212158203125, "learning_rate": 4.491150442477876e-07, "loss": 36.5312, "step": 203 }, { "epoch": 0.01354851564056585, "grad_norm": 1471.0814208984375, "learning_rate": 4.5132743362831857e-07, "loss": 35.9375, "step": 204 }, { "epoch": 0.013614929932921564, "grad_norm": 374.5555114746094, "learning_rate": 4.535398230088495e-07, "loss": 43.625, "step": 205 }, { "epoch": 0.01368134422527728, "grad_norm": 340.40960693359375, "learning_rate": 4.557522123893805e-07, "loss": 40.4375, "step": 206 }, { "epoch": 0.013747758517632994, "grad_norm": 639.0816040039062, "learning_rate": 4.5796460176991146e-07, "loss": 34.7188, "step": 207 }, { "epoch": 0.01381417280998871, "grad_norm": 347.9823913574219, "learning_rate": 4.6017699115044245e-07, "loss": 33.6562, "step": 208 }, { "epoch": 0.013880587102344424, "grad_norm": 412.93157958984375, "learning_rate": 4.6238938053097343e-07, "loss": 44.0625, "step": 209 }, { "epoch": 0.01394700139470014, "grad_norm": 414.9576721191406, "learning_rate": 4.646017699115044e-07, "loss": 35.1562, "step": 210 }, { "epoch": 0.014013415687055854, "grad_norm": 483.4190673828125, "learning_rate": 4.668141592920354e-07, "loss": 43.8125, "step": 211 }, { "epoch": 0.01407982997941157, "grad_norm": 332.4232177734375, "learning_rate": 4.690265486725664e-07, "loss": 36.0625, "step": 212 }, { "epoch": 0.014146244271767285, "grad_norm": 371.9752197265625, "learning_rate": 4.7123893805309736e-07, "loss": 37.875, "step": 213 }, { "epoch": 0.014212658564123, "grad_norm": 432.70648193359375, "learning_rate": 4.734513274336283e-07, "loss": 40.3125, "step": 214 }, { "epoch": 0.014279072856478715, "grad_norm": 393.66748046875, "learning_rate": 4.7566371681415927e-07, "loss": 32.9375, "step": 215 }, { "epoch": 0.01434548714883443, "grad_norm": 479.83880615234375, "learning_rate": 4.778761061946903e-07, "loss": 40.625, "step": 216 }, { "epoch": 0.014411901441190145, "grad_norm": 436.4576110839844, "learning_rate": 4.800884955752213e-07, "loss": 36.5938, "step": 217 }, { "epoch": 0.01447831573354586, "grad_norm": 299.87353515625, "learning_rate": 4.823008849557521e-07, "loss": 36.1562, "step": 218 }, { "epoch": 0.014544730025901575, "grad_norm": 334.7084655761719, "learning_rate": 4.845132743362832e-07, "loss": 39.6875, "step": 219 }, { "epoch": 0.01461114431825729, "grad_norm": 633.1138916015625, "learning_rate": 4.867256637168141e-07, "loss": 47.8125, "step": 220 }, { "epoch": 0.014677558610613003, "grad_norm": 1213.0623779296875, "learning_rate": 4.889380530973451e-07, "loss": 38.6562, "step": 221 }, { "epoch": 0.014743972902968718, "grad_norm": 473.9811096191406, "learning_rate": 4.91150442477876e-07, "loss": 34.5312, "step": 222 }, { "epoch": 0.014810387195324433, "grad_norm": 314.3461608886719, "learning_rate": 4.933628318584071e-07, "loss": 35.8125, "step": 223 }, { "epoch": 0.014876801487680148, "grad_norm": 358.38531494140625, "learning_rate": 4.95575221238938e-07, "loss": 33.0312, "step": 224 }, { "epoch": 0.014943215780035863, "grad_norm": 298.2361145019531, "learning_rate": 4.97787610619469e-07, "loss": 36.5, "step": 225 }, { "epoch": 0.015009630072391578, "grad_norm": 404.67169189453125, "learning_rate": 5e-07, "loss": 36.0938, "step": 226 }, { "epoch": 0.015076044364747293, "grad_norm": 286.0328674316406, "learning_rate": 5.022123893805309e-07, "loss": 35.8125, "step": 227 }, { "epoch": 0.015142458657103008, "grad_norm": 479.1570739746094, "learning_rate": 5.044247787610619e-07, "loss": 36.1875, "step": 228 }, { "epoch": 0.015208872949458723, "grad_norm": 277.19317626953125, "learning_rate": 5.066371681415929e-07, "loss": 38.875, "step": 229 }, { "epoch": 0.015275287241814439, "grad_norm": 372.1421203613281, "learning_rate": 5.088495575221239e-07, "loss": 34.5312, "step": 230 }, { "epoch": 0.015341701534170154, "grad_norm": 560.7784423828125, "learning_rate": 5.110619469026548e-07, "loss": 39.0625, "step": 231 }, { "epoch": 0.015408115826525869, "grad_norm": 400.81671142578125, "learning_rate": 5.132743362831859e-07, "loss": 45.0625, "step": 232 }, { "epoch": 0.015474530118881584, "grad_norm": 441.2385559082031, "learning_rate": 5.154867256637168e-07, "loss": 36.9688, "step": 233 }, { "epoch": 0.015540944411237299, "grad_norm": 294.4197082519531, "learning_rate": 5.176991150442478e-07, "loss": 35.2812, "step": 234 }, { "epoch": 0.015607358703593014, "grad_norm": 333.3111572265625, "learning_rate": 5.199115044247787e-07, "loss": 36.6562, "step": 235 }, { "epoch": 0.01567377299594873, "grad_norm": 310.1619567871094, "learning_rate": 5.221238938053097e-07, "loss": 31.4688, "step": 236 }, { "epoch": 0.015740187288304442, "grad_norm": 368.3042297363281, "learning_rate": 5.243362831858406e-07, "loss": 33.3438, "step": 237 }, { "epoch": 0.01580660158066016, "grad_norm": 359.4574279785156, "learning_rate": 5.265486725663717e-07, "loss": 34.5625, "step": 238 }, { "epoch": 0.015873015873015872, "grad_norm": 324.6553039550781, "learning_rate": 5.287610619469026e-07, "loss": 31.1875, "step": 239 }, { "epoch": 0.01593943016537159, "grad_norm": 394.7154541015625, "learning_rate": 5.309734513274336e-07, "loss": 31.375, "step": 240 }, { "epoch": 0.016005844457727302, "grad_norm": 301.9300537109375, "learning_rate": 5.331858407079646e-07, "loss": 30.2812, "step": 241 }, { "epoch": 0.01607225875008302, "grad_norm": 365.1582336425781, "learning_rate": 5.353982300884956e-07, "loss": 33.5, "step": 242 }, { "epoch": 0.016138673042438732, "grad_norm": 361.3687438964844, "learning_rate": 5.376106194690265e-07, "loss": 39.1875, "step": 243 }, { "epoch": 0.01620508733479445, "grad_norm": 310.88775634765625, "learning_rate": 5.398230088495575e-07, "loss": 28.5625, "step": 244 }, { "epoch": 0.016271501627150162, "grad_norm": 387.8221435546875, "learning_rate": 5.420353982300885e-07, "loss": 32.9062, "step": 245 }, { "epoch": 0.01633791591950588, "grad_norm": 506.06683349609375, "learning_rate": 5.442477876106194e-07, "loss": 43.125, "step": 246 }, { "epoch": 0.016404330211861592, "grad_norm": 361.6125793457031, "learning_rate": 5.464601769911505e-07, "loss": 35.75, "step": 247 }, { "epoch": 0.01647074450421731, "grad_norm": 414.8364562988281, "learning_rate": 5.486725663716814e-07, "loss": 39.5625, "step": 248 }, { "epoch": 0.016537158796573023, "grad_norm": 442.00823974609375, "learning_rate": 5.508849557522124e-07, "loss": 42.75, "step": 249 }, { "epoch": 0.016603573088928736, "grad_norm": 270.0237121582031, "learning_rate": 5.530973451327434e-07, "loss": 30.75, "step": 250 }, { "epoch": 0.016669987381284453, "grad_norm": 334.240966796875, "learning_rate": 5.553097345132744e-07, "loss": 35.25, "step": 251 }, { "epoch": 0.016736401673640166, "grad_norm": 382.0826110839844, "learning_rate": 5.575221238938052e-07, "loss": 33.625, "step": 252 }, { "epoch": 0.016802815965995883, "grad_norm": 366.2176513671875, "learning_rate": 5.597345132743362e-07, "loss": 32.8125, "step": 253 }, { "epoch": 0.016869230258351596, "grad_norm": 356.5561828613281, "learning_rate": 5.619469026548672e-07, "loss": 38.5, "step": 254 }, { "epoch": 0.016935644550707313, "grad_norm": 326.6780090332031, "learning_rate": 5.641592920353982e-07, "loss": 34.0312, "step": 255 }, { "epoch": 0.017002058843063026, "grad_norm": 245.58351135253906, "learning_rate": 5.663716814159291e-07, "loss": 28.125, "step": 256 }, { "epoch": 0.017068473135418743, "grad_norm": 874.86279296875, "learning_rate": 5.685840707964602e-07, "loss": 35.6875, "step": 257 }, { "epoch": 0.017134887427774456, "grad_norm": 342.34033203125, "learning_rate": 5.707964601769911e-07, "loss": 37.75, "step": 258 }, { "epoch": 0.017201301720130173, "grad_norm": 518.0966796875, "learning_rate": 5.73008849557522e-07, "loss": 33.1875, "step": 259 }, { "epoch": 0.017267716012485886, "grad_norm": 326.62933349609375, "learning_rate": 5.752212389380531e-07, "loss": 33.3438, "step": 260 }, { "epoch": 0.017334130304841603, "grad_norm": 302.9090576171875, "learning_rate": 5.77433628318584e-07, "loss": 34.2188, "step": 261 }, { "epoch": 0.017400544597197316, "grad_norm": 373.5575866699219, "learning_rate": 5.79646017699115e-07, "loss": 38.5625, "step": 262 }, { "epoch": 0.017466958889553033, "grad_norm": 343.4082946777344, "learning_rate": 5.81858407079646e-07, "loss": 36.0, "step": 263 }, { "epoch": 0.017533373181908746, "grad_norm": 351.612548828125, "learning_rate": 5.84070796460177e-07, "loss": 26.1875, "step": 264 }, { "epoch": 0.017599787474264463, "grad_norm": 528.5353393554688, "learning_rate": 5.862831858407079e-07, "loss": 39.25, "step": 265 }, { "epoch": 0.017666201766620176, "grad_norm": 353.9274597167969, "learning_rate": 5.88495575221239e-07, "loss": 30.0, "step": 266 }, { "epoch": 0.017732616058975893, "grad_norm": 355.3372802734375, "learning_rate": 5.907079646017699e-07, "loss": 28.7812, "step": 267 }, { "epoch": 0.017799030351331607, "grad_norm": 260.38690185546875, "learning_rate": 5.929203539823009e-07, "loss": 32.4062, "step": 268 }, { "epoch": 0.01786544464368732, "grad_norm": 419.2821960449219, "learning_rate": 5.951327433628319e-07, "loss": 32.6562, "step": 269 }, { "epoch": 0.017931858936043037, "grad_norm": 342.531494140625, "learning_rate": 5.973451327433628e-07, "loss": 33.8125, "step": 270 }, { "epoch": 0.01799827322839875, "grad_norm": 210.9619598388672, "learning_rate": 5.995575221238937e-07, "loss": 29.6875, "step": 271 }, { "epoch": 0.018064687520754467, "grad_norm": 543.686767578125, "learning_rate": 6.017699115044248e-07, "loss": 34.5312, "step": 272 }, { "epoch": 0.01813110181311018, "grad_norm": 328.7593078613281, "learning_rate": 6.039823008849557e-07, "loss": 36.1562, "step": 273 }, { "epoch": 0.018197516105465897, "grad_norm": 355.12799072265625, "learning_rate": 6.061946902654867e-07, "loss": 32.7812, "step": 274 }, { "epoch": 0.01826393039782161, "grad_norm": 2705.663330078125, "learning_rate": 6.084070796460177e-07, "loss": 31.75, "step": 275 }, { "epoch": 0.018330344690177327, "grad_norm": 298.0554504394531, "learning_rate": 6.106194690265486e-07, "loss": 35.125, "step": 276 }, { "epoch": 0.01839675898253304, "grad_norm": 309.7917785644531, "learning_rate": 6.128318584070796e-07, "loss": 29.875, "step": 277 }, { "epoch": 0.018463173274888757, "grad_norm": 303.73773193359375, "learning_rate": 6.150442477876105e-07, "loss": 30.75, "step": 278 }, { "epoch": 0.01852958756724447, "grad_norm": 247.09716796875, "learning_rate": 6.172566371681416e-07, "loss": 34.1562, "step": 279 }, { "epoch": 0.018596001859600187, "grad_norm": 350.9267883300781, "learning_rate": 6.194690265486725e-07, "loss": 34.5312, "step": 280 }, { "epoch": 0.0186624161519559, "grad_norm": 263.85162353515625, "learning_rate": 6.216814159292036e-07, "loss": 36.0, "step": 281 }, { "epoch": 0.018728830444311617, "grad_norm": 366.4549560546875, "learning_rate": 6.238938053097345e-07, "loss": 33.9688, "step": 282 }, { "epoch": 0.01879524473666733, "grad_norm": 464.90911865234375, "learning_rate": 6.261061946902655e-07, "loss": 40.375, "step": 283 }, { "epoch": 0.018861659029023047, "grad_norm": 513.763427734375, "learning_rate": 6.283185840707964e-07, "loss": 38.9062, "step": 284 }, { "epoch": 0.01892807332137876, "grad_norm": 565.6420288085938, "learning_rate": 6.305309734513275e-07, "loss": 37.1875, "step": 285 }, { "epoch": 0.018994487613734477, "grad_norm": 275.089111328125, "learning_rate": 6.327433628318584e-07, "loss": 33.125, "step": 286 }, { "epoch": 0.01906090190609019, "grad_norm": 298.3101501464844, "learning_rate": 6.349557522123894e-07, "loss": 36.6562, "step": 287 }, { "epoch": 0.019127316198445904, "grad_norm": 251.8985137939453, "learning_rate": 6.371681415929203e-07, "loss": 30.5, "step": 288 }, { "epoch": 0.01919373049080162, "grad_norm": 243.9000244140625, "learning_rate": 6.393805309734513e-07, "loss": 30.75, "step": 289 }, { "epoch": 0.019260144783157334, "grad_norm": 265.717529296875, "learning_rate": 6.415929203539822e-07, "loss": 31.5625, "step": 290 }, { "epoch": 0.01932655907551305, "grad_norm": 423.2583312988281, "learning_rate": 6.438053097345132e-07, "loss": 39.1562, "step": 291 }, { "epoch": 0.019392973367868764, "grad_norm": 300.86578369140625, "learning_rate": 6.460176991150442e-07, "loss": 31.8125, "step": 292 }, { "epoch": 0.01945938766022448, "grad_norm": 275.69793701171875, "learning_rate": 6.482300884955751e-07, "loss": 27.75, "step": 293 }, { "epoch": 0.019525801952580194, "grad_norm": 284.71234130859375, "learning_rate": 6.504424778761062e-07, "loss": 33.125, "step": 294 }, { "epoch": 0.01959221624493591, "grad_norm": 423.4798583984375, "learning_rate": 6.526548672566371e-07, "loss": 35.4375, "step": 295 }, { "epoch": 0.019658630537291624, "grad_norm": 366.07855224609375, "learning_rate": 6.548672566371681e-07, "loss": 27.5625, "step": 296 }, { "epoch": 0.01972504482964734, "grad_norm": 226.2543182373047, "learning_rate": 6.570796460176991e-07, "loss": 31.0625, "step": 297 }, { "epoch": 0.019791459122003054, "grad_norm": 663.63916015625, "learning_rate": 6.592920353982301e-07, "loss": 31.1562, "step": 298 }, { "epoch": 0.01985787341435877, "grad_norm": 300.907958984375, "learning_rate": 6.61504424778761e-07, "loss": 34.5625, "step": 299 }, { "epoch": 0.019924287706714484, "grad_norm": 391.88330078125, "learning_rate": 6.637168141592921e-07, "loss": 34.5, "step": 300 }, { "epoch": 0.0199907019990702, "grad_norm": 537.9354248046875, "learning_rate": 6.65929203539823e-07, "loss": 36.0, "step": 301 }, { "epoch": 0.020057116291425914, "grad_norm": 523.4166259765625, "learning_rate": 6.68141592920354e-07, "loss": 45.8125, "step": 302 }, { "epoch": 0.02012353058378163, "grad_norm": 610.998779296875, "learning_rate": 6.70353982300885e-07, "loss": 33.1562, "step": 303 }, { "epoch": 0.020189944876137345, "grad_norm": 351.3565368652344, "learning_rate": 6.72566371681416e-07, "loss": 33.4375, "step": 304 }, { "epoch": 0.02025635916849306, "grad_norm": 621.0404052734375, "learning_rate": 6.747787610619468e-07, "loss": 32.9688, "step": 305 }, { "epoch": 0.020322773460848775, "grad_norm": 243.45652770996094, "learning_rate": 6.769911504424779e-07, "loss": 30.9062, "step": 306 }, { "epoch": 0.020389187753204488, "grad_norm": 366.33624267578125, "learning_rate": 6.792035398230088e-07, "loss": 27.0, "step": 307 }, { "epoch": 0.020455602045560205, "grad_norm": 344.9841003417969, "learning_rate": 6.814159292035397e-07, "loss": 30.5938, "step": 308 }, { "epoch": 0.020522016337915918, "grad_norm": 256.10870361328125, "learning_rate": 6.836283185840707e-07, "loss": 27.0938, "step": 309 }, { "epoch": 0.020588430630271635, "grad_norm": 296.6623229980469, "learning_rate": 6.858407079646017e-07, "loss": 29.8438, "step": 310 }, { "epoch": 0.020654844922627348, "grad_norm": 231.20018005371094, "learning_rate": 6.880530973451327e-07, "loss": 34.0, "step": 311 }, { "epoch": 0.020721259214983065, "grad_norm": 2603.9541015625, "learning_rate": 6.902654867256636e-07, "loss": 28.2812, "step": 312 }, { "epoch": 0.020787673507338778, "grad_norm": 226.758544921875, "learning_rate": 6.924778761061947e-07, "loss": 29.3125, "step": 313 }, { "epoch": 0.020854087799694495, "grad_norm": 212.4892120361328, "learning_rate": 6.946902654867256e-07, "loss": 31.0625, "step": 314 }, { "epoch": 0.02092050209205021, "grad_norm": 274.27117919921875, "learning_rate": 6.969026548672566e-07, "loss": 29.8125, "step": 315 }, { "epoch": 0.020986916384405925, "grad_norm": 3173.78173828125, "learning_rate": 6.991150442477876e-07, "loss": 30.6562, "step": 316 }, { "epoch": 0.02105333067676164, "grad_norm": 366.580810546875, "learning_rate": 7.013274336283186e-07, "loss": 31.0, "step": 317 }, { "epoch": 0.021119744969117355, "grad_norm": 310.28326416015625, "learning_rate": 7.035398230088495e-07, "loss": 28.5625, "step": 318 }, { "epoch": 0.02118615926147307, "grad_norm": 234.53973388671875, "learning_rate": 7.057522123893806e-07, "loss": 33.6875, "step": 319 }, { "epoch": 0.021252573553828785, "grad_norm": 252.43658447265625, "learning_rate": 7.079646017699115e-07, "loss": 28.0938, "step": 320 }, { "epoch": 0.0213189878461845, "grad_norm": 235.93991088867188, "learning_rate": 7.101769911504425e-07, "loss": 30.4375, "step": 321 }, { "epoch": 0.021385402138540215, "grad_norm": 845.3995971679688, "learning_rate": 7.123893805309734e-07, "loss": 31.9688, "step": 322 }, { "epoch": 0.02145181643089593, "grad_norm": 327.10565185546875, "learning_rate": 7.146017699115043e-07, "loss": 32.0312, "step": 323 }, { "epoch": 0.021518230723251645, "grad_norm": 217.4642333984375, "learning_rate": 7.168141592920353e-07, "loss": 29.5625, "step": 324 }, { "epoch": 0.02158464501560736, "grad_norm": 402.5422668457031, "learning_rate": 7.190265486725663e-07, "loss": 29.5, "step": 325 }, { "epoch": 0.021651059307963072, "grad_norm": 255.02708435058594, "learning_rate": 7.212389380530973e-07, "loss": 30.2188, "step": 326 }, { "epoch": 0.02171747360031879, "grad_norm": 302.2115783691406, "learning_rate": 7.234513274336282e-07, "loss": 28.7812, "step": 327 }, { "epoch": 0.021783887892674502, "grad_norm": 314.4698791503906, "learning_rate": 7.256637168141593e-07, "loss": 33.5, "step": 328 }, { "epoch": 0.02185030218503022, "grad_norm": 426.9784240722656, "learning_rate": 7.278761061946902e-07, "loss": 37.4375, "step": 329 }, { "epoch": 0.021916716477385932, "grad_norm": 284.21820068359375, "learning_rate": 7.300884955752212e-07, "loss": 28.75, "step": 330 }, { "epoch": 0.02198313076974165, "grad_norm": 259.9896240234375, "learning_rate": 7.323008849557522e-07, "loss": 29.1562, "step": 331 }, { "epoch": 0.022049545062097362, "grad_norm": 203.56398010253906, "learning_rate": 7.345132743362832e-07, "loss": 29.8438, "step": 332 }, { "epoch": 0.02211595935445308, "grad_norm": 376.3184814453125, "learning_rate": 7.367256637168141e-07, "loss": 35.9375, "step": 333 }, { "epoch": 0.022182373646808792, "grad_norm": 322.6485900878906, "learning_rate": 7.389380530973452e-07, "loss": 32.3125, "step": 334 }, { "epoch": 0.02224878793916451, "grad_norm": 332.6505126953125, "learning_rate": 7.411504424778761e-07, "loss": 27.5, "step": 335 }, { "epoch": 0.022315202231520222, "grad_norm": 226.72348022460938, "learning_rate": 7.433628318584071e-07, "loss": 24.6562, "step": 336 }, { "epoch": 0.02238161652387594, "grad_norm": 244.8278045654297, "learning_rate": 7.455752212389381e-07, "loss": 32.5, "step": 337 }, { "epoch": 0.022448030816231652, "grad_norm": 255.4455108642578, "learning_rate": 7.477876106194691e-07, "loss": 28.0, "step": 338 }, { "epoch": 0.02251444510858737, "grad_norm": 272.1263732910156, "learning_rate": 7.5e-07, "loss": 31.25, "step": 339 }, { "epoch": 0.022580859400943083, "grad_norm": 289.4168701171875, "learning_rate": 7.522123893805308e-07, "loss": 29.625, "step": 340 }, { "epoch": 0.0226472736932988, "grad_norm": 252.88845825195312, "learning_rate": 7.544247787610619e-07, "loss": 28.1875, "step": 341 }, { "epoch": 0.022713687985654513, "grad_norm": 228.9846954345703, "learning_rate": 7.566371681415928e-07, "loss": 27.4062, "step": 342 }, { "epoch": 0.02278010227801023, "grad_norm": 189.99655151367188, "learning_rate": 7.588495575221238e-07, "loss": 27.75, "step": 343 }, { "epoch": 0.022846516570365943, "grad_norm": 244.15919494628906, "learning_rate": 7.610619469026548e-07, "loss": 28.4375, "step": 344 }, { "epoch": 0.022912930862721656, "grad_norm": 565.2012329101562, "learning_rate": 7.632743362831858e-07, "loss": 29.0312, "step": 345 }, { "epoch": 0.022979345155077373, "grad_norm": 303.8019714355469, "learning_rate": 7.654867256637167e-07, "loss": 24.4688, "step": 346 }, { "epoch": 0.023045759447433086, "grad_norm": 298.8661193847656, "learning_rate": 7.676991150442478e-07, "loss": 29.5, "step": 347 }, { "epoch": 0.023112173739788803, "grad_norm": 239.49607849121094, "learning_rate": 7.699115044247787e-07, "loss": 28.4062, "step": 348 }, { "epoch": 0.023178588032144516, "grad_norm": 498.46832275390625, "learning_rate": 7.721238938053097e-07, "loss": 35.4375, "step": 349 }, { "epoch": 0.023245002324500233, "grad_norm": 212.68544006347656, "learning_rate": 7.743362831858407e-07, "loss": 26.9375, "step": 350 }, { "epoch": 0.023311416616855946, "grad_norm": 219.95799255371094, "learning_rate": 7.765486725663717e-07, "loss": 29.0312, "step": 351 }, { "epoch": 0.023377830909211663, "grad_norm": 217.2903289794922, "learning_rate": 7.787610619469026e-07, "loss": 26.5312, "step": 352 }, { "epoch": 0.023444245201567376, "grad_norm": 404.6930847167969, "learning_rate": 7.809734513274337e-07, "loss": 29.75, "step": 353 }, { "epoch": 0.023510659493923093, "grad_norm": 354.14263916015625, "learning_rate": 7.831858407079646e-07, "loss": 29.8438, "step": 354 }, { "epoch": 0.023577073786278806, "grad_norm": 534.2947998046875, "learning_rate": 7.853982300884956e-07, "loss": 32.6875, "step": 355 }, { "epoch": 0.023643488078634523, "grad_norm": 338.5124816894531, "learning_rate": 7.876106194690266e-07, "loss": 30.7188, "step": 356 }, { "epoch": 0.023709902370990237, "grad_norm": 234.79837036132812, "learning_rate": 7.898230088495574e-07, "loss": 31.4375, "step": 357 }, { "epoch": 0.023776316663345953, "grad_norm": 332.58984375, "learning_rate": 7.920353982300884e-07, "loss": 35.9375, "step": 358 }, { "epoch": 0.023842730955701667, "grad_norm": 218.22743225097656, "learning_rate": 7.942477876106194e-07, "loss": 26.875, "step": 359 }, { "epoch": 0.023909145248057383, "grad_norm": 264.3971252441406, "learning_rate": 7.964601769911504e-07, "loss": 29.2812, "step": 360 }, { "epoch": 0.023975559540413097, "grad_norm": 338.9344177246094, "learning_rate": 7.986725663716813e-07, "loss": 45.9062, "step": 361 }, { "epoch": 0.024041973832768813, "grad_norm": 411.9454040527344, "learning_rate": 8.008849557522124e-07, "loss": 35.9062, "step": 362 }, { "epoch": 0.024108388125124527, "grad_norm": 182.00311279296875, "learning_rate": 8.030973451327433e-07, "loss": 29.2188, "step": 363 }, { "epoch": 0.02417480241748024, "grad_norm": 331.3401184082031, "learning_rate": 8.053097345132743e-07, "loss": 31.4375, "step": 364 }, { "epoch": 0.024241216709835957, "grad_norm": 177.0878143310547, "learning_rate": 8.075221238938053e-07, "loss": 24.7812, "step": 365 }, { "epoch": 0.02430763100219167, "grad_norm": 314.965576171875, "learning_rate": 8.097345132743363e-07, "loss": 27.9062, "step": 366 }, { "epoch": 0.024374045294547387, "grad_norm": 180.74667358398438, "learning_rate": 8.119469026548672e-07, "loss": 26.4688, "step": 367 }, { "epoch": 0.0244404595869031, "grad_norm": 686.9805297851562, "learning_rate": 8.141592920353983e-07, "loss": 33.3438, "step": 368 }, { "epoch": 0.024506873879258817, "grad_norm": 220.9214630126953, "learning_rate": 8.163716814159292e-07, "loss": 31.0625, "step": 369 }, { "epoch": 0.02457328817161453, "grad_norm": 193.54872131347656, "learning_rate": 8.185840707964602e-07, "loss": 32.375, "step": 370 }, { "epoch": 0.024639702463970247, "grad_norm": 245.88392639160156, "learning_rate": 8.207964601769911e-07, "loss": 31.5312, "step": 371 }, { "epoch": 0.02470611675632596, "grad_norm": 294.548583984375, "learning_rate": 8.230088495575221e-07, "loss": 28.2188, "step": 372 }, { "epoch": 0.024772531048681677, "grad_norm": 258.857177734375, "learning_rate": 8.252212389380531e-07, "loss": 28.375, "step": 373 }, { "epoch": 0.02483894534103739, "grad_norm": 892.920654296875, "learning_rate": 8.274336283185839e-07, "loss": 23.1875, "step": 374 }, { "epoch": 0.024905359633393107, "grad_norm": 175.46142578125, "learning_rate": 8.29646017699115e-07, "loss": 25.7812, "step": 375 }, { "epoch": 0.02497177392574882, "grad_norm": 762.1475219726562, "learning_rate": 8.318584070796459e-07, "loss": 29.3125, "step": 376 }, { "epoch": 0.025038188218104537, "grad_norm": 280.13360595703125, "learning_rate": 8.340707964601769e-07, "loss": 32.1562, "step": 377 }, { "epoch": 0.02510460251046025, "grad_norm": 496.422119140625, "learning_rate": 8.362831858407079e-07, "loss": 31.8438, "step": 378 }, { "epoch": 0.025171016802815967, "grad_norm": 333.1136169433594, "learning_rate": 8.384955752212389e-07, "loss": 35.5625, "step": 379 }, { "epoch": 0.02523743109517168, "grad_norm": 170.52536010742188, "learning_rate": 8.407079646017698e-07, "loss": 28.0312, "step": 380 }, { "epoch": 0.025303845387527397, "grad_norm": 384.62249755859375, "learning_rate": 8.429203539823009e-07, "loss": 29.25, "step": 381 }, { "epoch": 0.02537025967988311, "grad_norm": 425.60760498046875, "learning_rate": 8.451327433628318e-07, "loss": 28.25, "step": 382 }, { "epoch": 0.025436673972238824, "grad_norm": 361.1803894042969, "learning_rate": 8.473451327433628e-07, "loss": 28.75, "step": 383 }, { "epoch": 0.02550308826459454, "grad_norm": 310.10491943359375, "learning_rate": 8.495575221238938e-07, "loss": 31.9688, "step": 384 }, { "epoch": 0.025569502556950254, "grad_norm": 162.7232666015625, "learning_rate": 8.517699115044248e-07, "loss": 24.3438, "step": 385 }, { "epoch": 0.02563591684930597, "grad_norm": 340.5950927734375, "learning_rate": 8.539823008849557e-07, "loss": 29.9688, "step": 386 }, { "epoch": 0.025702331141661684, "grad_norm": 569.2677001953125, "learning_rate": 8.561946902654868e-07, "loss": 31.4688, "step": 387 }, { "epoch": 0.0257687454340174, "grad_norm": 163.24635314941406, "learning_rate": 8.584070796460177e-07, "loss": 24.7812, "step": 388 }, { "epoch": 0.025835159726373114, "grad_norm": 265.4236145019531, "learning_rate": 8.606194690265486e-07, "loss": 33.0312, "step": 389 }, { "epoch": 0.02590157401872883, "grad_norm": 367.77532958984375, "learning_rate": 8.628318584070797e-07, "loss": 35.2188, "step": 390 }, { "epoch": 0.025967988311084544, "grad_norm": 215.87010192871094, "learning_rate": 8.650442477876105e-07, "loss": 27.4688, "step": 391 }, { "epoch": 0.02603440260344026, "grad_norm": 348.8505554199219, "learning_rate": 8.672566371681415e-07, "loss": 26.4688, "step": 392 }, { "epoch": 0.026100816895795974, "grad_norm": 248.89198303222656, "learning_rate": 8.694690265486725e-07, "loss": 29.75, "step": 393 }, { "epoch": 0.02616723118815169, "grad_norm": 439.79052734375, "learning_rate": 8.716814159292035e-07, "loss": 30.4375, "step": 394 }, { "epoch": 0.026233645480507405, "grad_norm": 226.25946044921875, "learning_rate": 8.738938053097344e-07, "loss": 25.5938, "step": 395 }, { "epoch": 0.02630005977286312, "grad_norm": 455.4720153808594, "learning_rate": 8.761061946902655e-07, "loss": 30.5312, "step": 396 }, { "epoch": 0.026366474065218835, "grad_norm": 231.4905242919922, "learning_rate": 8.783185840707964e-07, "loss": 26.0625, "step": 397 }, { "epoch": 0.02643288835757455, "grad_norm": 164.28564453125, "learning_rate": 8.805309734513274e-07, "loss": 22.3906, "step": 398 }, { "epoch": 0.026499302649930265, "grad_norm": 819.7192993164062, "learning_rate": 8.827433628318583e-07, "loss": 26.4375, "step": 399 }, { "epoch": 0.02656571694228598, "grad_norm": 281.3686828613281, "learning_rate": 8.849557522123894e-07, "loss": 34.875, "step": 400 }, { "epoch": 0.026632131234641695, "grad_norm": 402.36419677734375, "learning_rate": 8.871681415929203e-07, "loss": 29.7188, "step": 401 }, { "epoch": 0.026698545526997408, "grad_norm": 170.57334899902344, "learning_rate": 8.893805309734513e-07, "loss": 28.9062, "step": 402 }, { "epoch": 0.026764959819353125, "grad_norm": 395.6105651855469, "learning_rate": 8.915929203539823e-07, "loss": 27.0625, "step": 403 }, { "epoch": 0.026831374111708838, "grad_norm": 253.69273376464844, "learning_rate": 8.938053097345132e-07, "loss": 29.5312, "step": 404 }, { "epoch": 0.026897788404064555, "grad_norm": 328.8283386230469, "learning_rate": 8.960176991150442e-07, "loss": 29.375, "step": 405 }, { "epoch": 0.02696420269642027, "grad_norm": 2943.135498046875, "learning_rate": 8.982300884955752e-07, "loss": 35.3438, "step": 406 }, { "epoch": 0.027030616988775985, "grad_norm": 190.63351440429688, "learning_rate": 9.004424778761062e-07, "loss": 22.5625, "step": 407 }, { "epoch": 0.0270970312811317, "grad_norm": 249.48245239257812, "learning_rate": 9.026548672566371e-07, "loss": 31.3125, "step": 408 }, { "epoch": 0.027163445573487415, "grad_norm": 240.78921508789062, "learning_rate": 9.048672566371681e-07, "loss": 28.375, "step": 409 }, { "epoch": 0.02722985986584313, "grad_norm": 234.486083984375, "learning_rate": 9.07079646017699e-07, "loss": 28.5312, "step": 410 }, { "epoch": 0.027296274158198845, "grad_norm": 399.7752380371094, "learning_rate": 9.0929203539823e-07, "loss": 25.9375, "step": 411 }, { "epoch": 0.02736268845055456, "grad_norm": 176.03128051757812, "learning_rate": 9.11504424778761e-07, "loss": 23.0625, "step": 412 }, { "epoch": 0.027429102742910275, "grad_norm": 204.08091735839844, "learning_rate": 9.13716814159292e-07, "loss": 27.6875, "step": 413 }, { "epoch": 0.02749551703526599, "grad_norm": 255.67440795898438, "learning_rate": 9.159292035398229e-07, "loss": 26.0, "step": 414 }, { "epoch": 0.027561931327621705, "grad_norm": 470.9114074707031, "learning_rate": 9.18141592920354e-07, "loss": 33.8125, "step": 415 }, { "epoch": 0.02762834561997742, "grad_norm": 261.67529296875, "learning_rate": 9.203539823008849e-07, "loss": 32.6562, "step": 416 }, { "epoch": 0.027694759912333135, "grad_norm": 319.4299011230469, "learning_rate": 9.225663716814159e-07, "loss": 34.7812, "step": 417 }, { "epoch": 0.02776117420468885, "grad_norm": 195.89808654785156, "learning_rate": 9.247787610619469e-07, "loss": 26.9688, "step": 418 }, { "epoch": 0.027827588497044566, "grad_norm": 288.44952392578125, "learning_rate": 9.269911504424779e-07, "loss": 28.5312, "step": 419 }, { "epoch": 0.02789400278940028, "grad_norm": 488.1466979980469, "learning_rate": 9.292035398230088e-07, "loss": 34.875, "step": 420 }, { "epoch": 0.027960417081755992, "grad_norm": 378.8598327636719, "learning_rate": 9.314159292035398e-07, "loss": 29.7812, "step": 421 }, { "epoch": 0.02802683137411171, "grad_norm": 282.6359558105469, "learning_rate": 9.336283185840708e-07, "loss": 26.6562, "step": 422 }, { "epoch": 0.028093245666467422, "grad_norm": 951.7051391601562, "learning_rate": 9.358407079646017e-07, "loss": 31.7812, "step": 423 }, { "epoch": 0.02815965995882314, "grad_norm": 333.70367431640625, "learning_rate": 9.380530973451328e-07, "loss": 29.9688, "step": 424 }, { "epoch": 0.028226074251178852, "grad_norm": 357.5912170410156, "learning_rate": 9.402654867256637e-07, "loss": 32.375, "step": 425 }, { "epoch": 0.02829248854353457, "grad_norm": 323.7542419433594, "learning_rate": 9.424778761061947e-07, "loss": 28.0625, "step": 426 }, { "epoch": 0.028358902835890282, "grad_norm": 226.78318786621094, "learning_rate": 9.446902654867255e-07, "loss": 27.5312, "step": 427 }, { "epoch": 0.028425317128246, "grad_norm": 246.41213989257812, "learning_rate": 9.469026548672566e-07, "loss": 22.6875, "step": 428 }, { "epoch": 0.028491731420601712, "grad_norm": 198.07728576660156, "learning_rate": 9.491150442477875e-07, "loss": 27.9688, "step": 429 }, { "epoch": 0.02855814571295743, "grad_norm": 209.91769409179688, "learning_rate": 9.513274336283185e-07, "loss": 34.625, "step": 430 }, { "epoch": 0.028624560005313143, "grad_norm": 321.9237976074219, "learning_rate": 9.535398230088495e-07, "loss": 38.5312, "step": 431 }, { "epoch": 0.02869097429766886, "grad_norm": 309.42437744140625, "learning_rate": 9.557522123893805e-07, "loss": 27.2188, "step": 432 }, { "epoch": 0.028757388590024573, "grad_norm": 262.63629150390625, "learning_rate": 9.579646017699114e-07, "loss": 28.6562, "step": 433 }, { "epoch": 0.02882380288238029, "grad_norm": 288.3605041503906, "learning_rate": 9.601769911504426e-07, "loss": 28.7188, "step": 434 }, { "epoch": 0.028890217174736003, "grad_norm": 313.8514404296875, "learning_rate": 9.623893805309733e-07, "loss": 27.9688, "step": 435 }, { "epoch": 0.02895663146709172, "grad_norm": 204.51914978027344, "learning_rate": 9.646017699115042e-07, "loss": 26.2188, "step": 436 }, { "epoch": 0.029023045759447433, "grad_norm": 129.25486755371094, "learning_rate": 9.668141592920354e-07, "loss": 20.375, "step": 437 }, { "epoch": 0.02908946005180315, "grad_norm": 238.27590942382812, "learning_rate": 9.690265486725663e-07, "loss": 24.625, "step": 438 }, { "epoch": 0.029155874344158863, "grad_norm": 235.4149169921875, "learning_rate": 9.712389380530972e-07, "loss": 30.9688, "step": 439 }, { "epoch": 0.02922228863651458, "grad_norm": 212.78921508789062, "learning_rate": 9.734513274336282e-07, "loss": 26.9375, "step": 440 }, { "epoch": 0.029288702928870293, "grad_norm": 423.27435302734375, "learning_rate": 9.756637168141593e-07, "loss": 26.7188, "step": 441 }, { "epoch": 0.029355117221226006, "grad_norm": 235.09300231933594, "learning_rate": 9.778761061946902e-07, "loss": 24.5938, "step": 442 }, { "epoch": 0.029421531513581723, "grad_norm": 341.330322265625, "learning_rate": 9.800884955752212e-07, "loss": 33.4688, "step": 443 }, { "epoch": 0.029487945805937436, "grad_norm": 303.7009582519531, "learning_rate": 9.82300884955752e-07, "loss": 31.0, "step": 444 }, { "epoch": 0.029554360098293153, "grad_norm": 277.9767761230469, "learning_rate": 9.845132743362832e-07, "loss": 28.4688, "step": 445 }, { "epoch": 0.029620774390648866, "grad_norm": 186.15933227539062, "learning_rate": 9.867256637168142e-07, "loss": 24.625, "step": 446 }, { "epoch": 0.029687188683004583, "grad_norm": 330.72540283203125, "learning_rate": 9.88938053097345e-07, "loss": 25.1562, "step": 447 }, { "epoch": 0.029753602975360297, "grad_norm": 292.70306396484375, "learning_rate": 9.91150442477876e-07, "loss": 21.5938, "step": 448 }, { "epoch": 0.029820017267716013, "grad_norm": 272.9013671875, "learning_rate": 9.933628318584072e-07, "loss": 28.375, "step": 449 }, { "epoch": 0.029886431560071727, "grad_norm": 225.18434143066406, "learning_rate": 9.95575221238938e-07, "loss": 26.4062, "step": 450 }, { "epoch": 0.029952845852427443, "grad_norm": 283.734375, "learning_rate": 9.97787610619469e-07, "loss": 22.1875, "step": 451 }, { "epoch": 0.030019260144783157, "grad_norm": 249.75868225097656, "learning_rate": 1e-06, "loss": 25.6875, "step": 452 }, { "epoch": 0.030085674437138873, "grad_norm": 374.28741455078125, "learning_rate": 1.0022123893805309e-06, "loss": 27.0, "step": 453 }, { "epoch": 0.030152088729494587, "grad_norm": 322.92138671875, "learning_rate": 1.0044247787610618e-06, "loss": 32.7188, "step": 454 }, { "epoch": 0.030218503021850304, "grad_norm": 355.7425842285156, "learning_rate": 1.0066371681415927e-06, "loss": 33.0938, "step": 455 }, { "epoch": 0.030284917314206017, "grad_norm": 239.9808349609375, "learning_rate": 1.0088495575221239e-06, "loss": 31.875, "step": 456 }, { "epoch": 0.030351331606561734, "grad_norm": 223.4071502685547, "learning_rate": 1.0110619469026548e-06, "loss": 26.875, "step": 457 }, { "epoch": 0.030417745898917447, "grad_norm": 162.98428344726562, "learning_rate": 1.0132743362831857e-06, "loss": 25.9062, "step": 458 }, { "epoch": 0.030484160191273164, "grad_norm": 250.9767608642578, "learning_rate": 1.0154867256637167e-06, "loss": 29.375, "step": 459 }, { "epoch": 0.030550574483628877, "grad_norm": 201.77845764160156, "learning_rate": 1.0176991150442478e-06, "loss": 23.7188, "step": 460 }, { "epoch": 0.03061698877598459, "grad_norm": 609.7275390625, "learning_rate": 1.0199115044247787e-06, "loss": 29.5938, "step": 461 }, { "epoch": 0.030683403068340307, "grad_norm": 533.0616455078125, "learning_rate": 1.0221238938053097e-06, "loss": 28.6562, "step": 462 }, { "epoch": 0.03074981736069602, "grad_norm": 402.7500915527344, "learning_rate": 1.0243362831858406e-06, "loss": 26.1875, "step": 463 }, { "epoch": 0.030816231653051737, "grad_norm": 404.0836181640625, "learning_rate": 1.0265486725663717e-06, "loss": 34.6562, "step": 464 }, { "epoch": 0.03088264594540745, "grad_norm": 252.7110595703125, "learning_rate": 1.0287610619469027e-06, "loss": 27.0938, "step": 465 }, { "epoch": 0.030949060237763167, "grad_norm": 338.299072265625, "learning_rate": 1.0309734513274336e-06, "loss": 36.6562, "step": 466 }, { "epoch": 0.03101547453011888, "grad_norm": 301.9615173339844, "learning_rate": 1.0331858407079645e-06, "loss": 41.9688, "step": 467 }, { "epoch": 0.031081888822474597, "grad_norm": 694.36474609375, "learning_rate": 1.0353982300884957e-06, "loss": 27.5625, "step": 468 }, { "epoch": 0.03114830311483031, "grad_norm": 210.47862243652344, "learning_rate": 1.0376106194690264e-06, "loss": 24.3438, "step": 469 }, { "epoch": 0.031214717407186027, "grad_norm": 264.4151306152344, "learning_rate": 1.0398230088495573e-06, "loss": 25.5, "step": 470 }, { "epoch": 0.03128113169954174, "grad_norm": 193.2547149658203, "learning_rate": 1.0420353982300885e-06, "loss": 26.8125, "step": 471 }, { "epoch": 0.03134754599189746, "grad_norm": 341.4839782714844, "learning_rate": 1.0442477876106194e-06, "loss": 29.4375, "step": 472 }, { "epoch": 0.031413960284253174, "grad_norm": 304.40985107421875, "learning_rate": 1.0464601769911503e-06, "loss": 26.3438, "step": 473 }, { "epoch": 0.031480374576608884, "grad_norm": 184.92080688476562, "learning_rate": 1.0486725663716813e-06, "loss": 28.7812, "step": 474 }, { "epoch": 0.0315467888689646, "grad_norm": 209.33541870117188, "learning_rate": 1.0508849557522124e-06, "loss": 25.6562, "step": 475 }, { "epoch": 0.03161320316132032, "grad_norm": 279.7400207519531, "learning_rate": 1.0530973451327433e-06, "loss": 34.4062, "step": 476 }, { "epoch": 0.031679617453676034, "grad_norm": 453.50006103515625, "learning_rate": 1.0553097345132743e-06, "loss": 32.8438, "step": 477 }, { "epoch": 0.031746031746031744, "grad_norm": 210.9303436279297, "learning_rate": 1.0575221238938052e-06, "loss": 26.375, "step": 478 }, { "epoch": 0.03181244603838746, "grad_norm": 184.8925323486328, "learning_rate": 1.0597345132743363e-06, "loss": 29.4062, "step": 479 }, { "epoch": 0.03187886033074318, "grad_norm": 283.05145263671875, "learning_rate": 1.0619469026548673e-06, "loss": 30.8125, "step": 480 }, { "epoch": 0.03194527462309889, "grad_norm": 484.59906005859375, "learning_rate": 1.0641592920353982e-06, "loss": 33.0938, "step": 481 }, { "epoch": 0.032011688915454604, "grad_norm": 186.5665740966797, "learning_rate": 1.0663716814159291e-06, "loss": 25.4375, "step": 482 }, { "epoch": 0.03207810320781032, "grad_norm": 260.53094482421875, "learning_rate": 1.0685840707964603e-06, "loss": 23.8125, "step": 483 }, { "epoch": 0.03214451750016604, "grad_norm": 2045.74755859375, "learning_rate": 1.0707964601769912e-06, "loss": 24.5, "step": 484 }, { "epoch": 0.03221093179252175, "grad_norm": 453.6766662597656, "learning_rate": 1.0730088495575221e-06, "loss": 28.75, "step": 485 }, { "epoch": 0.032277346084877465, "grad_norm": 240.1822509765625, "learning_rate": 1.075221238938053e-06, "loss": 33.25, "step": 486 }, { "epoch": 0.03234376037723318, "grad_norm": 205.0232696533203, "learning_rate": 1.0774336283185842e-06, "loss": 30.1875, "step": 487 }, { "epoch": 0.0324101746695889, "grad_norm": 230.56137084960938, "learning_rate": 1.079646017699115e-06, "loss": 27.125, "step": 488 }, { "epoch": 0.03247658896194461, "grad_norm": 151.33929443359375, "learning_rate": 1.0818584070796458e-06, "loss": 25.2188, "step": 489 }, { "epoch": 0.032543003254300325, "grad_norm": 165.7460174560547, "learning_rate": 1.084070796460177e-06, "loss": 28.125, "step": 490 }, { "epoch": 0.03260941754665604, "grad_norm": 189.6488800048828, "learning_rate": 1.086283185840708e-06, "loss": 28.9062, "step": 491 }, { "epoch": 0.03267583183901176, "grad_norm": 183.42039489746094, "learning_rate": 1.0884955752212388e-06, "loss": 26.625, "step": 492 }, { "epoch": 0.03274224613136747, "grad_norm": 392.4314880371094, "learning_rate": 1.0907079646017698e-06, "loss": 30.0625, "step": 493 }, { "epoch": 0.032808660423723185, "grad_norm": 280.51763916015625, "learning_rate": 1.092920353982301e-06, "loss": 31.125, "step": 494 }, { "epoch": 0.0328750747160789, "grad_norm": 1049.3399658203125, "learning_rate": 1.0951327433628318e-06, "loss": 27.9375, "step": 495 }, { "epoch": 0.03294148900843462, "grad_norm": 220.5833282470703, "learning_rate": 1.0973451327433628e-06, "loss": 26.0625, "step": 496 }, { "epoch": 0.03300790330079033, "grad_norm": 208.6725311279297, "learning_rate": 1.0995575221238937e-06, "loss": 26.4688, "step": 497 }, { "epoch": 0.033074317593146045, "grad_norm": 358.1100769042969, "learning_rate": 1.1017699115044248e-06, "loss": 28.3438, "step": 498 }, { "epoch": 0.03314073188550176, "grad_norm": 282.24090576171875, "learning_rate": 1.1039823008849558e-06, "loss": 30.3125, "step": 499 }, { "epoch": 0.03320714617785747, "grad_norm": 192.83538818359375, "learning_rate": 1.1061946902654867e-06, "loss": 25.0312, "step": 500 }, { "epoch": 0.03327356047021319, "grad_norm": 356.2369079589844, "learning_rate": 1.1084070796460176e-06, "loss": 44.0, "step": 501 }, { "epoch": 0.033339974762568905, "grad_norm": 242.562744140625, "learning_rate": 1.1106194690265488e-06, "loss": 29.75, "step": 502 }, { "epoch": 0.03340638905492462, "grad_norm": 252.31558227539062, "learning_rate": 1.1128318584070795e-06, "loss": 26.0625, "step": 503 }, { "epoch": 0.03347280334728033, "grad_norm": 246.76943969726562, "learning_rate": 1.1150442477876104e-06, "loss": 28.4375, "step": 504 }, { "epoch": 0.03353921763963605, "grad_norm": 246.18505859375, "learning_rate": 1.1172566371681416e-06, "loss": 24.875, "step": 505 }, { "epoch": 0.033605631931991765, "grad_norm": 1063.328857421875, "learning_rate": 1.1194690265486725e-06, "loss": 27.1562, "step": 506 }, { "epoch": 0.03367204622434748, "grad_norm": 216.98577880859375, "learning_rate": 1.1216814159292034e-06, "loss": 28.375, "step": 507 }, { "epoch": 0.03373846051670319, "grad_norm": 346.47564697265625, "learning_rate": 1.1238938053097344e-06, "loss": 29.5625, "step": 508 }, { "epoch": 0.03380487480905891, "grad_norm": 493.966796875, "learning_rate": 1.1261061946902655e-06, "loss": 39.75, "step": 509 }, { "epoch": 0.033871289101414626, "grad_norm": 294.18182373046875, "learning_rate": 1.1283185840707964e-06, "loss": 27.75, "step": 510 }, { "epoch": 0.03393770339377034, "grad_norm": 341.27813720703125, "learning_rate": 1.1305309734513274e-06, "loss": 34.0, "step": 511 }, { "epoch": 0.03400411768612605, "grad_norm": 257.76287841796875, "learning_rate": 1.1327433628318583e-06, "loss": 25.875, "step": 512 }, { "epoch": 0.03407053197848177, "grad_norm": 280.0215148925781, "learning_rate": 1.1349557522123894e-06, "loss": 35.9375, "step": 513 }, { "epoch": 0.034136946270837486, "grad_norm": 281.995849609375, "learning_rate": 1.1371681415929204e-06, "loss": 26.7188, "step": 514 }, { "epoch": 0.0342033605631932, "grad_norm": 303.7110290527344, "learning_rate": 1.1393805309734513e-06, "loss": 27.0625, "step": 515 }, { "epoch": 0.03426977485554891, "grad_norm": 249.9722900390625, "learning_rate": 1.1415929203539822e-06, "loss": 32.5312, "step": 516 }, { "epoch": 0.03433618914790463, "grad_norm": 238.3840789794922, "learning_rate": 1.1438053097345134e-06, "loss": 25.4688, "step": 517 }, { "epoch": 0.034402603440260346, "grad_norm": 280.704345703125, "learning_rate": 1.146017699115044e-06, "loss": 24.125, "step": 518 }, { "epoch": 0.034469017732616056, "grad_norm": 310.0551452636719, "learning_rate": 1.1482300884955752e-06, "loss": 27.0625, "step": 519 }, { "epoch": 0.03453543202497177, "grad_norm": 236.31552124023438, "learning_rate": 1.1504424778761061e-06, "loss": 25.2188, "step": 520 }, { "epoch": 0.03460184631732749, "grad_norm": 173.7613067626953, "learning_rate": 1.1526548672566373e-06, "loss": 24.8438, "step": 521 }, { "epoch": 0.034668260609683206, "grad_norm": 252.8490753173828, "learning_rate": 1.154867256637168e-06, "loss": 34.375, "step": 522 }, { "epoch": 0.034734674902038916, "grad_norm": 162.5184783935547, "learning_rate": 1.157079646017699e-06, "loss": 24.8438, "step": 523 }, { "epoch": 0.03480108919439463, "grad_norm": 315.240966796875, "learning_rate": 1.15929203539823e-06, "loss": 27.5, "step": 524 }, { "epoch": 0.03486750348675035, "grad_norm": 246.8081512451172, "learning_rate": 1.161504424778761e-06, "loss": 30.75, "step": 525 }, { "epoch": 0.034933917779106066, "grad_norm": 186.2504425048828, "learning_rate": 1.163716814159292e-06, "loss": 31.0312, "step": 526 }, { "epoch": 0.035000332071461776, "grad_norm": 766.2584228515625, "learning_rate": 1.1659292035398229e-06, "loss": 25.1875, "step": 527 }, { "epoch": 0.03506674636381749, "grad_norm": 292.1177978515625, "learning_rate": 1.168141592920354e-06, "loss": 27.7812, "step": 528 }, { "epoch": 0.03513316065617321, "grad_norm": 1298.121826171875, "learning_rate": 1.170353982300885e-06, "loss": 31.3438, "step": 529 }, { "epoch": 0.035199574948528926, "grad_norm": 237.9285888671875, "learning_rate": 1.1725663716814159e-06, "loss": 31.5625, "step": 530 }, { "epoch": 0.035265989240884636, "grad_norm": 340.7438049316406, "learning_rate": 1.1747787610619468e-06, "loss": 26.6562, "step": 531 }, { "epoch": 0.03533240353324035, "grad_norm": 191.04786682128906, "learning_rate": 1.176991150442478e-06, "loss": 23.1562, "step": 532 }, { "epoch": 0.03539881782559607, "grad_norm": 266.7891845703125, "learning_rate": 1.1792035398230087e-06, "loss": 24.5, "step": 533 }, { "epoch": 0.03546523211795179, "grad_norm": 201.1075897216797, "learning_rate": 1.1814159292035398e-06, "loss": 29.3125, "step": 534 }, { "epoch": 0.035531646410307496, "grad_norm": 283.3518981933594, "learning_rate": 1.1836283185840707e-06, "loss": 34.1875, "step": 535 }, { "epoch": 0.03559806070266321, "grad_norm": 370.14874267578125, "learning_rate": 1.1858407079646019e-06, "loss": 27.4375, "step": 536 }, { "epoch": 0.03566447499501893, "grad_norm": 215.98379516601562, "learning_rate": 1.1880530973451326e-06, "loss": 30.0938, "step": 537 }, { "epoch": 0.03573088928737464, "grad_norm": 363.71575927734375, "learning_rate": 1.1902654867256637e-06, "loss": 27.4375, "step": 538 }, { "epoch": 0.03579730357973036, "grad_norm": 308.18731689453125, "learning_rate": 1.1924778761061947e-06, "loss": 30.125, "step": 539 }, { "epoch": 0.03586371787208607, "grad_norm": 149.59365844726562, "learning_rate": 1.1946902654867256e-06, "loss": 22.7812, "step": 540 }, { "epoch": 0.03593013216444179, "grad_norm": 270.19781494140625, "learning_rate": 1.1969026548672565e-06, "loss": 31.3125, "step": 541 }, { "epoch": 0.0359965464567975, "grad_norm": 259.0935974121094, "learning_rate": 1.1991150442477874e-06, "loss": 31.75, "step": 542 }, { "epoch": 0.03606296074915322, "grad_norm": 291.7544250488281, "learning_rate": 1.2013274336283186e-06, "loss": 26.5, "step": 543 }, { "epoch": 0.036129375041508933, "grad_norm": 164.5298614501953, "learning_rate": 1.2035398230088495e-06, "loss": 23.5938, "step": 544 }, { "epoch": 0.03619578933386465, "grad_norm": 211.42845153808594, "learning_rate": 1.2057522123893804e-06, "loss": 28.6875, "step": 545 }, { "epoch": 0.03626220362622036, "grad_norm": 224.9961700439453, "learning_rate": 1.2079646017699114e-06, "loss": 24.0625, "step": 546 }, { "epoch": 0.03632861791857608, "grad_norm": 230.24928283691406, "learning_rate": 1.2101769911504425e-06, "loss": 26.25, "step": 547 }, { "epoch": 0.036395032210931794, "grad_norm": 230.3402557373047, "learning_rate": 1.2123893805309734e-06, "loss": 29.0312, "step": 548 }, { "epoch": 0.03646144650328751, "grad_norm": 423.4350280761719, "learning_rate": 1.2146017699115044e-06, "loss": 33.4062, "step": 549 }, { "epoch": 0.03652786079564322, "grad_norm": 269.96380615234375, "learning_rate": 1.2168141592920353e-06, "loss": 32.375, "step": 550 }, { "epoch": 0.03659427508799894, "grad_norm": 228.4596710205078, "learning_rate": 1.2190265486725665e-06, "loss": 25.8438, "step": 551 }, { "epoch": 0.036660689380354654, "grad_norm": 307.6494140625, "learning_rate": 1.2212389380530972e-06, "loss": 24.0, "step": 552 }, { "epoch": 0.03672710367271037, "grad_norm": 252.01048278808594, "learning_rate": 1.2234513274336283e-06, "loss": 29.6562, "step": 553 }, { "epoch": 0.03679351796506608, "grad_norm": 243.47601318359375, "learning_rate": 1.2256637168141592e-06, "loss": 20.875, "step": 554 }, { "epoch": 0.0368599322574218, "grad_norm": 156.9990692138672, "learning_rate": 1.2278761061946904e-06, "loss": 25.4688, "step": 555 }, { "epoch": 0.036926346549777514, "grad_norm": 267.4819030761719, "learning_rate": 1.230088495575221e-06, "loss": 30.8438, "step": 556 }, { "epoch": 0.036992760842133224, "grad_norm": 260.0784912109375, "learning_rate": 1.232300884955752e-06, "loss": 26.25, "step": 557 }, { "epoch": 0.03705917513448894, "grad_norm": 653.445068359375, "learning_rate": 1.2345132743362832e-06, "loss": 23.6562, "step": 558 }, { "epoch": 0.03712558942684466, "grad_norm": 182.01979064941406, "learning_rate": 1.236725663716814e-06, "loss": 23.7188, "step": 559 }, { "epoch": 0.037192003719200374, "grad_norm": 181.2103729248047, "learning_rate": 1.238938053097345e-06, "loss": 27.8125, "step": 560 }, { "epoch": 0.037258418011556084, "grad_norm": 224.741943359375, "learning_rate": 1.241150442477876e-06, "loss": 29.5312, "step": 561 }, { "epoch": 0.0373248323039118, "grad_norm": 156.29588317871094, "learning_rate": 1.243362831858407e-06, "loss": 26.0312, "step": 562 }, { "epoch": 0.03739124659626752, "grad_norm": 149.44688415527344, "learning_rate": 1.245575221238938e-06, "loss": 22.9375, "step": 563 }, { "epoch": 0.037457660888623234, "grad_norm": 132.99562072753906, "learning_rate": 1.247787610619469e-06, "loss": 26.0938, "step": 564 }, { "epoch": 0.037524075180978944, "grad_norm": 287.3142395019531, "learning_rate": 1.2499999999999999e-06, "loss": 24.375, "step": 565 }, { "epoch": 0.03759048947333466, "grad_norm": 488.141845703125, "learning_rate": 1.252212389380531e-06, "loss": 29.2812, "step": 566 }, { "epoch": 0.03765690376569038, "grad_norm": 255.42282104492188, "learning_rate": 1.2544247787610618e-06, "loss": 29.4062, "step": 567 }, { "epoch": 0.037723318058046094, "grad_norm": 160.28843688964844, "learning_rate": 1.2566371681415929e-06, "loss": 22.2812, "step": 568 }, { "epoch": 0.037789732350401804, "grad_norm": 252.50729370117188, "learning_rate": 1.2588495575221238e-06, "loss": 28.4844, "step": 569 }, { "epoch": 0.03785614664275752, "grad_norm": 243.66268920898438, "learning_rate": 1.261061946902655e-06, "loss": 21.9531, "step": 570 }, { "epoch": 0.03792256093511324, "grad_norm": 231.9391326904297, "learning_rate": 1.2632743362831857e-06, "loss": 26.375, "step": 571 }, { "epoch": 0.037988975227468955, "grad_norm": 199.92254638671875, "learning_rate": 1.2654867256637168e-06, "loss": 26.0, "step": 572 }, { "epoch": 0.038055389519824664, "grad_norm": 518.0091552734375, "learning_rate": 1.2676991150442478e-06, "loss": 35.9375, "step": 573 }, { "epoch": 0.03812180381218038, "grad_norm": 214.73477172851562, "learning_rate": 1.2699115044247789e-06, "loss": 25.6875, "step": 574 }, { "epoch": 0.0381882181045361, "grad_norm": 135.12347412109375, "learning_rate": 1.2721238938053096e-06, "loss": 23.8125, "step": 575 }, { "epoch": 0.03825463239689181, "grad_norm": 146.60107421875, "learning_rate": 1.2743362831858405e-06, "loss": 27.8125, "step": 576 }, { "epoch": 0.038321046689247525, "grad_norm": 183.24896240234375, "learning_rate": 1.2765486725663717e-06, "loss": 26.0938, "step": 577 }, { "epoch": 0.03838746098160324, "grad_norm": 236.34739685058594, "learning_rate": 1.2787610619469026e-06, "loss": 21.625, "step": 578 }, { "epoch": 0.03845387527395896, "grad_norm": 183.78860473632812, "learning_rate": 1.2809734513274335e-06, "loss": 28.4375, "step": 579 }, { "epoch": 0.03852028956631467, "grad_norm": 215.0352020263672, "learning_rate": 1.2831858407079645e-06, "loss": 28.4062, "step": 580 }, { "epoch": 0.038586703858670385, "grad_norm": 221.2220916748047, "learning_rate": 1.2853982300884956e-06, "loss": 24.5312, "step": 581 }, { "epoch": 0.0386531181510261, "grad_norm": 188.29600524902344, "learning_rate": 1.2876106194690263e-06, "loss": 19.6094, "step": 582 }, { "epoch": 0.03871953244338182, "grad_norm": 251.93948364257812, "learning_rate": 1.2898230088495575e-06, "loss": 27.3438, "step": 583 }, { "epoch": 0.03878594673573753, "grad_norm": 216.4022979736328, "learning_rate": 1.2920353982300884e-06, "loss": 25.5938, "step": 584 }, { "epoch": 0.038852361028093245, "grad_norm": 151.68634033203125, "learning_rate": 1.2942477876106195e-06, "loss": 22.5938, "step": 585 }, { "epoch": 0.03891877532044896, "grad_norm": 149.9869842529297, "learning_rate": 1.2964601769911503e-06, "loss": 25.9375, "step": 586 }, { "epoch": 0.03898518961280468, "grad_norm": 304.7462463378906, "learning_rate": 1.2986725663716814e-06, "loss": 29.6562, "step": 587 }, { "epoch": 0.03905160390516039, "grad_norm": 314.9557800292969, "learning_rate": 1.3008849557522123e-06, "loss": 25.1562, "step": 588 }, { "epoch": 0.039118018197516105, "grad_norm": 313.9792785644531, "learning_rate": 1.3030973451327435e-06, "loss": 25.2188, "step": 589 }, { "epoch": 0.03918443248987182, "grad_norm": 242.36419677734375, "learning_rate": 1.3053097345132742e-06, "loss": 26.9375, "step": 590 }, { "epoch": 0.03925084678222754, "grad_norm": 150.2750244140625, "learning_rate": 1.3075221238938051e-06, "loss": 25.5, "step": 591 }, { "epoch": 0.03931726107458325, "grad_norm": 231.15699768066406, "learning_rate": 1.3097345132743363e-06, "loss": 26.5312, "step": 592 }, { "epoch": 0.039383675366938965, "grad_norm": 171.90621948242188, "learning_rate": 1.3119469026548672e-06, "loss": 23.9375, "step": 593 }, { "epoch": 0.03945008965929468, "grad_norm": 205.8200225830078, "learning_rate": 1.3141592920353981e-06, "loss": 25.2188, "step": 594 }, { "epoch": 0.03951650395165039, "grad_norm": 146.29136657714844, "learning_rate": 1.316371681415929e-06, "loss": 25.4062, "step": 595 }, { "epoch": 0.03958291824400611, "grad_norm": 153.70950317382812, "learning_rate": 1.3185840707964602e-06, "loss": 26.1562, "step": 596 }, { "epoch": 0.039649332536361825, "grad_norm": 193.98208618164062, "learning_rate": 1.3207964601769911e-06, "loss": 30.1875, "step": 597 }, { "epoch": 0.03971574682871754, "grad_norm": 220.9350128173828, "learning_rate": 1.323008849557522e-06, "loss": 27.7812, "step": 598 }, { "epoch": 0.03978216112107325, "grad_norm": 172.76553344726562, "learning_rate": 1.325221238938053e-06, "loss": 22.9688, "step": 599 }, { "epoch": 0.03984857541342897, "grad_norm": 167.8828582763672, "learning_rate": 1.3274336283185841e-06, "loss": 26.5625, "step": 600 }, { "epoch": 0.039914989705784686, "grad_norm": 361.8505859375, "learning_rate": 1.3296460176991148e-06, "loss": 24.5938, "step": 601 }, { "epoch": 0.0399814039981404, "grad_norm": 293.7651672363281, "learning_rate": 1.331858407079646e-06, "loss": 24.9375, "step": 602 }, { "epoch": 0.04004781829049611, "grad_norm": 357.95367431640625, "learning_rate": 1.334070796460177e-06, "loss": 31.2812, "step": 603 }, { "epoch": 0.04011423258285183, "grad_norm": 279.86669921875, "learning_rate": 1.336283185840708e-06, "loss": 23.4688, "step": 604 }, { "epoch": 0.040180646875207546, "grad_norm": 241.3472442626953, "learning_rate": 1.3384955752212388e-06, "loss": 28.0, "step": 605 }, { "epoch": 0.04024706116756326, "grad_norm": 379.6533203125, "learning_rate": 1.34070796460177e-06, "loss": 28.4062, "step": 606 }, { "epoch": 0.04031347545991897, "grad_norm": 514.8516845703125, "learning_rate": 1.3429203539823008e-06, "loss": 21.9062, "step": 607 }, { "epoch": 0.04037988975227469, "grad_norm": 360.0697021484375, "learning_rate": 1.345132743362832e-06, "loss": 26.9219, "step": 608 }, { "epoch": 0.040446304044630406, "grad_norm": 672.1234741210938, "learning_rate": 1.3473451327433627e-06, "loss": 30.8438, "step": 609 }, { "epoch": 0.04051271833698612, "grad_norm": 315.45697021484375, "learning_rate": 1.3495575221238936e-06, "loss": 25.0938, "step": 610 }, { "epoch": 0.04057913262934183, "grad_norm": 222.94833374023438, "learning_rate": 1.3517699115044248e-06, "loss": 27.4688, "step": 611 }, { "epoch": 0.04064554692169755, "grad_norm": 400.40093994140625, "learning_rate": 1.3539823008849557e-06, "loss": 36.5312, "step": 612 }, { "epoch": 0.040711961214053266, "grad_norm": 244.0144500732422, "learning_rate": 1.3561946902654866e-06, "loss": 39.125, "step": 613 }, { "epoch": 0.040778375506408976, "grad_norm": 132.4122772216797, "learning_rate": 1.3584070796460176e-06, "loss": 21.0, "step": 614 }, { "epoch": 0.04084478979876469, "grad_norm": 353.4618225097656, "learning_rate": 1.3606194690265487e-06, "loss": 35.5312, "step": 615 }, { "epoch": 0.04091120409112041, "grad_norm": 211.92344665527344, "learning_rate": 1.3628318584070794e-06, "loss": 34.2188, "step": 616 }, { "epoch": 0.040977618383476126, "grad_norm": 257.62591552734375, "learning_rate": 1.3650442477876106e-06, "loss": 27.5625, "step": 617 }, { "epoch": 0.041044032675831836, "grad_norm": 150.86224365234375, "learning_rate": 1.3672566371681415e-06, "loss": 28.125, "step": 618 }, { "epoch": 0.04111044696818755, "grad_norm": 399.2211608886719, "learning_rate": 1.3694690265486726e-06, "loss": 34.0938, "step": 619 }, { "epoch": 0.04117686126054327, "grad_norm": 286.5841064453125, "learning_rate": 1.3716814159292034e-06, "loss": 28.375, "step": 620 }, { "epoch": 0.041243275552898986, "grad_norm": 220.9322509765625, "learning_rate": 1.3738938053097345e-06, "loss": 27.8125, "step": 621 }, { "epoch": 0.041309689845254696, "grad_norm": 276.2059020996094, "learning_rate": 1.3761061946902654e-06, "loss": 29.875, "step": 622 }, { "epoch": 0.04137610413761041, "grad_norm": 248.45639038085938, "learning_rate": 1.3783185840707966e-06, "loss": 31.9688, "step": 623 }, { "epoch": 0.04144251842996613, "grad_norm": 204.9469757080078, "learning_rate": 1.3805309734513273e-06, "loss": 24.5312, "step": 624 }, { "epoch": 0.04150893272232185, "grad_norm": 220.64529418945312, "learning_rate": 1.3827433628318584e-06, "loss": 27.9062, "step": 625 }, { "epoch": 0.041575347014677556, "grad_norm": 220.99671936035156, "learning_rate": 1.3849557522123894e-06, "loss": 23.3125, "step": 626 }, { "epoch": 0.04164176130703327, "grad_norm": 289.400146484375, "learning_rate": 1.3871681415929203e-06, "loss": 28.5, "step": 627 }, { "epoch": 0.04170817559938899, "grad_norm": 437.8466491699219, "learning_rate": 1.3893805309734512e-06, "loss": 29.7188, "step": 628 }, { "epoch": 0.04177458989174471, "grad_norm": 207.79566955566406, "learning_rate": 1.3915929203539821e-06, "loss": 25.7812, "step": 629 }, { "epoch": 0.04184100418410042, "grad_norm": 366.2226867675781, "learning_rate": 1.3938053097345133e-06, "loss": 24.8125, "step": 630 }, { "epoch": 0.04190741847645613, "grad_norm": 259.078369140625, "learning_rate": 1.396017699115044e-06, "loss": 32.1562, "step": 631 }, { "epoch": 0.04197383276881185, "grad_norm": 263.93719482421875, "learning_rate": 1.3982300884955752e-06, "loss": 22.625, "step": 632 }, { "epoch": 0.04204024706116756, "grad_norm": 222.4141387939453, "learning_rate": 1.400442477876106e-06, "loss": 27.4062, "step": 633 }, { "epoch": 0.04210666135352328, "grad_norm": 239.16864013671875, "learning_rate": 1.4026548672566372e-06, "loss": 20.875, "step": 634 }, { "epoch": 0.042173075645878993, "grad_norm": 305.57183837890625, "learning_rate": 1.404867256637168e-06, "loss": 33.5, "step": 635 }, { "epoch": 0.04223948993823471, "grad_norm": 737.751708984375, "learning_rate": 1.407079646017699e-06, "loss": 27.4688, "step": 636 }, { "epoch": 0.04230590423059042, "grad_norm": 195.9906768798828, "learning_rate": 1.40929203539823e-06, "loss": 25.0938, "step": 637 }, { "epoch": 0.04237231852294614, "grad_norm": 182.673828125, "learning_rate": 1.4115044247787612e-06, "loss": 24.375, "step": 638 }, { "epoch": 0.042438732815301854, "grad_norm": 135.03265380859375, "learning_rate": 1.4137168141592919e-06, "loss": 25.9375, "step": 639 }, { "epoch": 0.04250514710765757, "grad_norm": 247.98574829101562, "learning_rate": 1.415929203539823e-06, "loss": 28.6562, "step": 640 }, { "epoch": 0.04257156140001328, "grad_norm": 250.7220916748047, "learning_rate": 1.418141592920354e-06, "loss": 27.8438, "step": 641 }, { "epoch": 0.042637975692369, "grad_norm": 184.0060577392578, "learning_rate": 1.420353982300885e-06, "loss": 28.7812, "step": 642 }, { "epoch": 0.042704389984724714, "grad_norm": 389.25653076171875, "learning_rate": 1.4225663716814158e-06, "loss": 25.5625, "step": 643 }, { "epoch": 0.04277080427708043, "grad_norm": 135.6185760498047, "learning_rate": 1.4247787610619467e-06, "loss": 24.6562, "step": 644 }, { "epoch": 0.04283721856943614, "grad_norm": 139.87147521972656, "learning_rate": 1.4269911504424779e-06, "loss": 24.6562, "step": 645 }, { "epoch": 0.04290363286179186, "grad_norm": 222.6914520263672, "learning_rate": 1.4292035398230086e-06, "loss": 25.0312, "step": 646 }, { "epoch": 0.042970047154147574, "grad_norm": 397.3922119140625, "learning_rate": 1.4314159292035397e-06, "loss": 26.3438, "step": 647 }, { "epoch": 0.04303646144650329, "grad_norm": 192.03440856933594, "learning_rate": 1.4336283185840707e-06, "loss": 26.5938, "step": 648 }, { "epoch": 0.043102875738859, "grad_norm": 136.53466796875, "learning_rate": 1.4358407079646018e-06, "loss": 20.6719, "step": 649 }, { "epoch": 0.04316929003121472, "grad_norm": 161.11607360839844, "learning_rate": 1.4380530973451325e-06, "loss": 25.0312, "step": 650 }, { "epoch": 0.043235704323570434, "grad_norm": 153.11395263671875, "learning_rate": 1.4402654867256637e-06, "loss": 21.375, "step": 651 }, { "epoch": 0.043302118615926144, "grad_norm": 175.22149658203125, "learning_rate": 1.4424778761061946e-06, "loss": 27.5, "step": 652 }, { "epoch": 0.04336853290828186, "grad_norm": 371.8228759765625, "learning_rate": 1.4446902654867257e-06, "loss": 29.0938, "step": 653 }, { "epoch": 0.04343494720063758, "grad_norm": 171.0619354248047, "learning_rate": 1.4469026548672565e-06, "loss": 25.1562, "step": 654 }, { "epoch": 0.043501361492993294, "grad_norm": 156.43283081054688, "learning_rate": 1.4491150442477876e-06, "loss": 25.5312, "step": 655 }, { "epoch": 0.043567775785349004, "grad_norm": 395.2561340332031, "learning_rate": 1.4513274336283185e-06, "loss": 29.9688, "step": 656 }, { "epoch": 0.04363419007770472, "grad_norm": 205.2784881591797, "learning_rate": 1.4535398230088497e-06, "loss": 26.75, "step": 657 }, { "epoch": 0.04370060437006044, "grad_norm": 209.42388916015625, "learning_rate": 1.4557522123893804e-06, "loss": 25.2812, "step": 658 }, { "epoch": 0.043767018662416154, "grad_norm": 170.70664978027344, "learning_rate": 1.4579646017699115e-06, "loss": 22.4688, "step": 659 }, { "epoch": 0.043833432954771864, "grad_norm": 156.5286102294922, "learning_rate": 1.4601769911504425e-06, "loss": 26.125, "step": 660 }, { "epoch": 0.04389984724712758, "grad_norm": 255.79078674316406, "learning_rate": 1.4623893805309736e-06, "loss": 25.4375, "step": 661 }, { "epoch": 0.0439662615394833, "grad_norm": 266.6308898925781, "learning_rate": 1.4646017699115043e-06, "loss": 21.5938, "step": 662 }, { "epoch": 0.044032675831839015, "grad_norm": 338.9183654785156, "learning_rate": 1.4668141592920352e-06, "loss": 25.6875, "step": 663 }, { "epoch": 0.044099090124194724, "grad_norm": 176.023681640625, "learning_rate": 1.4690265486725664e-06, "loss": 24.5625, "step": 664 }, { "epoch": 0.04416550441655044, "grad_norm": 443.2438659667969, "learning_rate": 1.471238938053097e-06, "loss": 28.25, "step": 665 }, { "epoch": 0.04423191870890616, "grad_norm": 271.4664001464844, "learning_rate": 1.4734513274336282e-06, "loss": 23.5625, "step": 666 }, { "epoch": 0.044298333001261875, "grad_norm": 320.8485412597656, "learning_rate": 1.4756637168141592e-06, "loss": 28.9688, "step": 667 }, { "epoch": 0.044364747293617585, "grad_norm": 912.989501953125, "learning_rate": 1.4778761061946903e-06, "loss": 27.6875, "step": 668 }, { "epoch": 0.0444311615859733, "grad_norm": 243.12860107421875, "learning_rate": 1.480088495575221e-06, "loss": 24.2812, "step": 669 }, { "epoch": 0.04449757587832902, "grad_norm": 157.48477172851562, "learning_rate": 1.4823008849557522e-06, "loss": 27.0312, "step": 670 }, { "epoch": 0.04456399017068473, "grad_norm": 332.1856384277344, "learning_rate": 1.4845132743362831e-06, "loss": 31.1875, "step": 671 }, { "epoch": 0.044630404463040445, "grad_norm": 245.91712951660156, "learning_rate": 1.4867256637168142e-06, "loss": 28.9375, "step": 672 }, { "epoch": 0.04469681875539616, "grad_norm": 121.6054458618164, "learning_rate": 1.488938053097345e-06, "loss": 23.9375, "step": 673 }, { "epoch": 0.04476323304775188, "grad_norm": 155.35939025878906, "learning_rate": 1.4911504424778761e-06, "loss": 25.2812, "step": 674 }, { "epoch": 0.04482964734010759, "grad_norm": 332.5707092285156, "learning_rate": 1.493362831858407e-06, "loss": 28.1875, "step": 675 }, { "epoch": 0.044896061632463305, "grad_norm": 182.33718872070312, "learning_rate": 1.4955752212389382e-06, "loss": 32.0938, "step": 676 }, { "epoch": 0.04496247592481902, "grad_norm": 347.8371887207031, "learning_rate": 1.497787610619469e-06, "loss": 24.625, "step": 677 }, { "epoch": 0.04502889021717474, "grad_norm": 157.27781677246094, "learning_rate": 1.5e-06, "loss": 20.5, "step": 678 }, { "epoch": 0.04509530450953045, "grad_norm": 142.02969360351562, "learning_rate": 1.502212389380531e-06, "loss": 21.25, "step": 679 }, { "epoch": 0.045161718801886165, "grad_norm": 334.0956726074219, "learning_rate": 1.5044247787610617e-06, "loss": 26.2812, "step": 680 }, { "epoch": 0.04522813309424188, "grad_norm": 151.70968627929688, "learning_rate": 1.5066371681415928e-06, "loss": 25.5312, "step": 681 }, { "epoch": 0.0452945473865976, "grad_norm": 281.9298400878906, "learning_rate": 1.5088495575221238e-06, "loss": 32.7812, "step": 682 }, { "epoch": 0.04536096167895331, "grad_norm": 394.71539306640625, "learning_rate": 1.511061946902655e-06, "loss": 27.9688, "step": 683 }, { "epoch": 0.045427375971309025, "grad_norm": 149.4479217529297, "learning_rate": 1.5132743362831856e-06, "loss": 21.6875, "step": 684 }, { "epoch": 0.04549379026366474, "grad_norm": 274.9649963378906, "learning_rate": 1.5154867256637168e-06, "loss": 27.625, "step": 685 }, { "epoch": 0.04556020455602046, "grad_norm": 270.4120178222656, "learning_rate": 1.5176991150442477e-06, "loss": 23.0625, "step": 686 }, { "epoch": 0.04562661884837617, "grad_norm": 201.01467895507812, "learning_rate": 1.5199115044247788e-06, "loss": 28.2188, "step": 687 }, { "epoch": 0.045693033140731885, "grad_norm": 253.71607971191406, "learning_rate": 1.5221238938053095e-06, "loss": 23.7188, "step": 688 }, { "epoch": 0.0457594474330876, "grad_norm": 149.77362060546875, "learning_rate": 1.5243362831858407e-06, "loss": 25.1875, "step": 689 }, { "epoch": 0.04582586172544331, "grad_norm": 351.1092529296875, "learning_rate": 1.5265486725663716e-06, "loss": 26.6875, "step": 690 }, { "epoch": 0.04589227601779903, "grad_norm": 293.6138610839844, "learning_rate": 1.5287610619469028e-06, "loss": 23.5625, "step": 691 }, { "epoch": 0.045958690310154746, "grad_norm": 433.9178771972656, "learning_rate": 1.5309734513274335e-06, "loss": 24.375, "step": 692 }, { "epoch": 0.04602510460251046, "grad_norm": 182.3809051513672, "learning_rate": 1.5331858407079646e-06, "loss": 24.0, "step": 693 }, { "epoch": 0.04609151889486617, "grad_norm": 250.31976318359375, "learning_rate": 1.5353982300884955e-06, "loss": 29.5625, "step": 694 }, { "epoch": 0.04615793318722189, "grad_norm": 135.30001831054688, "learning_rate": 1.5376106194690263e-06, "loss": 24.4688, "step": 695 }, { "epoch": 0.046224347479577606, "grad_norm": 778.3662109375, "learning_rate": 1.5398230088495574e-06, "loss": 27.4062, "step": 696 }, { "epoch": 0.04629076177193332, "grad_norm": 271.09735107421875, "learning_rate": 1.5420353982300883e-06, "loss": 22.8125, "step": 697 }, { "epoch": 0.04635717606428903, "grad_norm": 769.6979370117188, "learning_rate": 1.5442477876106195e-06, "loss": 26.0, "step": 698 }, { "epoch": 0.04642359035664475, "grad_norm": 353.24908447265625, "learning_rate": 1.5464601769911502e-06, "loss": 30.6875, "step": 699 }, { "epoch": 0.046490004649000466, "grad_norm": 221.12217712402344, "learning_rate": 1.5486725663716813e-06, "loss": 27.7812, "step": 700 }, { "epoch": 0.04655641894135618, "grad_norm": 433.5491638183594, "learning_rate": 1.5508849557522123e-06, "loss": 36.875, "step": 701 }, { "epoch": 0.04662283323371189, "grad_norm": 177.83897399902344, "learning_rate": 1.5530973451327434e-06, "loss": 21.2188, "step": 702 }, { "epoch": 0.04668924752606761, "grad_norm": 447.67681884765625, "learning_rate": 1.5553097345132741e-06, "loss": 25.2188, "step": 703 }, { "epoch": 0.046755661818423326, "grad_norm": 161.10293579101562, "learning_rate": 1.5575221238938053e-06, "loss": 23.0, "step": 704 }, { "epoch": 0.04682207611077904, "grad_norm": 338.5676574707031, "learning_rate": 1.5597345132743362e-06, "loss": 28.875, "step": 705 }, { "epoch": 0.04688849040313475, "grad_norm": 166.50042724609375, "learning_rate": 1.5619469026548673e-06, "loss": 27.2188, "step": 706 }, { "epoch": 0.04695490469549047, "grad_norm": 975.39599609375, "learning_rate": 1.564159292035398e-06, "loss": 22.0938, "step": 707 }, { "epoch": 0.047021318987846186, "grad_norm": 255.6847381591797, "learning_rate": 1.5663716814159292e-06, "loss": 26.0312, "step": 708 }, { "epoch": 0.047087733280201896, "grad_norm": 218.67730712890625, "learning_rate": 1.5685840707964601e-06, "loss": 27.25, "step": 709 }, { "epoch": 0.04715414757255761, "grad_norm": 321.67376708984375, "learning_rate": 1.5707964601769913e-06, "loss": 27.75, "step": 710 }, { "epoch": 0.04722056186491333, "grad_norm": 322.9001159667969, "learning_rate": 1.573008849557522e-06, "loss": 26.6875, "step": 711 }, { "epoch": 0.047286976157269046, "grad_norm": 365.61932373046875, "learning_rate": 1.5752212389380531e-06, "loss": 27.1562, "step": 712 }, { "epoch": 0.047353390449624756, "grad_norm": 149.2021942138672, "learning_rate": 1.577433628318584e-06, "loss": 22.0625, "step": 713 }, { "epoch": 0.04741980474198047, "grad_norm": 162.8117218017578, "learning_rate": 1.5796460176991148e-06, "loss": 27.0312, "step": 714 }, { "epoch": 0.04748621903433619, "grad_norm": 331.802490234375, "learning_rate": 1.581858407079646e-06, "loss": 27.5938, "step": 715 }, { "epoch": 0.04755263332669191, "grad_norm": 259.2983703613281, "learning_rate": 1.5840707964601769e-06, "loss": 27.6562, "step": 716 }, { "epoch": 0.047619047619047616, "grad_norm": 253.53363037109375, "learning_rate": 1.586283185840708e-06, "loss": 25.3438, "step": 717 }, { "epoch": 0.04768546191140333, "grad_norm": 270.07781982421875, "learning_rate": 1.5884955752212387e-06, "loss": 29.1562, "step": 718 }, { "epoch": 0.04775187620375905, "grad_norm": 150.27032470703125, "learning_rate": 1.5907079646017699e-06, "loss": 23.7188, "step": 719 }, { "epoch": 0.04781829049611477, "grad_norm": 282.3329162597656, "learning_rate": 1.5929203539823008e-06, "loss": 28.125, "step": 720 }, { "epoch": 0.04788470478847048, "grad_norm": 196.75311279296875, "learning_rate": 1.595132743362832e-06, "loss": 25.1875, "step": 721 }, { "epoch": 0.04795111908082619, "grad_norm": 289.69439697265625, "learning_rate": 1.5973451327433626e-06, "loss": 28.8125, "step": 722 }, { "epoch": 0.04801753337318191, "grad_norm": 274.3666687011719, "learning_rate": 1.5995575221238938e-06, "loss": 30.375, "step": 723 }, { "epoch": 0.04808394766553763, "grad_norm": 248.80455017089844, "learning_rate": 1.6017699115044247e-06, "loss": 24.625, "step": 724 }, { "epoch": 0.04815036195789334, "grad_norm": 443.9928283691406, "learning_rate": 1.6039823008849559e-06, "loss": 28.5938, "step": 725 }, { "epoch": 0.048216776250249054, "grad_norm": 301.6865234375, "learning_rate": 1.6061946902654866e-06, "loss": 27.375, "step": 726 }, { "epoch": 0.04828319054260477, "grad_norm": 446.5880126953125, "learning_rate": 1.6084070796460177e-06, "loss": 26.3438, "step": 727 }, { "epoch": 0.04834960483496048, "grad_norm": 204.13755798339844, "learning_rate": 1.6106194690265486e-06, "loss": 25.0, "step": 728 }, { "epoch": 0.0484160191273162, "grad_norm": 226.7001953125, "learning_rate": 1.6128318584070796e-06, "loss": 24.875, "step": 729 }, { "epoch": 0.048482433419671914, "grad_norm": 240.1326446533203, "learning_rate": 1.6150442477876105e-06, "loss": 32.1562, "step": 730 }, { "epoch": 0.04854884771202763, "grad_norm": 131.40773010253906, "learning_rate": 1.6172566371681414e-06, "loss": 25.2812, "step": 731 }, { "epoch": 0.04861526200438334, "grad_norm": 193.91868591308594, "learning_rate": 1.6194690265486726e-06, "loss": 24.6562, "step": 732 }, { "epoch": 0.04868167629673906, "grad_norm": 177.82888793945312, "learning_rate": 1.6216814159292033e-06, "loss": 24.875, "step": 733 }, { "epoch": 0.048748090589094774, "grad_norm": 229.8272247314453, "learning_rate": 1.6238938053097344e-06, "loss": 27.0312, "step": 734 }, { "epoch": 0.04881450488145049, "grad_norm": 671.7279663085938, "learning_rate": 1.6261061946902654e-06, "loss": 31.4062, "step": 735 }, { "epoch": 0.0488809191738062, "grad_norm": 187.01666259765625, "learning_rate": 1.6283185840707965e-06, "loss": 19.7188, "step": 736 }, { "epoch": 0.04894733346616192, "grad_norm": 155.19677734375, "learning_rate": 1.6305309734513272e-06, "loss": 23.6562, "step": 737 }, { "epoch": 0.049013747758517634, "grad_norm": 233.68186950683594, "learning_rate": 1.6327433628318584e-06, "loss": 31.0, "step": 738 }, { "epoch": 0.04908016205087335, "grad_norm": 170.2172088623047, "learning_rate": 1.6349557522123893e-06, "loss": 25.375, "step": 739 }, { "epoch": 0.04914657634322906, "grad_norm": 276.3296813964844, "learning_rate": 1.6371681415929204e-06, "loss": 21.4375, "step": 740 }, { "epoch": 0.04921299063558478, "grad_norm": 239.47628784179688, "learning_rate": 1.6393805309734512e-06, "loss": 23.6562, "step": 741 }, { "epoch": 0.049279404927940494, "grad_norm": 215.43800354003906, "learning_rate": 1.6415929203539823e-06, "loss": 26.4375, "step": 742 }, { "epoch": 0.04934581922029621, "grad_norm": 165.36318969726562, "learning_rate": 1.6438053097345132e-06, "loss": 27.1875, "step": 743 }, { "epoch": 0.04941223351265192, "grad_norm": 191.92849731445312, "learning_rate": 1.6460176991150442e-06, "loss": 25.0312, "step": 744 }, { "epoch": 0.04947864780500764, "grad_norm": 186.0350341796875, "learning_rate": 1.648230088495575e-06, "loss": 23.7188, "step": 745 }, { "epoch": 0.049545062097363354, "grad_norm": 299.5389404296875, "learning_rate": 1.6504424778761062e-06, "loss": 33.9062, "step": 746 }, { "epoch": 0.049611476389719064, "grad_norm": 272.9243469238281, "learning_rate": 1.6526548672566372e-06, "loss": 26.7188, "step": 747 }, { "epoch": 0.04967789068207478, "grad_norm": 185.80613708496094, "learning_rate": 1.6548672566371679e-06, "loss": 26.3125, "step": 748 }, { "epoch": 0.0497443049744305, "grad_norm": 160.53675842285156, "learning_rate": 1.657079646017699e-06, "loss": 24.0312, "step": 749 }, { "epoch": 0.049810719266786214, "grad_norm": 219.1264190673828, "learning_rate": 1.65929203539823e-06, "loss": 31.4688, "step": 750 }, { "epoch": 0.049877133559141924, "grad_norm": 315.0263977050781, "learning_rate": 1.661504424778761e-06, "loss": 30.75, "step": 751 }, { "epoch": 0.04994354785149764, "grad_norm": 143.69114685058594, "learning_rate": 1.6637168141592918e-06, "loss": 25.375, "step": 752 }, { "epoch": 0.05000996214385336, "grad_norm": 247.151611328125, "learning_rate": 1.665929203539823e-06, "loss": 27.4688, "step": 753 }, { "epoch": 0.050076376436209075, "grad_norm": 167.9521026611328, "learning_rate": 1.6681415929203539e-06, "loss": 23.9688, "step": 754 }, { "epoch": 0.050142790728564784, "grad_norm": 341.2510681152344, "learning_rate": 1.670353982300885e-06, "loss": 30.25, "step": 755 }, { "epoch": 0.0502092050209205, "grad_norm": 242.85093688964844, "learning_rate": 1.6725663716814157e-06, "loss": 27.2812, "step": 756 }, { "epoch": 0.05027561931327622, "grad_norm": 556.0595703125, "learning_rate": 1.6747787610619469e-06, "loss": 25.375, "step": 757 }, { "epoch": 0.050342033605631935, "grad_norm": 757.5227661132812, "learning_rate": 1.6769911504424778e-06, "loss": 29.7812, "step": 758 }, { "epoch": 0.050408447897987645, "grad_norm": 227.69654846191406, "learning_rate": 1.6792035398230087e-06, "loss": 21.0938, "step": 759 }, { "epoch": 0.05047486219034336, "grad_norm": 142.9094696044922, "learning_rate": 1.6814159292035397e-06, "loss": 24.875, "step": 760 }, { "epoch": 0.05054127648269908, "grad_norm": 461.81787109375, "learning_rate": 1.6836283185840708e-06, "loss": 31.0, "step": 761 }, { "epoch": 0.050607690775054795, "grad_norm": 2182.847900390625, "learning_rate": 1.6858407079646017e-06, "loss": 27.3125, "step": 762 }, { "epoch": 0.050674105067410505, "grad_norm": 218.1992950439453, "learning_rate": 1.6880530973451327e-06, "loss": 30.0938, "step": 763 }, { "epoch": 0.05074051935976622, "grad_norm": 234.20469665527344, "learning_rate": 1.6902654867256636e-06, "loss": 27.625, "step": 764 }, { "epoch": 0.05080693365212194, "grad_norm": 308.17242431640625, "learning_rate": 1.6924778761061947e-06, "loss": 30.75, "step": 765 }, { "epoch": 0.05087334794447765, "grad_norm": 242.2608642578125, "learning_rate": 1.6946902654867257e-06, "loss": 29.1562, "step": 766 }, { "epoch": 0.050939762236833365, "grad_norm": 219.70204162597656, "learning_rate": 1.6969026548672564e-06, "loss": 22.875, "step": 767 }, { "epoch": 0.05100617652918908, "grad_norm": 186.17567443847656, "learning_rate": 1.6991150442477875e-06, "loss": 27.875, "step": 768 }, { "epoch": 0.0510725908215448, "grad_norm": 222.642578125, "learning_rate": 1.7013274336283185e-06, "loss": 25.0625, "step": 769 }, { "epoch": 0.05113900511390051, "grad_norm": 232.60418701171875, "learning_rate": 1.7035398230088496e-06, "loss": 25.9062, "step": 770 }, { "epoch": 0.051205419406256225, "grad_norm": 211.5519561767578, "learning_rate": 1.7057522123893803e-06, "loss": 25.0938, "step": 771 }, { "epoch": 0.05127183369861194, "grad_norm": 221.34996032714844, "learning_rate": 1.7079646017699115e-06, "loss": 31.125, "step": 772 }, { "epoch": 0.05133824799096766, "grad_norm": 381.5605163574219, "learning_rate": 1.7101769911504424e-06, "loss": 29.0625, "step": 773 }, { "epoch": 0.05140466228332337, "grad_norm": 248.40228271484375, "learning_rate": 1.7123893805309735e-06, "loss": 31.0, "step": 774 }, { "epoch": 0.051471076575679085, "grad_norm": 157.54197692871094, "learning_rate": 1.7146017699115043e-06, "loss": 25.75, "step": 775 }, { "epoch": 0.0515374908680348, "grad_norm": 280.5276184082031, "learning_rate": 1.7168141592920354e-06, "loss": 25.1875, "step": 776 }, { "epoch": 0.05160390516039052, "grad_norm": 155.9620361328125, "learning_rate": 1.7190265486725663e-06, "loss": 22.9688, "step": 777 }, { "epoch": 0.05167031945274623, "grad_norm": 273.1925354003906, "learning_rate": 1.7212389380530973e-06, "loss": 27.2812, "step": 778 }, { "epoch": 0.051736733745101945, "grad_norm": 223.06198120117188, "learning_rate": 1.7234513274336282e-06, "loss": 23.7969, "step": 779 }, { "epoch": 0.05180314803745766, "grad_norm": 262.6780700683594, "learning_rate": 1.7256637168141593e-06, "loss": 25.4375, "step": 780 }, { "epoch": 0.05186956232981338, "grad_norm": 160.8428192138672, "learning_rate": 1.7278761061946903e-06, "loss": 21.625, "step": 781 }, { "epoch": 0.05193597662216909, "grad_norm": 228.22848510742188, "learning_rate": 1.730088495575221e-06, "loss": 27.0625, "step": 782 }, { "epoch": 0.052002390914524806, "grad_norm": 154.31427001953125, "learning_rate": 1.7323008849557521e-06, "loss": 21.3906, "step": 783 }, { "epoch": 0.05206880520688052, "grad_norm": 705.2180786132812, "learning_rate": 1.734513274336283e-06, "loss": 23.1562, "step": 784 }, { "epoch": 0.05213521949923623, "grad_norm": 232.9654083251953, "learning_rate": 1.7367256637168142e-06, "loss": 22.5, "step": 785 }, { "epoch": 0.05220163379159195, "grad_norm": 178.5063934326172, "learning_rate": 1.738938053097345e-06, "loss": 23.8125, "step": 786 }, { "epoch": 0.052268048083947666, "grad_norm": 197.6211700439453, "learning_rate": 1.741150442477876e-06, "loss": 24.25, "step": 787 }, { "epoch": 0.05233446237630338, "grad_norm": 141.38990783691406, "learning_rate": 1.743362831858407e-06, "loss": 23.5312, "step": 788 }, { "epoch": 0.05240087666865909, "grad_norm": 151.798828125, "learning_rate": 1.7455752212389381e-06, "loss": 23.4688, "step": 789 }, { "epoch": 0.05246729096101481, "grad_norm": 224.6299591064453, "learning_rate": 1.7477876106194688e-06, "loss": 27.6562, "step": 790 }, { "epoch": 0.052533705253370526, "grad_norm": 186.30384826660156, "learning_rate": 1.75e-06, "loss": 28.4375, "step": 791 }, { "epoch": 0.05260011954572624, "grad_norm": 303.2155456542969, "learning_rate": 1.752212389380531e-06, "loss": 22.5938, "step": 792 }, { "epoch": 0.05266653383808195, "grad_norm": 160.4236297607422, "learning_rate": 1.7544247787610618e-06, "loss": 26.4688, "step": 793 }, { "epoch": 0.05273294813043767, "grad_norm": 233.85565185546875, "learning_rate": 1.7566371681415928e-06, "loss": 26.0938, "step": 794 }, { "epoch": 0.052799362422793386, "grad_norm": 175.4025421142578, "learning_rate": 1.758849557522124e-06, "loss": 21.8125, "step": 795 }, { "epoch": 0.0528657767151491, "grad_norm": 147.4490509033203, "learning_rate": 1.7610619469026548e-06, "loss": 22.375, "step": 796 }, { "epoch": 0.05293219100750481, "grad_norm": 198.03672790527344, "learning_rate": 1.7632743362831858e-06, "loss": 27.0938, "step": 797 }, { "epoch": 0.05299860529986053, "grad_norm": 211.34803771972656, "learning_rate": 1.7654867256637167e-06, "loss": 22.4375, "step": 798 }, { "epoch": 0.053065019592216246, "grad_norm": 442.2536315917969, "learning_rate": 1.7676991150442478e-06, "loss": 30.5312, "step": 799 }, { "epoch": 0.05313143388457196, "grad_norm": 224.85867309570312, "learning_rate": 1.7699115044247788e-06, "loss": 28.1406, "step": 800 }, { "epoch": 0.05319784817692767, "grad_norm": 395.8821105957031, "learning_rate": 1.7721238938053095e-06, "loss": 24.1562, "step": 801 }, { "epoch": 0.05326426246928339, "grad_norm": 244.1392059326172, "learning_rate": 1.7743362831858406e-06, "loss": 23.9375, "step": 802 }, { "epoch": 0.053330676761639106, "grad_norm": 301.3325500488281, "learning_rate": 1.7765486725663716e-06, "loss": 22.2812, "step": 803 }, { "epoch": 0.053397091053994816, "grad_norm": 262.3141174316406, "learning_rate": 1.7787610619469027e-06, "loss": 27.0, "step": 804 }, { "epoch": 0.05346350534635053, "grad_norm": 208.48773193359375, "learning_rate": 1.7809734513274334e-06, "loss": 25.7188, "step": 805 }, { "epoch": 0.05352991963870625, "grad_norm": 200.399658203125, "learning_rate": 1.7831858407079646e-06, "loss": 25.6875, "step": 806 }, { "epoch": 0.05359633393106197, "grad_norm": 241.86972045898438, "learning_rate": 1.7853982300884955e-06, "loss": 25.4688, "step": 807 }, { "epoch": 0.053662748223417676, "grad_norm": 221.17457580566406, "learning_rate": 1.7876106194690264e-06, "loss": 26.6562, "step": 808 }, { "epoch": 0.05372916251577339, "grad_norm": 232.3868408203125, "learning_rate": 1.7898230088495573e-06, "loss": 20.0938, "step": 809 }, { "epoch": 0.05379557680812911, "grad_norm": 523.4752197265625, "learning_rate": 1.7920353982300885e-06, "loss": 26.4688, "step": 810 }, { "epoch": 0.05386199110048483, "grad_norm": 348.3585205078125, "learning_rate": 1.7942477876106194e-06, "loss": 23.0312, "step": 811 }, { "epoch": 0.05392840539284054, "grad_norm": 238.4254608154297, "learning_rate": 1.7964601769911503e-06, "loss": 27.125, "step": 812 }, { "epoch": 0.05399481968519625, "grad_norm": 243.2105255126953, "learning_rate": 1.7986725663716813e-06, "loss": 24.5, "step": 813 }, { "epoch": 0.05406123397755197, "grad_norm": 231.60092163085938, "learning_rate": 1.8008849557522124e-06, "loss": 29.0938, "step": 814 }, { "epoch": 0.05412764826990769, "grad_norm": 159.7774200439453, "learning_rate": 1.8030973451327433e-06, "loss": 21.375, "step": 815 }, { "epoch": 0.0541940625622634, "grad_norm": 275.1500244140625, "learning_rate": 1.8053097345132743e-06, "loss": 23.875, "step": 816 }, { "epoch": 0.054260476854619114, "grad_norm": 204.38229370117188, "learning_rate": 1.8075221238938052e-06, "loss": 22.5938, "step": 817 }, { "epoch": 0.05432689114697483, "grad_norm": 208.84912109375, "learning_rate": 1.8097345132743361e-06, "loss": 20.6562, "step": 818 }, { "epoch": 0.05439330543933055, "grad_norm": 194.58477783203125, "learning_rate": 1.8119469026548673e-06, "loss": 31.4062, "step": 819 }, { "epoch": 0.05445971973168626, "grad_norm": 312.9147644042969, "learning_rate": 1.814159292035398e-06, "loss": 31.3125, "step": 820 }, { "epoch": 0.054526134024041974, "grad_norm": 152.26922607421875, "learning_rate": 1.8163716814159291e-06, "loss": 25.0312, "step": 821 }, { "epoch": 0.05459254831639769, "grad_norm": 231.60702514648438, "learning_rate": 1.81858407079646e-06, "loss": 24.2812, "step": 822 }, { "epoch": 0.0546589626087534, "grad_norm": 315.6712951660156, "learning_rate": 1.8207964601769912e-06, "loss": 32.125, "step": 823 }, { "epoch": 0.05472537690110912, "grad_norm": 204.7447509765625, "learning_rate": 1.823008849557522e-06, "loss": 20.6094, "step": 824 }, { "epoch": 0.054791791193464834, "grad_norm": 147.68104553222656, "learning_rate": 1.825221238938053e-06, "loss": 23.125, "step": 825 }, { "epoch": 0.05485820548582055, "grad_norm": 690.5040893554688, "learning_rate": 1.827433628318584e-06, "loss": 27.125, "step": 826 }, { "epoch": 0.05492461977817626, "grad_norm": 306.7604675292969, "learning_rate": 1.829646017699115e-06, "loss": 26.25, "step": 827 }, { "epoch": 0.05499103407053198, "grad_norm": 264.3233947753906, "learning_rate": 1.8318584070796459e-06, "loss": 26.125, "step": 828 }, { "epoch": 0.055057448362887694, "grad_norm": 315.13800048828125, "learning_rate": 1.834070796460177e-06, "loss": 20.4062, "step": 829 }, { "epoch": 0.05512386265524341, "grad_norm": 409.96014404296875, "learning_rate": 1.836283185840708e-06, "loss": 25.6562, "step": 830 }, { "epoch": 0.05519027694759912, "grad_norm": 174.1163330078125, "learning_rate": 1.8384955752212389e-06, "loss": 22.8125, "step": 831 }, { "epoch": 0.05525669123995484, "grad_norm": 191.396728515625, "learning_rate": 1.8407079646017698e-06, "loss": 27.0, "step": 832 }, { "epoch": 0.055323105532310554, "grad_norm": 213.12210083007812, "learning_rate": 1.842920353982301e-06, "loss": 25.7188, "step": 833 }, { "epoch": 0.05538951982466627, "grad_norm": 598.7440185546875, "learning_rate": 1.8451327433628319e-06, "loss": 25.2969, "step": 834 }, { "epoch": 0.05545593411702198, "grad_norm": 182.99365234375, "learning_rate": 1.8473451327433626e-06, "loss": 24.8438, "step": 835 }, { "epoch": 0.0555223484093777, "grad_norm": 883.6388549804688, "learning_rate": 1.8495575221238937e-06, "loss": 29.5, "step": 836 }, { "epoch": 0.055588762701733414, "grad_norm": 305.9509582519531, "learning_rate": 1.8517699115044246e-06, "loss": 25.4688, "step": 837 }, { "epoch": 0.05565517699408913, "grad_norm": 162.08859252929688, "learning_rate": 1.8539823008849558e-06, "loss": 20.0938, "step": 838 }, { "epoch": 0.05572159128644484, "grad_norm": 296.11029052734375, "learning_rate": 1.8561946902654865e-06, "loss": 27.6875, "step": 839 }, { "epoch": 0.05578800557880056, "grad_norm": 219.4546661376953, "learning_rate": 1.8584070796460177e-06, "loss": 25.125, "step": 840 }, { "epoch": 0.055854419871156274, "grad_norm": 376.8454895019531, "learning_rate": 1.8606194690265486e-06, "loss": 31.25, "step": 841 }, { "epoch": 0.055920834163511984, "grad_norm": 341.198974609375, "learning_rate": 1.8628318584070795e-06, "loss": 20.9062, "step": 842 }, { "epoch": 0.0559872484558677, "grad_norm": 378.5665283203125, "learning_rate": 1.8650442477876104e-06, "loss": 26.75, "step": 843 }, { "epoch": 0.05605366274822342, "grad_norm": 269.4254455566406, "learning_rate": 1.8672566371681416e-06, "loss": 24.3438, "step": 844 }, { "epoch": 0.056120077040579135, "grad_norm": 348.79669189453125, "learning_rate": 1.8694690265486725e-06, "loss": 23.3438, "step": 845 }, { "epoch": 0.056186491332934844, "grad_norm": 206.70924377441406, "learning_rate": 1.8716814159292034e-06, "loss": 23.0938, "step": 846 }, { "epoch": 0.05625290562529056, "grad_norm": 277.7427062988281, "learning_rate": 1.8738938053097344e-06, "loss": 24.9375, "step": 847 }, { "epoch": 0.05631931991764628, "grad_norm": 179.05062866210938, "learning_rate": 1.8761061946902655e-06, "loss": 23.2969, "step": 848 }, { "epoch": 0.056385734210001995, "grad_norm": 277.86102294921875, "learning_rate": 1.8783185840707964e-06, "loss": 25.3438, "step": 849 }, { "epoch": 0.056452148502357705, "grad_norm": 227.84877014160156, "learning_rate": 1.8805309734513274e-06, "loss": 28.8125, "step": 850 }, { "epoch": 0.05651856279471342, "grad_norm": 232.22105407714844, "learning_rate": 1.8827433628318583e-06, "loss": 28.4375, "step": 851 }, { "epoch": 0.05658497708706914, "grad_norm": 235.35235595703125, "learning_rate": 1.8849557522123894e-06, "loss": 19.7812, "step": 852 }, { "epoch": 0.056651391379424855, "grad_norm": 189.05650329589844, "learning_rate": 1.8871681415929204e-06, "loss": 26.3125, "step": 853 }, { "epoch": 0.056717805671780565, "grad_norm": 382.8556823730469, "learning_rate": 1.889380530973451e-06, "loss": 30.3125, "step": 854 }, { "epoch": 0.05678421996413628, "grad_norm": 372.1766662597656, "learning_rate": 1.8915929203539822e-06, "loss": 25.9688, "step": 855 }, { "epoch": 0.056850634256492, "grad_norm": 227.62680053710938, "learning_rate": 1.8938053097345132e-06, "loss": 22.2188, "step": 856 }, { "epoch": 0.056917048548847715, "grad_norm": 235.82479858398438, "learning_rate": 1.896017699115044e-06, "loss": 26.6875, "step": 857 }, { "epoch": 0.056983462841203425, "grad_norm": 190.92666625976562, "learning_rate": 1.898230088495575e-06, "loss": 23.7812, "step": 858 }, { "epoch": 0.05704987713355914, "grad_norm": 637.05810546875, "learning_rate": 1.9004424778761062e-06, "loss": 21.1562, "step": 859 }, { "epoch": 0.05711629142591486, "grad_norm": 422.857177734375, "learning_rate": 1.902654867256637e-06, "loss": 29.5625, "step": 860 }, { "epoch": 0.05718270571827057, "grad_norm": 224.5232696533203, "learning_rate": 1.904867256637168e-06, "loss": 29.75, "step": 861 }, { "epoch": 0.057249120010626285, "grad_norm": 215.78384399414062, "learning_rate": 1.907079646017699e-06, "loss": 24.25, "step": 862 }, { "epoch": 0.057315534302982, "grad_norm": 226.4017791748047, "learning_rate": 1.90929203539823e-06, "loss": 27.4062, "step": 863 }, { "epoch": 0.05738194859533772, "grad_norm": 200.9221649169922, "learning_rate": 1.911504424778761e-06, "loss": 25.2812, "step": 864 }, { "epoch": 0.05744836288769343, "grad_norm": 207.15655517578125, "learning_rate": 1.913716814159292e-06, "loss": 22.9062, "step": 865 }, { "epoch": 0.057514777180049145, "grad_norm": 306.2936706542969, "learning_rate": 1.915929203539823e-06, "loss": 31.0, "step": 866 }, { "epoch": 0.05758119147240486, "grad_norm": 246.1280059814453, "learning_rate": 1.918141592920354e-06, "loss": 24.3125, "step": 867 }, { "epoch": 0.05764760576476058, "grad_norm": 218.16294860839844, "learning_rate": 1.920353982300885e-06, "loss": 28.0938, "step": 868 }, { "epoch": 0.05771402005711629, "grad_norm": 168.377197265625, "learning_rate": 1.9225663716814157e-06, "loss": 23.375, "step": 869 }, { "epoch": 0.057780434349472005, "grad_norm": 172.72201538085938, "learning_rate": 1.9247787610619466e-06, "loss": 21.5, "step": 870 }, { "epoch": 0.05784684864182772, "grad_norm": 202.9408416748047, "learning_rate": 1.926991150442478e-06, "loss": 23.375, "step": 871 }, { "epoch": 0.05791326293418344, "grad_norm": 586.6368408203125, "learning_rate": 1.9292035398230085e-06, "loss": 31.3125, "step": 872 }, { "epoch": 0.05797967722653915, "grad_norm": 278.7891540527344, "learning_rate": 1.93141592920354e-06, "loss": 24.4375, "step": 873 }, { "epoch": 0.058046091518894866, "grad_norm": 271.5343933105469, "learning_rate": 1.9336283185840707e-06, "loss": 24.125, "step": 874 }, { "epoch": 0.05811250581125058, "grad_norm": 479.7917785644531, "learning_rate": 1.9358407079646017e-06, "loss": 33.6875, "step": 875 }, { "epoch": 0.0581789201036063, "grad_norm": 220.23855590820312, "learning_rate": 1.9380530973451326e-06, "loss": 24.5312, "step": 876 }, { "epoch": 0.05824533439596201, "grad_norm": 244.37277221679688, "learning_rate": 1.9402654867256635e-06, "loss": 26.5625, "step": 877 }, { "epoch": 0.058311748688317726, "grad_norm": 203.12310791015625, "learning_rate": 1.9424778761061945e-06, "loss": 25.0312, "step": 878 }, { "epoch": 0.05837816298067344, "grad_norm": 359.44989013671875, "learning_rate": 1.944690265486726e-06, "loss": 22.5312, "step": 879 }, { "epoch": 0.05844457727302916, "grad_norm": 403.85882568359375, "learning_rate": 1.9469026548672563e-06, "loss": 33.5, "step": 880 }, { "epoch": 0.05851099156538487, "grad_norm": 250.6345977783203, "learning_rate": 1.9491150442477877e-06, "loss": 24.375, "step": 881 }, { "epoch": 0.058577405857740586, "grad_norm": 209.34793090820312, "learning_rate": 1.9513274336283186e-06, "loss": 23.75, "step": 882 }, { "epoch": 0.0586438201500963, "grad_norm": 159.3431396484375, "learning_rate": 1.9535398230088495e-06, "loss": 23.25, "step": 883 }, { "epoch": 0.05871023444245201, "grad_norm": 195.09341430664062, "learning_rate": 1.9557522123893805e-06, "loss": 26.625, "step": 884 }, { "epoch": 0.05877664873480773, "grad_norm": 219.61436462402344, "learning_rate": 1.9579646017699114e-06, "loss": 28.3438, "step": 885 }, { "epoch": 0.058843063027163446, "grad_norm": 289.50128173828125, "learning_rate": 1.9601769911504423e-06, "loss": 25.0312, "step": 886 }, { "epoch": 0.05890947731951916, "grad_norm": 203.7050323486328, "learning_rate": 1.9623893805309737e-06, "loss": 27.3125, "step": 887 }, { "epoch": 0.05897589161187487, "grad_norm": 390.40850830078125, "learning_rate": 1.964601769911504e-06, "loss": 17.9531, "step": 888 }, { "epoch": 0.05904230590423059, "grad_norm": 435.1428527832031, "learning_rate": 1.966814159292035e-06, "loss": 26.5312, "step": 889 }, { "epoch": 0.059108720196586306, "grad_norm": 1212.1019287109375, "learning_rate": 1.9690265486725665e-06, "loss": 23.3438, "step": 890 }, { "epoch": 0.05917513448894202, "grad_norm": 222.27371215820312, "learning_rate": 1.971238938053097e-06, "loss": 23.3281, "step": 891 }, { "epoch": 0.05924154878129773, "grad_norm": 223.2935791015625, "learning_rate": 1.9734513274336283e-06, "loss": 23.0625, "step": 892 }, { "epoch": 0.05930796307365345, "grad_norm": 262.25823974609375, "learning_rate": 1.9756637168141593e-06, "loss": 26.2812, "step": 893 }, { "epoch": 0.059374377366009166, "grad_norm": 220.24609375, "learning_rate": 1.97787610619469e-06, "loss": 20.6562, "step": 894 }, { "epoch": 0.05944079165836488, "grad_norm": 175.95289611816406, "learning_rate": 1.980088495575221e-06, "loss": 19.5, "step": 895 }, { "epoch": 0.05950720595072059, "grad_norm": 405.1151428222656, "learning_rate": 1.982300884955752e-06, "loss": 20.1094, "step": 896 }, { "epoch": 0.05957362024307631, "grad_norm": 153.73583984375, "learning_rate": 1.984513274336283e-06, "loss": 17.9219, "step": 897 }, { "epoch": 0.05964003453543203, "grad_norm": 200.37930297851562, "learning_rate": 1.9867256637168143e-06, "loss": 21.5312, "step": 898 }, { "epoch": 0.05970644882778774, "grad_norm": 323.64447021484375, "learning_rate": 1.988938053097345e-06, "loss": 36.125, "step": 899 }, { "epoch": 0.05977286312014345, "grad_norm": 313.1463928222656, "learning_rate": 1.991150442477876e-06, "loss": 29.0938, "step": 900 }, { "epoch": 0.05983927741249917, "grad_norm": 1388.061279296875, "learning_rate": 1.993362831858407e-06, "loss": 22.0, "step": 901 }, { "epoch": 0.05990569170485489, "grad_norm": 194.32766723632812, "learning_rate": 1.995575221238938e-06, "loss": 22.6562, "step": 902 }, { "epoch": 0.0599721059972106, "grad_norm": 184.36233520507812, "learning_rate": 1.997787610619469e-06, "loss": 21.875, "step": 903 }, { "epoch": 0.06003852028956631, "grad_norm": 225.63743591308594, "learning_rate": 2e-06, "loss": 29.5938, "step": 904 }, { "epoch": 0.06010493458192203, "grad_norm": 482.8502197265625, "learning_rate": 1.9999999942162883e-06, "loss": 32.625, "step": 905 }, { "epoch": 0.06017134887427775, "grad_norm": 247.51268005371094, "learning_rate": 1.9999999768651533e-06, "loss": 25.0938, "step": 906 }, { "epoch": 0.06023776316663346, "grad_norm": 231.9724578857422, "learning_rate": 1.999999947946595e-06, "loss": 24.9688, "step": 907 }, { "epoch": 0.060304177458989174, "grad_norm": 267.8528137207031, "learning_rate": 1.9999999074606145e-06, "loss": 19.9688, "step": 908 }, { "epoch": 0.06037059175134489, "grad_norm": 387.41778564453125, "learning_rate": 1.999999855407211e-06, "loss": 28.0312, "step": 909 }, { "epoch": 0.06043700604370061, "grad_norm": 208.7961883544922, "learning_rate": 1.9999997917863863e-06, "loss": 26.7188, "step": 910 }, { "epoch": 0.06050342033605632, "grad_norm": 231.2948760986328, "learning_rate": 1.9999997165981405e-06, "loss": 26.6875, "step": 911 }, { "epoch": 0.060569834628412034, "grad_norm": 320.6202087402344, "learning_rate": 1.999999629842475e-06, "loss": 24.4375, "step": 912 }, { "epoch": 0.06063624892076775, "grad_norm": 449.8643798828125, "learning_rate": 1.9999995315193896e-06, "loss": 27.25, "step": 913 }, { "epoch": 0.06070266321312347, "grad_norm": 357.2931823730469, "learning_rate": 1.9999994216288866e-06, "loss": 27.4688, "step": 914 }, { "epoch": 0.06076907750547918, "grad_norm": 259.0432434082031, "learning_rate": 1.999999300170967e-06, "loss": 21.4375, "step": 915 }, { "epoch": 0.060835491797834894, "grad_norm": 213.43826293945312, "learning_rate": 1.999999167145632e-06, "loss": 24.4688, "step": 916 }, { "epoch": 0.06090190609019061, "grad_norm": 154.20166015625, "learning_rate": 1.9999990225528838e-06, "loss": 22.8438, "step": 917 }, { "epoch": 0.06096832038254633, "grad_norm": 243.9883270263672, "learning_rate": 1.999998866392723e-06, "loss": 24.0938, "step": 918 }, { "epoch": 0.06103473467490204, "grad_norm": 163.21304321289062, "learning_rate": 1.999998698665152e-06, "loss": 21.3125, "step": 919 }, { "epoch": 0.061101148967257754, "grad_norm": 170.6994171142578, "learning_rate": 1.999998519370173e-06, "loss": 24.0, "step": 920 }, { "epoch": 0.06116756325961347, "grad_norm": 151.1453857421875, "learning_rate": 1.999998328507787e-06, "loss": 20.8438, "step": 921 }, { "epoch": 0.06123397755196918, "grad_norm": 237.11863708496094, "learning_rate": 1.999998126077998e-06, "loss": 24.375, "step": 922 }, { "epoch": 0.0613003918443249, "grad_norm": 218.61940002441406, "learning_rate": 1.999997912080807e-06, "loss": 21.4375, "step": 923 }, { "epoch": 0.061366806136680614, "grad_norm": 194.69529724121094, "learning_rate": 1.9999976865162164e-06, "loss": 24.9062, "step": 924 }, { "epoch": 0.06143322042903633, "grad_norm": 224.4569854736328, "learning_rate": 1.9999974493842293e-06, "loss": 25.4062, "step": 925 }, { "epoch": 0.06149963472139204, "grad_norm": 163.44854736328125, "learning_rate": 1.9999972006848485e-06, "loss": 21.625, "step": 926 }, { "epoch": 0.06156604901374776, "grad_norm": 270.9807434082031, "learning_rate": 1.9999969404180766e-06, "loss": 25.1875, "step": 927 }, { "epoch": 0.061632463306103474, "grad_norm": 249.57846069335938, "learning_rate": 1.9999966685839167e-06, "loss": 25.5938, "step": 928 }, { "epoch": 0.06169887759845919, "grad_norm": 308.58917236328125, "learning_rate": 1.999996385182372e-06, "loss": 32.5625, "step": 929 }, { "epoch": 0.0617652918908149, "grad_norm": 303.9928283691406, "learning_rate": 1.999996090213446e-06, "loss": 27.3125, "step": 930 }, { "epoch": 0.06183170618317062, "grad_norm": 799.2744140625, "learning_rate": 1.9999957836771414e-06, "loss": 31.3125, "step": 931 }, { "epoch": 0.061898120475526335, "grad_norm": 262.2683410644531, "learning_rate": 1.9999954655734624e-06, "loss": 21.1875, "step": 932 }, { "epoch": 0.06196453476788205, "grad_norm": 510.39300537109375, "learning_rate": 1.9999951359024125e-06, "loss": 44.6562, "step": 933 }, { "epoch": 0.06203094906023776, "grad_norm": 215.47523498535156, "learning_rate": 1.9999947946639958e-06, "loss": 21.5312, "step": 934 }, { "epoch": 0.06209736335259348, "grad_norm": 243.6783447265625, "learning_rate": 1.9999944418582154e-06, "loss": 28.1562, "step": 935 }, { "epoch": 0.062163777644949195, "grad_norm": 209.66421508789062, "learning_rate": 1.999994077485076e-06, "loss": 19.9531, "step": 936 }, { "epoch": 0.06223019193730491, "grad_norm": 206.37962341308594, "learning_rate": 1.9999937015445823e-06, "loss": 24.1562, "step": 937 }, { "epoch": 0.06229660622966062, "grad_norm": 197.73573303222656, "learning_rate": 1.999993314036738e-06, "loss": 24.5938, "step": 938 }, { "epoch": 0.06236302052201634, "grad_norm": 439.9418640136719, "learning_rate": 1.999992914961547e-06, "loss": 26.4062, "step": 939 }, { "epoch": 0.062429434814372055, "grad_norm": 276.47882080078125, "learning_rate": 1.9999925043190153e-06, "loss": 23.5938, "step": 940 }, { "epoch": 0.062495849106727765, "grad_norm": 863.5718383789062, "learning_rate": 1.9999920821091463e-06, "loss": 20.2656, "step": 941 }, { "epoch": 0.06256226339908348, "grad_norm": 280.6564025878906, "learning_rate": 1.999991648331946e-06, "loss": 30.5312, "step": 942 }, { "epoch": 0.06262867769143919, "grad_norm": 241.96351623535156, "learning_rate": 1.999991202987419e-06, "loss": 27.875, "step": 943 }, { "epoch": 0.06269509198379491, "grad_norm": 184.66928100585938, "learning_rate": 1.99999074607557e-06, "loss": 23.9375, "step": 944 }, { "epoch": 0.06276150627615062, "grad_norm": 193.207763671875, "learning_rate": 1.999990277596405e-06, "loss": 26.5938, "step": 945 }, { "epoch": 0.06282792056850635, "grad_norm": 240.68028259277344, "learning_rate": 1.999989797549929e-06, "loss": 24.4062, "step": 946 }, { "epoch": 0.06289433486086206, "grad_norm": 373.06170654296875, "learning_rate": 1.9999893059361476e-06, "loss": 28.2656, "step": 947 }, { "epoch": 0.06296074915321777, "grad_norm": 914.6382446289062, "learning_rate": 1.9999888027550665e-06, "loss": 28.7188, "step": 948 }, { "epoch": 0.06302716344557349, "grad_norm": 226.9456787109375, "learning_rate": 1.9999882880066913e-06, "loss": 24.9375, "step": 949 }, { "epoch": 0.0630935777379292, "grad_norm": 223.12881469726562, "learning_rate": 1.999987761691029e-06, "loss": 23.5625, "step": 950 }, { "epoch": 0.06315999203028491, "grad_norm": 204.62696838378906, "learning_rate": 1.999987223808084e-06, "loss": 23.25, "step": 951 }, { "epoch": 0.06322640632264064, "grad_norm": 351.3700256347656, "learning_rate": 1.999986674357864e-06, "loss": 24.4375, "step": 952 }, { "epoch": 0.06329282061499635, "grad_norm": 320.1885681152344, "learning_rate": 1.9999861133403745e-06, "loss": 25.8203, "step": 953 }, { "epoch": 0.06335923490735207, "grad_norm": 227.41358947753906, "learning_rate": 1.9999855407556224e-06, "loss": 20.7812, "step": 954 }, { "epoch": 0.06342564919970778, "grad_norm": 364.9652404785156, "learning_rate": 1.999984956603614e-06, "loss": 24.6875, "step": 955 }, { "epoch": 0.06349206349206349, "grad_norm": 244.15049743652344, "learning_rate": 1.9999843608843565e-06, "loss": 28.875, "step": 956 }, { "epoch": 0.06355847778441921, "grad_norm": 234.6276092529297, "learning_rate": 1.9999837535978563e-06, "loss": 24.625, "step": 957 }, { "epoch": 0.06362489207677492, "grad_norm": 266.0535888671875, "learning_rate": 1.999983134744121e-06, "loss": 31.4688, "step": 958 }, { "epoch": 0.06369130636913063, "grad_norm": 258.861328125, "learning_rate": 1.9999825043231574e-06, "loss": 23.5938, "step": 959 }, { "epoch": 0.06375772066148636, "grad_norm": 2440.510986328125, "learning_rate": 1.9999818623349723e-06, "loss": 20.5938, "step": 960 }, { "epoch": 0.06382413495384207, "grad_norm": 150.6211395263672, "learning_rate": 1.9999812087795743e-06, "loss": 21.7812, "step": 961 }, { "epoch": 0.06389054924619778, "grad_norm": 165.16395568847656, "learning_rate": 1.99998054365697e-06, "loss": 19.0156, "step": 962 }, { "epoch": 0.0639569635385535, "grad_norm": 199.8099365234375, "learning_rate": 1.999979866967167e-06, "loss": 23.9375, "step": 963 }, { "epoch": 0.06402337783090921, "grad_norm": 250.69781494140625, "learning_rate": 1.999979178710174e-06, "loss": 24.9062, "step": 964 }, { "epoch": 0.06408979212326493, "grad_norm": 180.86996459960938, "learning_rate": 1.9999784788859986e-06, "loss": 19.7812, "step": 965 }, { "epoch": 0.06415620641562064, "grad_norm": 226.49008178710938, "learning_rate": 1.9999777674946484e-06, "loss": 19.9219, "step": 966 }, { "epoch": 0.06422262070797635, "grad_norm": 267.0340270996094, "learning_rate": 1.999977044536132e-06, "loss": 17.1406, "step": 967 }, { "epoch": 0.06428903500033208, "grad_norm": 150.95481872558594, "learning_rate": 1.999976310010458e-06, "loss": 21.9062, "step": 968 }, { "epoch": 0.06435544929268779, "grad_norm": 191.77818298339844, "learning_rate": 1.9999755639176347e-06, "loss": 24.3438, "step": 969 }, { "epoch": 0.0644218635850435, "grad_norm": 264.7092590332031, "learning_rate": 1.9999748062576706e-06, "loss": 20.4219, "step": 970 }, { "epoch": 0.06448827787739922, "grad_norm": 234.33628845214844, "learning_rate": 1.9999740370305746e-06, "loss": 28.5938, "step": 971 }, { "epoch": 0.06455469216975493, "grad_norm": 131.19281005859375, "learning_rate": 1.9999732562363555e-06, "loss": 18.5781, "step": 972 }, { "epoch": 0.06462110646211065, "grad_norm": 373.9825439453125, "learning_rate": 1.9999724638750224e-06, "loss": 26.2188, "step": 973 }, { "epoch": 0.06468752075446636, "grad_norm": 248.9016571044922, "learning_rate": 1.9999716599465844e-06, "loss": 24.5312, "step": 974 }, { "epoch": 0.06475393504682207, "grad_norm": 169.84677124023438, "learning_rate": 1.999970844451051e-06, "loss": 23.2812, "step": 975 }, { "epoch": 0.0648203493391778, "grad_norm": 211.3028564453125, "learning_rate": 1.9999700173884314e-06, "loss": 26.375, "step": 976 }, { "epoch": 0.0648867636315335, "grad_norm": 188.98316955566406, "learning_rate": 1.999969178758735e-06, "loss": 23.1875, "step": 977 }, { "epoch": 0.06495317792388922, "grad_norm": 299.64263916015625, "learning_rate": 1.9999683285619723e-06, "loss": 24.2812, "step": 978 }, { "epoch": 0.06501959221624494, "grad_norm": 146.4781494140625, "learning_rate": 1.9999674667981522e-06, "loss": 22.4219, "step": 979 }, { "epoch": 0.06508600650860065, "grad_norm": 186.93893432617188, "learning_rate": 1.999966593467285e-06, "loss": 21.9375, "step": 980 }, { "epoch": 0.06515242080095636, "grad_norm": 280.94354248046875, "learning_rate": 1.999965708569381e-06, "loss": 25.5, "step": 981 }, { "epoch": 0.06521883509331208, "grad_norm": 217.19821166992188, "learning_rate": 1.9999648121044503e-06, "loss": 22.75, "step": 982 }, { "epoch": 0.06528524938566779, "grad_norm": 322.17645263671875, "learning_rate": 1.9999639040725033e-06, "loss": 32.9375, "step": 983 }, { "epoch": 0.06535166367802352, "grad_norm": 256.3409118652344, "learning_rate": 1.9999629844735506e-06, "loss": 25.9375, "step": 984 }, { "epoch": 0.06541807797037923, "grad_norm": 268.9239196777344, "learning_rate": 1.9999620533076023e-06, "loss": 26.75, "step": 985 }, { "epoch": 0.06548449226273494, "grad_norm": 336.5542297363281, "learning_rate": 1.99996111057467e-06, "loss": 29.3125, "step": 986 }, { "epoch": 0.06555090655509066, "grad_norm": 226.96945190429688, "learning_rate": 1.9999601562747637e-06, "loss": 24.875, "step": 987 }, { "epoch": 0.06561732084744637, "grad_norm": 227.7809295654297, "learning_rate": 1.999959190407896e-06, "loss": 23.0625, "step": 988 }, { "epoch": 0.06568373513980208, "grad_norm": 181.5550079345703, "learning_rate": 1.9999582129740756e-06, "loss": 22.2188, "step": 989 }, { "epoch": 0.0657501494321578, "grad_norm": 244.40585327148438, "learning_rate": 1.999957223973316e-06, "loss": 28.7188, "step": 990 }, { "epoch": 0.06581656372451351, "grad_norm": 203.80679321289062, "learning_rate": 1.9999562234056276e-06, "loss": 21.7188, "step": 991 }, { "epoch": 0.06588297801686924, "grad_norm": 131.47341918945312, "learning_rate": 1.9999552112710223e-06, "loss": 18.7031, "step": 992 }, { "epoch": 0.06594939230922495, "grad_norm": 286.6537780761719, "learning_rate": 1.9999541875695118e-06, "loss": 35.2812, "step": 993 }, { "epoch": 0.06601580660158066, "grad_norm": 203.13662719726562, "learning_rate": 1.9999531523011077e-06, "loss": 24.2812, "step": 994 }, { "epoch": 0.06608222089393638, "grad_norm": 855.6458740234375, "learning_rate": 1.999952105465822e-06, "loss": 23.25, "step": 995 }, { "epoch": 0.06614863518629209, "grad_norm": 225.58258056640625, "learning_rate": 1.9999510470636675e-06, "loss": 20.5469, "step": 996 }, { "epoch": 0.0662150494786478, "grad_norm": 181.9052734375, "learning_rate": 1.999949977094655e-06, "loss": 20.625, "step": 997 }, { "epoch": 0.06628146377100352, "grad_norm": 272.7892761230469, "learning_rate": 1.9999488955587988e-06, "loss": 22.2656, "step": 998 }, { "epoch": 0.06634787806335923, "grad_norm": 157.1732177734375, "learning_rate": 1.99994780245611e-06, "loss": 24.0938, "step": 999 }, { "epoch": 0.06641429235571494, "grad_norm": 285.56988525390625, "learning_rate": 1.9999466977866014e-06, "loss": 25.3125, "step": 1000 }, { "epoch": 0.06648070664807067, "grad_norm": 482.026123046875, "learning_rate": 1.999945581550286e-06, "loss": 20.625, "step": 1001 }, { "epoch": 0.06654712094042638, "grad_norm": 232.8124542236328, "learning_rate": 1.999944453747177e-06, "loss": 25.1875, "step": 1002 }, { "epoch": 0.0666135352327821, "grad_norm": 342.911376953125, "learning_rate": 1.999943314377287e-06, "loss": 22.1875, "step": 1003 }, { "epoch": 0.06667994952513781, "grad_norm": 331.7461242675781, "learning_rate": 1.9999421634406294e-06, "loss": 38.875, "step": 1004 }, { "epoch": 0.06674636381749352, "grad_norm": 208.37442016601562, "learning_rate": 1.999941000937217e-06, "loss": 22.4062, "step": 1005 }, { "epoch": 0.06681277810984924, "grad_norm": 695.0036010742188, "learning_rate": 1.9999398268670644e-06, "loss": 22.5625, "step": 1006 }, { "epoch": 0.06687919240220495, "grad_norm": 288.0826110839844, "learning_rate": 1.999938641230184e-06, "loss": 22.2188, "step": 1007 }, { "epoch": 0.06694560669456066, "grad_norm": 263.1529235839844, "learning_rate": 1.9999374440265905e-06, "loss": 26.1562, "step": 1008 }, { "epoch": 0.06701202098691639, "grad_norm": 114.71102905273438, "learning_rate": 1.999936235256297e-06, "loss": 18.2812, "step": 1009 }, { "epoch": 0.0670784352792721, "grad_norm": 196.56808471679688, "learning_rate": 1.9999350149193173e-06, "loss": 23.7344, "step": 1010 }, { "epoch": 0.06714484957162782, "grad_norm": 763.0376586914062, "learning_rate": 1.9999337830156662e-06, "loss": 37.25, "step": 1011 }, { "epoch": 0.06721126386398353, "grad_norm": 294.26934814453125, "learning_rate": 1.999932539545358e-06, "loss": 26.625, "step": 1012 }, { "epoch": 0.06727767815633924, "grad_norm": 380.59820556640625, "learning_rate": 1.9999312845084066e-06, "loss": 21.125, "step": 1013 }, { "epoch": 0.06734409244869496, "grad_norm": 198.20761108398438, "learning_rate": 1.9999300179048265e-06, "loss": 22.5312, "step": 1014 }, { "epoch": 0.06741050674105067, "grad_norm": 144.22079467773438, "learning_rate": 1.999928739734633e-06, "loss": 22.7188, "step": 1015 }, { "epoch": 0.06747692103340638, "grad_norm": 132.8133544921875, "learning_rate": 1.99992744999784e-06, "loss": 19.5, "step": 1016 }, { "epoch": 0.06754333532576211, "grad_norm": 136.63966369628906, "learning_rate": 1.999926148694463e-06, "loss": 20.4062, "step": 1017 }, { "epoch": 0.06760974961811782, "grad_norm": 273.3392028808594, "learning_rate": 1.999924835824517e-06, "loss": 23.0, "step": 1018 }, { "epoch": 0.06767616391047353, "grad_norm": 148.04592895507812, "learning_rate": 1.999923511388017e-06, "loss": 18.875, "step": 1019 }, { "epoch": 0.06774257820282925, "grad_norm": 281.1832275390625, "learning_rate": 1.9999221753849784e-06, "loss": 27.3125, "step": 1020 }, { "epoch": 0.06780899249518496, "grad_norm": 239.92529296875, "learning_rate": 1.9999208278154166e-06, "loss": 28.3125, "step": 1021 }, { "epoch": 0.06787540678754068, "grad_norm": 305.3265686035156, "learning_rate": 1.999919468679347e-06, "loss": 27.8125, "step": 1022 }, { "epoch": 0.0679418210798964, "grad_norm": 278.7178039550781, "learning_rate": 1.999918097976786e-06, "loss": 21.5625, "step": 1023 }, { "epoch": 0.0680082353722521, "grad_norm": 236.98667907714844, "learning_rate": 1.999916715707749e-06, "loss": 21.9375, "step": 1024 }, { "epoch": 0.06807464966460783, "grad_norm": 245.99749755859375, "learning_rate": 1.9999153218722518e-06, "loss": 22.3125, "step": 1025 }, { "epoch": 0.06814106395696354, "grad_norm": 357.75958251953125, "learning_rate": 1.999913916470311e-06, "loss": 23.125, "step": 1026 }, { "epoch": 0.06820747824931925, "grad_norm": 185.88055419921875, "learning_rate": 1.9999124995019422e-06, "loss": 21.4062, "step": 1027 }, { "epoch": 0.06827389254167497, "grad_norm": 289.2301025390625, "learning_rate": 1.999911070967163e-06, "loss": 32.5938, "step": 1028 }, { "epoch": 0.06834030683403068, "grad_norm": 122.80204010009766, "learning_rate": 1.999909630865988e-06, "loss": 21.3438, "step": 1029 }, { "epoch": 0.0684067211263864, "grad_norm": 248.61363220214844, "learning_rate": 1.9999081791984355e-06, "loss": 21.6094, "step": 1030 }, { "epoch": 0.06847313541874211, "grad_norm": 290.8533020019531, "learning_rate": 1.999906715964522e-06, "loss": 26.25, "step": 1031 }, { "epoch": 0.06853954971109782, "grad_norm": 268.3686218261719, "learning_rate": 1.9999052411642642e-06, "loss": 23.7812, "step": 1032 }, { "epoch": 0.06860596400345355, "grad_norm": 233.38230895996094, "learning_rate": 1.999903754797679e-06, "loss": 25.7188, "step": 1033 }, { "epoch": 0.06867237829580926, "grad_norm": 295.8428649902344, "learning_rate": 1.999902256864783e-06, "loss": 26.6875, "step": 1034 }, { "epoch": 0.06873879258816497, "grad_norm": 197.02696228027344, "learning_rate": 1.9999007473655955e-06, "loss": 28.125, "step": 1035 }, { "epoch": 0.06880520688052069, "grad_norm": 346.53717041015625, "learning_rate": 1.9998992263001318e-06, "loss": 22.6562, "step": 1036 }, { "epoch": 0.0688716211728764, "grad_norm": 239.14634704589844, "learning_rate": 1.9998976936684107e-06, "loss": 27.0469, "step": 1037 }, { "epoch": 0.06893803546523211, "grad_norm": 161.44696044921875, "learning_rate": 1.9998961494704497e-06, "loss": 23.6875, "step": 1038 }, { "epoch": 0.06900444975758784, "grad_norm": 351.9098205566406, "learning_rate": 1.999894593706267e-06, "loss": 28.3906, "step": 1039 }, { "epoch": 0.06907086404994355, "grad_norm": 268.44219970703125, "learning_rate": 1.9998930263758798e-06, "loss": 20.4062, "step": 1040 }, { "epoch": 0.06913727834229927, "grad_norm": 276.3031005859375, "learning_rate": 1.999891447479307e-06, "loss": 28.0156, "step": 1041 }, { "epoch": 0.06920369263465498, "grad_norm": 339.614990234375, "learning_rate": 1.999889857016566e-06, "loss": 25.125, "step": 1042 }, { "epoch": 0.06927010692701069, "grad_norm": 215.5698699951172, "learning_rate": 1.999888254987676e-06, "loss": 18.4531, "step": 1043 }, { "epoch": 0.06933652121936641, "grad_norm": 190.92367553710938, "learning_rate": 1.999886641392655e-06, "loss": 24.1562, "step": 1044 }, { "epoch": 0.06940293551172212, "grad_norm": 230.86669921875, "learning_rate": 1.999885016231522e-06, "loss": 23.9062, "step": 1045 }, { "epoch": 0.06946934980407783, "grad_norm": 199.95358276367188, "learning_rate": 1.999883379504296e-06, "loss": 21.875, "step": 1046 }, { "epoch": 0.06953576409643356, "grad_norm": 524.6242065429688, "learning_rate": 1.999881731210995e-06, "loss": 28.25, "step": 1047 }, { "epoch": 0.06960217838878927, "grad_norm": 396.94610595703125, "learning_rate": 1.9998800713516393e-06, "loss": 24.8438, "step": 1048 }, { "epoch": 0.06966859268114499, "grad_norm": 259.8545837402344, "learning_rate": 1.9998783999262473e-06, "loss": 21.9062, "step": 1049 }, { "epoch": 0.0697350069735007, "grad_norm": 155.07212829589844, "learning_rate": 1.9998767169348384e-06, "loss": 23.5, "step": 1050 }, { "epoch": 0.06980142126585641, "grad_norm": 234.823974609375, "learning_rate": 1.9998750223774327e-06, "loss": 16.1406, "step": 1051 }, { "epoch": 0.06986783555821213, "grad_norm": 247.3891143798828, "learning_rate": 1.9998733162540488e-06, "loss": 22.2969, "step": 1052 }, { "epoch": 0.06993424985056784, "grad_norm": 344.95654296875, "learning_rate": 1.999871598564707e-06, "loss": 22.4688, "step": 1053 }, { "epoch": 0.07000066414292355, "grad_norm": 2558.134765625, "learning_rate": 1.999869869309427e-06, "loss": 24.7188, "step": 1054 }, { "epoch": 0.07006707843527928, "grad_norm": 223.188232421875, "learning_rate": 1.999868128488229e-06, "loss": 23.0312, "step": 1055 }, { "epoch": 0.07013349272763499, "grad_norm": 229.8405303955078, "learning_rate": 1.9998663761011333e-06, "loss": 23.5938, "step": 1056 }, { "epoch": 0.0701999070199907, "grad_norm": 295.8621520996094, "learning_rate": 1.9998646121481597e-06, "loss": 19.5, "step": 1057 }, { "epoch": 0.07026632131234642, "grad_norm": 299.36834716796875, "learning_rate": 1.999862836629329e-06, "loss": 23.375, "step": 1058 }, { "epoch": 0.07033273560470213, "grad_norm": 194.62562561035156, "learning_rate": 1.9998610495446614e-06, "loss": 20.2188, "step": 1059 }, { "epoch": 0.07039914989705785, "grad_norm": 615.19189453125, "learning_rate": 1.9998592508941774e-06, "loss": 23.6562, "step": 1060 }, { "epoch": 0.07046556418941356, "grad_norm": 309.0528259277344, "learning_rate": 1.999857440677899e-06, "loss": 29.7188, "step": 1061 }, { "epoch": 0.07053197848176927, "grad_norm": 238.3802947998047, "learning_rate": 1.9998556188958456e-06, "loss": 20.5, "step": 1062 }, { "epoch": 0.070598392774125, "grad_norm": 418.48175048828125, "learning_rate": 1.999853785548039e-06, "loss": 36.5938, "step": 1063 }, { "epoch": 0.0706648070664807, "grad_norm": 228.69430541992188, "learning_rate": 1.9998519406345004e-06, "loss": 16.4688, "step": 1064 }, { "epoch": 0.07073122135883642, "grad_norm": 203.5328826904297, "learning_rate": 1.9998500841552513e-06, "loss": 27.9688, "step": 1065 }, { "epoch": 0.07079763565119214, "grad_norm": 380.8016662597656, "learning_rate": 1.999848216110313e-06, "loss": 30.1875, "step": 1066 }, { "epoch": 0.07086404994354785, "grad_norm": 360.2862243652344, "learning_rate": 1.999846336499707e-06, "loss": 31.6875, "step": 1067 }, { "epoch": 0.07093046423590357, "grad_norm": 273.9457702636719, "learning_rate": 1.999844445323455e-06, "loss": 30.375, "step": 1068 }, { "epoch": 0.07099687852825928, "grad_norm": 280.2629699707031, "learning_rate": 1.999842542581579e-06, "loss": 27.75, "step": 1069 }, { "epoch": 0.07106329282061499, "grad_norm": 247.28631591796875, "learning_rate": 1.999840628274101e-06, "loss": 22.75, "step": 1070 }, { "epoch": 0.07112970711297072, "grad_norm": 249.10415649414062, "learning_rate": 1.9998387024010433e-06, "loss": 32.25, "step": 1071 }, { "epoch": 0.07119612140532643, "grad_norm": 204.2041015625, "learning_rate": 1.999836764962428e-06, "loss": 20.4688, "step": 1072 }, { "epoch": 0.07126253569768214, "grad_norm": 228.3826904296875, "learning_rate": 1.9998348159582773e-06, "loss": 21.4688, "step": 1073 }, { "epoch": 0.07132894999003786, "grad_norm": 253.75425720214844, "learning_rate": 1.9998328553886142e-06, "loss": 30.1875, "step": 1074 }, { "epoch": 0.07139536428239357, "grad_norm": 232.83811950683594, "learning_rate": 1.9998308832534613e-06, "loss": 20.9062, "step": 1075 }, { "epoch": 0.07146177857474928, "grad_norm": 365.50958251953125, "learning_rate": 1.999828899552841e-06, "loss": 24.8125, "step": 1076 }, { "epoch": 0.071528192867105, "grad_norm": 312.08367919921875, "learning_rate": 1.9998269042867766e-06, "loss": 24.7812, "step": 1077 }, { "epoch": 0.07159460715946071, "grad_norm": 150.97589111328125, "learning_rate": 1.999824897455291e-06, "loss": 23.875, "step": 1078 }, { "epoch": 0.07166102145181644, "grad_norm": 174.26222229003906, "learning_rate": 1.9998228790584076e-06, "loss": 24.5938, "step": 1079 }, { "epoch": 0.07172743574417215, "grad_norm": 257.17828369140625, "learning_rate": 1.9998208490961497e-06, "loss": 19.4375, "step": 1080 }, { "epoch": 0.07179385003652786, "grad_norm": 146.17129516601562, "learning_rate": 1.9998188075685413e-06, "loss": 17.375, "step": 1081 }, { "epoch": 0.07186026432888358, "grad_norm": 194.84954833984375, "learning_rate": 1.9998167544756047e-06, "loss": 26.4062, "step": 1082 }, { "epoch": 0.07192667862123929, "grad_norm": 207.3955078125, "learning_rate": 1.9998146898173645e-06, "loss": 24.2188, "step": 1083 }, { "epoch": 0.071993092913595, "grad_norm": 344.197021484375, "learning_rate": 1.999812613593845e-06, "loss": 24.6875, "step": 1084 }, { "epoch": 0.07205950720595072, "grad_norm": 226.03794860839844, "learning_rate": 1.999810525805069e-06, "loss": 23.9062, "step": 1085 }, { "epoch": 0.07212592149830643, "grad_norm": 233.78347778320312, "learning_rate": 1.999808426451062e-06, "loss": 26.125, "step": 1086 }, { "epoch": 0.07219233579066216, "grad_norm": 245.0970916748047, "learning_rate": 1.9998063155318474e-06, "loss": 21.5469, "step": 1087 }, { "epoch": 0.07225875008301787, "grad_norm": 436.62042236328125, "learning_rate": 1.9998041930474497e-06, "loss": 23.8125, "step": 1088 }, { "epoch": 0.07232516437537358, "grad_norm": 247.787353515625, "learning_rate": 1.9998020589978937e-06, "loss": 25.25, "step": 1089 }, { "epoch": 0.0723915786677293, "grad_norm": 188.42921447753906, "learning_rate": 1.999799913383204e-06, "loss": 22.4688, "step": 1090 }, { "epoch": 0.07245799296008501, "grad_norm": 207.30970764160156, "learning_rate": 1.9997977562034054e-06, "loss": 30.25, "step": 1091 }, { "epoch": 0.07252440725244072, "grad_norm": 198.9735870361328, "learning_rate": 1.999795587458523e-06, "loss": 24.375, "step": 1092 }, { "epoch": 0.07259082154479644, "grad_norm": 197.65182495117188, "learning_rate": 1.9997934071485817e-06, "loss": 20.2031, "step": 1093 }, { "epoch": 0.07265723583715215, "grad_norm": 212.7982177734375, "learning_rate": 1.9997912152736068e-06, "loss": 26.5938, "step": 1094 }, { "epoch": 0.07272365012950786, "grad_norm": 298.12884521484375, "learning_rate": 1.9997890118336235e-06, "loss": 26.25, "step": 1095 }, { "epoch": 0.07279006442186359, "grad_norm": 225.4922332763672, "learning_rate": 1.999786796828658e-06, "loss": 24.9062, "step": 1096 }, { "epoch": 0.0728564787142193, "grad_norm": 252.7838134765625, "learning_rate": 1.9997845702587347e-06, "loss": 28.3281, "step": 1097 }, { "epoch": 0.07292289300657502, "grad_norm": 181.49696350097656, "learning_rate": 1.99978233212388e-06, "loss": 21.9688, "step": 1098 }, { "epoch": 0.07298930729893073, "grad_norm": 344.4598388671875, "learning_rate": 1.9997800824241203e-06, "loss": 25.5625, "step": 1099 }, { "epoch": 0.07305572159128644, "grad_norm": 185.0198516845703, "learning_rate": 1.999777821159481e-06, "loss": 19.7031, "step": 1100 }, { "epoch": 0.07312213588364216, "grad_norm": 174.691162109375, "learning_rate": 1.9997755483299877e-06, "loss": 21.75, "step": 1101 }, { "epoch": 0.07318855017599787, "grad_norm": 229.26498413085938, "learning_rate": 1.9997732639356677e-06, "loss": 21.3125, "step": 1102 }, { "epoch": 0.07325496446835358, "grad_norm": 173.23719787597656, "learning_rate": 1.9997709679765474e-06, "loss": 22.25, "step": 1103 }, { "epoch": 0.07332137876070931, "grad_norm": 246.22451782226562, "learning_rate": 1.999768660452653e-06, "loss": 23.5469, "step": 1104 }, { "epoch": 0.07338779305306502, "grad_norm": 209.11248779296875, "learning_rate": 1.999766341364011e-06, "loss": 18.6875, "step": 1105 }, { "epoch": 0.07345420734542074, "grad_norm": 274.1006774902344, "learning_rate": 1.9997640107106484e-06, "loss": 24.125, "step": 1106 }, { "epoch": 0.07352062163777645, "grad_norm": 196.7348175048828, "learning_rate": 1.999761668492592e-06, "loss": 27.1875, "step": 1107 }, { "epoch": 0.07358703593013216, "grad_norm": 209.84814453125, "learning_rate": 1.99975931470987e-06, "loss": 24.2812, "step": 1108 }, { "epoch": 0.07365345022248788, "grad_norm": 433.1179504394531, "learning_rate": 1.999756949362508e-06, "loss": 27.9688, "step": 1109 }, { "epoch": 0.0737198645148436, "grad_norm": 277.3152770996094, "learning_rate": 1.999754572450534e-06, "loss": 19.6719, "step": 1110 }, { "epoch": 0.0737862788071993, "grad_norm": 466.2574462890625, "learning_rate": 1.9997521839739763e-06, "loss": 29.0938, "step": 1111 }, { "epoch": 0.07385269309955503, "grad_norm": 271.8940734863281, "learning_rate": 1.9997497839328615e-06, "loss": 24.3438, "step": 1112 }, { "epoch": 0.07391910739191074, "grad_norm": 348.17974853515625, "learning_rate": 1.9997473723272177e-06, "loss": 22.5312, "step": 1113 }, { "epoch": 0.07398552168426645, "grad_norm": 263.34521484375, "learning_rate": 1.9997449491570725e-06, "loss": 29.8438, "step": 1114 }, { "epoch": 0.07405193597662217, "grad_norm": 293.68621826171875, "learning_rate": 1.9997425144224542e-06, "loss": 22.1875, "step": 1115 }, { "epoch": 0.07411835026897788, "grad_norm": 263.8956298828125, "learning_rate": 1.9997400681233913e-06, "loss": 26.5625, "step": 1116 }, { "epoch": 0.0741847645613336, "grad_norm": 626.7108154296875, "learning_rate": 1.999737610259912e-06, "loss": 27.5938, "step": 1117 }, { "epoch": 0.07425117885368931, "grad_norm": 240.6473388671875, "learning_rate": 1.999735140832044e-06, "loss": 32.125, "step": 1118 }, { "epoch": 0.07431759314604502, "grad_norm": 329.7398681640625, "learning_rate": 1.9997326598398167e-06, "loss": 22.2188, "step": 1119 }, { "epoch": 0.07438400743840075, "grad_norm": 469.4482116699219, "learning_rate": 1.9997301672832583e-06, "loss": 22.0625, "step": 1120 }, { "epoch": 0.07445042173075646, "grad_norm": 210.93405151367188, "learning_rate": 1.999727663162398e-06, "loss": 22.5312, "step": 1121 }, { "epoch": 0.07451683602311217, "grad_norm": 162.57290649414062, "learning_rate": 1.9997251474772645e-06, "loss": 21.4062, "step": 1122 }, { "epoch": 0.07458325031546789, "grad_norm": 144.6345977783203, "learning_rate": 1.9997226202278866e-06, "loss": 19.625, "step": 1123 }, { "epoch": 0.0746496646078236, "grad_norm": 286.41021728515625, "learning_rate": 1.9997200814142944e-06, "loss": 26.4375, "step": 1124 }, { "epoch": 0.07471607890017933, "grad_norm": 239.3023223876953, "learning_rate": 1.9997175310365163e-06, "loss": 23.6875, "step": 1125 }, { "epoch": 0.07478249319253504, "grad_norm": 408.6880798339844, "learning_rate": 1.9997149690945823e-06, "loss": 26.9375, "step": 1126 }, { "epoch": 0.07484890748489074, "grad_norm": 692.75830078125, "learning_rate": 1.9997123955885226e-06, "loss": 27.5469, "step": 1127 }, { "epoch": 0.07491532177724647, "grad_norm": 235.82127380371094, "learning_rate": 1.999709810518366e-06, "loss": 23.8125, "step": 1128 }, { "epoch": 0.07498173606960218, "grad_norm": 260.13909912109375, "learning_rate": 1.9997072138841426e-06, "loss": 24.0312, "step": 1129 }, { "epoch": 0.07504815036195789, "grad_norm": 250.41476440429688, "learning_rate": 1.9997046056858826e-06, "loss": 22.625, "step": 1130 }, { "epoch": 0.07511456465431361, "grad_norm": 251.28758239746094, "learning_rate": 1.9997019859236166e-06, "loss": 23.875, "step": 1131 }, { "epoch": 0.07518097894666932, "grad_norm": 284.7143859863281, "learning_rate": 1.9996993545973743e-06, "loss": 21.3906, "step": 1132 }, { "epoch": 0.07524739323902503, "grad_norm": 139.1249237060547, "learning_rate": 1.9996967117071862e-06, "loss": 18.0625, "step": 1133 }, { "epoch": 0.07531380753138076, "grad_norm": 194.51515197753906, "learning_rate": 1.999694057253083e-06, "loss": 26.0625, "step": 1134 }, { "epoch": 0.07538022182373647, "grad_norm": 763.24658203125, "learning_rate": 1.9996913912350956e-06, "loss": 24.3906, "step": 1135 }, { "epoch": 0.07544663611609219, "grad_norm": 127.85782623291016, "learning_rate": 1.9996887136532544e-06, "loss": 17.2031, "step": 1136 }, { "epoch": 0.0755130504084479, "grad_norm": 449.010498046875, "learning_rate": 1.9996860245075906e-06, "loss": 29.6875, "step": 1137 }, { "epoch": 0.07557946470080361, "grad_norm": 639.6605224609375, "learning_rate": 1.9996833237981357e-06, "loss": 37.875, "step": 1138 }, { "epoch": 0.07564587899315933, "grad_norm": 223.05397033691406, "learning_rate": 1.9996806115249205e-06, "loss": 19.7188, "step": 1139 }, { "epoch": 0.07571229328551504, "grad_norm": 185.31163024902344, "learning_rate": 1.999677887687976e-06, "loss": 23.5938, "step": 1140 }, { "epoch": 0.07577870757787075, "grad_norm": 320.5321960449219, "learning_rate": 1.9996751522873345e-06, "loss": 29.7188, "step": 1141 }, { "epoch": 0.07584512187022648, "grad_norm": 151.8074951171875, "learning_rate": 1.999672405323027e-06, "loss": 20.8125, "step": 1142 }, { "epoch": 0.07591153616258219, "grad_norm": 144.1839599609375, "learning_rate": 1.9996696467950856e-06, "loss": 18.4219, "step": 1143 }, { "epoch": 0.07597795045493791, "grad_norm": 169.34747314453125, "learning_rate": 1.9996668767035424e-06, "loss": 18.125, "step": 1144 }, { "epoch": 0.07604436474729362, "grad_norm": 227.02696228027344, "learning_rate": 1.999664095048429e-06, "loss": 21.5938, "step": 1145 }, { "epoch": 0.07611077903964933, "grad_norm": 202.20811462402344, "learning_rate": 1.9996613018297783e-06, "loss": 19.2656, "step": 1146 }, { "epoch": 0.07617719333200505, "grad_norm": 217.33717346191406, "learning_rate": 1.9996584970476217e-06, "loss": 26.2812, "step": 1147 }, { "epoch": 0.07624360762436076, "grad_norm": 1843.8626708984375, "learning_rate": 1.999655680701992e-06, "loss": 17.6406, "step": 1148 }, { "epoch": 0.07631002191671647, "grad_norm": 208.04583740234375, "learning_rate": 1.9996528527929217e-06, "loss": 18.6094, "step": 1149 }, { "epoch": 0.0763764362090722, "grad_norm": 184.43765258789062, "learning_rate": 1.999650013320444e-06, "loss": 18.3594, "step": 1150 }, { "epoch": 0.0764428505014279, "grad_norm": 198.10887145996094, "learning_rate": 1.999647162284591e-06, "loss": 20.0469, "step": 1151 }, { "epoch": 0.07650926479378362, "grad_norm": 182.62164306640625, "learning_rate": 1.9996442996853964e-06, "loss": 27.625, "step": 1152 }, { "epoch": 0.07657567908613934, "grad_norm": 200.44403076171875, "learning_rate": 1.999641425522893e-06, "loss": 22.7188, "step": 1153 }, { "epoch": 0.07664209337849505, "grad_norm": 404.54510498046875, "learning_rate": 1.9996385397971135e-06, "loss": 19.9375, "step": 1154 }, { "epoch": 0.07670850767085077, "grad_norm": 451.394775390625, "learning_rate": 1.999635642508092e-06, "loss": 29.4062, "step": 1155 }, { "epoch": 0.07677492196320648, "grad_norm": 287.5335693359375, "learning_rate": 1.999632733655862e-06, "loss": 26.4688, "step": 1156 }, { "epoch": 0.07684133625556219, "grad_norm": 188.063232421875, "learning_rate": 1.999629813240457e-06, "loss": 18.3906, "step": 1157 }, { "epoch": 0.07690775054791792, "grad_norm": 173.7041015625, "learning_rate": 1.9996268812619105e-06, "loss": 21.8594, "step": 1158 }, { "epoch": 0.07697416484027363, "grad_norm": 445.2503356933594, "learning_rate": 1.999623937720257e-06, "loss": 26.2188, "step": 1159 }, { "epoch": 0.07704057913262934, "grad_norm": 187.1010284423828, "learning_rate": 1.99962098261553e-06, "loss": 21.3125, "step": 1160 }, { "epoch": 0.07710699342498506, "grad_norm": 224.2029266357422, "learning_rate": 1.999618015947764e-06, "loss": 22.75, "step": 1161 }, { "epoch": 0.07717340771734077, "grad_norm": 224.8708953857422, "learning_rate": 1.9996150377169927e-06, "loss": 22.4688, "step": 1162 }, { "epoch": 0.0772398220096965, "grad_norm": 273.9388732910156, "learning_rate": 1.9996120479232516e-06, "loss": 28.6875, "step": 1163 }, { "epoch": 0.0773062363020522, "grad_norm": 273.1171875, "learning_rate": 1.9996090465665743e-06, "loss": 21.2812, "step": 1164 }, { "epoch": 0.07737265059440791, "grad_norm": 390.1731872558594, "learning_rate": 1.9996060336469964e-06, "loss": 30.0, "step": 1165 }, { "epoch": 0.07743906488676364, "grad_norm": 347.0338134765625, "learning_rate": 1.9996030091645523e-06, "loss": 25.3594, "step": 1166 }, { "epoch": 0.07750547917911935, "grad_norm": 158.94271850585938, "learning_rate": 1.9995999731192767e-06, "loss": 18.9062, "step": 1167 }, { "epoch": 0.07757189347147506, "grad_norm": 352.4714660644531, "learning_rate": 1.999596925511205e-06, "loss": 23.1875, "step": 1168 }, { "epoch": 0.07763830776383078, "grad_norm": 193.8984375, "learning_rate": 1.9995938663403724e-06, "loss": 20.0, "step": 1169 }, { "epoch": 0.07770472205618649, "grad_norm": 331.9603576660156, "learning_rate": 1.999590795606815e-06, "loss": 27.3125, "step": 1170 }, { "epoch": 0.0777711363485422, "grad_norm": 218.09744262695312, "learning_rate": 1.999587713310567e-06, "loss": 22.0, "step": 1171 }, { "epoch": 0.07783755064089792, "grad_norm": 283.43560791015625, "learning_rate": 1.999584619451665e-06, "loss": 24.0938, "step": 1172 }, { "epoch": 0.07790396493325363, "grad_norm": 235.439453125, "learning_rate": 1.9995815140301446e-06, "loss": 25.4375, "step": 1173 }, { "epoch": 0.07797037922560936, "grad_norm": 275.43731689453125, "learning_rate": 1.9995783970460415e-06, "loss": 23.5, "step": 1174 }, { "epoch": 0.07803679351796507, "grad_norm": 658.24658203125, "learning_rate": 1.999575268499392e-06, "loss": 22.9375, "step": 1175 }, { "epoch": 0.07810320781032078, "grad_norm": 505.4327697753906, "learning_rate": 1.999572128390232e-06, "loss": 24.8281, "step": 1176 }, { "epoch": 0.0781696221026765, "grad_norm": 281.399169921875, "learning_rate": 1.9995689767185978e-06, "loss": 19.375, "step": 1177 }, { "epoch": 0.07823603639503221, "grad_norm": 378.5130920410156, "learning_rate": 1.9995658134845264e-06, "loss": 40.25, "step": 1178 }, { "epoch": 0.07830245068738792, "grad_norm": 246.81301879882812, "learning_rate": 1.9995626386880543e-06, "loss": 24.9062, "step": 1179 }, { "epoch": 0.07836886497974364, "grad_norm": 305.2641906738281, "learning_rate": 1.9995594523292177e-06, "loss": 25.3125, "step": 1180 }, { "epoch": 0.07843527927209935, "grad_norm": 148.26138305664062, "learning_rate": 1.9995562544080537e-06, "loss": 21.7812, "step": 1181 }, { "epoch": 0.07850169356445508, "grad_norm": 236.23631286621094, "learning_rate": 1.9995530449245994e-06, "loss": 23.6875, "step": 1182 }, { "epoch": 0.07856810785681079, "grad_norm": 178.64805603027344, "learning_rate": 1.9995498238788916e-06, "loss": 19.3438, "step": 1183 }, { "epoch": 0.0786345221491665, "grad_norm": 202.95994567871094, "learning_rate": 1.999546591270968e-06, "loss": 24.5156, "step": 1184 }, { "epoch": 0.07870093644152222, "grad_norm": 228.4683837890625, "learning_rate": 1.999543347100866e-06, "loss": 20.0938, "step": 1185 }, { "epoch": 0.07876735073387793, "grad_norm": 268.62530517578125, "learning_rate": 1.9995400913686227e-06, "loss": 33.4688, "step": 1186 }, { "epoch": 0.07883376502623364, "grad_norm": 277.3011169433594, "learning_rate": 1.9995368240742757e-06, "loss": 23.1875, "step": 1187 }, { "epoch": 0.07890017931858936, "grad_norm": 372.6509704589844, "learning_rate": 1.9995335452178638e-06, "loss": 21.5312, "step": 1188 }, { "epoch": 0.07896659361094507, "grad_norm": 211.54405212402344, "learning_rate": 1.9995302547994237e-06, "loss": 24.0, "step": 1189 }, { "epoch": 0.07903300790330078, "grad_norm": 235.6300048828125, "learning_rate": 1.999526952818994e-06, "loss": 20.5938, "step": 1190 }, { "epoch": 0.07909942219565651, "grad_norm": 246.2345733642578, "learning_rate": 1.999523639276613e-06, "loss": 21.7812, "step": 1191 }, { "epoch": 0.07916583648801222, "grad_norm": 252.68980407714844, "learning_rate": 1.999520314172319e-06, "loss": 24.0625, "step": 1192 }, { "epoch": 0.07923225078036794, "grad_norm": 198.75732421875, "learning_rate": 1.99951697750615e-06, "loss": 23.0938, "step": 1193 }, { "epoch": 0.07929866507272365, "grad_norm": 228.7831573486328, "learning_rate": 1.9995136292781456e-06, "loss": 20.7188, "step": 1194 }, { "epoch": 0.07936507936507936, "grad_norm": 263.7424011230469, "learning_rate": 1.9995102694883433e-06, "loss": 24.6875, "step": 1195 }, { "epoch": 0.07943149365743508, "grad_norm": 287.84320068359375, "learning_rate": 1.999506898136783e-06, "loss": 20.7188, "step": 1196 }, { "epoch": 0.0794979079497908, "grad_norm": 196.9571990966797, "learning_rate": 1.9995035152235026e-06, "loss": 22.0625, "step": 1197 }, { "epoch": 0.0795643222421465, "grad_norm": 161.72018432617188, "learning_rate": 1.9995001207485426e-06, "loss": 21.3906, "step": 1198 }, { "epoch": 0.07963073653450223, "grad_norm": 177.36021423339844, "learning_rate": 1.999496714711941e-06, "loss": 21.7812, "step": 1199 }, { "epoch": 0.07969715082685794, "grad_norm": 345.3435363769531, "learning_rate": 1.999493297113738e-06, "loss": 26.4375, "step": 1200 }, { "epoch": 0.07976356511921366, "grad_norm": 261.69805908203125, "learning_rate": 1.999489867953973e-06, "loss": 21.4688, "step": 1201 }, { "epoch": 0.07982997941156937, "grad_norm": 182.001708984375, "learning_rate": 1.999486427232685e-06, "loss": 24.8906, "step": 1202 }, { "epoch": 0.07989639370392508, "grad_norm": 323.35980224609375, "learning_rate": 1.9994829749499148e-06, "loss": 26.625, "step": 1203 }, { "epoch": 0.0799628079962808, "grad_norm": 255.13119506835938, "learning_rate": 1.999479511105702e-06, "loss": 28.7188, "step": 1204 }, { "epoch": 0.08002922228863651, "grad_norm": 140.3678741455078, "learning_rate": 1.9994760357000864e-06, "loss": 22.6875, "step": 1205 }, { "epoch": 0.08009563658099222, "grad_norm": 489.580078125, "learning_rate": 1.9994725487331083e-06, "loss": 32.4688, "step": 1206 }, { "epoch": 0.08016205087334795, "grad_norm": 258.9342346191406, "learning_rate": 1.999469050204808e-06, "loss": 15.8594, "step": 1207 }, { "epoch": 0.08022846516570366, "grad_norm": 169.2599334716797, "learning_rate": 1.999465540115226e-06, "loss": 16.8438, "step": 1208 }, { "epoch": 0.08029487945805937, "grad_norm": 328.79931640625, "learning_rate": 1.9994620184644033e-06, "loss": 26.5, "step": 1209 }, { "epoch": 0.08036129375041509, "grad_norm": 187.28842163085938, "learning_rate": 1.99945848525238e-06, "loss": 25.4375, "step": 1210 }, { "epoch": 0.0804277080427708, "grad_norm": 324.7859802246094, "learning_rate": 1.9994549404791976e-06, "loss": 31.2188, "step": 1211 }, { "epoch": 0.08049412233512653, "grad_norm": 212.5389862060547, "learning_rate": 1.9994513841448963e-06, "loss": 19.9375, "step": 1212 }, { "epoch": 0.08056053662748223, "grad_norm": 237.32720947265625, "learning_rate": 1.9994478162495182e-06, "loss": 22.5625, "step": 1213 }, { "epoch": 0.08062695091983794, "grad_norm": 134.19631958007812, "learning_rate": 1.999444236793104e-06, "loss": 14.4219, "step": 1214 }, { "epoch": 0.08069336521219367, "grad_norm": 195.38507080078125, "learning_rate": 1.999440645775695e-06, "loss": 28.2188, "step": 1215 }, { "epoch": 0.08075977950454938, "grad_norm": 483.5729675292969, "learning_rate": 1.9994370431973325e-06, "loss": 28.3438, "step": 1216 }, { "epoch": 0.08082619379690509, "grad_norm": 323.36126708984375, "learning_rate": 1.9994334290580593e-06, "loss": 21.0938, "step": 1217 }, { "epoch": 0.08089260808926081, "grad_norm": 241.59169006347656, "learning_rate": 1.9994298033579164e-06, "loss": 24.4375, "step": 1218 }, { "epoch": 0.08095902238161652, "grad_norm": 179.33663940429688, "learning_rate": 1.999426166096945e-06, "loss": 21.5156, "step": 1219 }, { "epoch": 0.08102543667397225, "grad_norm": 188.68435668945312, "learning_rate": 1.9994225172751893e-06, "loss": 19.7188, "step": 1220 }, { "epoch": 0.08109185096632796, "grad_norm": 495.35308837890625, "learning_rate": 1.9994188568926894e-06, "loss": 36.125, "step": 1221 }, { "epoch": 0.08115826525868367, "grad_norm": 169.85189819335938, "learning_rate": 1.9994151849494887e-06, "loss": 25.4375, "step": 1222 }, { "epoch": 0.08122467955103939, "grad_norm": 293.97509765625, "learning_rate": 1.999411501445629e-06, "loss": 24.0156, "step": 1223 }, { "epoch": 0.0812910938433951, "grad_norm": 693.291015625, "learning_rate": 1.999407806381154e-06, "loss": 24.4062, "step": 1224 }, { "epoch": 0.08135750813575081, "grad_norm": 477.21881103515625, "learning_rate": 1.9994040997561057e-06, "loss": 22.0, "step": 1225 }, { "epoch": 0.08142392242810653, "grad_norm": 346.7300720214844, "learning_rate": 1.9994003815705265e-06, "loss": 26.0469, "step": 1226 }, { "epoch": 0.08149033672046224, "grad_norm": 243.1171417236328, "learning_rate": 1.9993966518244605e-06, "loss": 20.9062, "step": 1227 }, { "epoch": 0.08155675101281795, "grad_norm": 227.07199096679688, "learning_rate": 1.9993929105179503e-06, "loss": 25.875, "step": 1228 }, { "epoch": 0.08162316530517368, "grad_norm": 185.94891357421875, "learning_rate": 1.999389157651039e-06, "loss": 25.5312, "step": 1229 }, { "epoch": 0.08168957959752939, "grad_norm": 238.96878051757812, "learning_rate": 1.99938539322377e-06, "loss": 23.0, "step": 1230 }, { "epoch": 0.08175599388988511, "grad_norm": 170.3710174560547, "learning_rate": 1.9993816172361875e-06, "loss": 20.9062, "step": 1231 }, { "epoch": 0.08182240818224082, "grad_norm": 492.2698059082031, "learning_rate": 1.9993778296883348e-06, "loss": 30.9062, "step": 1232 }, { "epoch": 0.08188882247459653, "grad_norm": 311.239990234375, "learning_rate": 1.999374030580255e-06, "loss": 23.625, "step": 1233 }, { "epoch": 0.08195523676695225, "grad_norm": 546.3229370117188, "learning_rate": 1.999370219911993e-06, "loss": 19.8906, "step": 1234 }, { "epoch": 0.08202165105930796, "grad_norm": 258.6357116699219, "learning_rate": 1.9993663976835927e-06, "loss": 21.6406, "step": 1235 }, { "epoch": 0.08208806535166367, "grad_norm": 201.71890258789062, "learning_rate": 1.9993625638950984e-06, "loss": 24.0938, "step": 1236 }, { "epoch": 0.0821544796440194, "grad_norm": 298.8157653808594, "learning_rate": 1.9993587185465538e-06, "loss": 24.75, "step": 1237 }, { "epoch": 0.0822208939363751, "grad_norm": 325.7315979003906, "learning_rate": 1.999354861638004e-06, "loss": 22.4688, "step": 1238 }, { "epoch": 0.08228730822873083, "grad_norm": 355.42120361328125, "learning_rate": 1.9993509931694933e-06, "loss": 32.9688, "step": 1239 }, { "epoch": 0.08235372252108654, "grad_norm": 190.38597106933594, "learning_rate": 1.9993471131410667e-06, "loss": 21.125, "step": 1240 }, { "epoch": 0.08242013681344225, "grad_norm": 462.5823974609375, "learning_rate": 1.999343221552769e-06, "loss": 28.1562, "step": 1241 }, { "epoch": 0.08248655110579797, "grad_norm": 167.66201782226562, "learning_rate": 1.999339318404645e-06, "loss": 20.0, "step": 1242 }, { "epoch": 0.08255296539815368, "grad_norm": 199.1075439453125, "learning_rate": 1.9993354036967404e-06, "loss": 22.3906, "step": 1243 }, { "epoch": 0.08261937969050939, "grad_norm": 146.07598876953125, "learning_rate": 1.9993314774290995e-06, "loss": 18.8125, "step": 1244 }, { "epoch": 0.08268579398286512, "grad_norm": 369.9111022949219, "learning_rate": 1.9993275396017688e-06, "loss": 25.9531, "step": 1245 }, { "epoch": 0.08275220827522083, "grad_norm": 152.13966369628906, "learning_rate": 1.999323590214793e-06, "loss": 22.2031, "step": 1246 }, { "epoch": 0.08281862256757654, "grad_norm": 188.96743774414062, "learning_rate": 1.999319629268218e-06, "loss": 24.1875, "step": 1247 }, { "epoch": 0.08288503685993226, "grad_norm": 322.99810791015625, "learning_rate": 1.9993156567620905e-06, "loss": 23.1875, "step": 1248 }, { "epoch": 0.08295145115228797, "grad_norm": 329.71014404296875, "learning_rate": 1.999311672696455e-06, "loss": 21.0625, "step": 1249 }, { "epoch": 0.0830178654446437, "grad_norm": 891.2066040039062, "learning_rate": 1.999307677071359e-06, "loss": 31.0625, "step": 1250 }, { "epoch": 0.0830842797369994, "grad_norm": 240.31655883789062, "learning_rate": 1.9993036698868475e-06, "loss": 23.0, "step": 1251 }, { "epoch": 0.08315069402935511, "grad_norm": 168.891357421875, "learning_rate": 1.9992996511429675e-06, "loss": 21.1562, "step": 1252 }, { "epoch": 0.08321710832171084, "grad_norm": 310.6842346191406, "learning_rate": 1.9992956208397653e-06, "loss": 30.9688, "step": 1253 }, { "epoch": 0.08328352261406655, "grad_norm": 232.85369873046875, "learning_rate": 1.9992915789772878e-06, "loss": 28.6875, "step": 1254 }, { "epoch": 0.08334993690642226, "grad_norm": 244.46836853027344, "learning_rate": 1.9992875255555816e-06, "loss": 24.8438, "step": 1255 }, { "epoch": 0.08341635119877798, "grad_norm": 198.446044921875, "learning_rate": 1.999283460574693e-06, "loss": 22.5312, "step": 1256 }, { "epoch": 0.08348276549113369, "grad_norm": 196.13726806640625, "learning_rate": 1.9992793840346697e-06, "loss": 25.0938, "step": 1257 }, { "epoch": 0.08354917978348941, "grad_norm": 222.33721923828125, "learning_rate": 1.999275295935559e-06, "loss": 26.9688, "step": 1258 }, { "epoch": 0.08361559407584512, "grad_norm": 200.14295959472656, "learning_rate": 1.9992711962774077e-06, "loss": 24.375, "step": 1259 }, { "epoch": 0.08368200836820083, "grad_norm": 220.28390502929688, "learning_rate": 1.999267085060264e-06, "loss": 24.9531, "step": 1260 }, { "epoch": 0.08374842266055656, "grad_norm": 170.24951171875, "learning_rate": 1.999262962284174e-06, "loss": 22.9062, "step": 1261 }, { "epoch": 0.08381483695291227, "grad_norm": 559.5327758789062, "learning_rate": 1.9992588279491867e-06, "loss": 22.7656, "step": 1262 }, { "epoch": 0.08388125124526798, "grad_norm": 181.83595275878906, "learning_rate": 1.999254682055349e-06, "loss": 27.0938, "step": 1263 }, { "epoch": 0.0839476655376237, "grad_norm": 228.44216918945312, "learning_rate": 1.99925052460271e-06, "loss": 25.0469, "step": 1264 }, { "epoch": 0.08401407982997941, "grad_norm": 306.6783447265625, "learning_rate": 1.999246355591317e-06, "loss": 20.0625, "step": 1265 }, { "epoch": 0.08408049412233512, "grad_norm": 271.4317932128906, "learning_rate": 1.9992421750212182e-06, "loss": 25.4375, "step": 1266 }, { "epoch": 0.08414690841469084, "grad_norm": 189.92861938476562, "learning_rate": 1.999237982892462e-06, "loss": 20.75, "step": 1267 }, { "epoch": 0.08421332270704655, "grad_norm": 180.0780792236328, "learning_rate": 1.9992337792050973e-06, "loss": 16.7969, "step": 1268 }, { "epoch": 0.08427973699940228, "grad_norm": 165.2290496826172, "learning_rate": 1.999229563959172e-06, "loss": 20.5312, "step": 1269 }, { "epoch": 0.08434615129175799, "grad_norm": 239.00772094726562, "learning_rate": 1.9992253371547357e-06, "loss": 24.5, "step": 1270 }, { "epoch": 0.0844125655841137, "grad_norm": 164.1334991455078, "learning_rate": 1.999221098791837e-06, "loss": 21.5625, "step": 1271 }, { "epoch": 0.08447897987646942, "grad_norm": 315.799560546875, "learning_rate": 1.9992168488705245e-06, "loss": 22.4531, "step": 1272 }, { "epoch": 0.08454539416882513, "grad_norm": 453.5128173828125, "learning_rate": 1.999212587390848e-06, "loss": 24.2812, "step": 1273 }, { "epoch": 0.08461180846118084, "grad_norm": 260.122314453125, "learning_rate": 1.999208314352856e-06, "loss": 20.5625, "step": 1274 }, { "epoch": 0.08467822275353656, "grad_norm": 332.8451232910156, "learning_rate": 1.9992040297565986e-06, "loss": 32.875, "step": 1275 }, { "epoch": 0.08474463704589227, "grad_norm": 239.61961364746094, "learning_rate": 1.9991997336021253e-06, "loss": 27.8125, "step": 1276 }, { "epoch": 0.084811051338248, "grad_norm": 466.39202880859375, "learning_rate": 1.9991954258894855e-06, "loss": 23.0312, "step": 1277 }, { "epoch": 0.08487746563060371, "grad_norm": 300.3207092285156, "learning_rate": 1.999191106618729e-06, "loss": 27.625, "step": 1278 }, { "epoch": 0.08494387992295942, "grad_norm": 176.12582397460938, "learning_rate": 1.9991867757899063e-06, "loss": 16.8125, "step": 1279 }, { "epoch": 0.08501029421531514, "grad_norm": 177.29019165039062, "learning_rate": 1.999182433403067e-06, "loss": 21.7188, "step": 1280 }, { "epoch": 0.08507670850767085, "grad_norm": 313.21099853515625, "learning_rate": 1.9991780794582614e-06, "loss": 22.5938, "step": 1281 }, { "epoch": 0.08514312280002656, "grad_norm": 269.20172119140625, "learning_rate": 1.99917371395554e-06, "loss": 24.6562, "step": 1282 }, { "epoch": 0.08520953709238228, "grad_norm": 380.7796325683594, "learning_rate": 1.9991693368949534e-06, "loss": 26.0312, "step": 1283 }, { "epoch": 0.085275951384738, "grad_norm": 225.9879913330078, "learning_rate": 1.9991649482765518e-06, "loss": 16.4375, "step": 1284 }, { "epoch": 0.0853423656770937, "grad_norm": 254.04745483398438, "learning_rate": 1.9991605481003864e-06, "loss": 20.4375, "step": 1285 }, { "epoch": 0.08540877996944943, "grad_norm": 819.3475952148438, "learning_rate": 1.9991561363665077e-06, "loss": 21.4844, "step": 1286 }, { "epoch": 0.08547519426180514, "grad_norm": 196.6028594970703, "learning_rate": 1.9991517130749674e-06, "loss": 25.0781, "step": 1287 }, { "epoch": 0.08554160855416086, "grad_norm": 259.78009033203125, "learning_rate": 1.999147278225816e-06, "loss": 27.875, "step": 1288 }, { "epoch": 0.08560802284651657, "grad_norm": 180.69285583496094, "learning_rate": 1.9991428318191047e-06, "loss": 21.7188, "step": 1289 }, { "epoch": 0.08567443713887228, "grad_norm": 184.7758331298828, "learning_rate": 1.9991383738548856e-06, "loss": 22.875, "step": 1290 }, { "epoch": 0.085740851431228, "grad_norm": 231.59262084960938, "learning_rate": 1.99913390433321e-06, "loss": 21.6562, "step": 1291 }, { "epoch": 0.08580726572358371, "grad_norm": 303.88525390625, "learning_rate": 1.9991294232541295e-06, "loss": 24.2188, "step": 1292 }, { "epoch": 0.08587368001593942, "grad_norm": 248.3999786376953, "learning_rate": 1.999124930617696e-06, "loss": 22.7812, "step": 1293 }, { "epoch": 0.08594009430829515, "grad_norm": 176.15869140625, "learning_rate": 1.999120426423961e-06, "loss": 23.0469, "step": 1294 }, { "epoch": 0.08600650860065086, "grad_norm": 246.6315460205078, "learning_rate": 1.9991159106729774e-06, "loss": 24.9531, "step": 1295 }, { "epoch": 0.08607292289300658, "grad_norm": 311.9407653808594, "learning_rate": 1.9991113833647967e-06, "loss": 20.3281, "step": 1296 }, { "epoch": 0.08613933718536229, "grad_norm": 143.15371704101562, "learning_rate": 1.9991068444994724e-06, "loss": 18.625, "step": 1297 }, { "epoch": 0.086205751477718, "grad_norm": 206.75543212890625, "learning_rate": 1.9991022940770556e-06, "loss": 18.0, "step": 1298 }, { "epoch": 0.08627216577007372, "grad_norm": 209.6553955078125, "learning_rate": 1.9990977320975997e-06, "loss": 21.8281, "step": 1299 }, { "epoch": 0.08633858006242943, "grad_norm": 333.76312255859375, "learning_rate": 1.9990931585611572e-06, "loss": 22.1562, "step": 1300 }, { "epoch": 0.08640499435478514, "grad_norm": 202.79510498046875, "learning_rate": 1.9990885734677816e-06, "loss": 23.0625, "step": 1301 }, { "epoch": 0.08647140864714087, "grad_norm": 819.8528442382812, "learning_rate": 1.9990839768175252e-06, "loss": 18.25, "step": 1302 }, { "epoch": 0.08653782293949658, "grad_norm": 394.017578125, "learning_rate": 1.9990793686104416e-06, "loss": 25.125, "step": 1303 }, { "epoch": 0.08660423723185229, "grad_norm": 431.758056640625, "learning_rate": 1.999074748846584e-06, "loss": 31.0625, "step": 1304 }, { "epoch": 0.08667065152420801, "grad_norm": 208.56222534179688, "learning_rate": 1.9990701175260053e-06, "loss": 21.1562, "step": 1305 }, { "epoch": 0.08673706581656372, "grad_norm": 253.78005981445312, "learning_rate": 1.9990654746487603e-06, "loss": 24.4062, "step": 1306 }, { "epoch": 0.08680348010891945, "grad_norm": 316.1116943359375, "learning_rate": 1.9990608202149014e-06, "loss": 21.5938, "step": 1307 }, { "epoch": 0.08686989440127516, "grad_norm": 172.87876892089844, "learning_rate": 1.9990561542244832e-06, "loss": 16.6094, "step": 1308 }, { "epoch": 0.08693630869363086, "grad_norm": 168.95008850097656, "learning_rate": 1.9990514766775597e-06, "loss": 23.4688, "step": 1309 }, { "epoch": 0.08700272298598659, "grad_norm": 692.3076782226562, "learning_rate": 1.9990467875741844e-06, "loss": 21.4062, "step": 1310 }, { "epoch": 0.0870691372783423, "grad_norm": 144.54299926757812, "learning_rate": 1.999042086914412e-06, "loss": 21.8438, "step": 1311 }, { "epoch": 0.08713555157069801, "grad_norm": 302.8819274902344, "learning_rate": 1.9990373746982974e-06, "loss": 23.5, "step": 1312 }, { "epoch": 0.08720196586305373, "grad_norm": 691.3168334960938, "learning_rate": 1.999032650925894e-06, "loss": 22.3594, "step": 1313 }, { "epoch": 0.08726838015540944, "grad_norm": 322.595703125, "learning_rate": 1.9990279155972573e-06, "loss": 22.0, "step": 1314 }, { "epoch": 0.08733479444776517, "grad_norm": 152.2213134765625, "learning_rate": 1.9990231687124415e-06, "loss": 16.6094, "step": 1315 }, { "epoch": 0.08740120874012088, "grad_norm": 400.6410827636719, "learning_rate": 1.999018410271502e-06, "loss": 16.9375, "step": 1316 }, { "epoch": 0.08746762303247659, "grad_norm": 246.50999450683594, "learning_rate": 1.999013640274493e-06, "loss": 22.9062, "step": 1317 }, { "epoch": 0.08753403732483231, "grad_norm": 169.48504638671875, "learning_rate": 1.999008858721471e-06, "loss": 21.7812, "step": 1318 }, { "epoch": 0.08760045161718802, "grad_norm": 255.61614990234375, "learning_rate": 1.9990040656124904e-06, "loss": 29.0, "step": 1319 }, { "epoch": 0.08766686590954373, "grad_norm": 253.92921447753906, "learning_rate": 1.9989992609476064e-06, "loss": 20.4375, "step": 1320 }, { "epoch": 0.08773328020189945, "grad_norm": 276.85113525390625, "learning_rate": 1.9989944447268754e-06, "loss": 21.7812, "step": 1321 }, { "epoch": 0.08779969449425516, "grad_norm": 316.03387451171875, "learning_rate": 1.9989896169503526e-06, "loss": 21.9062, "step": 1322 }, { "epoch": 0.08786610878661087, "grad_norm": 375.0991516113281, "learning_rate": 1.998984777618094e-06, "loss": 19.7969, "step": 1323 }, { "epoch": 0.0879325230789666, "grad_norm": 178.28253173828125, "learning_rate": 1.9989799267301556e-06, "loss": 18.9062, "step": 1324 }, { "epoch": 0.0879989373713223, "grad_norm": 142.70530700683594, "learning_rate": 1.998975064286593e-06, "loss": 18.8594, "step": 1325 }, { "epoch": 0.08806535166367803, "grad_norm": 242.3474578857422, "learning_rate": 1.9989701902874634e-06, "loss": 20.6406, "step": 1326 }, { "epoch": 0.08813176595603374, "grad_norm": 265.9046630859375, "learning_rate": 1.9989653047328226e-06, "loss": 22.75, "step": 1327 }, { "epoch": 0.08819818024838945, "grad_norm": 243.81546020507812, "learning_rate": 1.9989604076227275e-06, "loss": 26.2188, "step": 1328 }, { "epoch": 0.08826459454074517, "grad_norm": 362.4500732421875, "learning_rate": 1.9989554989572336e-06, "loss": 23.9062, "step": 1329 }, { "epoch": 0.08833100883310088, "grad_norm": 235.7602081298828, "learning_rate": 1.9989505787363993e-06, "loss": 21.5625, "step": 1330 }, { "epoch": 0.08839742312545659, "grad_norm": 469.67022705078125, "learning_rate": 1.9989456469602802e-06, "loss": 18.9375, "step": 1331 }, { "epoch": 0.08846383741781232, "grad_norm": 204.1079559326172, "learning_rate": 1.9989407036289342e-06, "loss": 31.4219, "step": 1332 }, { "epoch": 0.08853025171016803, "grad_norm": 420.9501953125, "learning_rate": 1.998935748742418e-06, "loss": 23.8438, "step": 1333 }, { "epoch": 0.08859666600252375, "grad_norm": 144.2254180908203, "learning_rate": 1.998930782300789e-06, "loss": 26.8125, "step": 1334 }, { "epoch": 0.08866308029487946, "grad_norm": 209.8211669921875, "learning_rate": 1.998925804304105e-06, "loss": 25.5625, "step": 1335 }, { "epoch": 0.08872949458723517, "grad_norm": 143.0855712890625, "learning_rate": 1.998920814752423e-06, "loss": 17.1406, "step": 1336 }, { "epoch": 0.08879590887959089, "grad_norm": 289.0574951171875, "learning_rate": 1.9989158136458013e-06, "loss": 26.1562, "step": 1337 }, { "epoch": 0.0888623231719466, "grad_norm": 137.35060119628906, "learning_rate": 1.998910800984298e-06, "loss": 20.7031, "step": 1338 }, { "epoch": 0.08892873746430231, "grad_norm": 319.46405029296875, "learning_rate": 1.9989057767679694e-06, "loss": 23.0625, "step": 1339 }, { "epoch": 0.08899515175665804, "grad_norm": 308.5805969238281, "learning_rate": 1.998900740996876e-06, "loss": 22.7344, "step": 1340 }, { "epoch": 0.08906156604901375, "grad_norm": 255.10366821289062, "learning_rate": 1.9988956936710737e-06, "loss": 20.7031, "step": 1341 }, { "epoch": 0.08912798034136946, "grad_norm": 484.96063232421875, "learning_rate": 1.9988906347906224e-06, "loss": 29.1875, "step": 1342 }, { "epoch": 0.08919439463372518, "grad_norm": 560.1943969726562, "learning_rate": 1.9988855643555804e-06, "loss": 27.9375, "step": 1343 }, { "epoch": 0.08926080892608089, "grad_norm": 371.8629150390625, "learning_rate": 1.998880482366006e-06, "loss": 18.5625, "step": 1344 }, { "epoch": 0.08932722321843661, "grad_norm": 186.50253295898438, "learning_rate": 1.9988753888219584e-06, "loss": 21.9375, "step": 1345 }, { "epoch": 0.08939363751079232, "grad_norm": 166.70680236816406, "learning_rate": 1.998870283723496e-06, "loss": 23.3125, "step": 1346 }, { "epoch": 0.08946005180314803, "grad_norm": 163.6896209716797, "learning_rate": 1.9988651670706785e-06, "loss": 20.8594, "step": 1347 }, { "epoch": 0.08952646609550376, "grad_norm": 443.32891845703125, "learning_rate": 1.998860038863564e-06, "loss": 26.3125, "step": 1348 }, { "epoch": 0.08959288038785947, "grad_norm": 395.9346008300781, "learning_rate": 1.998854899102213e-06, "loss": 26.25, "step": 1349 }, { "epoch": 0.08965929468021518, "grad_norm": 487.0075988769531, "learning_rate": 1.9988497477866848e-06, "loss": 24.7188, "step": 1350 }, { "epoch": 0.0897257089725709, "grad_norm": 823.1617431640625, "learning_rate": 1.9988445849170387e-06, "loss": 31.2969, "step": 1351 }, { "epoch": 0.08979212326492661, "grad_norm": 127.06448364257812, "learning_rate": 1.998839410493334e-06, "loss": 17.0, "step": 1352 }, { "epoch": 0.08985853755728233, "grad_norm": 411.4551086425781, "learning_rate": 1.9988342245156308e-06, "loss": 26.3125, "step": 1353 }, { "epoch": 0.08992495184963804, "grad_norm": 279.84918212890625, "learning_rate": 1.99882902698399e-06, "loss": 33.0, "step": 1354 }, { "epoch": 0.08999136614199375, "grad_norm": 255.93212890625, "learning_rate": 1.9988238178984704e-06, "loss": 20.4375, "step": 1355 }, { "epoch": 0.09005778043434948, "grad_norm": 287.7039794921875, "learning_rate": 1.998818597259133e-06, "loss": 25.7812, "step": 1356 }, { "epoch": 0.09012419472670519, "grad_norm": 151.73052978515625, "learning_rate": 1.998813365066038e-06, "loss": 19.1406, "step": 1357 }, { "epoch": 0.0901906090190609, "grad_norm": 452.79595947265625, "learning_rate": 1.998808121319246e-06, "loss": 24.625, "step": 1358 }, { "epoch": 0.09025702331141662, "grad_norm": 275.3985900878906, "learning_rate": 1.9988028660188175e-06, "loss": 21.2812, "step": 1359 }, { "epoch": 0.09032343760377233, "grad_norm": 259.6611022949219, "learning_rate": 1.9987975991648132e-06, "loss": 25.5156, "step": 1360 }, { "epoch": 0.09038985189612804, "grad_norm": 290.16778564453125, "learning_rate": 1.9987923207572944e-06, "loss": 27.4062, "step": 1361 }, { "epoch": 0.09045626618848376, "grad_norm": 230.75221252441406, "learning_rate": 1.998787030796322e-06, "loss": 22.1094, "step": 1362 }, { "epoch": 0.09052268048083947, "grad_norm": 313.7645263671875, "learning_rate": 1.9987817292819573e-06, "loss": 24.2812, "step": 1363 }, { "epoch": 0.0905890947731952, "grad_norm": 235.61367797851562, "learning_rate": 1.998776416214261e-06, "loss": 22.3125, "step": 1364 }, { "epoch": 0.09065550906555091, "grad_norm": 334.7099914550781, "learning_rate": 1.9987710915932954e-06, "loss": 27.6094, "step": 1365 }, { "epoch": 0.09072192335790662, "grad_norm": 381.0361633300781, "learning_rate": 1.998765755419122e-06, "loss": 28.5312, "step": 1366 }, { "epoch": 0.09078833765026234, "grad_norm": 303.62689208984375, "learning_rate": 1.9987604076918018e-06, "loss": 21.5, "step": 1367 }, { "epoch": 0.09085475194261805, "grad_norm": 298.58026123046875, "learning_rate": 1.9987550484113974e-06, "loss": 22.6562, "step": 1368 }, { "epoch": 0.09092116623497376, "grad_norm": 118.7188949584961, "learning_rate": 1.9987496775779705e-06, "loss": 16.75, "step": 1369 }, { "epoch": 0.09098758052732948, "grad_norm": 210.16957092285156, "learning_rate": 1.998744295191583e-06, "loss": 22.9844, "step": 1370 }, { "epoch": 0.0910539948196852, "grad_norm": 170.91712951660156, "learning_rate": 1.9987389012522977e-06, "loss": 20.5, "step": 1371 }, { "epoch": 0.09112040911204092, "grad_norm": 200.55638122558594, "learning_rate": 1.9987334957601766e-06, "loss": 20.2344, "step": 1372 }, { "epoch": 0.09118682340439663, "grad_norm": 247.40859985351562, "learning_rate": 1.998728078715282e-06, "loss": 23.1875, "step": 1373 }, { "epoch": 0.09125323769675234, "grad_norm": 167.6441650390625, "learning_rate": 1.9987226501176773e-06, "loss": 23.0938, "step": 1374 }, { "epoch": 0.09131965198910806, "grad_norm": 257.5350036621094, "learning_rate": 1.998717209967425e-06, "loss": 17.625, "step": 1375 }, { "epoch": 0.09138606628146377, "grad_norm": 221.6208953857422, "learning_rate": 1.9987117582645874e-06, "loss": 25.0312, "step": 1376 }, { "epoch": 0.09145248057381948, "grad_norm": 181.2992401123047, "learning_rate": 1.9987062950092285e-06, "loss": 24.9375, "step": 1377 }, { "epoch": 0.0915188948661752, "grad_norm": 185.0813751220703, "learning_rate": 1.9987008202014105e-06, "loss": 21.2031, "step": 1378 }, { "epoch": 0.09158530915853091, "grad_norm": 180.05059814453125, "learning_rate": 1.9986953338411977e-06, "loss": 23.3438, "step": 1379 }, { "epoch": 0.09165172345088662, "grad_norm": 534.8882446289062, "learning_rate": 1.9986898359286536e-06, "loss": 20.0938, "step": 1380 }, { "epoch": 0.09171813774324235, "grad_norm": 272.38092041015625, "learning_rate": 1.9986843264638405e-06, "loss": 24.0938, "step": 1381 }, { "epoch": 0.09178455203559806, "grad_norm": 279.03167724609375, "learning_rate": 1.9986788054468235e-06, "loss": 18.875, "step": 1382 }, { "epoch": 0.09185096632795378, "grad_norm": 2956.0048828125, "learning_rate": 1.998673272877666e-06, "loss": 18.5625, "step": 1383 }, { "epoch": 0.09191738062030949, "grad_norm": 332.7354736328125, "learning_rate": 1.9986677287564316e-06, "loss": 31.0312, "step": 1384 }, { "epoch": 0.0919837949126652, "grad_norm": 180.7508544921875, "learning_rate": 1.998662173083185e-06, "loss": 18.4375, "step": 1385 }, { "epoch": 0.09205020920502092, "grad_norm": 396.1321105957031, "learning_rate": 1.9986566058579902e-06, "loss": 19.1562, "step": 1386 }, { "epoch": 0.09211662349737663, "grad_norm": 180.02928161621094, "learning_rate": 1.998651027080912e-06, "loss": 20.6094, "step": 1387 }, { "epoch": 0.09218303778973234, "grad_norm": 396.9158020019531, "learning_rate": 1.998645436752014e-06, "loss": 22.5625, "step": 1388 }, { "epoch": 0.09224945208208807, "grad_norm": 163.17379760742188, "learning_rate": 1.9986398348713615e-06, "loss": 21.4375, "step": 1389 }, { "epoch": 0.09231586637444378, "grad_norm": 270.8669738769531, "learning_rate": 1.9986342214390195e-06, "loss": 27.8125, "step": 1390 }, { "epoch": 0.0923822806667995, "grad_norm": 170.43450927734375, "learning_rate": 1.9986285964550526e-06, "loss": 19.5, "step": 1391 }, { "epoch": 0.09244869495915521, "grad_norm": 127.63409423828125, "learning_rate": 1.998622959919526e-06, "loss": 15.2969, "step": 1392 }, { "epoch": 0.09251510925151092, "grad_norm": 191.5062713623047, "learning_rate": 1.9986173118325047e-06, "loss": 24.375, "step": 1393 }, { "epoch": 0.09258152354386665, "grad_norm": 486.7026672363281, "learning_rate": 1.998611652194054e-06, "loss": 28.9375, "step": 1394 }, { "epoch": 0.09264793783622235, "grad_norm": 178.8219451904297, "learning_rate": 1.99860598100424e-06, "loss": 19.5625, "step": 1395 }, { "epoch": 0.09271435212857806, "grad_norm": 200.42283630371094, "learning_rate": 1.9986002982631276e-06, "loss": 25.0, "step": 1396 }, { "epoch": 0.09278076642093379, "grad_norm": 163.21578979492188, "learning_rate": 1.998594603970783e-06, "loss": 23.0938, "step": 1397 }, { "epoch": 0.0928471807132895, "grad_norm": 181.5293731689453, "learning_rate": 1.9985888981272713e-06, "loss": 17.9688, "step": 1398 }, { "epoch": 0.09291359500564521, "grad_norm": 178.55747985839844, "learning_rate": 1.9985831807326596e-06, "loss": 21.1875, "step": 1399 }, { "epoch": 0.09298000929800093, "grad_norm": 153.35569763183594, "learning_rate": 1.998577451787013e-06, "loss": 18.2188, "step": 1400 }, { "epoch": 0.09304642359035664, "grad_norm": 179.20152282714844, "learning_rate": 1.9985717112903987e-06, "loss": 21.5469, "step": 1401 }, { "epoch": 0.09311283788271237, "grad_norm": 158.3407440185547, "learning_rate": 1.998565959242882e-06, "loss": 20.2188, "step": 1402 }, { "epoch": 0.09317925217506808, "grad_norm": 209.19546508789062, "learning_rate": 1.998560195644531e-06, "loss": 20.4375, "step": 1403 }, { "epoch": 0.09324566646742379, "grad_norm": 214.98085021972656, "learning_rate": 1.998554420495411e-06, "loss": 19.4375, "step": 1404 }, { "epoch": 0.09331208075977951, "grad_norm": 147.28778076171875, "learning_rate": 1.9985486337955894e-06, "loss": 20.1875, "step": 1405 }, { "epoch": 0.09337849505213522, "grad_norm": 277.07366943359375, "learning_rate": 1.9985428355451327e-06, "loss": 24.25, "step": 1406 }, { "epoch": 0.09344490934449093, "grad_norm": 181.976318359375, "learning_rate": 1.9985370257441082e-06, "loss": 17.9531, "step": 1407 }, { "epoch": 0.09351132363684665, "grad_norm": 318.3641052246094, "learning_rate": 1.9985312043925834e-06, "loss": 22.125, "step": 1408 }, { "epoch": 0.09357773792920236, "grad_norm": 206.36671447753906, "learning_rate": 1.9985253714906254e-06, "loss": 25.6562, "step": 1409 }, { "epoch": 0.09364415222155809, "grad_norm": 279.6327819824219, "learning_rate": 1.9985195270383018e-06, "loss": 20.2812, "step": 1410 }, { "epoch": 0.0937105665139138, "grad_norm": 200.282470703125, "learning_rate": 1.99851367103568e-06, "loss": 18.5156, "step": 1411 }, { "epoch": 0.0937769808062695, "grad_norm": 243.85498046875, "learning_rate": 1.998507803482828e-06, "loss": 18.6719, "step": 1412 }, { "epoch": 0.09384339509862523, "grad_norm": 203.98228454589844, "learning_rate": 1.998501924379813e-06, "loss": 20.875, "step": 1413 }, { "epoch": 0.09390980939098094, "grad_norm": 195.4991455078125, "learning_rate": 1.998496033726704e-06, "loss": 18.7969, "step": 1414 }, { "epoch": 0.09397622368333665, "grad_norm": 372.5924987792969, "learning_rate": 1.9984901315235687e-06, "loss": 25.0938, "step": 1415 }, { "epoch": 0.09404263797569237, "grad_norm": 189.2462158203125, "learning_rate": 1.998484217770475e-06, "loss": 20.7188, "step": 1416 }, { "epoch": 0.09410905226804808, "grad_norm": 488.7311096191406, "learning_rate": 1.9984782924674915e-06, "loss": 22.5, "step": 1417 }, { "epoch": 0.09417546656040379, "grad_norm": 158.38589477539062, "learning_rate": 1.9984723556146874e-06, "loss": 15.0781, "step": 1418 }, { "epoch": 0.09424188085275952, "grad_norm": 221.21311950683594, "learning_rate": 1.9984664072121303e-06, "loss": 25.4688, "step": 1419 }, { "epoch": 0.09430829514511523, "grad_norm": 225.77853393554688, "learning_rate": 1.9984604472598902e-06, "loss": 20.9688, "step": 1420 }, { "epoch": 0.09437470943747095, "grad_norm": 288.1778259277344, "learning_rate": 1.998454475758035e-06, "loss": 22.4531, "step": 1421 }, { "epoch": 0.09444112372982666, "grad_norm": 354.5042419433594, "learning_rate": 1.9984484927066338e-06, "loss": 28.6875, "step": 1422 }, { "epoch": 0.09450753802218237, "grad_norm": 152.18417358398438, "learning_rate": 1.9984424981057567e-06, "loss": 17.4375, "step": 1423 }, { "epoch": 0.09457395231453809, "grad_norm": 246.65426635742188, "learning_rate": 1.9984364919554724e-06, "loss": 22.25, "step": 1424 }, { "epoch": 0.0946403666068938, "grad_norm": 169.3465576171875, "learning_rate": 1.9984304742558505e-06, "loss": 23.8438, "step": 1425 }, { "epoch": 0.09470678089924951, "grad_norm": 176.6103515625, "learning_rate": 1.9984244450069603e-06, "loss": 19.3125, "step": 1426 }, { "epoch": 0.09477319519160524, "grad_norm": 154.64356994628906, "learning_rate": 1.998418404208872e-06, "loss": 16.9844, "step": 1427 }, { "epoch": 0.09483960948396095, "grad_norm": 242.24803161621094, "learning_rate": 1.9984123518616555e-06, "loss": 26.2188, "step": 1428 }, { "epoch": 0.09490602377631667, "grad_norm": 793.4747924804688, "learning_rate": 1.9984062879653806e-06, "loss": 28.9375, "step": 1429 }, { "epoch": 0.09497243806867238, "grad_norm": 235.2990264892578, "learning_rate": 1.9984002125201178e-06, "loss": 25.4688, "step": 1430 }, { "epoch": 0.09503885236102809, "grad_norm": 221.1936492919922, "learning_rate": 1.9983941255259363e-06, "loss": 21.0938, "step": 1431 }, { "epoch": 0.09510526665338381, "grad_norm": 328.0105285644531, "learning_rate": 1.998388026982908e-06, "loss": 22.3438, "step": 1432 }, { "epoch": 0.09517168094573952, "grad_norm": 341.1011047363281, "learning_rate": 1.9983819168911022e-06, "loss": 21.3906, "step": 1433 }, { "epoch": 0.09523809523809523, "grad_norm": 239.20603942871094, "learning_rate": 1.99837579525059e-06, "loss": 23.2344, "step": 1434 }, { "epoch": 0.09530450953045096, "grad_norm": 185.59291076660156, "learning_rate": 1.9983696620614426e-06, "loss": 21.75, "step": 1435 }, { "epoch": 0.09537092382280667, "grad_norm": 131.9634552001953, "learning_rate": 1.9983635173237304e-06, "loss": 18.9219, "step": 1436 }, { "epoch": 0.09543733811516238, "grad_norm": 551.6941528320312, "learning_rate": 1.998357361037525e-06, "loss": 20.9062, "step": 1437 }, { "epoch": 0.0955037524075181, "grad_norm": 338.671142578125, "learning_rate": 1.998351193202897e-06, "loss": 19.25, "step": 1438 }, { "epoch": 0.09557016669987381, "grad_norm": 505.2725830078125, "learning_rate": 1.9983450138199187e-06, "loss": 32.0312, "step": 1439 }, { "epoch": 0.09563658099222953, "grad_norm": 378.9117126464844, "learning_rate": 1.9983388228886604e-06, "loss": 18.875, "step": 1440 }, { "epoch": 0.09570299528458524, "grad_norm": 311.07562255859375, "learning_rate": 1.9983326204091946e-06, "loss": 23.625, "step": 1441 }, { "epoch": 0.09576940957694095, "grad_norm": 473.44073486328125, "learning_rate": 1.9983264063815924e-06, "loss": 23.0625, "step": 1442 }, { "epoch": 0.09583582386929668, "grad_norm": 169.01925659179688, "learning_rate": 1.998320180805926e-06, "loss": 20.5625, "step": 1443 }, { "epoch": 0.09590223816165239, "grad_norm": 251.49444580078125, "learning_rate": 1.9983139436822675e-06, "loss": 27.9375, "step": 1444 }, { "epoch": 0.0959686524540081, "grad_norm": 165.47927856445312, "learning_rate": 1.998307695010689e-06, "loss": 17.7969, "step": 1445 }, { "epoch": 0.09603506674636382, "grad_norm": 177.63412475585938, "learning_rate": 1.998301434791263e-06, "loss": 22.9375, "step": 1446 }, { "epoch": 0.09610148103871953, "grad_norm": 210.12335205078125, "learning_rate": 1.9982951630240616e-06, "loss": 28.2812, "step": 1447 }, { "epoch": 0.09616789533107525, "grad_norm": 367.68121337890625, "learning_rate": 1.998288879709157e-06, "loss": 22.3438, "step": 1448 }, { "epoch": 0.09623430962343096, "grad_norm": 258.6820983886719, "learning_rate": 1.9982825848466225e-06, "loss": 20.8594, "step": 1449 }, { "epoch": 0.09630072391578667, "grad_norm": 206.99502563476562, "learning_rate": 1.998276278436531e-06, "loss": 23.2188, "step": 1450 }, { "epoch": 0.0963671382081424, "grad_norm": 206.59500122070312, "learning_rate": 1.9982699604789546e-06, "loss": 22.9219, "step": 1451 }, { "epoch": 0.09643355250049811, "grad_norm": 972.0762329101562, "learning_rate": 1.998263630973967e-06, "loss": 25.1875, "step": 1452 }, { "epoch": 0.09649996679285382, "grad_norm": 248.5134735107422, "learning_rate": 1.9982572899216417e-06, "loss": 27.6562, "step": 1453 }, { "epoch": 0.09656638108520954, "grad_norm": 280.0013427734375, "learning_rate": 1.9982509373220515e-06, "loss": 17.875, "step": 1454 }, { "epoch": 0.09663279537756525, "grad_norm": 248.4324188232422, "learning_rate": 1.9982445731752697e-06, "loss": 20.2812, "step": 1455 }, { "epoch": 0.09669920966992096, "grad_norm": 392.3816833496094, "learning_rate": 1.998238197481371e-06, "loss": 20.375, "step": 1456 }, { "epoch": 0.09676562396227668, "grad_norm": 261.87542724609375, "learning_rate": 1.9982318102404284e-06, "loss": 20.1562, "step": 1457 }, { "epoch": 0.0968320382546324, "grad_norm": 183.2694549560547, "learning_rate": 1.9982254114525156e-06, "loss": 27.0625, "step": 1458 }, { "epoch": 0.09689845254698812, "grad_norm": 300.371826171875, "learning_rate": 1.9982190011177066e-06, "loss": 20.5312, "step": 1459 }, { "epoch": 0.09696486683934383, "grad_norm": 528.0913696289062, "learning_rate": 1.998212579236076e-06, "loss": 19.2812, "step": 1460 }, { "epoch": 0.09703128113169954, "grad_norm": 295.069091796875, "learning_rate": 1.998206145807698e-06, "loss": 28.0, "step": 1461 }, { "epoch": 0.09709769542405526, "grad_norm": 230.5514678955078, "learning_rate": 1.998199700832647e-06, "loss": 21.25, "step": 1462 }, { "epoch": 0.09716410971641097, "grad_norm": 234.8374481201172, "learning_rate": 1.9981932443109973e-06, "loss": 24.7188, "step": 1463 }, { "epoch": 0.09723052400876668, "grad_norm": 379.7082824707031, "learning_rate": 1.9981867762428237e-06, "loss": 22.25, "step": 1464 }, { "epoch": 0.0972969383011224, "grad_norm": 199.88876342773438, "learning_rate": 1.998180296628201e-06, "loss": 26.4062, "step": 1465 }, { "epoch": 0.09736335259347811, "grad_norm": 278.5506591796875, "learning_rate": 1.9981738054672043e-06, "loss": 18.6094, "step": 1466 }, { "epoch": 0.09742976688583384, "grad_norm": 275.688720703125, "learning_rate": 1.998167302759909e-06, "loss": 21.2812, "step": 1467 }, { "epoch": 0.09749618117818955, "grad_norm": 153.4736785888672, "learning_rate": 1.998160788506389e-06, "loss": 20.0625, "step": 1468 }, { "epoch": 0.09756259547054526, "grad_norm": 212.88499450683594, "learning_rate": 1.998154262706721e-06, "loss": 25.25, "step": 1469 }, { "epoch": 0.09762900976290098, "grad_norm": 154.14808654785156, "learning_rate": 1.9981477253609802e-06, "loss": 25.2188, "step": 1470 }, { "epoch": 0.09769542405525669, "grad_norm": 578.03857421875, "learning_rate": 1.998141176469242e-06, "loss": 21.9375, "step": 1471 }, { "epoch": 0.0977618383476124, "grad_norm": 205.81980895996094, "learning_rate": 1.9981346160315822e-06, "loss": 19.8281, "step": 1472 }, { "epoch": 0.09782825263996812, "grad_norm": 441.4497985839844, "learning_rate": 1.998128044048077e-06, "loss": 21.0, "step": 1473 }, { "epoch": 0.09789466693232383, "grad_norm": 323.1939392089844, "learning_rate": 1.998121460518802e-06, "loss": 22.3594, "step": 1474 }, { "epoch": 0.09796108122467954, "grad_norm": 156.04188537597656, "learning_rate": 1.9981148654438334e-06, "loss": 23.6562, "step": 1475 }, { "epoch": 0.09802749551703527, "grad_norm": 208.4091339111328, "learning_rate": 1.9981082588232475e-06, "loss": 21.8125, "step": 1476 }, { "epoch": 0.09809390980939098, "grad_norm": 253.94581604003906, "learning_rate": 1.998101640657121e-06, "loss": 21.375, "step": 1477 }, { "epoch": 0.0981603241017467, "grad_norm": 331.6153869628906, "learning_rate": 1.9980950109455305e-06, "loss": 25.0625, "step": 1478 }, { "epoch": 0.09822673839410241, "grad_norm": 214.0850830078125, "learning_rate": 1.998088369688552e-06, "loss": 27.3125, "step": 1479 }, { "epoch": 0.09829315268645812, "grad_norm": 245.70706176757812, "learning_rate": 1.9980817168862626e-06, "loss": 27.6406, "step": 1480 }, { "epoch": 0.09835956697881384, "grad_norm": 211.748046875, "learning_rate": 1.99807505253874e-06, "loss": 20.3438, "step": 1481 }, { "epoch": 0.09842598127116955, "grad_norm": 198.3880615234375, "learning_rate": 1.99806837664606e-06, "loss": 18.4375, "step": 1482 }, { "epoch": 0.09849239556352526, "grad_norm": 336.55218505859375, "learning_rate": 1.9980616892083013e-06, "loss": 25.9062, "step": 1483 }, { "epoch": 0.09855880985588099, "grad_norm": 202.80410766601562, "learning_rate": 1.99805499022554e-06, "loss": 21.2812, "step": 1484 }, { "epoch": 0.0986252241482367, "grad_norm": 216.99684143066406, "learning_rate": 1.9980482796978544e-06, "loss": 24.0312, "step": 1485 }, { "epoch": 0.09869163844059242, "grad_norm": 178.0451202392578, "learning_rate": 1.9980415576253217e-06, "loss": 19.5, "step": 1486 }, { "epoch": 0.09875805273294813, "grad_norm": 168.47706604003906, "learning_rate": 1.9980348240080196e-06, "loss": 17.5156, "step": 1487 }, { "epoch": 0.09882446702530384, "grad_norm": 315.3368835449219, "learning_rate": 1.998028078846026e-06, "loss": 26.6562, "step": 1488 }, { "epoch": 0.09889088131765957, "grad_norm": 254.5510711669922, "learning_rate": 1.9980213221394197e-06, "loss": 22.1562, "step": 1489 }, { "epoch": 0.09895729561001528, "grad_norm": 318.07440185546875, "learning_rate": 1.998014553888278e-06, "loss": 29.25, "step": 1490 }, { "epoch": 0.09902370990237098, "grad_norm": 206.23497009277344, "learning_rate": 1.998007774092679e-06, "loss": 21.125, "step": 1491 }, { "epoch": 0.09909012419472671, "grad_norm": 208.21287536621094, "learning_rate": 1.998000982752702e-06, "loss": 24.7812, "step": 1492 }, { "epoch": 0.09915653848708242, "grad_norm": 274.8124084472656, "learning_rate": 1.9979941798684247e-06, "loss": 22.0625, "step": 1493 }, { "epoch": 0.09922295277943813, "grad_norm": 257.7064208984375, "learning_rate": 1.997987365439927e-06, "loss": 18.9531, "step": 1494 }, { "epoch": 0.09928936707179385, "grad_norm": 163.2344207763672, "learning_rate": 1.997980539467286e-06, "loss": 22.9688, "step": 1495 }, { "epoch": 0.09935578136414956, "grad_norm": 259.8163757324219, "learning_rate": 1.997973701950582e-06, "loss": 21.1094, "step": 1496 }, { "epoch": 0.09942219565650529, "grad_norm": 148.92208862304688, "learning_rate": 1.997966852889894e-06, "loss": 20.125, "step": 1497 }, { "epoch": 0.099488609948861, "grad_norm": 377.7610168457031, "learning_rate": 1.9979599922853005e-06, "loss": 27.9062, "step": 1498 }, { "epoch": 0.0995550242412167, "grad_norm": 391.6260681152344, "learning_rate": 1.997953120136881e-06, "loss": 23.7812, "step": 1499 }, { "epoch": 0.09962143853357243, "grad_norm": 190.37451171875, "learning_rate": 1.997946236444716e-06, "loss": 19.8281, "step": 1500 }, { "epoch": 0.09968785282592814, "grad_norm": 261.8497619628906, "learning_rate": 1.9979393412088837e-06, "loss": 23.3125, "step": 1501 }, { "epoch": 0.09975426711828385, "grad_norm": 2272.673095703125, "learning_rate": 1.997932434429465e-06, "loss": 21.9062, "step": 1502 }, { "epoch": 0.09982068141063957, "grad_norm": 355.45849609375, "learning_rate": 1.997925516106539e-06, "loss": 30.0312, "step": 1503 }, { "epoch": 0.09988709570299528, "grad_norm": 182.92750549316406, "learning_rate": 1.997918586240186e-06, "loss": 19.375, "step": 1504 }, { "epoch": 0.099953509995351, "grad_norm": 167.6757354736328, "learning_rate": 1.997911644830487e-06, "loss": 23.4688, "step": 1505 }, { "epoch": 0.10001992428770672, "grad_norm": 226.5205535888672, "learning_rate": 1.997904691877521e-06, "loss": 30.0312, "step": 1506 }, { "epoch": 0.10008633858006243, "grad_norm": 262.0826416015625, "learning_rate": 1.997897727381369e-06, "loss": 21.8906, "step": 1507 }, { "epoch": 0.10015275287241815, "grad_norm": 170.22962951660156, "learning_rate": 1.9978907513421115e-06, "loss": 23.1719, "step": 1508 }, { "epoch": 0.10021916716477386, "grad_norm": 376.7835388183594, "learning_rate": 1.997883763759829e-06, "loss": 21.1562, "step": 1509 }, { "epoch": 0.10028558145712957, "grad_norm": 211.77322387695312, "learning_rate": 1.9978767646346025e-06, "loss": 18.0781, "step": 1510 }, { "epoch": 0.10035199574948529, "grad_norm": 169.96762084960938, "learning_rate": 1.997869753966513e-06, "loss": 23.5938, "step": 1511 }, { "epoch": 0.100418410041841, "grad_norm": 500.7065734863281, "learning_rate": 1.9978627317556423e-06, "loss": 19.5, "step": 1512 }, { "epoch": 0.10048482433419671, "grad_norm": 143.7285919189453, "learning_rate": 1.99785569800207e-06, "loss": 24.5938, "step": 1513 }, { "epoch": 0.10055123862655244, "grad_norm": 136.61199951171875, "learning_rate": 1.997848652705879e-06, "loss": 18.6094, "step": 1514 }, { "epoch": 0.10061765291890815, "grad_norm": 204.26107788085938, "learning_rate": 1.99784159586715e-06, "loss": 18.25, "step": 1515 }, { "epoch": 0.10068406721126387, "grad_norm": 807.0415649414062, "learning_rate": 1.9978345274859646e-06, "loss": 17.2188, "step": 1516 }, { "epoch": 0.10075048150361958, "grad_norm": 145.24363708496094, "learning_rate": 1.997827447562405e-06, "loss": 22.7969, "step": 1517 }, { "epoch": 0.10081689579597529, "grad_norm": 355.1551513671875, "learning_rate": 1.997820356096553e-06, "loss": 24.2188, "step": 1518 }, { "epoch": 0.10088331008833101, "grad_norm": 181.17108154296875, "learning_rate": 1.9978132530884902e-06, "loss": 20.8125, "step": 1519 }, { "epoch": 0.10094972438068672, "grad_norm": 133.9143524169922, "learning_rate": 1.997806138538299e-06, "loss": 17.25, "step": 1520 }, { "epoch": 0.10101613867304243, "grad_norm": 478.0348815917969, "learning_rate": 1.997799012446062e-06, "loss": 28.75, "step": 1521 }, { "epoch": 0.10108255296539816, "grad_norm": 644.55224609375, "learning_rate": 1.9977918748118613e-06, "loss": 21.3125, "step": 1522 }, { "epoch": 0.10114896725775387, "grad_norm": 266.1358642578125, "learning_rate": 1.9977847256357797e-06, "loss": 21.2812, "step": 1523 }, { "epoch": 0.10121538155010959, "grad_norm": 909.0255737304688, "learning_rate": 1.9977775649178996e-06, "loss": 20.5625, "step": 1524 }, { "epoch": 0.1012817958424653, "grad_norm": 186.61448669433594, "learning_rate": 1.997770392658304e-06, "loss": 20.6875, "step": 1525 }, { "epoch": 0.10134821013482101, "grad_norm": 314.75518798828125, "learning_rate": 1.997763208857076e-06, "loss": 25.8594, "step": 1526 }, { "epoch": 0.10141462442717673, "grad_norm": 158.10235595703125, "learning_rate": 1.9977560135142982e-06, "loss": 17.75, "step": 1527 }, { "epoch": 0.10148103871953244, "grad_norm": 186.5385284423828, "learning_rate": 1.997748806630054e-06, "loss": 21.7812, "step": 1528 }, { "epoch": 0.10154745301188815, "grad_norm": 136.16757202148438, "learning_rate": 1.9977415882044276e-06, "loss": 22.9375, "step": 1529 }, { "epoch": 0.10161386730424388, "grad_norm": 197.1947784423828, "learning_rate": 1.9977343582375015e-06, "loss": 21.7656, "step": 1530 }, { "epoch": 0.10168028159659959, "grad_norm": 226.98623657226562, "learning_rate": 1.99772711672936e-06, "loss": 25.9375, "step": 1531 }, { "epoch": 0.1017466958889553, "grad_norm": 172.3445587158203, "learning_rate": 1.9977198636800863e-06, "loss": 23.2188, "step": 1532 }, { "epoch": 0.10181311018131102, "grad_norm": 507.36279296875, "learning_rate": 1.9977125990897643e-06, "loss": 21.2188, "step": 1533 }, { "epoch": 0.10187952447366673, "grad_norm": 265.4617919921875, "learning_rate": 1.9977053229584783e-06, "loss": 18.0156, "step": 1534 }, { "epoch": 0.10194593876602245, "grad_norm": 303.9537048339844, "learning_rate": 1.9976980352863125e-06, "loss": 25.0625, "step": 1535 }, { "epoch": 0.10201235305837816, "grad_norm": 516.6340942382812, "learning_rate": 1.9976907360733516e-06, "loss": 32.4375, "step": 1536 }, { "epoch": 0.10207876735073387, "grad_norm": 282.47802734375, "learning_rate": 1.997683425319679e-06, "loss": 22.9375, "step": 1537 }, { "epoch": 0.1021451816430896, "grad_norm": 251.77194213867188, "learning_rate": 1.99767610302538e-06, "loss": 28.0, "step": 1538 }, { "epoch": 0.1022115959354453, "grad_norm": 331.79815673828125, "learning_rate": 1.9976687691905393e-06, "loss": 24.7188, "step": 1539 }, { "epoch": 0.10227801022780102, "grad_norm": 227.31092834472656, "learning_rate": 1.997661423815241e-06, "loss": 21.5781, "step": 1540 }, { "epoch": 0.10234442452015674, "grad_norm": 1521.718017578125, "learning_rate": 1.9976540668995714e-06, "loss": 16.7812, "step": 1541 }, { "epoch": 0.10241083881251245, "grad_norm": 256.3233337402344, "learning_rate": 1.9976466984436144e-06, "loss": 25.9062, "step": 1542 }, { "epoch": 0.10247725310486817, "grad_norm": 230.05050659179688, "learning_rate": 1.997639318447456e-06, "loss": 23.0312, "step": 1543 }, { "epoch": 0.10254366739722388, "grad_norm": 220.56845092773438, "learning_rate": 1.9976319269111806e-06, "loss": 28.5625, "step": 1544 }, { "epoch": 0.1026100816895796, "grad_norm": 280.1962585449219, "learning_rate": 1.997624523834875e-06, "loss": 22.5938, "step": 1545 }, { "epoch": 0.10267649598193532, "grad_norm": 970.9983520507812, "learning_rate": 1.9976171092186237e-06, "loss": 24.75, "step": 1546 }, { "epoch": 0.10274291027429103, "grad_norm": 470.2358093261719, "learning_rate": 1.997609683062513e-06, "loss": 30.4375, "step": 1547 }, { "epoch": 0.10280932456664674, "grad_norm": 409.1690673828125, "learning_rate": 1.997602245366629e-06, "loss": 27.25, "step": 1548 }, { "epoch": 0.10287573885900246, "grad_norm": 143.43850708007812, "learning_rate": 1.9975947961310575e-06, "loss": 19.75, "step": 1549 }, { "epoch": 0.10294215315135817, "grad_norm": 345.5434265136719, "learning_rate": 1.9975873353558843e-06, "loss": 19.6562, "step": 1550 }, { "epoch": 0.10300856744371388, "grad_norm": 284.042724609375, "learning_rate": 1.9975798630411964e-06, "loss": 24.25, "step": 1551 }, { "epoch": 0.1030749817360696, "grad_norm": 287.4996337890625, "learning_rate": 1.9975723791870795e-06, "loss": 26.5312, "step": 1552 }, { "epoch": 0.10314139602842531, "grad_norm": 267.5904541015625, "learning_rate": 1.997564883793621e-06, "loss": 32.1562, "step": 1553 }, { "epoch": 0.10320781032078104, "grad_norm": 216.26161193847656, "learning_rate": 1.997557376860907e-06, "loss": 21.0781, "step": 1554 }, { "epoch": 0.10327422461313675, "grad_norm": 1344.2569580078125, "learning_rate": 1.997549858389024e-06, "loss": 19.875, "step": 1555 }, { "epoch": 0.10334063890549246, "grad_norm": 243.79067993164062, "learning_rate": 1.99754232837806e-06, "loss": 22.9219, "step": 1556 }, { "epoch": 0.10340705319784818, "grad_norm": 654.909912109375, "learning_rate": 1.9975347868281015e-06, "loss": 30.5625, "step": 1557 }, { "epoch": 0.10347346749020389, "grad_norm": 260.2639465332031, "learning_rate": 1.9975272337392357e-06, "loss": 19.5, "step": 1558 }, { "epoch": 0.1035398817825596, "grad_norm": 247.3903045654297, "learning_rate": 1.99751966911155e-06, "loss": 22.3438, "step": 1559 }, { "epoch": 0.10360629607491532, "grad_norm": 314.0259094238281, "learning_rate": 1.9975120929451322e-06, "loss": 19.9062, "step": 1560 }, { "epoch": 0.10367271036727103, "grad_norm": 174.32952880859375, "learning_rate": 1.9975045052400695e-06, "loss": 20.1094, "step": 1561 }, { "epoch": 0.10373912465962676, "grad_norm": 287.849853515625, "learning_rate": 1.99749690599645e-06, "loss": 27.0312, "step": 1562 }, { "epoch": 0.10380553895198247, "grad_norm": 217.73533630371094, "learning_rate": 1.9974892952143618e-06, "loss": 23.375, "step": 1563 }, { "epoch": 0.10387195324433818, "grad_norm": 186.92552185058594, "learning_rate": 1.997481672893892e-06, "loss": 24.7812, "step": 1564 }, { "epoch": 0.1039383675366939, "grad_norm": 258.2301940917969, "learning_rate": 1.9974740390351297e-06, "loss": 22.5625, "step": 1565 }, { "epoch": 0.10400478182904961, "grad_norm": 184.13075256347656, "learning_rate": 1.9974663936381626e-06, "loss": 24.0469, "step": 1566 }, { "epoch": 0.10407119612140532, "grad_norm": 145.48007202148438, "learning_rate": 1.99745873670308e-06, "loss": 22.75, "step": 1567 }, { "epoch": 0.10413761041376104, "grad_norm": 450.9101257324219, "learning_rate": 1.9974510682299693e-06, "loss": 25.8125, "step": 1568 }, { "epoch": 0.10420402470611675, "grad_norm": 257.53460693359375, "learning_rate": 1.9974433882189203e-06, "loss": 25.4062, "step": 1569 }, { "epoch": 0.10427043899847246, "grad_norm": 278.0014343261719, "learning_rate": 1.997435696670021e-06, "loss": 23.4062, "step": 1570 }, { "epoch": 0.10433685329082819, "grad_norm": 459.15087890625, "learning_rate": 1.997427993583361e-06, "loss": 20.2812, "step": 1571 }, { "epoch": 0.1044032675831839, "grad_norm": 174.6844940185547, "learning_rate": 1.9974202789590287e-06, "loss": 15.3594, "step": 1572 }, { "epoch": 0.10446968187553962, "grad_norm": 913.6152954101562, "learning_rate": 1.997412552797114e-06, "loss": 28.2812, "step": 1573 }, { "epoch": 0.10453609616789533, "grad_norm": 375.7596130371094, "learning_rate": 1.997404815097706e-06, "loss": 24.1094, "step": 1574 }, { "epoch": 0.10460251046025104, "grad_norm": 445.3517150878906, "learning_rate": 1.997397065860894e-06, "loss": 21.1562, "step": 1575 }, { "epoch": 0.10466892475260677, "grad_norm": 199.9519500732422, "learning_rate": 1.9973893050867686e-06, "loss": 22.5625, "step": 1576 }, { "epoch": 0.10473533904496247, "grad_norm": 180.1151885986328, "learning_rate": 1.997381532775418e-06, "loss": 20.4688, "step": 1577 }, { "epoch": 0.10480175333731818, "grad_norm": 288.3316955566406, "learning_rate": 1.9973737489269338e-06, "loss": 22.2188, "step": 1578 }, { "epoch": 0.10486816762967391, "grad_norm": 147.00784301757812, "learning_rate": 1.9973659535414046e-06, "loss": 17.0, "step": 1579 }, { "epoch": 0.10493458192202962, "grad_norm": 368.962158203125, "learning_rate": 1.997358146618921e-06, "loss": 23.0, "step": 1580 }, { "epoch": 0.10500099621438534, "grad_norm": 271.216064453125, "learning_rate": 1.997350328159574e-06, "loss": 20.75, "step": 1581 }, { "epoch": 0.10506741050674105, "grad_norm": 710.9257202148438, "learning_rate": 1.9973424981634533e-06, "loss": 26.8594, "step": 1582 }, { "epoch": 0.10513382479909676, "grad_norm": 205.1395263671875, "learning_rate": 1.9973346566306498e-06, "loss": 19.1875, "step": 1583 }, { "epoch": 0.10520023909145249, "grad_norm": 272.72210693359375, "learning_rate": 1.997326803561254e-06, "loss": 24.0312, "step": 1584 }, { "epoch": 0.1052666533838082, "grad_norm": 193.73507690429688, "learning_rate": 1.997318938955357e-06, "loss": 24.4844, "step": 1585 }, { "epoch": 0.1053330676761639, "grad_norm": 228.90155029296875, "learning_rate": 1.997311062813049e-06, "loss": 22.8438, "step": 1586 }, { "epoch": 0.10539948196851963, "grad_norm": 221.60546875, "learning_rate": 1.997303175134422e-06, "loss": 26.7812, "step": 1587 }, { "epoch": 0.10546589626087534, "grad_norm": 182.29263305664062, "learning_rate": 1.997295275919567e-06, "loss": 23.9688, "step": 1588 }, { "epoch": 0.10553231055323105, "grad_norm": 145.5023193359375, "learning_rate": 1.9972873651685754e-06, "loss": 18.1562, "step": 1589 }, { "epoch": 0.10559872484558677, "grad_norm": 128.9724578857422, "learning_rate": 1.9972794428815386e-06, "loss": 15.8281, "step": 1590 }, { "epoch": 0.10566513913794248, "grad_norm": 153.8639678955078, "learning_rate": 1.9972715090585484e-06, "loss": 20.4062, "step": 1591 }, { "epoch": 0.1057315534302982, "grad_norm": 206.13497924804688, "learning_rate": 1.997263563699696e-06, "loss": 21.4062, "step": 1592 }, { "epoch": 0.10579796772265392, "grad_norm": 415.8521423339844, "learning_rate": 1.9972556068050744e-06, "loss": 22.2188, "step": 1593 }, { "epoch": 0.10586438201500963, "grad_norm": 188.17794799804688, "learning_rate": 1.9972476383747743e-06, "loss": 16.1406, "step": 1594 }, { "epoch": 0.10593079630736535, "grad_norm": 122.41138458251953, "learning_rate": 1.9972396584088893e-06, "loss": 16.5312, "step": 1595 }, { "epoch": 0.10599721059972106, "grad_norm": 224.8682098388672, "learning_rate": 1.9972316669075103e-06, "loss": 21.9062, "step": 1596 }, { "epoch": 0.10606362489207677, "grad_norm": 154.4534454345703, "learning_rate": 1.997223663870731e-06, "loss": 15.8438, "step": 1597 }, { "epoch": 0.10613003918443249, "grad_norm": 323.1102294921875, "learning_rate": 1.997215649298643e-06, "loss": 24.5938, "step": 1598 }, { "epoch": 0.1061964534767882, "grad_norm": 391.8900451660156, "learning_rate": 1.997207623191339e-06, "loss": 23.9375, "step": 1599 }, { "epoch": 0.10626286776914393, "grad_norm": 140.21136474609375, "learning_rate": 1.997199585548913e-06, "loss": 21.2031, "step": 1600 }, { "epoch": 0.10632928206149964, "grad_norm": 183.54898071289062, "learning_rate": 1.997191536371457e-06, "loss": 23.3125, "step": 1601 }, { "epoch": 0.10639569635385535, "grad_norm": 168.448486328125, "learning_rate": 1.997183475659064e-06, "loss": 22.6562, "step": 1602 }, { "epoch": 0.10646211064621107, "grad_norm": 282.4515075683594, "learning_rate": 1.997175403411828e-06, "loss": 16.9531, "step": 1603 }, { "epoch": 0.10652852493856678, "grad_norm": 182.2367401123047, "learning_rate": 1.9971673196298417e-06, "loss": 20.6875, "step": 1604 }, { "epoch": 0.10659493923092249, "grad_norm": 205.86322021484375, "learning_rate": 1.997159224313199e-06, "loss": 20.1875, "step": 1605 }, { "epoch": 0.10666135352327821, "grad_norm": 167.13226318359375, "learning_rate": 1.9971511174619935e-06, "loss": 21.2969, "step": 1606 }, { "epoch": 0.10672776781563392, "grad_norm": 285.2275085449219, "learning_rate": 1.9971429990763186e-06, "loss": 16.5, "step": 1607 }, { "epoch": 0.10679418210798963, "grad_norm": 224.0365447998047, "learning_rate": 1.9971348691562683e-06, "loss": 20.7812, "step": 1608 }, { "epoch": 0.10686059640034536, "grad_norm": 1037.5743408203125, "learning_rate": 1.997126727701937e-06, "loss": 19.7344, "step": 1609 }, { "epoch": 0.10692701069270107, "grad_norm": 154.72857666015625, "learning_rate": 1.9971185747134187e-06, "loss": 21.5938, "step": 1610 }, { "epoch": 0.10699342498505679, "grad_norm": 244.57952880859375, "learning_rate": 1.997110410190808e-06, "loss": 20.2031, "step": 1611 }, { "epoch": 0.1070598392774125, "grad_norm": 295.03326416015625, "learning_rate": 1.9971022341341986e-06, "loss": 21.8125, "step": 1612 }, { "epoch": 0.10712625356976821, "grad_norm": 408.24761962890625, "learning_rate": 1.997094046543686e-06, "loss": 24.4062, "step": 1613 }, { "epoch": 0.10719266786212393, "grad_norm": 247.67457580566406, "learning_rate": 1.997085847419364e-06, "loss": 25.2188, "step": 1614 }, { "epoch": 0.10725908215447964, "grad_norm": 856.4574584960938, "learning_rate": 1.997077636761328e-06, "loss": 30.3438, "step": 1615 }, { "epoch": 0.10732549644683535, "grad_norm": 277.27923583984375, "learning_rate": 1.9970694145696726e-06, "loss": 24.375, "step": 1616 }, { "epoch": 0.10739191073919108, "grad_norm": 278.73492431640625, "learning_rate": 1.9970611808444937e-06, "loss": 18.6406, "step": 1617 }, { "epoch": 0.10745832503154679, "grad_norm": 215.30844116210938, "learning_rate": 1.9970529355858854e-06, "loss": 24.5938, "step": 1618 }, { "epoch": 0.10752473932390251, "grad_norm": 328.51708984375, "learning_rate": 1.997044678793944e-06, "loss": 29.0938, "step": 1619 }, { "epoch": 0.10759115361625822, "grad_norm": 281.4698181152344, "learning_rate": 1.997036410468765e-06, "loss": 18.5312, "step": 1620 }, { "epoch": 0.10765756790861393, "grad_norm": 249.30078125, "learning_rate": 1.997028130610443e-06, "loss": 24.4688, "step": 1621 }, { "epoch": 0.10772398220096965, "grad_norm": 296.86328125, "learning_rate": 1.997019839219075e-06, "loss": 19.75, "step": 1622 }, { "epoch": 0.10779039649332536, "grad_norm": 190.27389526367188, "learning_rate": 1.997011536294756e-06, "loss": 17.0469, "step": 1623 }, { "epoch": 0.10785681078568107, "grad_norm": 442.9554443359375, "learning_rate": 1.997003221837583e-06, "loss": 22.9375, "step": 1624 }, { "epoch": 0.1079232250780368, "grad_norm": 142.9203643798828, "learning_rate": 1.9969948958476516e-06, "loss": 20.4062, "step": 1625 }, { "epoch": 0.1079896393703925, "grad_norm": 131.65675354003906, "learning_rate": 1.9969865583250576e-06, "loss": 19.9062, "step": 1626 }, { "epoch": 0.10805605366274822, "grad_norm": 206.30422973632812, "learning_rate": 1.9969782092698983e-06, "loss": 24.4375, "step": 1627 }, { "epoch": 0.10812246795510394, "grad_norm": 184.028564453125, "learning_rate": 1.9969698486822702e-06, "loss": 19.4844, "step": 1628 }, { "epoch": 0.10818888224745965, "grad_norm": 530.6934204101562, "learning_rate": 1.9969614765622695e-06, "loss": 30.625, "step": 1629 }, { "epoch": 0.10825529653981537, "grad_norm": 237.66001892089844, "learning_rate": 1.9969530929099935e-06, "loss": 23.625, "step": 1630 }, { "epoch": 0.10832171083217108, "grad_norm": 397.0396423339844, "learning_rate": 1.9969446977255385e-06, "loss": 22.0312, "step": 1631 }, { "epoch": 0.1083881251245268, "grad_norm": 377.8217468261719, "learning_rate": 1.9969362910090026e-06, "loss": 23.3125, "step": 1632 }, { "epoch": 0.10845453941688252, "grad_norm": 161.82736206054688, "learning_rate": 1.9969278727604825e-06, "loss": 18.3281, "step": 1633 }, { "epoch": 0.10852095370923823, "grad_norm": 292.3719787597656, "learning_rate": 1.9969194429800755e-06, "loss": 24.875, "step": 1634 }, { "epoch": 0.10858736800159394, "grad_norm": 226.81045532226562, "learning_rate": 1.9969110016678794e-06, "loss": 21.2031, "step": 1635 }, { "epoch": 0.10865378229394966, "grad_norm": 192.5828857421875, "learning_rate": 1.9969025488239913e-06, "loss": 20.125, "step": 1636 }, { "epoch": 0.10872019658630537, "grad_norm": 199.06578063964844, "learning_rate": 1.99689408444851e-06, "loss": 21.4062, "step": 1637 }, { "epoch": 0.1087866108786611, "grad_norm": 246.3956298828125, "learning_rate": 1.9968856085415325e-06, "loss": 21.3125, "step": 1638 }, { "epoch": 0.1088530251710168, "grad_norm": 1409.875732421875, "learning_rate": 1.9968771211031566e-06, "loss": 22.2188, "step": 1639 }, { "epoch": 0.10891943946337251, "grad_norm": 283.62359619140625, "learning_rate": 1.9968686221334816e-06, "loss": 19.4062, "step": 1640 }, { "epoch": 0.10898585375572824, "grad_norm": 302.1219177246094, "learning_rate": 1.996860111632605e-06, "loss": 24.3125, "step": 1641 }, { "epoch": 0.10905226804808395, "grad_norm": 134.72634887695312, "learning_rate": 1.9968515896006255e-06, "loss": 16.7656, "step": 1642 }, { "epoch": 0.10911868234043966, "grad_norm": 403.3640441894531, "learning_rate": 1.9968430560376417e-06, "loss": 25.7188, "step": 1643 }, { "epoch": 0.10918509663279538, "grad_norm": 190.345703125, "learning_rate": 1.9968345109437524e-06, "loss": 23.0938, "step": 1644 }, { "epoch": 0.10925151092515109, "grad_norm": 490.8495788574219, "learning_rate": 1.9968259543190557e-06, "loss": 26.1875, "step": 1645 }, { "epoch": 0.1093179252175068, "grad_norm": 332.8659362792969, "learning_rate": 1.9968173861636517e-06, "loss": 22.0312, "step": 1646 }, { "epoch": 0.10938433950986252, "grad_norm": 197.9927215576172, "learning_rate": 1.9968088064776386e-06, "loss": 21.0625, "step": 1647 }, { "epoch": 0.10945075380221823, "grad_norm": 423.71038818359375, "learning_rate": 1.9968002152611162e-06, "loss": 23.8281, "step": 1648 }, { "epoch": 0.10951716809457396, "grad_norm": 222.7073516845703, "learning_rate": 1.996791612514184e-06, "loss": 22.2188, "step": 1649 }, { "epoch": 0.10958358238692967, "grad_norm": 404.8847351074219, "learning_rate": 1.9967829982369406e-06, "loss": 31.25, "step": 1650 }, { "epoch": 0.10964999667928538, "grad_norm": 246.7508544921875, "learning_rate": 1.9967743724294867e-06, "loss": 21.375, "step": 1651 }, { "epoch": 0.1097164109716411, "grad_norm": 684.0834350585938, "learning_rate": 1.9967657350919213e-06, "loss": 28.9375, "step": 1652 }, { "epoch": 0.10978282526399681, "grad_norm": 125.03050231933594, "learning_rate": 1.996757086224345e-06, "loss": 20.9375, "step": 1653 }, { "epoch": 0.10984923955635252, "grad_norm": 197.2208709716797, "learning_rate": 1.9967484258268576e-06, "loss": 17.7656, "step": 1654 }, { "epoch": 0.10991565384870824, "grad_norm": 541.977294921875, "learning_rate": 1.9967397538995587e-06, "loss": 29.8281, "step": 1655 }, { "epoch": 0.10998206814106395, "grad_norm": 337.7115173339844, "learning_rate": 1.9967310704425494e-06, "loss": 35.7812, "step": 1656 }, { "epoch": 0.11004848243341968, "grad_norm": 225.32260131835938, "learning_rate": 1.9967223754559293e-06, "loss": 25.3125, "step": 1657 }, { "epoch": 0.11011489672577539, "grad_norm": 388.2222595214844, "learning_rate": 1.9967136689398e-06, "loss": 23.6562, "step": 1658 }, { "epoch": 0.1101813110181311, "grad_norm": 203.23770141601562, "learning_rate": 1.9967049508942616e-06, "loss": 22.5938, "step": 1659 }, { "epoch": 0.11024772531048682, "grad_norm": 597.4645385742188, "learning_rate": 1.9966962213194153e-06, "loss": 15.8594, "step": 1660 }, { "epoch": 0.11031413960284253, "grad_norm": 215.5852813720703, "learning_rate": 1.9966874802153617e-06, "loss": 23.1094, "step": 1661 }, { "epoch": 0.11038055389519824, "grad_norm": 511.96441650390625, "learning_rate": 1.996678727582202e-06, "loss": 26.3906, "step": 1662 }, { "epoch": 0.11044696818755396, "grad_norm": 420.703369140625, "learning_rate": 1.996669963420037e-06, "loss": 19.1562, "step": 1663 }, { "epoch": 0.11051338247990967, "grad_norm": 280.721435546875, "learning_rate": 1.9966611877289695e-06, "loss": 23.3438, "step": 1664 }, { "epoch": 0.11057979677226538, "grad_norm": 206.20529174804688, "learning_rate": 1.9966524005091e-06, "loss": 22.375, "step": 1665 }, { "epoch": 0.11064621106462111, "grad_norm": 291.99749755859375, "learning_rate": 1.9966436017605294e-06, "loss": 28.9688, "step": 1666 }, { "epoch": 0.11071262535697682, "grad_norm": 171.37254333496094, "learning_rate": 1.996634791483361e-06, "loss": 18.0938, "step": 1667 }, { "epoch": 0.11077903964933254, "grad_norm": 304.2980041503906, "learning_rate": 1.9966259696776955e-06, "loss": 19.9844, "step": 1668 }, { "epoch": 0.11084545394168825, "grad_norm": 152.2075958251953, "learning_rate": 1.9966171363436357e-06, "loss": 17.2969, "step": 1669 }, { "epoch": 0.11091186823404396, "grad_norm": 168.9300994873047, "learning_rate": 1.996608291481284e-06, "loss": 20.4375, "step": 1670 }, { "epoch": 0.11097828252639969, "grad_norm": 307.06494140625, "learning_rate": 1.9965994350907416e-06, "loss": 26.1875, "step": 1671 }, { "epoch": 0.1110446968187554, "grad_norm": 332.0226745605469, "learning_rate": 1.9965905671721123e-06, "loss": 26.625, "step": 1672 }, { "epoch": 0.1111111111111111, "grad_norm": 473.7713317871094, "learning_rate": 1.996581687725497e-06, "loss": 22.0156, "step": 1673 }, { "epoch": 0.11117752540346683, "grad_norm": 320.0482482910156, "learning_rate": 1.9965727967510002e-06, "loss": 25.375, "step": 1674 }, { "epoch": 0.11124393969582254, "grad_norm": 1887.6585693359375, "learning_rate": 1.9965638942487238e-06, "loss": 23.9375, "step": 1675 }, { "epoch": 0.11131035398817826, "grad_norm": 254.75648498535156, "learning_rate": 1.996554980218771e-06, "loss": 23.8438, "step": 1676 }, { "epoch": 0.11137676828053397, "grad_norm": 453.2525939941406, "learning_rate": 1.9965460546612446e-06, "loss": 22.8594, "step": 1677 }, { "epoch": 0.11144318257288968, "grad_norm": 241.05105590820312, "learning_rate": 1.996537117576248e-06, "loss": 22.7969, "step": 1678 }, { "epoch": 0.1115095968652454, "grad_norm": 417.9183349609375, "learning_rate": 1.996528168963885e-06, "loss": 15.6875, "step": 1679 }, { "epoch": 0.11157601115760112, "grad_norm": 144.6858367919922, "learning_rate": 1.9965192088242584e-06, "loss": 23.5938, "step": 1680 }, { "epoch": 0.11164242544995683, "grad_norm": 287.0624694824219, "learning_rate": 1.9965102371574724e-06, "loss": 21.7812, "step": 1681 }, { "epoch": 0.11170883974231255, "grad_norm": 491.481689453125, "learning_rate": 1.996501253963631e-06, "loss": 30.0938, "step": 1682 }, { "epoch": 0.11177525403466826, "grad_norm": 352.0125732421875, "learning_rate": 1.9964922592428374e-06, "loss": 22.4688, "step": 1683 }, { "epoch": 0.11184166832702397, "grad_norm": 510.2582702636719, "learning_rate": 1.9964832529951956e-06, "loss": 35.2812, "step": 1684 }, { "epoch": 0.11190808261937969, "grad_norm": 128.15206909179688, "learning_rate": 1.9964742352208106e-06, "loss": 16.25, "step": 1685 }, { "epoch": 0.1119744969117354, "grad_norm": 222.72085571289062, "learning_rate": 1.996465205919786e-06, "loss": 13.7031, "step": 1686 }, { "epoch": 0.11204091120409113, "grad_norm": 467.7706298828125, "learning_rate": 1.9964561650922266e-06, "loss": 24.375, "step": 1687 }, { "epoch": 0.11210732549644684, "grad_norm": 332.3263854980469, "learning_rate": 1.996447112738237e-06, "loss": 33.625, "step": 1688 }, { "epoch": 0.11217373978880255, "grad_norm": 275.4239807128906, "learning_rate": 1.996438048857921e-06, "loss": 20.6406, "step": 1689 }, { "epoch": 0.11224015408115827, "grad_norm": 157.6586151123047, "learning_rate": 1.996428973451385e-06, "loss": 17.2031, "step": 1690 }, { "epoch": 0.11230656837351398, "grad_norm": 187.49476623535156, "learning_rate": 1.996419886518733e-06, "loss": 24.7188, "step": 1691 }, { "epoch": 0.11237298266586969, "grad_norm": 182.06362915039062, "learning_rate": 1.9964107880600705e-06, "loss": 20.2969, "step": 1692 }, { "epoch": 0.11243939695822541, "grad_norm": 338.83416748046875, "learning_rate": 1.9964016780755024e-06, "loss": 23.125, "step": 1693 }, { "epoch": 0.11250581125058112, "grad_norm": 370.3644714355469, "learning_rate": 1.9963925565651346e-06, "loss": 24.4688, "step": 1694 }, { "epoch": 0.11257222554293685, "grad_norm": 170.19944763183594, "learning_rate": 1.9963834235290718e-06, "loss": 18.4375, "step": 1695 }, { "epoch": 0.11263863983529256, "grad_norm": 184.51869201660156, "learning_rate": 1.99637427896742e-06, "loss": 24.4062, "step": 1696 }, { "epoch": 0.11270505412764827, "grad_norm": 229.6938934326172, "learning_rate": 1.9963651228802853e-06, "loss": 17.8594, "step": 1697 }, { "epoch": 0.11277146842000399, "grad_norm": 307.3039245605469, "learning_rate": 1.9963559552677734e-06, "loss": 27.8125, "step": 1698 }, { "epoch": 0.1128378827123597, "grad_norm": 128.30992126464844, "learning_rate": 1.9963467761299905e-06, "loss": 20.6562, "step": 1699 }, { "epoch": 0.11290429700471541, "grad_norm": 176.5413360595703, "learning_rate": 1.9963375854670427e-06, "loss": 20.25, "step": 1700 }, { "epoch": 0.11297071129707113, "grad_norm": 278.53997802734375, "learning_rate": 1.996328383279036e-06, "loss": 25.6875, "step": 1701 }, { "epoch": 0.11303712558942684, "grad_norm": 181.85079956054688, "learning_rate": 1.996319169566077e-06, "loss": 23.3125, "step": 1702 }, { "epoch": 0.11310353988178255, "grad_norm": 545.8676147460938, "learning_rate": 1.9963099443282726e-06, "loss": 22.875, "step": 1703 }, { "epoch": 0.11316995417413828, "grad_norm": 248.78573608398438, "learning_rate": 1.996300707565729e-06, "loss": 19.8906, "step": 1704 }, { "epoch": 0.11323636846649399, "grad_norm": 361.3298645019531, "learning_rate": 1.9962914592785534e-06, "loss": 28.375, "step": 1705 }, { "epoch": 0.11330278275884971, "grad_norm": 199.1438751220703, "learning_rate": 1.9962821994668525e-06, "loss": 39.625, "step": 1706 }, { "epoch": 0.11336919705120542, "grad_norm": 184.18296813964844, "learning_rate": 1.9962729281307343e-06, "loss": 24.0312, "step": 1707 }, { "epoch": 0.11343561134356113, "grad_norm": 444.39080810546875, "learning_rate": 1.9962636452703047e-06, "loss": 26.7188, "step": 1708 }, { "epoch": 0.11350202563591685, "grad_norm": 356.4766845703125, "learning_rate": 1.996254350885672e-06, "loss": 27.875, "step": 1709 }, { "epoch": 0.11356843992827256, "grad_norm": 1094.9716796875, "learning_rate": 1.996245044976943e-06, "loss": 22.125, "step": 1710 }, { "epoch": 0.11363485422062827, "grad_norm": 231.3066864013672, "learning_rate": 1.9962357275442264e-06, "loss": 28.0312, "step": 1711 }, { "epoch": 0.113701268512984, "grad_norm": 387.0046691894531, "learning_rate": 1.996226398587629e-06, "loss": 29.375, "step": 1712 }, { "epoch": 0.1137676828053397, "grad_norm": 180.70440673828125, "learning_rate": 1.9962170581072597e-06, "loss": 20.5938, "step": 1713 }, { "epoch": 0.11383409709769543, "grad_norm": 280.73828125, "learning_rate": 1.996207706103225e-06, "loss": 23.8125, "step": 1714 }, { "epoch": 0.11390051139005114, "grad_norm": 231.3749542236328, "learning_rate": 1.9961983425756344e-06, "loss": 21.125, "step": 1715 }, { "epoch": 0.11396692568240685, "grad_norm": 260.9252624511719, "learning_rate": 1.996188967524596e-06, "loss": 17.875, "step": 1716 }, { "epoch": 0.11403333997476257, "grad_norm": 304.0097961425781, "learning_rate": 1.9961795809502177e-06, "loss": 20.5781, "step": 1717 }, { "epoch": 0.11409975426711828, "grad_norm": 247.62928771972656, "learning_rate": 1.9961701828526086e-06, "loss": 20.25, "step": 1718 }, { "epoch": 0.114166168559474, "grad_norm": 234.69635009765625, "learning_rate": 1.9961607732318773e-06, "loss": 22.7812, "step": 1719 }, { "epoch": 0.11423258285182972, "grad_norm": 214.07762145996094, "learning_rate": 1.9961513520881327e-06, "loss": 23.8438, "step": 1720 }, { "epoch": 0.11429899714418543, "grad_norm": 199.51394653320312, "learning_rate": 1.9961419194214835e-06, "loss": 16.7812, "step": 1721 }, { "epoch": 0.11436541143654114, "grad_norm": 152.00730895996094, "learning_rate": 1.996132475232039e-06, "loss": 21.375, "step": 1722 }, { "epoch": 0.11443182572889686, "grad_norm": 157.16050720214844, "learning_rate": 1.9961230195199084e-06, "loss": 17.3594, "step": 1723 }, { "epoch": 0.11449824002125257, "grad_norm": 276.9367370605469, "learning_rate": 1.9961135522852014e-06, "loss": 31.875, "step": 1724 }, { "epoch": 0.1145646543136083, "grad_norm": 404.4113464355469, "learning_rate": 1.996104073528027e-06, "loss": 23.9062, "step": 1725 }, { "epoch": 0.114631068605964, "grad_norm": 277.0516052246094, "learning_rate": 1.996094583248495e-06, "loss": 28.4688, "step": 1726 }, { "epoch": 0.11469748289831971, "grad_norm": 286.62982177734375, "learning_rate": 1.9960850814467152e-06, "loss": 24.0312, "step": 1727 }, { "epoch": 0.11476389719067544, "grad_norm": 312.6169738769531, "learning_rate": 1.9960755681227975e-06, "loss": 26.5, "step": 1728 }, { "epoch": 0.11483031148303115, "grad_norm": 217.31192016601562, "learning_rate": 1.9960660432768523e-06, "loss": 24.4688, "step": 1729 }, { "epoch": 0.11489672577538686, "grad_norm": 321.53546142578125, "learning_rate": 1.996056506908989e-06, "loss": 25.375, "step": 1730 }, { "epoch": 0.11496314006774258, "grad_norm": 336.2167663574219, "learning_rate": 1.9960469590193194e-06, "loss": 29.25, "step": 1731 }, { "epoch": 0.11502955436009829, "grad_norm": 259.7498779296875, "learning_rate": 1.996037399607952e-06, "loss": 27.0, "step": 1732 }, { "epoch": 0.11509596865245401, "grad_norm": 257.66119384765625, "learning_rate": 1.9960278286749986e-06, "loss": 24.3906, "step": 1733 }, { "epoch": 0.11516238294480972, "grad_norm": 192.08116149902344, "learning_rate": 1.9960182462205694e-06, "loss": 18.6406, "step": 1734 }, { "epoch": 0.11522879723716543, "grad_norm": 269.9233703613281, "learning_rate": 1.9960086522447757e-06, "loss": 23.1875, "step": 1735 }, { "epoch": 0.11529521152952116, "grad_norm": 251.20338439941406, "learning_rate": 1.9959990467477282e-06, "loss": 22.5625, "step": 1736 }, { "epoch": 0.11536162582187687, "grad_norm": 219.7432403564453, "learning_rate": 1.9959894297295377e-06, "loss": 27.4375, "step": 1737 }, { "epoch": 0.11542804011423258, "grad_norm": 392.4218444824219, "learning_rate": 1.9959798011903164e-06, "loss": 24.8125, "step": 1738 }, { "epoch": 0.1154944544065883, "grad_norm": 123.12520599365234, "learning_rate": 1.9959701611301747e-06, "loss": 14.0, "step": 1739 }, { "epoch": 0.11556086869894401, "grad_norm": 157.98983764648438, "learning_rate": 1.995960509549225e-06, "loss": 26.4609, "step": 1740 }, { "epoch": 0.11562728299129973, "grad_norm": 135.9591522216797, "learning_rate": 1.995950846447578e-06, "loss": 21.6719, "step": 1741 }, { "epoch": 0.11569369728365544, "grad_norm": 364.8240966796875, "learning_rate": 1.9959411718253457e-06, "loss": 22.5469, "step": 1742 }, { "epoch": 0.11576011157601115, "grad_norm": 248.50253295898438, "learning_rate": 1.9959314856826404e-06, "loss": 23.2812, "step": 1743 }, { "epoch": 0.11582652586836688, "grad_norm": 490.4991149902344, "learning_rate": 1.995921788019574e-06, "loss": 32.0781, "step": 1744 }, { "epoch": 0.11589294016072259, "grad_norm": 524.9898071289062, "learning_rate": 1.9959120788362585e-06, "loss": 38.875, "step": 1745 }, { "epoch": 0.1159593544530783, "grad_norm": 321.8814392089844, "learning_rate": 1.9959023581328064e-06, "loss": 23.6094, "step": 1746 }, { "epoch": 0.11602576874543402, "grad_norm": 12591.5751953125, "learning_rate": 1.9958926259093306e-06, "loss": 23.5938, "step": 1747 }, { "epoch": 0.11609218303778973, "grad_norm": 132.2279815673828, "learning_rate": 1.9958828821659424e-06, "loss": 16.6406, "step": 1748 }, { "epoch": 0.11615859733014544, "grad_norm": 197.79103088378906, "learning_rate": 1.995873126902756e-06, "loss": 26.6562, "step": 1749 }, { "epoch": 0.11622501162250116, "grad_norm": 191.12754821777344, "learning_rate": 1.995863360119883e-06, "loss": 20.2656, "step": 1750 }, { "epoch": 0.11629142591485687, "grad_norm": 474.49371337890625, "learning_rate": 1.995853581817437e-06, "loss": 17.7188, "step": 1751 }, { "epoch": 0.1163578402072126, "grad_norm": 298.37384033203125, "learning_rate": 1.995843791995531e-06, "loss": 33.25, "step": 1752 }, { "epoch": 0.11642425449956831, "grad_norm": 210.37420654296875, "learning_rate": 1.995833990654278e-06, "loss": 26.0938, "step": 1753 }, { "epoch": 0.11649066879192402, "grad_norm": 242.4628143310547, "learning_rate": 1.9958241777937922e-06, "loss": 23.625, "step": 1754 }, { "epoch": 0.11655708308427974, "grad_norm": 337.200927734375, "learning_rate": 1.9958143534141863e-06, "loss": 30.0938, "step": 1755 }, { "epoch": 0.11662349737663545, "grad_norm": 217.36143493652344, "learning_rate": 1.995804517515574e-06, "loss": 22.1562, "step": 1756 }, { "epoch": 0.11668991166899116, "grad_norm": 160.3636016845703, "learning_rate": 1.9957946700980694e-06, "loss": 25.9375, "step": 1757 }, { "epoch": 0.11675632596134689, "grad_norm": 287.1412353515625, "learning_rate": 1.9957848111617863e-06, "loss": 30.4375, "step": 1758 }, { "epoch": 0.1168227402537026, "grad_norm": 204.01119995117188, "learning_rate": 1.9957749407068386e-06, "loss": 21.5, "step": 1759 }, { "epoch": 0.11688915454605832, "grad_norm": 297.60211181640625, "learning_rate": 1.995765058733341e-06, "loss": 28.8438, "step": 1760 }, { "epoch": 0.11695556883841403, "grad_norm": 1017.2622680664062, "learning_rate": 1.995755165241407e-06, "loss": 18.0469, "step": 1761 }, { "epoch": 0.11702198313076974, "grad_norm": 221.7760467529297, "learning_rate": 1.995745260231151e-06, "loss": 21.9062, "step": 1762 }, { "epoch": 0.11708839742312546, "grad_norm": 787.6292724609375, "learning_rate": 1.9957353437026886e-06, "loss": 31.375, "step": 1763 }, { "epoch": 0.11715481171548117, "grad_norm": 256.6094665527344, "learning_rate": 1.995725415656134e-06, "loss": 24.3438, "step": 1764 }, { "epoch": 0.11722122600783688, "grad_norm": 190.4317626953125, "learning_rate": 1.9957154760916016e-06, "loss": 21.1562, "step": 1765 }, { "epoch": 0.1172876403001926, "grad_norm": 521.09423828125, "learning_rate": 1.995705525009207e-06, "loss": 21.2812, "step": 1766 }, { "epoch": 0.11735405459254832, "grad_norm": 363.77166748046875, "learning_rate": 1.995695562409065e-06, "loss": 23.0938, "step": 1767 }, { "epoch": 0.11742046888490403, "grad_norm": 263.17071533203125, "learning_rate": 1.995685588291291e-06, "loss": 20.7812, "step": 1768 }, { "epoch": 0.11748688317725975, "grad_norm": 206.57705688476562, "learning_rate": 1.995675602656e-06, "loss": 24.5312, "step": 1769 }, { "epoch": 0.11755329746961546, "grad_norm": 554.7630615234375, "learning_rate": 1.9956656055033077e-06, "loss": 20.2812, "step": 1770 }, { "epoch": 0.11761971176197118, "grad_norm": 175.32472229003906, "learning_rate": 1.99565559683333e-06, "loss": 25.3438, "step": 1771 }, { "epoch": 0.11768612605432689, "grad_norm": 182.73765563964844, "learning_rate": 1.9956455766461825e-06, "loss": 26.2344, "step": 1772 }, { "epoch": 0.1177525403466826, "grad_norm": 302.540771484375, "learning_rate": 1.995635544941981e-06, "loss": 25.7656, "step": 1773 }, { "epoch": 0.11781895463903833, "grad_norm": 315.4385681152344, "learning_rate": 1.9956255017208416e-06, "loss": 24.3438, "step": 1774 }, { "epoch": 0.11788536893139404, "grad_norm": 308.4131774902344, "learning_rate": 1.995615446982881e-06, "loss": 27.8438, "step": 1775 }, { "epoch": 0.11795178322374975, "grad_norm": 285.35845947265625, "learning_rate": 1.9956053807282146e-06, "loss": 25.625, "step": 1776 }, { "epoch": 0.11801819751610547, "grad_norm": 323.4344177246094, "learning_rate": 1.995595302956959e-06, "loss": 21.9375, "step": 1777 }, { "epoch": 0.11808461180846118, "grad_norm": 221.97610473632812, "learning_rate": 1.9955852136692315e-06, "loss": 24.1875, "step": 1778 }, { "epoch": 0.1181510261008169, "grad_norm": 156.1031951904297, "learning_rate": 1.995575112865148e-06, "loss": 17.9688, "step": 1779 }, { "epoch": 0.11821744039317261, "grad_norm": 351.8843994140625, "learning_rate": 1.9955650005448257e-06, "loss": 24.3125, "step": 1780 }, { "epoch": 0.11828385468552832, "grad_norm": 223.0792999267578, "learning_rate": 1.9955548767083815e-06, "loss": 22.75, "step": 1781 }, { "epoch": 0.11835026897788405, "grad_norm": 255.06842041015625, "learning_rate": 1.9955447413559328e-06, "loss": 26.4375, "step": 1782 }, { "epoch": 0.11841668327023976, "grad_norm": 155.29090881347656, "learning_rate": 1.9955345944875963e-06, "loss": 18.1719, "step": 1783 }, { "epoch": 0.11848309756259547, "grad_norm": 151.24319458007812, "learning_rate": 1.9955244361034904e-06, "loss": 23.1562, "step": 1784 }, { "epoch": 0.11854951185495119, "grad_norm": 169.85354614257812, "learning_rate": 1.995514266203731e-06, "loss": 20.4844, "step": 1785 }, { "epoch": 0.1186159261473069, "grad_norm": 307.48040771484375, "learning_rate": 1.995504084788437e-06, "loss": 21.0938, "step": 1786 }, { "epoch": 0.11868234043966261, "grad_norm": 192.00709533691406, "learning_rate": 1.9954938918577256e-06, "loss": 27.3125, "step": 1787 }, { "epoch": 0.11874875473201833, "grad_norm": 232.84988403320312, "learning_rate": 1.995483687411715e-06, "loss": 25.0625, "step": 1788 }, { "epoch": 0.11881516902437404, "grad_norm": 184.11956787109375, "learning_rate": 1.995473471450523e-06, "loss": 24.875, "step": 1789 }, { "epoch": 0.11888158331672977, "grad_norm": 222.83303833007812, "learning_rate": 1.9954632439742683e-06, "loss": 26.1562, "step": 1790 }, { "epoch": 0.11894799760908548, "grad_norm": 157.75567626953125, "learning_rate": 1.9954530049830685e-06, "loss": 21.2031, "step": 1791 }, { "epoch": 0.11901441190144119, "grad_norm": 197.47213745117188, "learning_rate": 1.995442754477043e-06, "loss": 27.0312, "step": 1792 }, { "epoch": 0.11908082619379691, "grad_norm": 178.04742431640625, "learning_rate": 1.9954324924563086e-06, "loss": 25.0312, "step": 1793 }, { "epoch": 0.11914724048615262, "grad_norm": 380.6228942871094, "learning_rate": 1.9954222189209856e-06, "loss": 26.75, "step": 1794 }, { "epoch": 0.11921365477850833, "grad_norm": 772.1321411132812, "learning_rate": 1.9954119338711924e-06, "loss": 24.375, "step": 1795 }, { "epoch": 0.11928006907086405, "grad_norm": 131.86143493652344, "learning_rate": 1.995401637307048e-06, "loss": 19.7188, "step": 1796 }, { "epoch": 0.11934648336321976, "grad_norm": 174.26272583007812, "learning_rate": 1.9953913292286716e-06, "loss": 22.625, "step": 1797 }, { "epoch": 0.11941289765557549, "grad_norm": 207.0204620361328, "learning_rate": 1.995381009636182e-06, "loss": 21.9375, "step": 1798 }, { "epoch": 0.1194793119479312, "grad_norm": 178.8519744873047, "learning_rate": 1.995370678529699e-06, "loss": 20.7188, "step": 1799 }, { "epoch": 0.1195457262402869, "grad_norm": 143.06231689453125, "learning_rate": 1.995360335909342e-06, "loss": 17.375, "step": 1800 }, { "epoch": 0.11961214053264263, "grad_norm": 239.27772521972656, "learning_rate": 1.9953499817752305e-06, "loss": 21.0, "step": 1801 }, { "epoch": 0.11967855482499834, "grad_norm": 229.27574157714844, "learning_rate": 1.9953396161274843e-06, "loss": 19.3906, "step": 1802 }, { "epoch": 0.11974496911735405, "grad_norm": 193.1858673095703, "learning_rate": 1.9953292389662233e-06, "loss": 16.2031, "step": 1803 }, { "epoch": 0.11981138340970977, "grad_norm": 1640.1600341796875, "learning_rate": 1.995318850291568e-06, "loss": 24.25, "step": 1804 }, { "epoch": 0.11987779770206548, "grad_norm": 160.81353759765625, "learning_rate": 1.995308450103638e-06, "loss": 18.5469, "step": 1805 }, { "epoch": 0.1199442119944212, "grad_norm": 220.97747802734375, "learning_rate": 1.9952980384025533e-06, "loss": 23.7969, "step": 1806 }, { "epoch": 0.12001062628677692, "grad_norm": 282.2904357910156, "learning_rate": 1.9952876151884354e-06, "loss": 24.75, "step": 1807 }, { "epoch": 0.12007704057913263, "grad_norm": 245.73471069335938, "learning_rate": 1.9952771804614043e-06, "loss": 20.2969, "step": 1808 }, { "epoch": 0.12014345487148835, "grad_norm": 236.42129516601562, "learning_rate": 1.9952667342215804e-06, "loss": 29.3438, "step": 1809 }, { "epoch": 0.12020986916384406, "grad_norm": 197.71348571777344, "learning_rate": 1.9952562764690847e-06, "loss": 21.7031, "step": 1810 }, { "epoch": 0.12027628345619977, "grad_norm": 384.9868469238281, "learning_rate": 1.995245807204038e-06, "loss": 23.375, "step": 1811 }, { "epoch": 0.1203426977485555, "grad_norm": 329.7610778808594, "learning_rate": 1.995235326426563e-06, "loss": 26.4688, "step": 1812 }, { "epoch": 0.1204091120409112, "grad_norm": 154.4685516357422, "learning_rate": 1.9952248341367785e-06, "loss": 23.4844, "step": 1813 }, { "epoch": 0.12047552633326691, "grad_norm": 126.10415649414062, "learning_rate": 1.995214330334807e-06, "loss": 18.7969, "step": 1814 }, { "epoch": 0.12054194062562264, "grad_norm": 276.7767333984375, "learning_rate": 1.9952038150207705e-06, "loss": 37.5312, "step": 1815 }, { "epoch": 0.12060835491797835, "grad_norm": 299.3113098144531, "learning_rate": 1.9951932881947902e-06, "loss": 17.3906, "step": 1816 }, { "epoch": 0.12067476921033407, "grad_norm": 148.435302734375, "learning_rate": 1.9951827498569872e-06, "loss": 18.4688, "step": 1817 }, { "epoch": 0.12074118350268978, "grad_norm": 228.6193389892578, "learning_rate": 1.9951722000074843e-06, "loss": 23.4062, "step": 1818 }, { "epoch": 0.12080759779504549, "grad_norm": 211.6616668701172, "learning_rate": 1.9951616386464034e-06, "loss": 26.375, "step": 1819 }, { "epoch": 0.12087401208740121, "grad_norm": 190.0148468017578, "learning_rate": 1.9951510657738665e-06, "loss": 22.5, "step": 1820 }, { "epoch": 0.12094042637975692, "grad_norm": 167.4160614013672, "learning_rate": 1.9951404813899955e-06, "loss": 18.5781, "step": 1821 }, { "epoch": 0.12100684067211263, "grad_norm": 196.4353485107422, "learning_rate": 1.9951298854949138e-06, "loss": 21.9375, "step": 1822 }, { "epoch": 0.12107325496446836, "grad_norm": 209.11851501464844, "learning_rate": 1.9951192780887432e-06, "loss": 29.3438, "step": 1823 }, { "epoch": 0.12113966925682407, "grad_norm": 307.5131530761719, "learning_rate": 1.9951086591716067e-06, "loss": 33.5625, "step": 1824 }, { "epoch": 0.12120608354917978, "grad_norm": 142.75186157226562, "learning_rate": 1.9950980287436265e-06, "loss": 23.1406, "step": 1825 }, { "epoch": 0.1212724978415355, "grad_norm": 219.5154266357422, "learning_rate": 1.995087386804927e-06, "loss": 25.9062, "step": 1826 }, { "epoch": 0.12133891213389121, "grad_norm": 410.679443359375, "learning_rate": 1.9950767333556297e-06, "loss": 17.375, "step": 1827 }, { "epoch": 0.12140532642624693, "grad_norm": 933.0557861328125, "learning_rate": 1.9950660683958586e-06, "loss": 20.5469, "step": 1828 }, { "epoch": 0.12147174071860264, "grad_norm": 505.5788879394531, "learning_rate": 1.9950553919257374e-06, "loss": 22.0625, "step": 1829 }, { "epoch": 0.12153815501095835, "grad_norm": 278.4473571777344, "learning_rate": 1.9950447039453887e-06, "loss": 23.4375, "step": 1830 }, { "epoch": 0.12160456930331408, "grad_norm": 197.75564575195312, "learning_rate": 1.9950340044549372e-06, "loss": 23.9062, "step": 1831 }, { "epoch": 0.12167098359566979, "grad_norm": 205.3637237548828, "learning_rate": 1.9950232934545057e-06, "loss": 23.625, "step": 1832 }, { "epoch": 0.1217373978880255, "grad_norm": 179.45840454101562, "learning_rate": 1.9950125709442186e-06, "loss": 19.4219, "step": 1833 }, { "epoch": 0.12180381218038122, "grad_norm": 156.12881469726562, "learning_rate": 1.9950018369242e-06, "loss": 22.625, "step": 1834 }, { "epoch": 0.12187022647273693, "grad_norm": 196.95521545410156, "learning_rate": 1.9949910913945737e-06, "loss": 21.1875, "step": 1835 }, { "epoch": 0.12193664076509265, "grad_norm": 310.4176330566406, "learning_rate": 1.994980334355464e-06, "loss": 21.2812, "step": 1836 }, { "epoch": 0.12200305505744836, "grad_norm": 288.4142761230469, "learning_rate": 1.9949695658069962e-06, "loss": 21.9688, "step": 1837 }, { "epoch": 0.12206946934980407, "grad_norm": 503.69195556640625, "learning_rate": 1.9949587857492937e-06, "loss": 29.25, "step": 1838 }, { "epoch": 0.1221358836421598, "grad_norm": 186.2353973388672, "learning_rate": 1.994947994182482e-06, "loss": 23.625, "step": 1839 }, { "epoch": 0.12220229793451551, "grad_norm": 149.114501953125, "learning_rate": 1.994937191106685e-06, "loss": 19.5781, "step": 1840 }, { "epoch": 0.12226871222687122, "grad_norm": 297.4740905761719, "learning_rate": 1.9949263765220286e-06, "loss": 20.875, "step": 1841 }, { "epoch": 0.12233512651922694, "grad_norm": 137.52035522460938, "learning_rate": 1.994915550428638e-06, "loss": 21.2812, "step": 1842 }, { "epoch": 0.12240154081158265, "grad_norm": 677.1524047851562, "learning_rate": 1.9949047128266376e-06, "loss": 27.75, "step": 1843 }, { "epoch": 0.12246795510393836, "grad_norm": 223.38392639160156, "learning_rate": 1.9948938637161533e-06, "loss": 21.875, "step": 1844 }, { "epoch": 0.12253436939629408, "grad_norm": 169.35960388183594, "learning_rate": 1.9948830030973106e-06, "loss": 23.9375, "step": 1845 }, { "epoch": 0.1226007836886498, "grad_norm": 509.3945007324219, "learning_rate": 1.9948721309702344e-06, "loss": 22.875, "step": 1846 }, { "epoch": 0.12266719798100552, "grad_norm": 201.19677734375, "learning_rate": 1.994861247335052e-06, "loss": 20.7188, "step": 1847 }, { "epoch": 0.12273361227336123, "grad_norm": 158.891357421875, "learning_rate": 1.9948503521918877e-06, "loss": 23.5938, "step": 1848 }, { "epoch": 0.12280002656571694, "grad_norm": 135.9833984375, "learning_rate": 1.9948394455408684e-06, "loss": 21.2188, "step": 1849 }, { "epoch": 0.12286644085807266, "grad_norm": 149.69212341308594, "learning_rate": 1.99482852738212e-06, "loss": 22.6875, "step": 1850 }, { "epoch": 0.12293285515042837, "grad_norm": 214.3806610107422, "learning_rate": 1.994817597715769e-06, "loss": 21.625, "step": 1851 }, { "epoch": 0.12299926944278408, "grad_norm": 170.30361938476562, "learning_rate": 1.9948066565419414e-06, "loss": 21.2188, "step": 1852 }, { "epoch": 0.1230656837351398, "grad_norm": 341.9070129394531, "learning_rate": 1.9947957038607643e-06, "loss": 18.1562, "step": 1853 }, { "epoch": 0.12313209802749552, "grad_norm": 259.5361633300781, "learning_rate": 1.994784739672364e-06, "loss": 22.75, "step": 1854 }, { "epoch": 0.12319851231985124, "grad_norm": 251.7302703857422, "learning_rate": 1.994773763976867e-06, "loss": 18.6562, "step": 1855 }, { "epoch": 0.12326492661220695, "grad_norm": 197.31517028808594, "learning_rate": 1.994762776774401e-06, "loss": 24.0, "step": 1856 }, { "epoch": 0.12333134090456266, "grad_norm": 525.7755126953125, "learning_rate": 1.9947517780650934e-06, "loss": 19.1562, "step": 1857 }, { "epoch": 0.12339775519691838, "grad_norm": 221.1438446044922, "learning_rate": 1.9947407678490704e-06, "loss": 22.2031, "step": 1858 }, { "epoch": 0.12346416948927409, "grad_norm": 174.5354766845703, "learning_rate": 1.9947297461264597e-06, "loss": 22.6875, "step": 1859 }, { "epoch": 0.1235305837816298, "grad_norm": 172.6741943359375, "learning_rate": 1.9947187128973893e-06, "loss": 25.625, "step": 1860 }, { "epoch": 0.12359699807398553, "grad_norm": 372.4442138671875, "learning_rate": 1.994707668161986e-06, "loss": 18.8125, "step": 1861 }, { "epoch": 0.12366341236634124, "grad_norm": 148.99618530273438, "learning_rate": 1.994696611920378e-06, "loss": 21.3438, "step": 1862 }, { "epoch": 0.12372982665869695, "grad_norm": 186.8201446533203, "learning_rate": 1.9946855441726937e-06, "loss": 20.25, "step": 1863 }, { "epoch": 0.12379624095105267, "grad_norm": 176.2028045654297, "learning_rate": 1.99467446491906e-06, "loss": 27.7812, "step": 1864 }, { "epoch": 0.12386265524340838, "grad_norm": 167.161376953125, "learning_rate": 1.9946633741596056e-06, "loss": 26.4375, "step": 1865 }, { "epoch": 0.1239290695357641, "grad_norm": 154.92189025878906, "learning_rate": 1.994652271894459e-06, "loss": 23.2188, "step": 1866 }, { "epoch": 0.12399548382811981, "grad_norm": 215.2190704345703, "learning_rate": 1.994641158123749e-06, "loss": 19.8438, "step": 1867 }, { "epoch": 0.12406189812047552, "grad_norm": 173.1608123779297, "learning_rate": 1.9946300328476026e-06, "loss": 23.5156, "step": 1868 }, { "epoch": 0.12412831241283125, "grad_norm": 205.5380096435547, "learning_rate": 1.99461889606615e-06, "loss": 17.7188, "step": 1869 }, { "epoch": 0.12419472670518696, "grad_norm": 152.84597778320312, "learning_rate": 1.99460774777952e-06, "loss": 21.2812, "step": 1870 }, { "epoch": 0.12426114099754267, "grad_norm": 219.92221069335938, "learning_rate": 1.9945965879878407e-06, "loss": 19.3281, "step": 1871 }, { "epoch": 0.12432755528989839, "grad_norm": 202.32627868652344, "learning_rate": 1.9945854166912415e-06, "loss": 24.875, "step": 1872 }, { "epoch": 0.1243939695822541, "grad_norm": 246.4282989501953, "learning_rate": 1.994574233889852e-06, "loss": 22.5156, "step": 1873 }, { "epoch": 0.12446038387460982, "grad_norm": 226.50608825683594, "learning_rate": 1.9945630395838007e-06, "loss": 20.7344, "step": 1874 }, { "epoch": 0.12452679816696553, "grad_norm": 153.58425903320312, "learning_rate": 1.9945518337732183e-06, "loss": 27.0625, "step": 1875 }, { "epoch": 0.12459321245932124, "grad_norm": 264.78961181640625, "learning_rate": 1.9945406164582335e-06, "loss": 17.1406, "step": 1876 }, { "epoch": 0.12465962675167697, "grad_norm": 254.66571044921875, "learning_rate": 1.9945293876389764e-06, "loss": 24.9375, "step": 1877 }, { "epoch": 0.12472604104403268, "grad_norm": 211.19326782226562, "learning_rate": 1.994518147315577e-06, "loss": 21.0625, "step": 1878 }, { "epoch": 0.12479245533638839, "grad_norm": 243.61260986328125, "learning_rate": 1.9945068954881644e-06, "loss": 19.1875, "step": 1879 }, { "epoch": 0.12485886962874411, "grad_norm": 219.12855529785156, "learning_rate": 1.99449563215687e-06, "loss": 21.25, "step": 1880 }, { "epoch": 0.12492528392109982, "grad_norm": 265.75677490234375, "learning_rate": 1.994484357321824e-06, "loss": 20.5156, "step": 1881 }, { "epoch": 0.12499169821345553, "grad_norm": 273.8396301269531, "learning_rate": 1.994473070983156e-06, "loss": 26.2812, "step": 1882 }, { "epoch": 0.12505811250581125, "grad_norm": 175.79266357421875, "learning_rate": 1.9944617731409965e-06, "loss": 17.0, "step": 1883 }, { "epoch": 0.12512452679816696, "grad_norm": 308.5451965332031, "learning_rate": 1.994450463795477e-06, "loss": 21.5625, "step": 1884 }, { "epoch": 0.12519094109052267, "grad_norm": 136.62887573242188, "learning_rate": 1.9944391429467285e-06, "loss": 23.375, "step": 1885 }, { "epoch": 0.12525735538287838, "grad_norm": 316.02325439453125, "learning_rate": 1.9944278105948808e-06, "loss": 25.0938, "step": 1886 }, { "epoch": 0.12532376967523412, "grad_norm": 848.6605834960938, "learning_rate": 1.994416466740066e-06, "loss": 18.2188, "step": 1887 }, { "epoch": 0.12539018396758983, "grad_norm": 385.95660400390625, "learning_rate": 1.9944051113824145e-06, "loss": 23.75, "step": 1888 }, { "epoch": 0.12545659825994554, "grad_norm": 198.2176055908203, "learning_rate": 1.9943937445220587e-06, "loss": 18.8125, "step": 1889 }, { "epoch": 0.12552301255230125, "grad_norm": 184.8277587890625, "learning_rate": 1.9943823661591292e-06, "loss": 19.0781, "step": 1890 }, { "epoch": 0.12558942684465696, "grad_norm": 368.75592041015625, "learning_rate": 1.9943709762937575e-06, "loss": 22.5938, "step": 1891 }, { "epoch": 0.1256558411370127, "grad_norm": 189.06207275390625, "learning_rate": 1.994359574926076e-06, "loss": 19.7188, "step": 1892 }, { "epoch": 0.1257222554293684, "grad_norm": 1178.186279296875, "learning_rate": 1.9943481620562164e-06, "loss": 20.9375, "step": 1893 }, { "epoch": 0.12578866972172412, "grad_norm": 304.9593811035156, "learning_rate": 1.9943367376843106e-06, "loss": 25.9375, "step": 1894 }, { "epoch": 0.12585508401407983, "grad_norm": 217.3447723388672, "learning_rate": 1.9943253018104906e-06, "loss": 21.0312, "step": 1895 }, { "epoch": 0.12592149830643554, "grad_norm": 194.34666442871094, "learning_rate": 1.994313854434889e-06, "loss": 19.2969, "step": 1896 }, { "epoch": 0.12598791259879125, "grad_norm": 260.43756103515625, "learning_rate": 1.994302395557638e-06, "loss": 21.7188, "step": 1897 }, { "epoch": 0.12605432689114698, "grad_norm": 177.82945251464844, "learning_rate": 1.99429092517887e-06, "loss": 23.1562, "step": 1898 }, { "epoch": 0.1261207411835027, "grad_norm": 136.92710876464844, "learning_rate": 1.9942794432987186e-06, "loss": 17.5781, "step": 1899 }, { "epoch": 0.1261871554758584, "grad_norm": 156.05435180664062, "learning_rate": 1.9942679499173157e-06, "loss": 18.7344, "step": 1900 }, { "epoch": 0.1262535697682141, "grad_norm": 166.51974487304688, "learning_rate": 1.9942564450347944e-06, "loss": 20.5469, "step": 1901 }, { "epoch": 0.12631998406056982, "grad_norm": 339.4131774902344, "learning_rate": 1.9942449286512873e-06, "loss": 20.7031, "step": 1902 }, { "epoch": 0.12638639835292556, "grad_norm": 157.02001953125, "learning_rate": 1.994233400766929e-06, "loss": 21.9375, "step": 1903 }, { "epoch": 0.12645281264528127, "grad_norm": 388.5343933105469, "learning_rate": 1.9942218613818517e-06, "loss": 28.1875, "step": 1904 }, { "epoch": 0.12651922693763698, "grad_norm": 262.827392578125, "learning_rate": 1.994210310496189e-06, "loss": 20.4219, "step": 1905 }, { "epoch": 0.1265856412299927, "grad_norm": 171.6873321533203, "learning_rate": 1.9941987481100753e-06, "loss": 17.1875, "step": 1906 }, { "epoch": 0.1266520555223484, "grad_norm": 249.5217742919922, "learning_rate": 1.9941871742236435e-06, "loss": 19.4375, "step": 1907 }, { "epoch": 0.12671846981470414, "grad_norm": 309.5939636230469, "learning_rate": 1.9941755888370277e-06, "loss": 31.2812, "step": 1908 }, { "epoch": 0.12678488410705985, "grad_norm": 162.7752685546875, "learning_rate": 1.9941639919503615e-06, "loss": 21.5, "step": 1909 }, { "epoch": 0.12685129839941556, "grad_norm": 551.6049194335938, "learning_rate": 1.9941523835637805e-06, "loss": 24.2188, "step": 1910 }, { "epoch": 0.12691771269177127, "grad_norm": 207.69309997558594, "learning_rate": 1.9941407636774174e-06, "loss": 18.4062, "step": 1911 }, { "epoch": 0.12698412698412698, "grad_norm": 244.74404907226562, "learning_rate": 1.994129132291407e-06, "loss": 26.125, "step": 1912 }, { "epoch": 0.1270505412764827, "grad_norm": 253.59571838378906, "learning_rate": 1.994117489405884e-06, "loss": 24.2812, "step": 1913 }, { "epoch": 0.12711695556883842, "grad_norm": 166.61073303222656, "learning_rate": 1.9941058350209837e-06, "loss": 18.0312, "step": 1914 }, { "epoch": 0.12718336986119413, "grad_norm": 175.12725830078125, "learning_rate": 1.99409416913684e-06, "loss": 28.2188, "step": 1915 }, { "epoch": 0.12724978415354984, "grad_norm": 115.55463409423828, "learning_rate": 1.9940824917535885e-06, "loss": 19.1875, "step": 1916 }, { "epoch": 0.12731619844590555, "grad_norm": 285.6515808105469, "learning_rate": 1.9940708028713634e-06, "loss": 20.5938, "step": 1917 }, { "epoch": 0.12738261273826126, "grad_norm": 178.01502990722656, "learning_rate": 1.994059102490301e-06, "loss": 20.875, "step": 1918 }, { "epoch": 0.127449027030617, "grad_norm": 428.6481628417969, "learning_rate": 1.994047390610536e-06, "loss": 27.0625, "step": 1919 }, { "epoch": 0.1275154413229727, "grad_norm": 397.20904541015625, "learning_rate": 1.9940356672322033e-06, "loss": 23.7188, "step": 1920 }, { "epoch": 0.12758185561532842, "grad_norm": 429.7027893066406, "learning_rate": 1.99402393235544e-06, "loss": 22.5781, "step": 1921 }, { "epoch": 0.12764826990768413, "grad_norm": 3024.3837890625, "learning_rate": 1.994012185980381e-06, "loss": 23.1562, "step": 1922 }, { "epoch": 0.12771468420003984, "grad_norm": 328.7854919433594, "learning_rate": 1.9940004281071617e-06, "loss": 18.6875, "step": 1923 }, { "epoch": 0.12778109849239555, "grad_norm": 210.23989868164062, "learning_rate": 1.9939886587359188e-06, "loss": 19.3281, "step": 1924 }, { "epoch": 0.1278475127847513, "grad_norm": 116.7938232421875, "learning_rate": 1.9939768778667885e-06, "loss": 16.1719, "step": 1925 }, { "epoch": 0.127913927077107, "grad_norm": 349.0055847167969, "learning_rate": 1.9939650854999067e-06, "loss": 23.4062, "step": 1926 }, { "epoch": 0.1279803413694627, "grad_norm": 186.71029663085938, "learning_rate": 1.99395328163541e-06, "loss": 18.6875, "step": 1927 }, { "epoch": 0.12804675566181842, "grad_norm": 180.9275665283203, "learning_rate": 1.993941466273435e-06, "loss": 20.0312, "step": 1928 }, { "epoch": 0.12811316995417413, "grad_norm": 253.83853149414062, "learning_rate": 1.993929639414118e-06, "loss": 26.6562, "step": 1929 }, { "epoch": 0.12817958424652987, "grad_norm": 271.4775085449219, "learning_rate": 1.9939178010575963e-06, "loss": 15.9844, "step": 1930 }, { "epoch": 0.12824599853888557, "grad_norm": 162.36581420898438, "learning_rate": 1.9939059512040064e-06, "loss": 18.9219, "step": 1931 }, { "epoch": 0.12831241283124128, "grad_norm": 217.13275146484375, "learning_rate": 1.9938940898534854e-06, "loss": 23.9531, "step": 1932 }, { "epoch": 0.128378827123597, "grad_norm": 326.76214599609375, "learning_rate": 1.993882217006171e-06, "loss": 28.0781, "step": 1933 }, { "epoch": 0.1284452414159527, "grad_norm": 384.0521240234375, "learning_rate": 1.9938703326622002e-06, "loss": 19.9688, "step": 1934 }, { "epoch": 0.12851165570830841, "grad_norm": 278.9498596191406, "learning_rate": 1.9938584368217106e-06, "loss": 22.1875, "step": 1935 }, { "epoch": 0.12857807000066415, "grad_norm": 133.10023498535156, "learning_rate": 1.9938465294848395e-06, "loss": 23.1562, "step": 1936 }, { "epoch": 0.12864448429301986, "grad_norm": 338.2537536621094, "learning_rate": 1.9938346106517248e-06, "loss": 21.25, "step": 1937 }, { "epoch": 0.12871089858537557, "grad_norm": 203.42886352539062, "learning_rate": 1.9938226803225044e-06, "loss": 20.8906, "step": 1938 }, { "epoch": 0.12877731287773128, "grad_norm": 133.7837371826172, "learning_rate": 1.9938107384973164e-06, "loss": 19.8125, "step": 1939 }, { "epoch": 0.128843727170087, "grad_norm": 208.34909057617188, "learning_rate": 1.9937987851762985e-06, "loss": 21.75, "step": 1940 }, { "epoch": 0.12891014146244273, "grad_norm": 305.49359130859375, "learning_rate": 1.99378682035959e-06, "loss": 22.875, "step": 1941 }, { "epoch": 0.12897655575479844, "grad_norm": 291.63836669921875, "learning_rate": 1.993774844047328e-06, "loss": 25.3281, "step": 1942 }, { "epoch": 0.12904297004715415, "grad_norm": 305.1022033691406, "learning_rate": 1.9937628562396517e-06, "loss": 23.125, "step": 1943 }, { "epoch": 0.12910938433950986, "grad_norm": 261.8619384765625, "learning_rate": 1.9937508569367e-06, "loss": 22.375, "step": 1944 }, { "epoch": 0.12917579863186557, "grad_norm": 196.00039672851562, "learning_rate": 1.993738846138611e-06, "loss": 18.2656, "step": 1945 }, { "epoch": 0.1292422129242213, "grad_norm": 260.1197509765625, "learning_rate": 1.993726823845525e-06, "loss": 30.1875, "step": 1946 }, { "epoch": 0.12930862721657702, "grad_norm": 136.4261932373047, "learning_rate": 1.993714790057579e-06, "loss": 16.0312, "step": 1947 }, { "epoch": 0.12937504150893273, "grad_norm": 248.73358154296875, "learning_rate": 1.993702744774914e-06, "loss": 22.3438, "step": 1948 }, { "epoch": 0.12944145580128844, "grad_norm": 194.79701232910156, "learning_rate": 1.9936906879976683e-06, "loss": 19.2812, "step": 1949 }, { "epoch": 0.12950787009364415, "grad_norm": 191.54136657714844, "learning_rate": 1.993678619725982e-06, "loss": 20.75, "step": 1950 }, { "epoch": 0.12957428438599985, "grad_norm": 187.32884216308594, "learning_rate": 1.993666539959994e-06, "loss": 23.2188, "step": 1951 }, { "epoch": 0.1296406986783556, "grad_norm": 257.5623779296875, "learning_rate": 1.993654448699845e-06, "loss": 19.4531, "step": 1952 }, { "epoch": 0.1297071129707113, "grad_norm": 238.80361938476562, "learning_rate": 1.993642345945674e-06, "loss": 24.0938, "step": 1953 }, { "epoch": 0.129773527263067, "grad_norm": 412.3127746582031, "learning_rate": 1.9936302316976213e-06, "loss": 21.8125, "step": 1954 }, { "epoch": 0.12983994155542272, "grad_norm": 156.72332763671875, "learning_rate": 1.9936181059558273e-06, "loss": 18.7188, "step": 1955 }, { "epoch": 0.12990635584777843, "grad_norm": 224.2266082763672, "learning_rate": 1.9936059687204315e-06, "loss": 24.4531, "step": 1956 }, { "epoch": 0.12997277014013417, "grad_norm": 410.99554443359375, "learning_rate": 1.9935938199915753e-06, "loss": 26.7812, "step": 1957 }, { "epoch": 0.13003918443248988, "grad_norm": 181.49916076660156, "learning_rate": 1.9935816597693985e-06, "loss": 25.5938, "step": 1958 }, { "epoch": 0.1301055987248456, "grad_norm": 322.39959716796875, "learning_rate": 1.993569488054042e-06, "loss": 23.1562, "step": 1959 }, { "epoch": 0.1301720130172013, "grad_norm": 168.55584716796875, "learning_rate": 1.9935573048456463e-06, "loss": 22.1094, "step": 1960 }, { "epoch": 0.130238427309557, "grad_norm": 393.86260986328125, "learning_rate": 1.993545110144353e-06, "loss": 25.6562, "step": 1961 }, { "epoch": 0.13030484160191272, "grad_norm": 566.1396484375, "learning_rate": 1.993532903950303e-06, "loss": 20.8281, "step": 1962 }, { "epoch": 0.13037125589426846, "grad_norm": 216.4644012451172, "learning_rate": 1.993520686263637e-06, "loss": 27.1875, "step": 1963 }, { "epoch": 0.13043767018662417, "grad_norm": 180.14854431152344, "learning_rate": 1.9935084570844963e-06, "loss": 20.0625, "step": 1964 }, { "epoch": 0.13050408447897988, "grad_norm": 315.7114562988281, "learning_rate": 1.993496216413023e-06, "loss": 24.1562, "step": 1965 }, { "epoch": 0.13057049877133559, "grad_norm": 159.04608154296875, "learning_rate": 1.9934839642493584e-06, "loss": 18.6406, "step": 1966 }, { "epoch": 0.1306369130636913, "grad_norm": 161.88076782226562, "learning_rate": 1.9934717005936438e-06, "loss": 23.4688, "step": 1967 }, { "epoch": 0.13070332735604703, "grad_norm": 121.61004638671875, "learning_rate": 1.9934594254460214e-06, "loss": 20.7344, "step": 1968 }, { "epoch": 0.13076974164840274, "grad_norm": 197.8072052001953, "learning_rate": 1.9934471388066337e-06, "loss": 20.6406, "step": 1969 }, { "epoch": 0.13083615594075845, "grad_norm": 165.68727111816406, "learning_rate": 1.993434840675622e-06, "loss": 15.1719, "step": 1970 }, { "epoch": 0.13090257023311416, "grad_norm": 193.65469360351562, "learning_rate": 1.9934225310531292e-06, "loss": 21.2188, "step": 1971 }, { "epoch": 0.13096898452546987, "grad_norm": 242.3997802734375, "learning_rate": 1.993410209939297e-06, "loss": 18.2188, "step": 1972 }, { "epoch": 0.13103539881782558, "grad_norm": 565.9468994140625, "learning_rate": 1.993397877334269e-06, "loss": 20.6562, "step": 1973 }, { "epoch": 0.13110181311018132, "grad_norm": 147.25001525878906, "learning_rate": 1.9933855332381867e-06, "loss": 21.4531, "step": 1974 }, { "epoch": 0.13116822740253703, "grad_norm": 304.7337646484375, "learning_rate": 1.9933731776511937e-06, "loss": 29.5469, "step": 1975 }, { "epoch": 0.13123464169489274, "grad_norm": 174.0998992919922, "learning_rate": 1.993360810573432e-06, "loss": 22.6875, "step": 1976 }, { "epoch": 0.13130105598724845, "grad_norm": 114.03954315185547, "learning_rate": 1.9933484320050457e-06, "loss": 18.125, "step": 1977 }, { "epoch": 0.13136747027960416, "grad_norm": 235.8745880126953, "learning_rate": 1.9933360419461775e-06, "loss": 20.9375, "step": 1978 }, { "epoch": 0.1314338845719599, "grad_norm": 214.83102416992188, "learning_rate": 1.9933236403969707e-06, "loss": 24.0938, "step": 1979 }, { "epoch": 0.1315002988643156, "grad_norm": 403.0351257324219, "learning_rate": 1.9933112273575684e-06, "loss": 27.2188, "step": 1980 }, { "epoch": 0.13156671315667132, "grad_norm": 259.2928466796875, "learning_rate": 1.9932988028281155e-06, "loss": 23.0625, "step": 1981 }, { "epoch": 0.13163312744902703, "grad_norm": 208.356689453125, "learning_rate": 1.993286366808754e-06, "loss": 25.6094, "step": 1982 }, { "epoch": 0.13169954174138274, "grad_norm": 178.82374572753906, "learning_rate": 1.993273919299629e-06, "loss": 21.6094, "step": 1983 }, { "epoch": 0.13176595603373847, "grad_norm": 369.6868591308594, "learning_rate": 1.993261460300884e-06, "loss": 24.5781, "step": 1984 }, { "epoch": 0.13183237032609418, "grad_norm": 157.21401977539062, "learning_rate": 1.993248989812663e-06, "loss": 22.5625, "step": 1985 }, { "epoch": 0.1318987846184499, "grad_norm": 159.07958984375, "learning_rate": 1.9932365078351108e-06, "loss": 20.2969, "step": 1986 }, { "epoch": 0.1319651989108056, "grad_norm": 245.06854248046875, "learning_rate": 1.993224014368371e-06, "loss": 26.4375, "step": 1987 }, { "epoch": 0.1320316132031613, "grad_norm": 185.50527954101562, "learning_rate": 1.9932115094125884e-06, "loss": 18.9688, "step": 1988 }, { "epoch": 0.13209802749551702, "grad_norm": 229.743408203125, "learning_rate": 1.9931989929679083e-06, "loss": 16.6562, "step": 1989 }, { "epoch": 0.13216444178787276, "grad_norm": 166.2001953125, "learning_rate": 1.9931864650344744e-06, "loss": 19.2812, "step": 1990 }, { "epoch": 0.13223085608022847, "grad_norm": 172.22381591796875, "learning_rate": 1.9931739256124328e-06, "loss": 20.2031, "step": 1991 }, { "epoch": 0.13229727037258418, "grad_norm": 224.2855682373047, "learning_rate": 1.9931613747019274e-06, "loss": 22.0469, "step": 1992 }, { "epoch": 0.1323636846649399, "grad_norm": 160.0260009765625, "learning_rate": 1.993148812303104e-06, "loss": 22.0938, "step": 1993 }, { "epoch": 0.1324300989572956, "grad_norm": 743.9744873046875, "learning_rate": 1.9931362384161083e-06, "loss": 32.8125, "step": 1994 }, { "epoch": 0.13249651324965134, "grad_norm": 210.54820251464844, "learning_rate": 1.993123653041085e-06, "loss": 21.4688, "step": 1995 }, { "epoch": 0.13256292754200705, "grad_norm": 536.2852172851562, "learning_rate": 1.9931110561781796e-06, "loss": 17.3438, "step": 1996 }, { "epoch": 0.13262934183436276, "grad_norm": 234.9747772216797, "learning_rate": 1.9930984478275385e-06, "loss": 21.1875, "step": 1997 }, { "epoch": 0.13269575612671847, "grad_norm": 261.9476013183594, "learning_rate": 1.993085827989307e-06, "loss": 19.7812, "step": 1998 }, { "epoch": 0.13276217041907418, "grad_norm": 323.4515380859375, "learning_rate": 1.9930731966636315e-06, "loss": 21.375, "step": 1999 }, { "epoch": 0.1328285847114299, "grad_norm": 171.5908660888672, "learning_rate": 1.993060553850658e-06, "loss": 18.5938, "step": 2000 }, { "epoch": 0.13289499900378562, "grad_norm": 392.37530517578125, "learning_rate": 1.993047899550532e-06, "loss": 16.9844, "step": 2001 }, { "epoch": 0.13296141329614133, "grad_norm": 211.21324157714844, "learning_rate": 1.993035233763401e-06, "loss": 25.6562, "step": 2002 }, { "epoch": 0.13302782758849704, "grad_norm": 180.43238830566406, "learning_rate": 1.9930225564894113e-06, "loss": 20.6562, "step": 2003 }, { "epoch": 0.13309424188085275, "grad_norm": 251.68746948242188, "learning_rate": 1.993009867728709e-06, "loss": 21.2188, "step": 2004 }, { "epoch": 0.13316065617320846, "grad_norm": 259.63238525390625, "learning_rate": 1.992997167481441e-06, "loss": 26.8125, "step": 2005 }, { "epoch": 0.1332270704655642, "grad_norm": 325.36749267578125, "learning_rate": 1.9929844557477546e-06, "loss": 20.75, "step": 2006 }, { "epoch": 0.1332934847579199, "grad_norm": 353.82647705078125, "learning_rate": 1.9929717325277966e-06, "loss": 36.3438, "step": 2007 }, { "epoch": 0.13335989905027562, "grad_norm": 171.2276611328125, "learning_rate": 1.992958997821714e-06, "loss": 22.9062, "step": 2008 }, { "epoch": 0.13342631334263133, "grad_norm": 236.307373046875, "learning_rate": 1.9929462516296544e-06, "loss": 18.875, "step": 2009 }, { "epoch": 0.13349272763498704, "grad_norm": 166.2023468017578, "learning_rate": 1.992933493951765e-06, "loss": 22.0469, "step": 2010 }, { "epoch": 0.13355914192734275, "grad_norm": 192.1544647216797, "learning_rate": 1.9929207247881936e-06, "loss": 16.4531, "step": 2011 }, { "epoch": 0.1336255562196985, "grad_norm": 182.3130340576172, "learning_rate": 1.992907944139088e-06, "loss": 23.2812, "step": 2012 }, { "epoch": 0.1336919705120542, "grad_norm": 216.1997833251953, "learning_rate": 1.9928951520045954e-06, "loss": 22.8906, "step": 2013 }, { "epoch": 0.1337583848044099, "grad_norm": 225.98284912109375, "learning_rate": 1.9928823483848646e-06, "loss": 18.9844, "step": 2014 }, { "epoch": 0.13382479909676562, "grad_norm": 236.13291931152344, "learning_rate": 1.992869533280043e-06, "loss": 23.4375, "step": 2015 }, { "epoch": 0.13389121338912133, "grad_norm": 274.46405029296875, "learning_rate": 1.9928567066902796e-06, "loss": 24.5, "step": 2016 }, { "epoch": 0.13395762768147707, "grad_norm": 216.22189331054688, "learning_rate": 1.992843868615722e-06, "loss": 20.8125, "step": 2017 }, { "epoch": 0.13402404197383277, "grad_norm": 225.1204833984375, "learning_rate": 1.9928310190565193e-06, "loss": 21.0, "step": 2018 }, { "epoch": 0.13409045626618848, "grad_norm": 188.8235626220703, "learning_rate": 1.9928181580128192e-06, "loss": 17.9531, "step": 2019 }, { "epoch": 0.1341568705585442, "grad_norm": 212.99842834472656, "learning_rate": 1.992805285484772e-06, "loss": 21.7812, "step": 2020 }, { "epoch": 0.1342232848508999, "grad_norm": 618.8545532226562, "learning_rate": 1.9927924014725254e-06, "loss": 20.3438, "step": 2021 }, { "epoch": 0.13428969914325564, "grad_norm": 350.3206481933594, "learning_rate": 1.9927795059762286e-06, "loss": 29.8438, "step": 2022 }, { "epoch": 0.13435611343561135, "grad_norm": 260.69586181640625, "learning_rate": 1.992766598996031e-06, "loss": 18.75, "step": 2023 }, { "epoch": 0.13442252772796706, "grad_norm": 394.770263671875, "learning_rate": 1.9927536805320823e-06, "loss": 29.1875, "step": 2024 }, { "epoch": 0.13448894202032277, "grad_norm": 392.4474182128906, "learning_rate": 1.992740750584531e-06, "loss": 27.0938, "step": 2025 }, { "epoch": 0.13455535631267848, "grad_norm": 171.07078552246094, "learning_rate": 1.9927278091535272e-06, "loss": 18.0938, "step": 2026 }, { "epoch": 0.1346217706050342, "grad_norm": 479.7737121582031, "learning_rate": 1.992714856239221e-06, "loss": 18.4531, "step": 2027 }, { "epoch": 0.13468818489738993, "grad_norm": 191.7958984375, "learning_rate": 1.992701891841761e-06, "loss": 25.75, "step": 2028 }, { "epoch": 0.13475459918974564, "grad_norm": 255.0034637451172, "learning_rate": 1.9926889159612982e-06, "loss": 20.7812, "step": 2029 }, { "epoch": 0.13482101348210135, "grad_norm": 304.0769958496094, "learning_rate": 1.9926759285979827e-06, "loss": 21.0938, "step": 2030 }, { "epoch": 0.13488742777445706, "grad_norm": 222.74571228027344, "learning_rate": 1.992662929751964e-06, "loss": 22.5312, "step": 2031 }, { "epoch": 0.13495384206681277, "grad_norm": 331.25897216796875, "learning_rate": 1.9926499194233934e-06, "loss": 30.7188, "step": 2032 }, { "epoch": 0.1350202563591685, "grad_norm": 728.052001953125, "learning_rate": 1.992636897612421e-06, "loss": 31.4688, "step": 2033 }, { "epoch": 0.13508667065152422, "grad_norm": 199.08782958984375, "learning_rate": 1.992623864319197e-06, "loss": 21.875, "step": 2034 }, { "epoch": 0.13515308494387993, "grad_norm": 271.82293701171875, "learning_rate": 1.992610819543873e-06, "loss": 25.6875, "step": 2035 }, { "epoch": 0.13521949923623564, "grad_norm": 315.6239013671875, "learning_rate": 1.9925977632865985e-06, "loss": 20.625, "step": 2036 }, { "epoch": 0.13528591352859135, "grad_norm": 176.0443115234375, "learning_rate": 1.992584695547526e-06, "loss": 21.7812, "step": 2037 }, { "epoch": 0.13535232782094705, "grad_norm": 277.8758544921875, "learning_rate": 1.9925716163268062e-06, "loss": 22.5312, "step": 2038 }, { "epoch": 0.1354187421133028, "grad_norm": 391.6315002441406, "learning_rate": 1.9925585256245902e-06, "loss": 20.5156, "step": 2039 }, { "epoch": 0.1354851564056585, "grad_norm": 203.8223419189453, "learning_rate": 1.99254542344103e-06, "loss": 23.3438, "step": 2040 }, { "epoch": 0.1355515706980142, "grad_norm": 219.0701904296875, "learning_rate": 1.992532309776276e-06, "loss": 18.5625, "step": 2041 }, { "epoch": 0.13561798499036992, "grad_norm": 325.5034484863281, "learning_rate": 1.9925191846304806e-06, "loss": 24.3438, "step": 2042 }, { "epoch": 0.13568439928272563, "grad_norm": 252.09034729003906, "learning_rate": 1.992506048003796e-06, "loss": 24.75, "step": 2043 }, { "epoch": 0.13575081357508137, "grad_norm": 249.10638427734375, "learning_rate": 1.992492899896373e-06, "loss": 26.9062, "step": 2044 }, { "epoch": 0.13581722786743708, "grad_norm": 153.88067626953125, "learning_rate": 1.992479740308365e-06, "loss": 18.2188, "step": 2045 }, { "epoch": 0.1358836421597928, "grad_norm": 390.8125305175781, "learning_rate": 1.992466569239924e-06, "loss": 22.0156, "step": 2046 }, { "epoch": 0.1359500564521485, "grad_norm": 179.03793334960938, "learning_rate": 1.9924533866912014e-06, "loss": 21.375, "step": 2047 }, { "epoch": 0.1360164707445042, "grad_norm": 209.4019012451172, "learning_rate": 1.992440192662351e-06, "loss": 18.4688, "step": 2048 }, { "epoch": 0.13608288503685992, "grad_norm": 186.9613494873047, "learning_rate": 1.992426987153524e-06, "loss": 20.5938, "step": 2049 }, { "epoch": 0.13614929932921566, "grad_norm": 350.0854187011719, "learning_rate": 1.992413770164874e-06, "loss": 23.0312, "step": 2050 }, { "epoch": 0.13621571362157137, "grad_norm": 441.62042236328125, "learning_rate": 1.9924005416965537e-06, "loss": 22.2031, "step": 2051 }, { "epoch": 0.13628212791392708, "grad_norm": 280.9316101074219, "learning_rate": 1.9923873017487163e-06, "loss": 20.5938, "step": 2052 }, { "epoch": 0.13634854220628279, "grad_norm": 109.88761901855469, "learning_rate": 1.9923740503215154e-06, "loss": 15.1406, "step": 2053 }, { "epoch": 0.1364149564986385, "grad_norm": 179.8908233642578, "learning_rate": 1.992360787415103e-06, "loss": 22.3125, "step": 2054 }, { "epoch": 0.13648137079099423, "grad_norm": 161.27186584472656, "learning_rate": 1.9923475130296333e-06, "loss": 22.2656, "step": 2055 }, { "epoch": 0.13654778508334994, "grad_norm": 461.8307800292969, "learning_rate": 1.99233422716526e-06, "loss": 19.25, "step": 2056 }, { "epoch": 0.13661419937570565, "grad_norm": 242.4347381591797, "learning_rate": 1.9923209298221365e-06, "loss": 23.3438, "step": 2057 }, { "epoch": 0.13668061366806136, "grad_norm": 453.9687805175781, "learning_rate": 1.9923076210004167e-06, "loss": 20.6875, "step": 2058 }, { "epoch": 0.13674702796041707, "grad_norm": 578.8563232421875, "learning_rate": 1.9922943007002545e-06, "loss": 26.8594, "step": 2059 }, { "epoch": 0.1368134422527728, "grad_norm": 447.66094970703125, "learning_rate": 1.992280968921804e-06, "loss": 18.6562, "step": 2060 }, { "epoch": 0.13687985654512852, "grad_norm": 399.61993408203125, "learning_rate": 1.9922676256652196e-06, "loss": 20.3281, "step": 2061 }, { "epoch": 0.13694627083748423, "grad_norm": 288.50360107421875, "learning_rate": 1.9922542709306557e-06, "loss": 23.75, "step": 2062 }, { "epoch": 0.13701268512983994, "grad_norm": 235.5913543701172, "learning_rate": 1.992240904718266e-06, "loss": 24.0156, "step": 2063 }, { "epoch": 0.13707909942219565, "grad_norm": 266.3539123535156, "learning_rate": 1.992227527028206e-06, "loss": 23.7188, "step": 2064 }, { "epoch": 0.13714551371455136, "grad_norm": 418.372314453125, "learning_rate": 1.99221413786063e-06, "loss": 20.2969, "step": 2065 }, { "epoch": 0.1372119280069071, "grad_norm": 273.30633544921875, "learning_rate": 1.9922007372156933e-06, "loss": 17.9375, "step": 2066 }, { "epoch": 0.1372783422992628, "grad_norm": 226.50086975097656, "learning_rate": 1.99218732509355e-06, "loss": 18.8125, "step": 2067 }, { "epoch": 0.13734475659161852, "grad_norm": 216.04275512695312, "learning_rate": 1.9921739014943562e-06, "loss": 19.8438, "step": 2068 }, { "epoch": 0.13741117088397423, "grad_norm": 214.04762268066406, "learning_rate": 1.992160466418267e-06, "loss": 29.2812, "step": 2069 }, { "epoch": 0.13747758517632994, "grad_norm": 253.69923400878906, "learning_rate": 1.9921470198654372e-06, "loss": 26.7344, "step": 2070 }, { "epoch": 0.13754399946868567, "grad_norm": 120.73892211914062, "learning_rate": 1.9921335618360233e-06, "loss": 16.4688, "step": 2071 }, { "epoch": 0.13761041376104138, "grad_norm": 237.52859497070312, "learning_rate": 1.99212009233018e-06, "loss": 18.75, "step": 2072 }, { "epoch": 0.1376768280533971, "grad_norm": 198.71434020996094, "learning_rate": 1.992106611348064e-06, "loss": 22.3125, "step": 2073 }, { "epoch": 0.1377432423457528, "grad_norm": 241.21945190429688, "learning_rate": 1.9920931188898303e-06, "loss": 23.0625, "step": 2074 }, { "epoch": 0.1378096566381085, "grad_norm": 183.90084838867188, "learning_rate": 1.992079614955636e-06, "loss": 19.5156, "step": 2075 }, { "epoch": 0.13787607093046422, "grad_norm": 288.9510803222656, "learning_rate": 1.9920660995456365e-06, "loss": 22.2188, "step": 2076 }, { "epoch": 0.13794248522281996, "grad_norm": 272.4867248535156, "learning_rate": 1.9920525726599884e-06, "loss": 17.2969, "step": 2077 }, { "epoch": 0.13800889951517567, "grad_norm": 193.0972442626953, "learning_rate": 1.992039034298848e-06, "loss": 13.0938, "step": 2078 }, { "epoch": 0.13807531380753138, "grad_norm": 161.8263702392578, "learning_rate": 1.9920254844623727e-06, "loss": 28.7188, "step": 2079 }, { "epoch": 0.1381417280998871, "grad_norm": 337.63885498046875, "learning_rate": 1.9920119231507184e-06, "loss": 26.8438, "step": 2080 }, { "epoch": 0.1382081423922428, "grad_norm": 301.87200927734375, "learning_rate": 1.991998350364042e-06, "loss": 22.5312, "step": 2081 }, { "epoch": 0.13827455668459854, "grad_norm": 367.000732421875, "learning_rate": 1.9919847661025005e-06, "loss": 25.5312, "step": 2082 }, { "epoch": 0.13834097097695425, "grad_norm": 343.4363098144531, "learning_rate": 1.9919711703662515e-06, "loss": 24.3125, "step": 2083 }, { "epoch": 0.13840738526930996, "grad_norm": 197.2932891845703, "learning_rate": 1.9919575631554518e-06, "loss": 21.4844, "step": 2084 }, { "epoch": 0.13847379956166567, "grad_norm": 253.35411071777344, "learning_rate": 1.9919439444702592e-06, "loss": 20.75, "step": 2085 }, { "epoch": 0.13854021385402138, "grad_norm": 247.6868896484375, "learning_rate": 1.991930314310831e-06, "loss": 18.2656, "step": 2086 }, { "epoch": 0.1386066281463771, "grad_norm": 229.44461059570312, "learning_rate": 1.991916672677325e-06, "loss": 19.2656, "step": 2087 }, { "epoch": 0.13867304243873282, "grad_norm": 231.517822265625, "learning_rate": 1.991903019569899e-06, "loss": 18.8906, "step": 2088 }, { "epoch": 0.13873945673108853, "grad_norm": 139.0255126953125, "learning_rate": 1.9918893549887107e-06, "loss": 23.0312, "step": 2089 }, { "epoch": 0.13880587102344424, "grad_norm": 241.36781311035156, "learning_rate": 1.991875678933918e-06, "loss": 21.2188, "step": 2090 }, { "epoch": 0.13887228531579995, "grad_norm": 188.43955993652344, "learning_rate": 1.99186199140568e-06, "loss": 21.6406, "step": 2091 }, { "epoch": 0.13893869960815566, "grad_norm": 176.6198272705078, "learning_rate": 1.991848292404154e-06, "loss": 18.7656, "step": 2092 }, { "epoch": 0.1390051139005114, "grad_norm": 486.08917236328125, "learning_rate": 1.991834581929499e-06, "loss": 17.3906, "step": 2093 }, { "epoch": 0.1390715281928671, "grad_norm": 320.98846435546875, "learning_rate": 1.9918208599818735e-06, "loss": 27.0312, "step": 2094 }, { "epoch": 0.13913794248522282, "grad_norm": 382.79656982421875, "learning_rate": 1.991807126561436e-06, "loss": 21.375, "step": 2095 }, { "epoch": 0.13920435677757853, "grad_norm": 309.7762451171875, "learning_rate": 1.9917933816683456e-06, "loss": 22.5781, "step": 2096 }, { "epoch": 0.13927077106993424, "grad_norm": 294.1110534667969, "learning_rate": 1.9917796253027616e-06, "loss": 22.1562, "step": 2097 }, { "epoch": 0.13933718536228998, "grad_norm": 303.4586486816406, "learning_rate": 1.9917658574648426e-06, "loss": 26.0625, "step": 2098 }, { "epoch": 0.1394035996546457, "grad_norm": 156.3911590576172, "learning_rate": 1.991752078154748e-06, "loss": 18.7188, "step": 2099 }, { "epoch": 0.1394700139470014, "grad_norm": 276.936767578125, "learning_rate": 1.9917382873726373e-06, "loss": 26.875, "step": 2100 }, { "epoch": 0.1395364282393571, "grad_norm": 147.22247314453125, "learning_rate": 1.99172448511867e-06, "loss": 18.25, "step": 2101 }, { "epoch": 0.13960284253171282, "grad_norm": 475.1858215332031, "learning_rate": 1.9917106713930053e-06, "loss": 25.0625, "step": 2102 }, { "epoch": 0.13966925682406853, "grad_norm": 198.73178100585938, "learning_rate": 1.991696846195804e-06, "loss": 24.9062, "step": 2103 }, { "epoch": 0.13973567111642426, "grad_norm": 388.79364013671875, "learning_rate": 1.991683009527225e-06, "loss": 20.4375, "step": 2104 }, { "epoch": 0.13980208540877997, "grad_norm": 282.3170166015625, "learning_rate": 1.991669161387429e-06, "loss": 27.7188, "step": 2105 }, { "epoch": 0.13986849970113568, "grad_norm": 150.53065490722656, "learning_rate": 1.991655301776576e-06, "loss": 21.0, "step": 2106 }, { "epoch": 0.1399349139934914, "grad_norm": 222.64878845214844, "learning_rate": 1.9916414306948265e-06, "loss": 19.25, "step": 2107 }, { "epoch": 0.1400013282858471, "grad_norm": 289.6336975097656, "learning_rate": 1.99162754814234e-06, "loss": 23.0625, "step": 2108 }, { "epoch": 0.14006774257820284, "grad_norm": 569.1845092773438, "learning_rate": 1.9916136541192786e-06, "loss": 18.4062, "step": 2109 }, { "epoch": 0.14013415687055855, "grad_norm": 225.4589385986328, "learning_rate": 1.991599748625802e-06, "loss": 25.1562, "step": 2110 }, { "epoch": 0.14020057116291426, "grad_norm": 874.2415161132812, "learning_rate": 1.991585831662071e-06, "loss": 15.7969, "step": 2111 }, { "epoch": 0.14026698545526997, "grad_norm": 232.9945831298828, "learning_rate": 1.9915719032282474e-06, "loss": 22.9688, "step": 2112 }, { "epoch": 0.14033339974762568, "grad_norm": 6513.03564453125, "learning_rate": 1.9915579633244913e-06, "loss": 19.4062, "step": 2113 }, { "epoch": 0.1403998140399814, "grad_norm": 265.5475158691406, "learning_rate": 1.9915440119509647e-06, "loss": 24.9062, "step": 2114 }, { "epoch": 0.14046622833233713, "grad_norm": 392.7364807128906, "learning_rate": 1.9915300491078285e-06, "loss": 22.7344, "step": 2115 }, { "epoch": 0.14053264262469284, "grad_norm": 192.9947967529297, "learning_rate": 1.991516074795245e-06, "loss": 21.5781, "step": 2116 }, { "epoch": 0.14059905691704855, "grad_norm": 619.211669921875, "learning_rate": 1.991502089013375e-06, "loss": 17.6406, "step": 2117 }, { "epoch": 0.14066547120940426, "grad_norm": 166.25315856933594, "learning_rate": 1.99148809176238e-06, "loss": 20.5, "step": 2118 }, { "epoch": 0.14073188550175997, "grad_norm": 302.8773498535156, "learning_rate": 1.9914740830424232e-06, "loss": 28.4531, "step": 2119 }, { "epoch": 0.1407982997941157, "grad_norm": 169.34600830078125, "learning_rate": 1.9914600628536657e-06, "loss": 20.0625, "step": 2120 }, { "epoch": 0.14086471408647142, "grad_norm": 528.6930541992188, "learning_rate": 1.9914460311962697e-06, "loss": 18.2969, "step": 2121 }, { "epoch": 0.14093112837882713, "grad_norm": 685.5098266601562, "learning_rate": 1.991431988070398e-06, "loss": 20.4375, "step": 2122 }, { "epoch": 0.14099754267118284, "grad_norm": 228.32479858398438, "learning_rate": 1.9914179334762128e-06, "loss": 19.5625, "step": 2123 }, { "epoch": 0.14106395696353854, "grad_norm": 221.23165893554688, "learning_rate": 1.9914038674138762e-06, "loss": 17.7344, "step": 2124 }, { "epoch": 0.14113037125589425, "grad_norm": 233.4306182861328, "learning_rate": 1.9913897898835517e-06, "loss": 20.7812, "step": 2125 }, { "epoch": 0.14119678554825, "grad_norm": 166.40286254882812, "learning_rate": 1.991375700885402e-06, "loss": 18.8906, "step": 2126 }, { "epoch": 0.1412631998406057, "grad_norm": 253.73452758789062, "learning_rate": 1.9913616004195894e-06, "loss": 18.2188, "step": 2127 }, { "epoch": 0.1413296141329614, "grad_norm": 286.8411560058594, "learning_rate": 1.9913474884862776e-06, "loss": 21.5625, "step": 2128 }, { "epoch": 0.14139602842531712, "grad_norm": 347.189208984375, "learning_rate": 1.9913333650856296e-06, "loss": 19.0938, "step": 2129 }, { "epoch": 0.14146244271767283, "grad_norm": 258.65765380859375, "learning_rate": 1.991319230217809e-06, "loss": 21.1094, "step": 2130 }, { "epoch": 0.14152885701002857, "grad_norm": 411.49560546875, "learning_rate": 1.991305083882979e-06, "loss": 21.2188, "step": 2131 }, { "epoch": 0.14159527130238428, "grad_norm": 162.8927764892578, "learning_rate": 1.991290926081304e-06, "loss": 15.875, "step": 2132 }, { "epoch": 0.14166168559474, "grad_norm": 275.2839660644531, "learning_rate": 1.9912767568129465e-06, "loss": 19.5781, "step": 2133 }, { "epoch": 0.1417280998870957, "grad_norm": 480.0184631347656, "learning_rate": 1.991262576078071e-06, "loss": 20.875, "step": 2134 }, { "epoch": 0.1417945141794514, "grad_norm": 237.65989685058594, "learning_rate": 1.991248383876842e-06, "loss": 21.5938, "step": 2135 }, { "epoch": 0.14186092847180715, "grad_norm": 189.04786682128906, "learning_rate": 1.9912341802094234e-06, "loss": 24.6875, "step": 2136 }, { "epoch": 0.14192734276416286, "grad_norm": 358.12237548828125, "learning_rate": 1.9912199650759793e-06, "loss": 28.375, "step": 2137 }, { "epoch": 0.14199375705651857, "grad_norm": 368.61505126953125, "learning_rate": 1.9912057384766737e-06, "loss": 22.6875, "step": 2138 }, { "epoch": 0.14206017134887428, "grad_norm": 131.36427307128906, "learning_rate": 1.9911915004116726e-06, "loss": 18.9688, "step": 2139 }, { "epoch": 0.14212658564122999, "grad_norm": 451.03155517578125, "learning_rate": 1.991177250881139e-06, "loss": 20.75, "step": 2140 }, { "epoch": 0.1421929999335857, "grad_norm": 242.71896362304688, "learning_rate": 1.9911629898852388e-06, "loss": 24.6406, "step": 2141 }, { "epoch": 0.14225941422594143, "grad_norm": 198.3270721435547, "learning_rate": 1.991148717424137e-06, "loss": 21.375, "step": 2142 }, { "epoch": 0.14232582851829714, "grad_norm": 280.8441467285156, "learning_rate": 1.9911344334979975e-06, "loss": 24.125, "step": 2143 }, { "epoch": 0.14239224281065285, "grad_norm": 221.64739990234375, "learning_rate": 1.991120138106987e-06, "loss": 21.3438, "step": 2144 }, { "epoch": 0.14245865710300856, "grad_norm": 354.2916259765625, "learning_rate": 1.99110583125127e-06, "loss": 25.0625, "step": 2145 }, { "epoch": 0.14252507139536427, "grad_norm": 273.2012023925781, "learning_rate": 1.9910915129310125e-06, "loss": 19.375, "step": 2146 }, { "epoch": 0.14259148568772, "grad_norm": 244.37144470214844, "learning_rate": 1.99107718314638e-06, "loss": 21.4531, "step": 2147 }, { "epoch": 0.14265789998007572, "grad_norm": 124.92252349853516, "learning_rate": 1.9910628418975378e-06, "loss": 19.9688, "step": 2148 }, { "epoch": 0.14272431427243143, "grad_norm": 140.49412536621094, "learning_rate": 1.9910484891846524e-06, "loss": 22.5156, "step": 2149 }, { "epoch": 0.14279072856478714, "grad_norm": 302.91656494140625, "learning_rate": 1.9910341250078894e-06, "loss": 25.5, "step": 2150 }, { "epoch": 0.14285714285714285, "grad_norm": 288.45867919921875, "learning_rate": 1.991019749367415e-06, "loss": 21.1406, "step": 2151 }, { "epoch": 0.14292355714949856, "grad_norm": 254.25210571289062, "learning_rate": 1.9910053622633957e-06, "loss": 22.5312, "step": 2152 }, { "epoch": 0.1429899714418543, "grad_norm": 158.59722900390625, "learning_rate": 1.9909909636959977e-06, "loss": 20.8125, "step": 2153 }, { "epoch": 0.14305638573421, "grad_norm": 293.8788146972656, "learning_rate": 1.9909765536653876e-06, "loss": 21.1094, "step": 2154 }, { "epoch": 0.14312280002656572, "grad_norm": 332.20941162109375, "learning_rate": 1.9909621321717325e-06, "loss": 19.6641, "step": 2155 }, { "epoch": 0.14318921431892143, "grad_norm": 275.2470397949219, "learning_rate": 1.9909476992151987e-06, "loss": 24.9375, "step": 2156 }, { "epoch": 0.14325562861127714, "grad_norm": 102.42245483398438, "learning_rate": 1.990933254795953e-06, "loss": 16.625, "step": 2157 }, { "epoch": 0.14332204290363287, "grad_norm": 350.7252502441406, "learning_rate": 1.9909187989141633e-06, "loss": 17.8438, "step": 2158 }, { "epoch": 0.14338845719598858, "grad_norm": 421.07037353515625, "learning_rate": 1.9909043315699964e-06, "loss": 25.4062, "step": 2159 }, { "epoch": 0.1434548714883443, "grad_norm": 251.33331298828125, "learning_rate": 1.990889852763619e-06, "loss": 25.3125, "step": 2160 }, { "epoch": 0.1435212857807, "grad_norm": 467.2099609375, "learning_rate": 1.9908753624952e-06, "loss": 23.2812, "step": 2161 }, { "epoch": 0.1435877000730557, "grad_norm": 428.1255187988281, "learning_rate": 1.9908608607649057e-06, "loss": 22.8125, "step": 2162 }, { "epoch": 0.14365411436541142, "grad_norm": 235.9037628173828, "learning_rate": 1.990846347572904e-06, "loss": 19.6094, "step": 2163 }, { "epoch": 0.14372052865776716, "grad_norm": 258.2474060058594, "learning_rate": 1.9908318229193637e-06, "loss": 18.2969, "step": 2164 }, { "epoch": 0.14378694295012287, "grad_norm": 261.1006164550781, "learning_rate": 1.990817286804452e-06, "loss": 29.5625, "step": 2165 }, { "epoch": 0.14385335724247858, "grad_norm": 239.29266357421875, "learning_rate": 1.990802739228337e-06, "loss": 19.9062, "step": 2166 }, { "epoch": 0.1439197715348343, "grad_norm": 294.4657287597656, "learning_rate": 1.9907881801911876e-06, "loss": 29.1719, "step": 2167 }, { "epoch": 0.14398618582719, "grad_norm": 389.4125061035156, "learning_rate": 1.990773609693172e-06, "loss": 22.2344, "step": 2168 }, { "epoch": 0.14405260011954574, "grad_norm": 295.5668640136719, "learning_rate": 1.990759027734458e-06, "loss": 18.1562, "step": 2169 }, { "epoch": 0.14411901441190145, "grad_norm": 199.111083984375, "learning_rate": 1.990744434315215e-06, "loss": 17.5156, "step": 2170 }, { "epoch": 0.14418542870425716, "grad_norm": 322.2822265625, "learning_rate": 1.990729829435612e-06, "loss": 30.4062, "step": 2171 }, { "epoch": 0.14425184299661287, "grad_norm": 313.6325378417969, "learning_rate": 1.9907152130958175e-06, "loss": 18.5781, "step": 2172 }, { "epoch": 0.14431825728896858, "grad_norm": 151.46725463867188, "learning_rate": 1.9907005852960005e-06, "loss": 18.875, "step": 2173 }, { "epoch": 0.14438467158132431, "grad_norm": 182.58026123046875, "learning_rate": 1.9906859460363304e-06, "loss": 20.0625, "step": 2174 }, { "epoch": 0.14445108587368002, "grad_norm": 204.6225128173828, "learning_rate": 1.9906712953169767e-06, "loss": 18.0312, "step": 2175 }, { "epoch": 0.14451750016603573, "grad_norm": 163.34686279296875, "learning_rate": 1.990656633138109e-06, "loss": 20.6562, "step": 2176 }, { "epoch": 0.14458391445839144, "grad_norm": 398.35150146484375, "learning_rate": 1.9906419594998963e-06, "loss": 19.9375, "step": 2177 }, { "epoch": 0.14465032875074715, "grad_norm": 238.12269592285156, "learning_rate": 1.9906272744025085e-06, "loss": 20.4844, "step": 2178 }, { "epoch": 0.14471674304310286, "grad_norm": 289.58477783203125, "learning_rate": 1.990612577846116e-06, "loss": 27.9062, "step": 2179 }, { "epoch": 0.1447831573354586, "grad_norm": 230.09579467773438, "learning_rate": 1.9905978698308883e-06, "loss": 22.8125, "step": 2180 }, { "epoch": 0.1448495716278143, "grad_norm": 291.2015380859375, "learning_rate": 1.9905831503569957e-06, "loss": 23.6562, "step": 2181 }, { "epoch": 0.14491598592017002, "grad_norm": 1100.292236328125, "learning_rate": 1.990568419424608e-06, "loss": 23.2344, "step": 2182 }, { "epoch": 0.14498240021252573, "grad_norm": 250.6841583251953, "learning_rate": 1.9905536770338966e-06, "loss": 19.7812, "step": 2183 }, { "epoch": 0.14504881450488144, "grad_norm": 149.06103515625, "learning_rate": 1.990538923185031e-06, "loss": 16.7812, "step": 2184 }, { "epoch": 0.14511522879723718, "grad_norm": 257.6304016113281, "learning_rate": 1.9905241578781823e-06, "loss": 20.7344, "step": 2185 }, { "epoch": 0.1451816430895929, "grad_norm": 154.88247680664062, "learning_rate": 1.9905093811135218e-06, "loss": 18.0156, "step": 2186 }, { "epoch": 0.1452480573819486, "grad_norm": 204.49314880371094, "learning_rate": 1.9904945928912194e-06, "loss": 25.4062, "step": 2187 }, { "epoch": 0.1453144716743043, "grad_norm": 215.80096435546875, "learning_rate": 1.990479793211447e-06, "loss": 20.125, "step": 2188 }, { "epoch": 0.14538088596666002, "grad_norm": 251.28587341308594, "learning_rate": 1.9904649820743755e-06, "loss": 25.25, "step": 2189 }, { "epoch": 0.14544730025901573, "grad_norm": 267.25433349609375, "learning_rate": 1.990450159480176e-06, "loss": 19.3906, "step": 2190 }, { "epoch": 0.14551371455137146, "grad_norm": 135.71556091308594, "learning_rate": 1.9904353254290202e-06, "loss": 16.0625, "step": 2191 }, { "epoch": 0.14558012884372717, "grad_norm": 163.41448974609375, "learning_rate": 1.99042047992108e-06, "loss": 20.7344, "step": 2192 }, { "epoch": 0.14564654313608288, "grad_norm": 246.14378356933594, "learning_rate": 1.9904056229565267e-06, "loss": 22.8438, "step": 2193 }, { "epoch": 0.1457129574284386, "grad_norm": 224.11546325683594, "learning_rate": 1.9903907545355317e-06, "loss": 22.5312, "step": 2194 }, { "epoch": 0.1457793717207943, "grad_norm": 369.57965087890625, "learning_rate": 1.9903758746582682e-06, "loss": 18.9062, "step": 2195 }, { "epoch": 0.14584578601315004, "grad_norm": 293.4088134765625, "learning_rate": 1.9903609833249075e-06, "loss": 21.3906, "step": 2196 }, { "epoch": 0.14591220030550575, "grad_norm": 415.41522216796875, "learning_rate": 1.9903460805356222e-06, "loss": 23.0625, "step": 2197 }, { "epoch": 0.14597861459786146, "grad_norm": 124.77845001220703, "learning_rate": 1.990331166290584e-06, "loss": 19.2188, "step": 2198 }, { "epoch": 0.14604502889021717, "grad_norm": 293.950927734375, "learning_rate": 1.9903162405899664e-06, "loss": 16.4688, "step": 2199 }, { "epoch": 0.14611144318257288, "grad_norm": 312.6014709472656, "learning_rate": 1.9903013034339413e-06, "loss": 18.5469, "step": 2200 }, { "epoch": 0.1461778574749286, "grad_norm": 330.74053955078125, "learning_rate": 1.9902863548226817e-06, "loss": 26.5156, "step": 2201 }, { "epoch": 0.14624427176728433, "grad_norm": 163.4642791748047, "learning_rate": 1.9902713947563606e-06, "loss": 18.3125, "step": 2202 }, { "epoch": 0.14631068605964004, "grad_norm": 208.42530822753906, "learning_rate": 1.9902564232351514e-06, "loss": 22.9062, "step": 2203 }, { "epoch": 0.14637710035199575, "grad_norm": 437.2677307128906, "learning_rate": 1.990241440259227e-06, "loss": 18.5156, "step": 2204 }, { "epoch": 0.14644351464435146, "grad_norm": 320.3740234375, "learning_rate": 1.99022644582876e-06, "loss": 20.875, "step": 2205 }, { "epoch": 0.14650992893670717, "grad_norm": 267.2911071777344, "learning_rate": 1.9902114399439247e-06, "loss": 23.9062, "step": 2206 }, { "epoch": 0.1465763432290629, "grad_norm": 365.0127868652344, "learning_rate": 1.9901964226048946e-06, "loss": 18.8438, "step": 2207 }, { "epoch": 0.14664275752141862, "grad_norm": 497.83294677734375, "learning_rate": 1.990181393811843e-06, "loss": 26.5312, "step": 2208 }, { "epoch": 0.14670917181377433, "grad_norm": 152.0484161376953, "learning_rate": 1.990166353564944e-06, "loss": 15.9219, "step": 2209 }, { "epoch": 0.14677558610613003, "grad_norm": 403.9521484375, "learning_rate": 1.990151301864372e-06, "loss": 20.3906, "step": 2210 }, { "epoch": 0.14684200039848574, "grad_norm": 143.48831176757812, "learning_rate": 1.990136238710301e-06, "loss": 14.3281, "step": 2211 }, { "epoch": 0.14690841469084148, "grad_norm": 227.50184631347656, "learning_rate": 1.990121164102904e-06, "loss": 20.2969, "step": 2212 }, { "epoch": 0.1469748289831972, "grad_norm": 134.81787109375, "learning_rate": 1.9901060780423568e-06, "loss": 22.7812, "step": 2213 }, { "epoch": 0.1470412432755529, "grad_norm": 262.168701171875, "learning_rate": 1.9900909805288333e-06, "loss": 27.2188, "step": 2214 }, { "epoch": 0.1471076575679086, "grad_norm": 317.20294189453125, "learning_rate": 1.9900758715625083e-06, "loss": 20.75, "step": 2215 }, { "epoch": 0.14717407186026432, "grad_norm": 345.4319763183594, "learning_rate": 1.9900607511435562e-06, "loss": 17.9531, "step": 2216 }, { "epoch": 0.14724048615262003, "grad_norm": 189.05503845214844, "learning_rate": 1.990045619272153e-06, "loss": 22.75, "step": 2217 }, { "epoch": 0.14730690044497577, "grad_norm": 598.2943115234375, "learning_rate": 1.9900304759484725e-06, "loss": 23.4531, "step": 2218 }, { "epoch": 0.14737331473733148, "grad_norm": 408.8236999511719, "learning_rate": 1.99001532117269e-06, "loss": 26.5, "step": 2219 }, { "epoch": 0.1474397290296872, "grad_norm": 163.43045043945312, "learning_rate": 1.9900001549449816e-06, "loss": 17.6562, "step": 2220 }, { "epoch": 0.1475061433220429, "grad_norm": 302.757080078125, "learning_rate": 1.9899849772655224e-06, "loss": 22.9375, "step": 2221 }, { "epoch": 0.1475725576143986, "grad_norm": 202.50523376464844, "learning_rate": 1.9899697881344873e-06, "loss": 23.2188, "step": 2222 }, { "epoch": 0.14763897190675435, "grad_norm": 200.63278198242188, "learning_rate": 1.989954587552053e-06, "loss": 23.5625, "step": 2223 }, { "epoch": 0.14770538619911006, "grad_norm": 242.4264373779297, "learning_rate": 1.9899393755183948e-06, "loss": 20.4375, "step": 2224 }, { "epoch": 0.14777180049146577, "grad_norm": 223.93186950683594, "learning_rate": 1.9899241520336883e-06, "loss": 19.6875, "step": 2225 }, { "epoch": 0.14783821478382148, "grad_norm": 146.2649383544922, "learning_rate": 1.989908917098111e-06, "loss": 18.4219, "step": 2226 }, { "epoch": 0.14790462907617719, "grad_norm": 226.04214477539062, "learning_rate": 1.989893670711837e-06, "loss": 21.7188, "step": 2227 }, { "epoch": 0.1479710433685329, "grad_norm": 251.86331176757812, "learning_rate": 1.9898784128750446e-06, "loss": 22.5156, "step": 2228 }, { "epoch": 0.14803745766088863, "grad_norm": 239.9044189453125, "learning_rate": 1.989863143587909e-06, "loss": 25.1562, "step": 2229 }, { "epoch": 0.14810387195324434, "grad_norm": 304.61810302734375, "learning_rate": 1.989847862850608e-06, "loss": 31.2188, "step": 2230 }, { "epoch": 0.14817028624560005, "grad_norm": 170.96096801757812, "learning_rate": 1.989832570663317e-06, "loss": 21.0938, "step": 2231 }, { "epoch": 0.14823670053795576, "grad_norm": 300.91961669921875, "learning_rate": 1.9898172670262145e-06, "loss": 18.4688, "step": 2232 }, { "epoch": 0.14830311483031147, "grad_norm": 283.8065490722656, "learning_rate": 1.9898019519394756e-06, "loss": 21.4844, "step": 2233 }, { "epoch": 0.1483695291226672, "grad_norm": 146.84117126464844, "learning_rate": 1.9897866254032792e-06, "loss": 18.9375, "step": 2234 }, { "epoch": 0.14843594341502292, "grad_norm": 309.73297119140625, "learning_rate": 1.9897712874178017e-06, "loss": 21.7656, "step": 2235 }, { "epoch": 0.14850235770737863, "grad_norm": 3031.14990234375, "learning_rate": 1.9897559379832203e-06, "loss": 19.8594, "step": 2236 }, { "epoch": 0.14856877199973434, "grad_norm": 258.3597106933594, "learning_rate": 1.989740577099713e-06, "loss": 17.2031, "step": 2237 }, { "epoch": 0.14863518629209005, "grad_norm": 201.1004638671875, "learning_rate": 1.989725204767458e-06, "loss": 21.9219, "step": 2238 }, { "epoch": 0.14870160058444576, "grad_norm": 276.77130126953125, "learning_rate": 1.989709820986632e-06, "loss": 19.2188, "step": 2239 }, { "epoch": 0.1487680148768015, "grad_norm": 173.17742919921875, "learning_rate": 1.9896944257574137e-06, "loss": 24.1406, "step": 2240 }, { "epoch": 0.1488344291691572, "grad_norm": 261.83245849609375, "learning_rate": 1.9896790190799807e-06, "loss": 24.4844, "step": 2241 }, { "epoch": 0.14890084346151292, "grad_norm": 404.13800048828125, "learning_rate": 1.989663600954512e-06, "loss": 29.6094, "step": 2242 }, { "epoch": 0.14896725775386863, "grad_norm": 1043.0369873046875, "learning_rate": 1.989648171381185e-06, "loss": 20.875, "step": 2243 }, { "epoch": 0.14903367204622434, "grad_norm": 403.8919372558594, "learning_rate": 1.9896327303601786e-06, "loss": 22.2188, "step": 2244 }, { "epoch": 0.14910008633858007, "grad_norm": 235.02114868164062, "learning_rate": 1.989617277891672e-06, "loss": 24.2969, "step": 2245 }, { "epoch": 0.14916650063093578, "grad_norm": 203.4889373779297, "learning_rate": 1.9896018139758426e-06, "loss": 24.1875, "step": 2246 }, { "epoch": 0.1492329149232915, "grad_norm": 169.3493194580078, "learning_rate": 1.9895863386128704e-06, "loss": 16.6406, "step": 2247 }, { "epoch": 0.1492993292156472, "grad_norm": 199.01425170898438, "learning_rate": 1.9895708518029345e-06, "loss": 22.0781, "step": 2248 }, { "epoch": 0.1493657435080029, "grad_norm": 140.9053497314453, "learning_rate": 1.9895553535462127e-06, "loss": 18.9844, "step": 2249 }, { "epoch": 0.14943215780035865, "grad_norm": 205.46127319335938, "learning_rate": 1.989539843842886e-06, "loss": 23.9219, "step": 2250 }, { "epoch": 0.14949857209271436, "grad_norm": 164.22605895996094, "learning_rate": 1.9895243226931327e-06, "loss": 17.4531, "step": 2251 }, { "epoch": 0.14956498638507007, "grad_norm": 253.81427001953125, "learning_rate": 1.9895087900971323e-06, "loss": 23.6094, "step": 2252 }, { "epoch": 0.14963140067742578, "grad_norm": 248.38421630859375, "learning_rate": 1.9894932460550652e-06, "loss": 23.2656, "step": 2253 }, { "epoch": 0.1496978149697815, "grad_norm": 93.02574157714844, "learning_rate": 1.989477690567111e-06, "loss": 15.5938, "step": 2254 }, { "epoch": 0.1497642292621372, "grad_norm": 155.19058227539062, "learning_rate": 1.9894621236334486e-06, "loss": 19.5312, "step": 2255 }, { "epoch": 0.14983064355449294, "grad_norm": 307.08135986328125, "learning_rate": 1.9894465452542595e-06, "loss": 20.1562, "step": 2256 }, { "epoch": 0.14989705784684865, "grad_norm": 191.31906127929688, "learning_rate": 1.9894309554297237e-06, "loss": 24.8125, "step": 2257 }, { "epoch": 0.14996347213920436, "grad_norm": 168.09634399414062, "learning_rate": 1.9894153541600206e-06, "loss": 23.4062, "step": 2258 }, { "epoch": 0.15002988643156007, "grad_norm": 294.7003479003906, "learning_rate": 1.989399741445331e-06, "loss": 26.4688, "step": 2259 }, { "epoch": 0.15009630072391578, "grad_norm": 142.7393798828125, "learning_rate": 1.989384117285836e-06, "loss": 18.3906, "step": 2260 }, { "epoch": 0.15016271501627151, "grad_norm": 409.6452331542969, "learning_rate": 1.9893684816817165e-06, "loss": 22.5, "step": 2261 }, { "epoch": 0.15022912930862722, "grad_norm": 340.8608093261719, "learning_rate": 1.9893528346331522e-06, "loss": 23.8125, "step": 2262 }, { "epoch": 0.15029554360098293, "grad_norm": 270.0186767578125, "learning_rate": 1.9893371761403253e-06, "loss": 26.2188, "step": 2263 }, { "epoch": 0.15036195789333864, "grad_norm": 130.88206481933594, "learning_rate": 1.9893215062034163e-06, "loss": 20.7656, "step": 2264 }, { "epoch": 0.15042837218569435, "grad_norm": 181.55084228515625, "learning_rate": 1.9893058248226063e-06, "loss": 21.5312, "step": 2265 }, { "epoch": 0.15049478647805006, "grad_norm": 222.0379180908203, "learning_rate": 1.989290131998077e-06, "loss": 25.8438, "step": 2266 }, { "epoch": 0.1505612007704058, "grad_norm": 216.14259338378906, "learning_rate": 1.9892744277300105e-06, "loss": 20.7812, "step": 2267 }, { "epoch": 0.1506276150627615, "grad_norm": 505.40234375, "learning_rate": 1.989258712018587e-06, "loss": 22.7188, "step": 2268 }, { "epoch": 0.15069402935511722, "grad_norm": 144.4697723388672, "learning_rate": 1.9892429848639897e-06, "loss": 23.7344, "step": 2269 }, { "epoch": 0.15076044364747293, "grad_norm": 137.51600646972656, "learning_rate": 1.9892272462664002e-06, "loss": 14.9219, "step": 2270 }, { "epoch": 0.15082685793982864, "grad_norm": 390.1280517578125, "learning_rate": 1.989211496226e-06, "loss": 18.8281, "step": 2271 }, { "epoch": 0.15089327223218438, "grad_norm": 235.4618682861328, "learning_rate": 1.9891957347429715e-06, "loss": 21.4375, "step": 2272 }, { "epoch": 0.1509596865245401, "grad_norm": 102.98108673095703, "learning_rate": 1.9891799618174973e-06, "loss": 16.2188, "step": 2273 }, { "epoch": 0.1510261008168958, "grad_norm": 141.96839904785156, "learning_rate": 1.98916417744976e-06, "loss": 23.5312, "step": 2274 }, { "epoch": 0.1510925151092515, "grad_norm": 308.327880859375, "learning_rate": 1.9891483816399416e-06, "loss": 19.4844, "step": 2275 }, { "epoch": 0.15115892940160722, "grad_norm": 288.2676696777344, "learning_rate": 1.9891325743882248e-06, "loss": 24.3125, "step": 2276 }, { "epoch": 0.15122534369396293, "grad_norm": 206.29139709472656, "learning_rate": 1.989116755694793e-06, "loss": 19.7656, "step": 2277 }, { "epoch": 0.15129175798631866, "grad_norm": 355.12347412109375, "learning_rate": 1.989100925559829e-06, "loss": 16.7812, "step": 2278 }, { "epoch": 0.15135817227867437, "grad_norm": 370.97564697265625, "learning_rate": 1.9890850839835157e-06, "loss": 24.9688, "step": 2279 }, { "epoch": 0.15142458657103008, "grad_norm": 318.1725158691406, "learning_rate": 1.9890692309660365e-06, "loss": 29.25, "step": 2280 }, { "epoch": 0.1514910008633858, "grad_norm": 172.35215759277344, "learning_rate": 1.989053366507575e-06, "loss": 17.8281, "step": 2281 }, { "epoch": 0.1515574151557415, "grad_norm": 254.23672485351562, "learning_rate": 1.9890374906083147e-06, "loss": 18.7656, "step": 2282 }, { "epoch": 0.15162382944809724, "grad_norm": 1465.0810546875, "learning_rate": 1.9890216032684383e-06, "loss": 22.7812, "step": 2283 }, { "epoch": 0.15169024374045295, "grad_norm": 274.3401794433594, "learning_rate": 1.9890057044881306e-06, "loss": 23.9688, "step": 2284 }, { "epoch": 0.15175665803280866, "grad_norm": 114.15382385253906, "learning_rate": 1.988989794267575e-06, "loss": 20.3125, "step": 2285 }, { "epoch": 0.15182307232516437, "grad_norm": 221.3769073486328, "learning_rate": 1.9889738726069563e-06, "loss": 26.2188, "step": 2286 }, { "epoch": 0.15188948661752008, "grad_norm": 157.0824737548828, "learning_rate": 1.988957939506458e-06, "loss": 20.625, "step": 2287 }, { "epoch": 0.15195590090987582, "grad_norm": 163.29617309570312, "learning_rate": 1.988941994966264e-06, "loss": 21.6875, "step": 2288 }, { "epoch": 0.15202231520223153, "grad_norm": 239.69627380371094, "learning_rate": 1.9889260389865595e-06, "loss": 21.5156, "step": 2289 }, { "epoch": 0.15208872949458724, "grad_norm": 280.15545654296875, "learning_rate": 1.988910071567529e-06, "loss": 22.25, "step": 2290 }, { "epoch": 0.15215514378694295, "grad_norm": 270.456298828125, "learning_rate": 1.9888940927093574e-06, "loss": 25.75, "step": 2291 }, { "epoch": 0.15222155807929866, "grad_norm": 193.64761352539062, "learning_rate": 1.988878102412228e-06, "loss": 22.0, "step": 2292 }, { "epoch": 0.15228797237165437, "grad_norm": 219.55734252929688, "learning_rate": 1.988862100676328e-06, "loss": 20.9844, "step": 2293 }, { "epoch": 0.1523543866640101, "grad_norm": 270.7129211425781, "learning_rate": 1.988846087501841e-06, "loss": 23.6094, "step": 2294 }, { "epoch": 0.15242080095636582, "grad_norm": 193.88417053222656, "learning_rate": 1.9888300628889525e-06, "loss": 18.8438, "step": 2295 }, { "epoch": 0.15248721524872152, "grad_norm": 253.81207275390625, "learning_rate": 1.9888140268378488e-06, "loss": 22.25, "step": 2296 }, { "epoch": 0.15255362954107723, "grad_norm": 208.48072814941406, "learning_rate": 1.988797979348714e-06, "loss": 19.9375, "step": 2297 }, { "epoch": 0.15262004383343294, "grad_norm": 203.0089874267578, "learning_rate": 1.9887819204217343e-06, "loss": 19.625, "step": 2298 }, { "epoch": 0.15268645812578868, "grad_norm": 471.6549377441406, "learning_rate": 1.9887658500570954e-06, "loss": 30.6875, "step": 2299 }, { "epoch": 0.1527528724181444, "grad_norm": 218.5962371826172, "learning_rate": 1.988749768254984e-06, "loss": 19.0, "step": 2300 }, { "epoch": 0.1528192867105001, "grad_norm": 916.2042846679688, "learning_rate": 1.9887336750155845e-06, "loss": 28.1562, "step": 2301 }, { "epoch": 0.1528857010028558, "grad_norm": 305.9306945800781, "learning_rate": 1.9887175703390848e-06, "loss": 21.7812, "step": 2302 }, { "epoch": 0.15295211529521152, "grad_norm": 132.9132537841797, "learning_rate": 1.98870145422567e-06, "loss": 15.1562, "step": 2303 }, { "epoch": 0.15301852958756723, "grad_norm": 181.39979553222656, "learning_rate": 1.9886853266755266e-06, "loss": 23.125, "step": 2304 }, { "epoch": 0.15308494387992297, "grad_norm": 263.02008056640625, "learning_rate": 1.9886691876888418e-06, "loss": 25.2188, "step": 2305 }, { "epoch": 0.15315135817227868, "grad_norm": 151.82070922851562, "learning_rate": 1.9886530372658022e-06, "loss": 20.0938, "step": 2306 }, { "epoch": 0.1532177724646344, "grad_norm": 210.14402770996094, "learning_rate": 1.988636875406594e-06, "loss": 20.9844, "step": 2307 }, { "epoch": 0.1532841867569901, "grad_norm": 548.9359741210938, "learning_rate": 1.9886207021114046e-06, "loss": 23.4844, "step": 2308 }, { "epoch": 0.1533506010493458, "grad_norm": 141.5565185546875, "learning_rate": 1.988604517380421e-06, "loss": 15.0312, "step": 2309 }, { "epoch": 0.15341701534170155, "grad_norm": 1132.348388671875, "learning_rate": 1.9885883212138303e-06, "loss": 18.9219, "step": 2310 }, { "epoch": 0.15348342963405726, "grad_norm": 220.3311004638672, "learning_rate": 1.98857211361182e-06, "loss": 18.7188, "step": 2311 }, { "epoch": 0.15354984392641297, "grad_norm": 588.7526245117188, "learning_rate": 1.9885558945745774e-06, "loss": 20.6875, "step": 2312 }, { "epoch": 0.15361625821876868, "grad_norm": 209.12164306640625, "learning_rate": 1.9885396641022907e-06, "loss": 18.3438, "step": 2313 }, { "epoch": 0.15368267251112439, "grad_norm": 225.9146728515625, "learning_rate": 1.9885234221951464e-06, "loss": 20.9375, "step": 2314 }, { "epoch": 0.1537490868034801, "grad_norm": 201.63720703125, "learning_rate": 1.988507168853334e-06, "loss": 28.5938, "step": 2315 }, { "epoch": 0.15381550109583583, "grad_norm": 192.87400817871094, "learning_rate": 1.9884909040770403e-06, "loss": 27.4062, "step": 2316 }, { "epoch": 0.15388191538819154, "grad_norm": 309.74505615234375, "learning_rate": 1.988474627866454e-06, "loss": 29.6562, "step": 2317 }, { "epoch": 0.15394832968054725, "grad_norm": 319.4832763671875, "learning_rate": 1.9884583402217627e-06, "loss": 17.1875, "step": 2318 }, { "epoch": 0.15401474397290296, "grad_norm": 223.8599395751953, "learning_rate": 1.9884420411431556e-06, "loss": 23.1406, "step": 2319 }, { "epoch": 0.15408115826525867, "grad_norm": 663.4998779296875, "learning_rate": 1.988425730630821e-06, "loss": 24.1406, "step": 2320 }, { "epoch": 0.1541475725576144, "grad_norm": 235.3331756591797, "learning_rate": 1.9884094086849474e-06, "loss": 19.4688, "step": 2321 }, { "epoch": 0.15421398684997012, "grad_norm": 226.42308044433594, "learning_rate": 1.9883930753057237e-06, "loss": 20.2344, "step": 2322 }, { "epoch": 0.15428040114232583, "grad_norm": 194.1866912841797, "learning_rate": 1.988376730493339e-06, "loss": 16.9375, "step": 2323 }, { "epoch": 0.15434681543468154, "grad_norm": 143.13926696777344, "learning_rate": 1.9883603742479824e-06, "loss": 18.7812, "step": 2324 }, { "epoch": 0.15441322972703725, "grad_norm": 212.00811767578125, "learning_rate": 1.9883440065698425e-06, "loss": 26.5312, "step": 2325 }, { "epoch": 0.154479644019393, "grad_norm": 154.84805297851562, "learning_rate": 1.9883276274591093e-06, "loss": 19.0625, "step": 2326 }, { "epoch": 0.1545460583117487, "grad_norm": 369.04217529296875, "learning_rate": 1.988311236915972e-06, "loss": 21.625, "step": 2327 }, { "epoch": 0.1546124726041044, "grad_norm": 145.0797119140625, "learning_rate": 1.9882948349406197e-06, "loss": 27.5625, "step": 2328 }, { "epoch": 0.15467888689646012, "grad_norm": 189.28956604003906, "learning_rate": 1.9882784215332433e-06, "loss": 20.2031, "step": 2329 }, { "epoch": 0.15474530118881583, "grad_norm": 266.5901184082031, "learning_rate": 1.9882619966940316e-06, "loss": 20.9844, "step": 2330 }, { "epoch": 0.15481171548117154, "grad_norm": 180.90042114257812, "learning_rate": 1.9882455604231755e-06, "loss": 25.125, "step": 2331 }, { "epoch": 0.15487812977352727, "grad_norm": 207.79135131835938, "learning_rate": 1.988229112720864e-06, "loss": 22.1562, "step": 2332 }, { "epoch": 0.15494454406588298, "grad_norm": 458.1941223144531, "learning_rate": 1.988212653587288e-06, "loss": 20.1562, "step": 2333 }, { "epoch": 0.1550109583582387, "grad_norm": 285.005615234375, "learning_rate": 1.9881961830226383e-06, "loss": 19.9688, "step": 2334 }, { "epoch": 0.1550773726505944, "grad_norm": 401.6538391113281, "learning_rate": 1.988179701027105e-06, "loss": 22.8438, "step": 2335 }, { "epoch": 0.1551437869429501, "grad_norm": 636.338623046875, "learning_rate": 1.9881632076008783e-06, "loss": 19.5312, "step": 2336 }, { "epoch": 0.15521020123530585, "grad_norm": 262.7098388671875, "learning_rate": 1.9881467027441492e-06, "loss": 22.7812, "step": 2337 }, { "epoch": 0.15527661552766156, "grad_norm": 1008.966552734375, "learning_rate": 1.9881301864571095e-06, "loss": 18.8594, "step": 2338 }, { "epoch": 0.15534302982001727, "grad_norm": 134.3524169921875, "learning_rate": 1.988113658739949e-06, "loss": 20.0469, "step": 2339 }, { "epoch": 0.15540944411237298, "grad_norm": 196.59439086914062, "learning_rate": 1.9880971195928603e-06, "loss": 25.375, "step": 2340 }, { "epoch": 0.1554758584047287, "grad_norm": 142.83468627929688, "learning_rate": 1.9880805690160333e-06, "loss": 21.4375, "step": 2341 }, { "epoch": 0.1555422726970844, "grad_norm": 142.06817626953125, "learning_rate": 1.98806400700966e-06, "loss": 18.6406, "step": 2342 }, { "epoch": 0.15560868698944014, "grad_norm": 1121.8065185546875, "learning_rate": 1.988047433573932e-06, "loss": 16.5469, "step": 2343 }, { "epoch": 0.15567510128179585, "grad_norm": 623.6990966796875, "learning_rate": 1.9880308487090415e-06, "loss": 26.625, "step": 2344 }, { "epoch": 0.15574151557415156, "grad_norm": 249.9058074951172, "learning_rate": 1.9880142524151794e-06, "loss": 29.5, "step": 2345 }, { "epoch": 0.15580792986650727, "grad_norm": 212.0081024169922, "learning_rate": 1.9879976446925386e-06, "loss": 19.5781, "step": 2346 }, { "epoch": 0.15587434415886298, "grad_norm": 216.08419799804688, "learning_rate": 1.9879810255413103e-06, "loss": 23.7812, "step": 2347 }, { "epoch": 0.15594075845121871, "grad_norm": 200.3809814453125, "learning_rate": 1.9879643949616874e-06, "loss": 21.8125, "step": 2348 }, { "epoch": 0.15600717274357442, "grad_norm": 244.19586181640625, "learning_rate": 1.987947752953862e-06, "loss": 21.7969, "step": 2349 }, { "epoch": 0.15607358703593013, "grad_norm": 244.76991271972656, "learning_rate": 1.9879310995180266e-06, "loss": 29.7188, "step": 2350 }, { "epoch": 0.15614000132828584, "grad_norm": 233.24392700195312, "learning_rate": 1.987914434654374e-06, "loss": 23.75, "step": 2351 }, { "epoch": 0.15620641562064155, "grad_norm": 263.7277526855469, "learning_rate": 1.987897758363097e-06, "loss": 21.5156, "step": 2352 }, { "epoch": 0.1562728299129973, "grad_norm": 391.84246826171875, "learning_rate": 1.987881070644388e-06, "loss": 20.6406, "step": 2353 }, { "epoch": 0.156339244205353, "grad_norm": 150.55528259277344, "learning_rate": 1.9878643714984413e-06, "loss": 20.6562, "step": 2354 }, { "epoch": 0.1564056584977087, "grad_norm": 343.82080078125, "learning_rate": 1.9878476609254482e-06, "loss": 26.5312, "step": 2355 }, { "epoch": 0.15647207279006442, "grad_norm": 204.33306884765625, "learning_rate": 1.9878309389256038e-06, "loss": 23.0, "step": 2356 }, { "epoch": 0.15653848708242013, "grad_norm": 206.8057098388672, "learning_rate": 1.9878142054991e-06, "loss": 20.0938, "step": 2357 }, { "epoch": 0.15660490137477584, "grad_norm": 395.5665588378906, "learning_rate": 1.9877974606461315e-06, "loss": 22.9375, "step": 2358 }, { "epoch": 0.15667131566713158, "grad_norm": 183.28231811523438, "learning_rate": 1.987780704366891e-06, "loss": 22.8281, "step": 2359 }, { "epoch": 0.1567377299594873, "grad_norm": 274.2291259765625, "learning_rate": 1.9877639366615737e-06, "loss": 23.0469, "step": 2360 }, { "epoch": 0.156804144251843, "grad_norm": 2801.653076171875, "learning_rate": 1.9877471575303725e-06, "loss": 20.9844, "step": 2361 }, { "epoch": 0.1568705585441987, "grad_norm": 232.42355346679688, "learning_rate": 1.9877303669734817e-06, "loss": 21.0469, "step": 2362 }, { "epoch": 0.15693697283655442, "grad_norm": 186.33016967773438, "learning_rate": 1.9877135649910956e-06, "loss": 21.6719, "step": 2363 }, { "epoch": 0.15700338712891015, "grad_norm": 412.49395751953125, "learning_rate": 1.9876967515834086e-06, "loss": 24.7031, "step": 2364 }, { "epoch": 0.15706980142126586, "grad_norm": 320.3381652832031, "learning_rate": 1.9876799267506146e-06, "loss": 20.4375, "step": 2365 }, { "epoch": 0.15713621571362157, "grad_norm": 193.25941467285156, "learning_rate": 1.987663090492909e-06, "loss": 22.8281, "step": 2366 }, { "epoch": 0.15720263000597728, "grad_norm": 227.1564483642578, "learning_rate": 1.9876462428104865e-06, "loss": 21.9062, "step": 2367 }, { "epoch": 0.157269044298333, "grad_norm": 261.9468688964844, "learning_rate": 1.987629383703542e-06, "loss": 21.875, "step": 2368 }, { "epoch": 0.1573354585906887, "grad_norm": 269.460693359375, "learning_rate": 1.9876125131722698e-06, "loss": 24.4531, "step": 2369 }, { "epoch": 0.15740187288304444, "grad_norm": 205.4748992919922, "learning_rate": 1.9875956312168656e-06, "loss": 21.9688, "step": 2370 }, { "epoch": 0.15746828717540015, "grad_norm": 238.83021545410156, "learning_rate": 1.9875787378375247e-06, "loss": 18.7188, "step": 2371 }, { "epoch": 0.15753470146775586, "grad_norm": 456.5072937011719, "learning_rate": 1.9875618330344427e-06, "loss": 29.25, "step": 2372 }, { "epoch": 0.15760111576011157, "grad_norm": 220.8404998779297, "learning_rate": 1.9875449168078144e-06, "loss": 21.1719, "step": 2373 }, { "epoch": 0.15766753005246728, "grad_norm": 318.3719787597656, "learning_rate": 1.987527989157836e-06, "loss": 20.5625, "step": 2374 }, { "epoch": 0.15773394434482302, "grad_norm": 252.58482360839844, "learning_rate": 1.987511050084704e-06, "loss": 23.7188, "step": 2375 }, { "epoch": 0.15780035863717873, "grad_norm": 574.1576538085938, "learning_rate": 1.987494099588613e-06, "loss": 23.5625, "step": 2376 }, { "epoch": 0.15786677292953444, "grad_norm": 194.5956573486328, "learning_rate": 1.9874771376697595e-06, "loss": 19.75, "step": 2377 }, { "epoch": 0.15793318722189015, "grad_norm": 2951.792236328125, "learning_rate": 1.98746016432834e-06, "loss": 23.9062, "step": 2378 }, { "epoch": 0.15799960151424586, "grad_norm": 331.897216796875, "learning_rate": 1.987443179564551e-06, "loss": 30.4062, "step": 2379 }, { "epoch": 0.15806601580660157, "grad_norm": 155.6643524169922, "learning_rate": 1.987426183378588e-06, "loss": 21.0312, "step": 2380 }, { "epoch": 0.1581324300989573, "grad_norm": 179.00271606445312, "learning_rate": 1.987409175770649e-06, "loss": 19.4531, "step": 2381 }, { "epoch": 0.15819884439131301, "grad_norm": 247.71812438964844, "learning_rate": 1.98739215674093e-06, "loss": 19.0625, "step": 2382 }, { "epoch": 0.15826525868366872, "grad_norm": 252.96697998046875, "learning_rate": 1.9873751262896277e-06, "loss": 22.5156, "step": 2383 }, { "epoch": 0.15833167297602443, "grad_norm": 264.8354797363281, "learning_rate": 1.9873580844169395e-06, "loss": 23.9375, "step": 2384 }, { "epoch": 0.15839808726838014, "grad_norm": 242.86813354492188, "learning_rate": 1.987341031123062e-06, "loss": 24.0312, "step": 2385 }, { "epoch": 0.15846450156073588, "grad_norm": 155.5728759765625, "learning_rate": 1.9873239664081927e-06, "loss": 20.625, "step": 2386 }, { "epoch": 0.1585309158530916, "grad_norm": 273.58306884765625, "learning_rate": 1.9873068902725297e-06, "loss": 20.875, "step": 2387 }, { "epoch": 0.1585973301454473, "grad_norm": 153.4441375732422, "learning_rate": 1.9872898027162694e-06, "loss": 18.8438, "step": 2388 }, { "epoch": 0.158663744437803, "grad_norm": 119.7273178100586, "learning_rate": 1.98727270373961e-06, "loss": 19.5938, "step": 2389 }, { "epoch": 0.15873015873015872, "grad_norm": 182.0042266845703, "learning_rate": 1.987255593342749e-06, "loss": 16.8281, "step": 2390 }, { "epoch": 0.15879657302251446, "grad_norm": 260.3619384765625, "learning_rate": 1.9872384715258856e-06, "loss": 27.8438, "step": 2391 }, { "epoch": 0.15886298731487017, "grad_norm": 180.0740966796875, "learning_rate": 1.987221338289216e-06, "loss": 18.2031, "step": 2392 }, { "epoch": 0.15892940160722588, "grad_norm": 1119.900390625, "learning_rate": 1.9872041936329395e-06, "loss": 27.875, "step": 2393 }, { "epoch": 0.1589958158995816, "grad_norm": 217.8234100341797, "learning_rate": 1.9871870375572538e-06, "loss": 19.8906, "step": 2394 }, { "epoch": 0.1590622301919373, "grad_norm": 265.94342041015625, "learning_rate": 1.9871698700623577e-06, "loss": 19.4844, "step": 2395 }, { "epoch": 0.159128644484293, "grad_norm": 288.1989440917969, "learning_rate": 1.9871526911484504e-06, "loss": 24.6719, "step": 2396 }, { "epoch": 0.15919505877664875, "grad_norm": 408.87445068359375, "learning_rate": 1.9871355008157296e-06, "loss": 20.5, "step": 2397 }, { "epoch": 0.15926147306900446, "grad_norm": 272.51739501953125, "learning_rate": 1.9871182990643944e-06, "loss": 21.625, "step": 2398 }, { "epoch": 0.15932788736136017, "grad_norm": 195.4495086669922, "learning_rate": 1.987101085894644e-06, "loss": 19.75, "step": 2399 }, { "epoch": 0.15939430165371588, "grad_norm": 118.07098388671875, "learning_rate": 1.9870838613066774e-06, "loss": 18.7188, "step": 2400 }, { "epoch": 0.15946071594607159, "grad_norm": 171.55642700195312, "learning_rate": 1.987066625300694e-06, "loss": 19.0625, "step": 2401 }, { "epoch": 0.15952713023842732, "grad_norm": 227.10374450683594, "learning_rate": 1.987049377876893e-06, "loss": 21.9375, "step": 2402 }, { "epoch": 0.15959354453078303, "grad_norm": 157.12916564941406, "learning_rate": 1.987032119035474e-06, "loss": 16.5156, "step": 2403 }, { "epoch": 0.15965995882313874, "grad_norm": 292.9057922363281, "learning_rate": 1.9870148487766367e-06, "loss": 19.3438, "step": 2404 }, { "epoch": 0.15972637311549445, "grad_norm": 228.7513427734375, "learning_rate": 1.986997567100581e-06, "loss": 20.6875, "step": 2405 }, { "epoch": 0.15979278740785016, "grad_norm": 249.1290283203125, "learning_rate": 1.986980274007506e-06, "loss": 27.7969, "step": 2406 }, { "epoch": 0.15985920170020587, "grad_norm": 256.9765930175781, "learning_rate": 1.9869629694976124e-06, "loss": 18.3906, "step": 2407 }, { "epoch": 0.1599256159925616, "grad_norm": 418.3013000488281, "learning_rate": 1.9869456535711004e-06, "loss": 31.0156, "step": 2408 }, { "epoch": 0.15999203028491732, "grad_norm": 503.1205139160156, "learning_rate": 1.9869283262281704e-06, "loss": 25.0938, "step": 2409 }, { "epoch": 0.16005844457727303, "grad_norm": 402.20361328125, "learning_rate": 1.9869109874690226e-06, "loss": 29.6562, "step": 2410 }, { "epoch": 0.16012485886962874, "grad_norm": 271.3823547363281, "learning_rate": 1.986893637293857e-06, "loss": 15.8125, "step": 2411 }, { "epoch": 0.16019127316198445, "grad_norm": 275.8125, "learning_rate": 1.9868762757028756e-06, "loss": 20.2812, "step": 2412 }, { "epoch": 0.1602576874543402, "grad_norm": 156.171142578125, "learning_rate": 1.9868589026962782e-06, "loss": 17.1875, "step": 2413 }, { "epoch": 0.1603241017466959, "grad_norm": 254.63375854492188, "learning_rate": 1.986841518274266e-06, "loss": 26.125, "step": 2414 }, { "epoch": 0.1603905160390516, "grad_norm": 214.79351806640625, "learning_rate": 1.9868241224370404e-06, "loss": 21.9219, "step": 2415 }, { "epoch": 0.16045693033140732, "grad_norm": 215.1773223876953, "learning_rate": 1.986806715184802e-06, "loss": 26.1562, "step": 2416 }, { "epoch": 0.16052334462376303, "grad_norm": 130.8676300048828, "learning_rate": 1.986789296517753e-06, "loss": 20.5938, "step": 2417 }, { "epoch": 0.16058975891611874, "grad_norm": 175.10479736328125, "learning_rate": 1.986771866436094e-06, "loss": 20.4844, "step": 2418 }, { "epoch": 0.16065617320847447, "grad_norm": 148.7013397216797, "learning_rate": 1.986754424940027e-06, "loss": 18.6875, "step": 2419 }, { "epoch": 0.16072258750083018, "grad_norm": 350.3233337402344, "learning_rate": 1.9867369720297544e-06, "loss": 20.9844, "step": 2420 }, { "epoch": 0.1607890017931859, "grad_norm": 229.14547729492188, "learning_rate": 1.986719507705477e-06, "loss": 18.9375, "step": 2421 }, { "epoch": 0.1608554160855416, "grad_norm": 198.70736694335938, "learning_rate": 1.986702031967398e-06, "loss": 18.4844, "step": 2422 }, { "epoch": 0.1609218303778973, "grad_norm": 284.9127197265625, "learning_rate": 1.986684544815718e-06, "loss": 18.5625, "step": 2423 }, { "epoch": 0.16098824467025305, "grad_norm": 303.9956970214844, "learning_rate": 1.9866670462506405e-06, "loss": 19.9375, "step": 2424 }, { "epoch": 0.16105465896260876, "grad_norm": 179.23130798339844, "learning_rate": 1.9866495362723675e-06, "loss": 22.5781, "step": 2425 }, { "epoch": 0.16112107325496447, "grad_norm": 296.6505126953125, "learning_rate": 1.9866320148811015e-06, "loss": 20.5156, "step": 2426 }, { "epoch": 0.16118748754732018, "grad_norm": 185.57835388183594, "learning_rate": 1.9866144820770452e-06, "loss": 22.3594, "step": 2427 }, { "epoch": 0.1612539018396759, "grad_norm": 152.8699493408203, "learning_rate": 1.986596937860402e-06, "loss": 18.4375, "step": 2428 }, { "epoch": 0.16132031613203163, "grad_norm": 195.6515350341797, "learning_rate": 1.986579382231374e-06, "loss": 20.3125, "step": 2429 }, { "epoch": 0.16138673042438734, "grad_norm": 242.25970458984375, "learning_rate": 1.9865618151901647e-06, "loss": 23.1406, "step": 2430 }, { "epoch": 0.16145314471674305, "grad_norm": 151.78024291992188, "learning_rate": 1.986544236736977e-06, "loss": 20.2969, "step": 2431 }, { "epoch": 0.16151955900909876, "grad_norm": 128.36395263671875, "learning_rate": 1.9865266468720148e-06, "loss": 20.3125, "step": 2432 }, { "epoch": 0.16158597330145447, "grad_norm": 301.8663635253906, "learning_rate": 1.9865090455954807e-06, "loss": 17.7031, "step": 2433 }, { "epoch": 0.16165238759381018, "grad_norm": 248.7993927001953, "learning_rate": 1.986491432907579e-06, "loss": 23.3438, "step": 2434 }, { "epoch": 0.1617188018861659, "grad_norm": 164.7132568359375, "learning_rate": 1.9864738088085135e-06, "loss": 19.1094, "step": 2435 }, { "epoch": 0.16178521617852162, "grad_norm": 141.177001953125, "learning_rate": 1.9864561732984873e-06, "loss": 18.2188, "step": 2436 }, { "epoch": 0.16185163047087733, "grad_norm": 158.33529663085938, "learning_rate": 1.9864385263777055e-06, "loss": 16.875, "step": 2437 }, { "epoch": 0.16191804476323304, "grad_norm": 502.6157531738281, "learning_rate": 1.9864208680463713e-06, "loss": 22.6875, "step": 2438 }, { "epoch": 0.16198445905558875, "grad_norm": 175.84164428710938, "learning_rate": 1.9864031983046897e-06, "loss": 20.4062, "step": 2439 }, { "epoch": 0.1620508733479445, "grad_norm": 413.3014221191406, "learning_rate": 1.9863855171528645e-06, "loss": 31.4688, "step": 2440 }, { "epoch": 0.1621172876403002, "grad_norm": 226.91864013671875, "learning_rate": 1.9863678245911e-06, "loss": 26.6875, "step": 2441 }, { "epoch": 0.1621837019326559, "grad_norm": 228.88133239746094, "learning_rate": 1.9863501206196017e-06, "loss": 20.8125, "step": 2442 }, { "epoch": 0.16225011622501162, "grad_norm": 756.3630981445312, "learning_rate": 1.9863324052385737e-06, "loss": 23.5312, "step": 2443 }, { "epoch": 0.16231653051736733, "grad_norm": 150.285400390625, "learning_rate": 1.9863146784482215e-06, "loss": 17.4219, "step": 2444 }, { "epoch": 0.16238294480972304, "grad_norm": 351.5370788574219, "learning_rate": 1.9862969402487497e-06, "loss": 26.5312, "step": 2445 }, { "epoch": 0.16244935910207878, "grad_norm": 274.5664367675781, "learning_rate": 1.9862791906403637e-06, "loss": 21.1562, "step": 2446 }, { "epoch": 0.1625157733944345, "grad_norm": 244.28964233398438, "learning_rate": 1.9862614296232685e-06, "loss": 24.625, "step": 2447 }, { "epoch": 0.1625821876867902, "grad_norm": 382.95953369140625, "learning_rate": 1.98624365719767e-06, "loss": 21.8438, "step": 2448 }, { "epoch": 0.1626486019791459, "grad_norm": 181.26112365722656, "learning_rate": 1.9862258733637734e-06, "loss": 18.0938, "step": 2449 }, { "epoch": 0.16271501627150162, "grad_norm": 260.956787109375, "learning_rate": 1.9862080781217847e-06, "loss": 23.2969, "step": 2450 }, { "epoch": 0.16278143056385735, "grad_norm": 471.923095703125, "learning_rate": 1.9861902714719097e-06, "loss": 24.3125, "step": 2451 }, { "epoch": 0.16284784485621306, "grad_norm": 613.7515258789062, "learning_rate": 1.986172453414354e-06, "loss": 19.875, "step": 2452 }, { "epoch": 0.16291425914856877, "grad_norm": 236.00128173828125, "learning_rate": 1.9861546239493244e-06, "loss": 22.3125, "step": 2453 }, { "epoch": 0.16298067344092448, "grad_norm": 112.36930847167969, "learning_rate": 1.9861367830770267e-06, "loss": 13.5156, "step": 2454 }, { "epoch": 0.1630470877332802, "grad_norm": 180.29147338867188, "learning_rate": 1.9861189307976676e-06, "loss": 20.0312, "step": 2455 }, { "epoch": 0.1631135020256359, "grad_norm": 205.0133056640625, "learning_rate": 1.9861010671114527e-06, "loss": 21.625, "step": 2456 }, { "epoch": 0.16317991631799164, "grad_norm": 134.61972045898438, "learning_rate": 1.9860831920185898e-06, "loss": 19.1406, "step": 2457 }, { "epoch": 0.16324633061034735, "grad_norm": 360.95709228515625, "learning_rate": 1.9860653055192847e-06, "loss": 25.625, "step": 2458 }, { "epoch": 0.16331274490270306, "grad_norm": 328.8307800292969, "learning_rate": 1.986047407613745e-06, "loss": 21.4375, "step": 2459 }, { "epoch": 0.16337915919505877, "grad_norm": 276.03619384765625, "learning_rate": 1.9860294983021775e-06, "loss": 18.0469, "step": 2460 }, { "epoch": 0.16344557348741448, "grad_norm": 186.5058135986328, "learning_rate": 1.986011577584789e-06, "loss": 22.0312, "step": 2461 }, { "epoch": 0.16351198777977022, "grad_norm": 258.38519287109375, "learning_rate": 1.985993645461788e-06, "loss": 23.875, "step": 2462 }, { "epoch": 0.16357840207212593, "grad_norm": 264.5103759765625, "learning_rate": 1.9859757019333803e-06, "loss": 21.4219, "step": 2463 }, { "epoch": 0.16364481636448164, "grad_norm": 382.3150634765625, "learning_rate": 1.9859577469997745e-06, "loss": 18.3594, "step": 2464 }, { "epoch": 0.16371123065683735, "grad_norm": 254.125732421875, "learning_rate": 1.985939780661178e-06, "loss": 20.8438, "step": 2465 }, { "epoch": 0.16377764494919306, "grad_norm": 347.9848327636719, "learning_rate": 1.9859218029177983e-06, "loss": 20.625, "step": 2466 }, { "epoch": 0.1638440592415488, "grad_norm": 173.9341583251953, "learning_rate": 1.985903813769844e-06, "loss": 18.0938, "step": 2467 }, { "epoch": 0.1639104735339045, "grad_norm": 456.9164733886719, "learning_rate": 1.9858858132175227e-06, "loss": 20.0469, "step": 2468 }, { "epoch": 0.16397688782626021, "grad_norm": 862.9873046875, "learning_rate": 1.985867801261043e-06, "loss": 20.3906, "step": 2469 }, { "epoch": 0.16404330211861592, "grad_norm": 217.40086364746094, "learning_rate": 1.985849777900613e-06, "loss": 33.9062, "step": 2470 }, { "epoch": 0.16410971641097163, "grad_norm": 235.60830688476562, "learning_rate": 1.9858317431364415e-06, "loss": 19.3125, "step": 2471 }, { "epoch": 0.16417613070332734, "grad_norm": 115.62623596191406, "learning_rate": 1.9858136969687366e-06, "loss": 14.5625, "step": 2472 }, { "epoch": 0.16424254499568308, "grad_norm": 313.12530517578125, "learning_rate": 1.9857956393977074e-06, "loss": 22.4844, "step": 2473 }, { "epoch": 0.1643089592880388, "grad_norm": 264.7531433105469, "learning_rate": 1.9857775704235625e-06, "loss": 19.375, "step": 2474 }, { "epoch": 0.1643753735803945, "grad_norm": 218.1165771484375, "learning_rate": 1.985759490046511e-06, "loss": 21.5625, "step": 2475 }, { "epoch": 0.1644417878727502, "grad_norm": 305.6697692871094, "learning_rate": 1.9857413982667624e-06, "loss": 21.4844, "step": 2476 }, { "epoch": 0.16450820216510592, "grad_norm": 299.2579040527344, "learning_rate": 1.9857232950845256e-06, "loss": 18.6875, "step": 2477 }, { "epoch": 0.16457461645746166, "grad_norm": 157.64877319335938, "learning_rate": 1.9857051805000104e-06, "loss": 20.0156, "step": 2478 }, { "epoch": 0.16464103074981737, "grad_norm": 133.45974731445312, "learning_rate": 1.985687054513426e-06, "loss": 18.1719, "step": 2479 }, { "epoch": 0.16470744504217308, "grad_norm": 311.0762023925781, "learning_rate": 1.985668917124982e-06, "loss": 21.5625, "step": 2480 }, { "epoch": 0.1647738593345288, "grad_norm": 222.84437561035156, "learning_rate": 1.985650768334888e-06, "loss": 19.8281, "step": 2481 }, { "epoch": 0.1648402736268845, "grad_norm": 341.63507080078125, "learning_rate": 1.985632608143355e-06, "loss": 23.5469, "step": 2482 }, { "epoch": 0.1649066879192402, "grad_norm": 237.2863311767578, "learning_rate": 1.9856144365505917e-06, "loss": 17.4531, "step": 2483 }, { "epoch": 0.16497310221159595, "grad_norm": 136.16468811035156, "learning_rate": 1.9855962535568093e-06, "loss": 19.0, "step": 2484 }, { "epoch": 0.16503951650395166, "grad_norm": 160.37008666992188, "learning_rate": 1.9855780591622176e-06, "loss": 13.7812, "step": 2485 }, { "epoch": 0.16510593079630737, "grad_norm": 266.52227783203125, "learning_rate": 1.985559853367027e-06, "loss": 21.7031, "step": 2486 }, { "epoch": 0.16517234508866308, "grad_norm": 311.3379821777344, "learning_rate": 1.985541636171449e-06, "loss": 25.9688, "step": 2487 }, { "epoch": 0.16523875938101878, "grad_norm": 150.75631713867188, "learning_rate": 1.9855234075756933e-06, "loss": 14.5625, "step": 2488 }, { "epoch": 0.16530517367337452, "grad_norm": 175.53858947753906, "learning_rate": 1.985505167579971e-06, "loss": 18.9219, "step": 2489 }, { "epoch": 0.16537158796573023, "grad_norm": 229.6349334716797, "learning_rate": 1.985486916184493e-06, "loss": 23.5156, "step": 2490 }, { "epoch": 0.16543800225808594, "grad_norm": 240.15191650390625, "learning_rate": 1.985468653389471e-06, "loss": 23.1094, "step": 2491 }, { "epoch": 0.16550441655044165, "grad_norm": 627.8562622070312, "learning_rate": 1.9854503791951157e-06, "loss": 19.8125, "step": 2492 }, { "epoch": 0.16557083084279736, "grad_norm": 174.41807556152344, "learning_rate": 1.9854320936016385e-06, "loss": 19.6562, "step": 2493 }, { "epoch": 0.16563724513515307, "grad_norm": 805.0387573242188, "learning_rate": 1.9854137966092515e-06, "loss": 25.875, "step": 2494 }, { "epoch": 0.1657036594275088, "grad_norm": 194.16470336914062, "learning_rate": 1.9853954882181654e-06, "loss": 22.2812, "step": 2495 }, { "epoch": 0.16577007371986452, "grad_norm": 377.3269348144531, "learning_rate": 1.985377168428593e-06, "loss": 27.0625, "step": 2496 }, { "epoch": 0.16583648801222023, "grad_norm": 189.18174743652344, "learning_rate": 1.9853588372407456e-06, "loss": 17.8438, "step": 2497 }, { "epoch": 0.16590290230457594, "grad_norm": 301.7325744628906, "learning_rate": 1.9853404946548352e-06, "loss": 19.3125, "step": 2498 }, { "epoch": 0.16596931659693165, "grad_norm": 253.72119140625, "learning_rate": 1.9853221406710743e-06, "loss": 18.4688, "step": 2499 }, { "epoch": 0.1660357308892874, "grad_norm": 159.20619201660156, "learning_rate": 1.985303775289675e-06, "loss": 19.7188, "step": 2500 }, { "epoch": 0.1661021451816431, "grad_norm": 380.6506042480469, "learning_rate": 1.98528539851085e-06, "loss": 20.1094, "step": 2501 }, { "epoch": 0.1661685594739988, "grad_norm": 135.11785888671875, "learning_rate": 1.985267010334811e-06, "loss": 15.5938, "step": 2502 }, { "epoch": 0.16623497376635452, "grad_norm": 254.74826049804688, "learning_rate": 1.9852486107617715e-06, "loss": 27.8125, "step": 2503 }, { "epoch": 0.16630138805871023, "grad_norm": 149.3649444580078, "learning_rate": 1.985230199791945e-06, "loss": 21.2188, "step": 2504 }, { "epoch": 0.16636780235106596, "grad_norm": 135.83226013183594, "learning_rate": 1.985211777425543e-06, "loss": 16.4219, "step": 2505 }, { "epoch": 0.16643421664342167, "grad_norm": 197.71853637695312, "learning_rate": 1.9851933436627793e-06, "loss": 32.5625, "step": 2506 }, { "epoch": 0.16650063093577738, "grad_norm": 209.62030029296875, "learning_rate": 1.985174898503867e-06, "loss": 19.5625, "step": 2507 }, { "epoch": 0.1665670452281331, "grad_norm": 214.17701721191406, "learning_rate": 1.98515644194902e-06, "loss": 24.5625, "step": 2508 }, { "epoch": 0.1666334595204888, "grad_norm": 200.75311279296875, "learning_rate": 1.985137973998451e-06, "loss": 20.6406, "step": 2509 }, { "epoch": 0.1666998738128445, "grad_norm": 218.47267150878906, "learning_rate": 1.985119494652374e-06, "loss": 17.9531, "step": 2510 }, { "epoch": 0.16676628810520025, "grad_norm": 218.76925659179688, "learning_rate": 1.9851010039110028e-06, "loss": 25.0625, "step": 2511 }, { "epoch": 0.16683270239755596, "grad_norm": 148.60403442382812, "learning_rate": 1.9850825017745513e-06, "loss": 14.0469, "step": 2512 }, { "epoch": 0.16689911668991167, "grad_norm": 274.04608154296875, "learning_rate": 1.9850639882432332e-06, "loss": 19.9219, "step": 2513 }, { "epoch": 0.16696553098226738, "grad_norm": 178.3951416015625, "learning_rate": 1.985045463317263e-06, "loss": 19.875, "step": 2514 }, { "epoch": 0.1670319452746231, "grad_norm": 451.9935302734375, "learning_rate": 1.985026926996855e-06, "loss": 20.5156, "step": 2515 }, { "epoch": 0.16709835956697883, "grad_norm": 247.91558837890625, "learning_rate": 1.985008379282224e-06, "loss": 21.2812, "step": 2516 }, { "epoch": 0.16716477385933454, "grad_norm": 140.40579223632812, "learning_rate": 1.9849898201735833e-06, "loss": 22.625, "step": 2517 }, { "epoch": 0.16723118815169025, "grad_norm": 152.34808349609375, "learning_rate": 1.9849712496711485e-06, "loss": 15.2344, "step": 2518 }, { "epoch": 0.16729760244404596, "grad_norm": 190.95225524902344, "learning_rate": 1.9849526677751347e-06, "loss": 16.7812, "step": 2519 }, { "epoch": 0.16736401673640167, "grad_norm": 269.7373046875, "learning_rate": 1.9849340744857556e-06, "loss": 22.7188, "step": 2520 }, { "epoch": 0.16743043102875738, "grad_norm": 412.39923095703125, "learning_rate": 1.9849154698032277e-06, "loss": 33.375, "step": 2521 }, { "epoch": 0.1674968453211131, "grad_norm": 274.2735595703125, "learning_rate": 1.9848968537277654e-06, "loss": 16.3594, "step": 2522 }, { "epoch": 0.16756325961346882, "grad_norm": 420.475830078125, "learning_rate": 1.984878226259584e-06, "loss": 20.0625, "step": 2523 }, { "epoch": 0.16762967390582453, "grad_norm": 164.28077697753906, "learning_rate": 1.9848595873988995e-06, "loss": 18.875, "step": 2524 }, { "epoch": 0.16769608819818024, "grad_norm": 170.26992797851562, "learning_rate": 1.984840937145927e-06, "loss": 18.7031, "step": 2525 }, { "epoch": 0.16776250249053595, "grad_norm": 494.47247314453125, "learning_rate": 1.9848222755008824e-06, "loss": 29.5, "step": 2526 }, { "epoch": 0.1678289167828917, "grad_norm": 486.1859130859375, "learning_rate": 1.984803602463982e-06, "loss": 24.2344, "step": 2527 }, { "epoch": 0.1678953310752474, "grad_norm": 297.226806640625, "learning_rate": 1.984784918035441e-06, "loss": 18.4688, "step": 2528 }, { "epoch": 0.1679617453676031, "grad_norm": 134.833251953125, "learning_rate": 1.984766222215476e-06, "loss": 19.8906, "step": 2529 }, { "epoch": 0.16802815965995882, "grad_norm": 178.8464813232422, "learning_rate": 1.9847475150043033e-06, "loss": 18.7969, "step": 2530 }, { "epoch": 0.16809457395231453, "grad_norm": 237.25009155273438, "learning_rate": 1.984728796402139e-06, "loss": 20.6875, "step": 2531 }, { "epoch": 0.16816098824467024, "grad_norm": 209.0033416748047, "learning_rate": 1.9847100664092e-06, "loss": 17.6562, "step": 2532 }, { "epoch": 0.16822740253702598, "grad_norm": 168.0105438232422, "learning_rate": 1.984691325025703e-06, "loss": 17.9531, "step": 2533 }, { "epoch": 0.1682938168293817, "grad_norm": 446.9515686035156, "learning_rate": 1.984672572251864e-06, "loss": 23.1875, "step": 2534 }, { "epoch": 0.1683602311217374, "grad_norm": 320.3267517089844, "learning_rate": 1.984653808087901e-06, "loss": 17.8594, "step": 2535 }, { "epoch": 0.1684266454140931, "grad_norm": 146.18704223632812, "learning_rate": 1.98463503253403e-06, "loss": 15.6406, "step": 2536 }, { "epoch": 0.16849305970644882, "grad_norm": 365.7581481933594, "learning_rate": 1.984616245590469e-06, "loss": 31.0938, "step": 2537 }, { "epoch": 0.16855947399880455, "grad_norm": 319.6128234863281, "learning_rate": 1.984597447257435e-06, "loss": 27.0625, "step": 2538 }, { "epoch": 0.16862588829116026, "grad_norm": 154.08351135253906, "learning_rate": 1.9845786375351456e-06, "loss": 16.375, "step": 2539 }, { "epoch": 0.16869230258351597, "grad_norm": 146.5184783935547, "learning_rate": 1.9845598164238184e-06, "loss": 16.5, "step": 2540 }, { "epoch": 0.16875871687587168, "grad_norm": 420.5914306640625, "learning_rate": 1.9845409839236703e-06, "loss": 25.4062, "step": 2541 }, { "epoch": 0.1688251311682274, "grad_norm": 221.7621307373047, "learning_rate": 1.9845221400349203e-06, "loss": 22.4219, "step": 2542 }, { "epoch": 0.16889154546058313, "grad_norm": 134.037109375, "learning_rate": 1.984503284757786e-06, "loss": 18.4844, "step": 2543 }, { "epoch": 0.16895795975293884, "grad_norm": 305.3228759765625, "learning_rate": 1.9844844180924852e-06, "loss": 18.2969, "step": 2544 }, { "epoch": 0.16902437404529455, "grad_norm": 216.270751953125, "learning_rate": 1.9844655400392364e-06, "loss": 21.2969, "step": 2545 }, { "epoch": 0.16909078833765026, "grad_norm": 159.59002685546875, "learning_rate": 1.9844466505982577e-06, "loss": 20.3594, "step": 2546 }, { "epoch": 0.16915720263000597, "grad_norm": 213.5595703125, "learning_rate": 1.984427749769768e-06, "loss": 21.9688, "step": 2547 }, { "epoch": 0.16922361692236168, "grad_norm": 226.1920623779297, "learning_rate": 1.984408837553986e-06, "loss": 19.4375, "step": 2548 }, { "epoch": 0.16929003121471742, "grad_norm": 114.07215118408203, "learning_rate": 1.98438991395113e-06, "loss": 16.9219, "step": 2549 }, { "epoch": 0.16935644550707313, "grad_norm": 238.51560974121094, "learning_rate": 1.984370978961419e-06, "loss": 21.9688, "step": 2550 }, { "epoch": 0.16942285979942884, "grad_norm": 226.44964599609375, "learning_rate": 1.9843520325850724e-06, "loss": 19.2969, "step": 2551 }, { "epoch": 0.16948927409178455, "grad_norm": 211.40798950195312, "learning_rate": 1.9843330748223086e-06, "loss": 24.4062, "step": 2552 }, { "epoch": 0.16955568838414026, "grad_norm": 137.82907104492188, "learning_rate": 1.9843141056733476e-06, "loss": 17.0156, "step": 2553 }, { "epoch": 0.169622102676496, "grad_norm": 194.9602508544922, "learning_rate": 1.9842951251384088e-06, "loss": 17.1406, "step": 2554 }, { "epoch": 0.1696885169688517, "grad_norm": 191.5415496826172, "learning_rate": 1.9842761332177115e-06, "loss": 20.7031, "step": 2555 }, { "epoch": 0.16975493126120741, "grad_norm": 136.5316619873047, "learning_rate": 1.9842571299114752e-06, "loss": 18.4375, "step": 2556 }, { "epoch": 0.16982134555356312, "grad_norm": 238.9514923095703, "learning_rate": 1.9842381152199205e-06, "loss": 17.8125, "step": 2557 }, { "epoch": 0.16988775984591883, "grad_norm": 204.9121551513672, "learning_rate": 1.9842190891432664e-06, "loss": 18.3594, "step": 2558 }, { "epoch": 0.16995417413827454, "grad_norm": 246.60903930664062, "learning_rate": 1.9842000516817335e-06, "loss": 22.2031, "step": 2559 }, { "epoch": 0.17002058843063028, "grad_norm": 575.9030151367188, "learning_rate": 1.984181002835542e-06, "loss": 23.6875, "step": 2560 }, { "epoch": 0.170087002722986, "grad_norm": 220.0952911376953, "learning_rate": 1.984161942604912e-06, "loss": 22.5312, "step": 2561 }, { "epoch": 0.1701534170153417, "grad_norm": 135.20396423339844, "learning_rate": 1.9841428709900643e-06, "loss": 12.8125, "step": 2562 }, { "epoch": 0.1702198313076974, "grad_norm": 384.18780517578125, "learning_rate": 1.9841237879912193e-06, "loss": 24.6562, "step": 2563 }, { "epoch": 0.17028624560005312, "grad_norm": 262.11834716796875, "learning_rate": 1.984104693608598e-06, "loss": 25.6562, "step": 2564 }, { "epoch": 0.17035265989240886, "grad_norm": 296.3768615722656, "learning_rate": 1.9840855878424205e-06, "loss": 21.25, "step": 2565 }, { "epoch": 0.17041907418476457, "grad_norm": 184.827880859375, "learning_rate": 1.9840664706929093e-06, "loss": 16.2812, "step": 2566 }, { "epoch": 0.17048548847712028, "grad_norm": 196.4492645263672, "learning_rate": 1.984047342160284e-06, "loss": 18.2188, "step": 2567 }, { "epoch": 0.170551902769476, "grad_norm": 288.287841796875, "learning_rate": 1.9840282022447667e-06, "loss": 15.5625, "step": 2568 }, { "epoch": 0.1706183170618317, "grad_norm": 143.3534698486328, "learning_rate": 1.9840090509465784e-06, "loss": 14.6719, "step": 2569 }, { "epoch": 0.1706847313541874, "grad_norm": 361.34814453125, "learning_rate": 1.9839898882659405e-06, "loss": 22.9375, "step": 2570 }, { "epoch": 0.17075114564654315, "grad_norm": 396.5361633300781, "learning_rate": 1.9839707142030756e-06, "loss": 28.6406, "step": 2571 }, { "epoch": 0.17081755993889886, "grad_norm": 202.26210021972656, "learning_rate": 1.9839515287582045e-06, "loss": 20.1094, "step": 2572 }, { "epoch": 0.17088397423125457, "grad_norm": 184.99327087402344, "learning_rate": 1.9839323319315497e-06, "loss": 17.6562, "step": 2573 }, { "epoch": 0.17095038852361027, "grad_norm": 179.0675048828125, "learning_rate": 1.983913123723333e-06, "loss": 19.3594, "step": 2574 }, { "epoch": 0.17101680281596598, "grad_norm": 289.0082702636719, "learning_rate": 1.9838939041337767e-06, "loss": 23.1562, "step": 2575 }, { "epoch": 0.17108321710832172, "grad_norm": 273.8856201171875, "learning_rate": 1.9838746731631027e-06, "loss": 24.6875, "step": 2576 }, { "epoch": 0.17114963140067743, "grad_norm": 202.8294219970703, "learning_rate": 1.983855430811534e-06, "loss": 17.9688, "step": 2577 }, { "epoch": 0.17121604569303314, "grad_norm": 480.586181640625, "learning_rate": 1.983836177079293e-06, "loss": 18.1094, "step": 2578 }, { "epoch": 0.17128245998538885, "grad_norm": 119.42903137207031, "learning_rate": 1.9838169119666027e-06, "loss": 15.9219, "step": 2579 }, { "epoch": 0.17134887427774456, "grad_norm": 218.75096130371094, "learning_rate": 1.9837976354736858e-06, "loss": 24.2969, "step": 2580 }, { "epoch": 0.1714152885701003, "grad_norm": 555.5335083007812, "learning_rate": 1.9837783476007648e-06, "loss": 22.0, "step": 2581 }, { "epoch": 0.171481702862456, "grad_norm": 365.757568359375, "learning_rate": 1.983759048348063e-06, "loss": 23.6094, "step": 2582 }, { "epoch": 0.17154811715481172, "grad_norm": 183.74810791015625, "learning_rate": 1.983739737715804e-06, "loss": 19.4844, "step": 2583 }, { "epoch": 0.17161453144716743, "grad_norm": 291.3074035644531, "learning_rate": 1.9837204157042107e-06, "loss": 24.3906, "step": 2584 }, { "epoch": 0.17168094573952314, "grad_norm": 545.350830078125, "learning_rate": 1.983701082313507e-06, "loss": 28.6562, "step": 2585 }, { "epoch": 0.17174736003187885, "grad_norm": 246.78846740722656, "learning_rate": 1.9836817375439166e-06, "loss": 21.0938, "step": 2586 }, { "epoch": 0.17181377432423459, "grad_norm": 312.6759338378906, "learning_rate": 1.983662381395663e-06, "loss": 17.8281, "step": 2587 }, { "epoch": 0.1718801886165903, "grad_norm": 228.3465118408203, "learning_rate": 1.9836430138689703e-06, "loss": 16.7656, "step": 2588 }, { "epoch": 0.171946602908946, "grad_norm": 125.16068267822266, "learning_rate": 1.9836236349640623e-06, "loss": 14.8594, "step": 2589 }, { "epoch": 0.17201301720130172, "grad_norm": 533.5167236328125, "learning_rate": 1.9836042446811632e-06, "loss": 16.0312, "step": 2590 }, { "epoch": 0.17207943149365743, "grad_norm": 278.5078125, "learning_rate": 1.9835848430204975e-06, "loss": 20.7188, "step": 2591 }, { "epoch": 0.17214584578601316, "grad_norm": 234.77825927734375, "learning_rate": 1.983565429982289e-06, "loss": 26.9062, "step": 2592 }, { "epoch": 0.17221226007836887, "grad_norm": 133.47140502929688, "learning_rate": 1.9835460055667634e-06, "loss": 14.3438, "step": 2593 }, { "epoch": 0.17227867437072458, "grad_norm": 241.09193420410156, "learning_rate": 1.9835265697741448e-06, "loss": 16.8438, "step": 2594 }, { "epoch": 0.1723450886630803, "grad_norm": 196.537109375, "learning_rate": 1.983507122604658e-06, "loss": 22.25, "step": 2595 }, { "epoch": 0.172411502955436, "grad_norm": 215.40187072753906, "learning_rate": 1.9834876640585273e-06, "loss": 29.7344, "step": 2596 }, { "epoch": 0.1724779172477917, "grad_norm": 135.69454956054688, "learning_rate": 1.983468194135979e-06, "loss": 16.9688, "step": 2597 }, { "epoch": 0.17254433154014745, "grad_norm": 135.76747131347656, "learning_rate": 1.983448712837237e-06, "loss": 22.4375, "step": 2598 }, { "epoch": 0.17261074583250316, "grad_norm": 227.09027099609375, "learning_rate": 1.9834292201625283e-06, "loss": 20.75, "step": 2599 }, { "epoch": 0.17267716012485887, "grad_norm": 312.4831237792969, "learning_rate": 1.983409716112077e-06, "loss": 22.75, "step": 2600 }, { "epoch": 0.17274357441721458, "grad_norm": 375.4190673828125, "learning_rate": 1.9833902006861094e-06, "loss": 21.2812, "step": 2601 }, { "epoch": 0.1728099887095703, "grad_norm": 162.44253540039062, "learning_rate": 1.9833706738848506e-06, "loss": 18.25, "step": 2602 }, { "epoch": 0.17287640300192603, "grad_norm": 396.4747314453125, "learning_rate": 1.983351135708527e-06, "loss": 18.4219, "step": 2603 }, { "epoch": 0.17294281729428174, "grad_norm": 507.8039245605469, "learning_rate": 1.983331586157365e-06, "loss": 30.4688, "step": 2604 }, { "epoch": 0.17300923158663745, "grad_norm": 335.3951416015625, "learning_rate": 1.98331202523159e-06, "loss": 21.3125, "step": 2605 }, { "epoch": 0.17307564587899316, "grad_norm": 233.56207275390625, "learning_rate": 1.983292452931428e-06, "loss": 21.9844, "step": 2606 }, { "epoch": 0.17314206017134887, "grad_norm": 144.9734344482422, "learning_rate": 1.9832728692571066e-06, "loss": 17.1562, "step": 2607 }, { "epoch": 0.17320847446370458, "grad_norm": 400.0149230957031, "learning_rate": 1.983253274208851e-06, "loss": 24.3125, "step": 2608 }, { "epoch": 0.1732748887560603, "grad_norm": 260.43280029296875, "learning_rate": 1.9832336677868887e-06, "loss": 16.9688, "step": 2609 }, { "epoch": 0.17334130304841602, "grad_norm": 154.7200469970703, "learning_rate": 1.9832140499914465e-06, "loss": 14.9062, "step": 2610 }, { "epoch": 0.17340771734077173, "grad_norm": 146.2701416015625, "learning_rate": 1.983194420822751e-06, "loss": 18.2812, "step": 2611 }, { "epoch": 0.17347413163312744, "grad_norm": 200.33607482910156, "learning_rate": 1.9831747802810294e-06, "loss": 17.6094, "step": 2612 }, { "epoch": 0.17354054592548315, "grad_norm": 355.08111572265625, "learning_rate": 1.983155128366509e-06, "loss": 19.75, "step": 2613 }, { "epoch": 0.1736069602178389, "grad_norm": 198.92884826660156, "learning_rate": 1.983135465079417e-06, "loss": 23.4375, "step": 2614 }, { "epoch": 0.1736733745101946, "grad_norm": 515.45751953125, "learning_rate": 1.9831157904199805e-06, "loss": 16.9688, "step": 2615 }, { "epoch": 0.1737397888025503, "grad_norm": 258.0498352050781, "learning_rate": 1.9830961043884277e-06, "loss": 18.9375, "step": 2616 }, { "epoch": 0.17380620309490602, "grad_norm": 189.07948303222656, "learning_rate": 1.983076406984986e-06, "loss": 18.1719, "step": 2617 }, { "epoch": 0.17387261738726173, "grad_norm": 428.1692810058594, "learning_rate": 1.9830566982098833e-06, "loss": 20.4062, "step": 2618 }, { "epoch": 0.17393903167961747, "grad_norm": 357.2334899902344, "learning_rate": 1.9830369780633475e-06, "loss": 19.3281, "step": 2619 }, { "epoch": 0.17400544597197318, "grad_norm": 135.40989685058594, "learning_rate": 1.983017246545607e-06, "loss": 16.2969, "step": 2620 }, { "epoch": 0.1740718602643289, "grad_norm": 148.2538299560547, "learning_rate": 1.9829975036568896e-06, "loss": 17.1406, "step": 2621 }, { "epoch": 0.1741382745566846, "grad_norm": 154.00526428222656, "learning_rate": 1.982977749397424e-06, "loss": 17.0, "step": 2622 }, { "epoch": 0.1742046888490403, "grad_norm": 486.7516784667969, "learning_rate": 1.9829579837674386e-06, "loss": 21.375, "step": 2623 }, { "epoch": 0.17427110314139602, "grad_norm": 173.53396606445312, "learning_rate": 1.982938206767162e-06, "loss": 15.25, "step": 2624 }, { "epoch": 0.17433751743375175, "grad_norm": 202.17543029785156, "learning_rate": 1.982918418396823e-06, "loss": 19.25, "step": 2625 }, { "epoch": 0.17440393172610746, "grad_norm": 289.8376770019531, "learning_rate": 1.982898618656651e-06, "loss": 24.2812, "step": 2626 }, { "epoch": 0.17447034601846317, "grad_norm": 440.1038513183594, "learning_rate": 1.9828788075468737e-06, "loss": 28.0, "step": 2627 }, { "epoch": 0.17453676031081888, "grad_norm": 222.010498046875, "learning_rate": 1.982858985067722e-06, "loss": 18.2969, "step": 2628 }, { "epoch": 0.1746031746031746, "grad_norm": 294.97430419921875, "learning_rate": 1.9828391512194236e-06, "loss": 28.3281, "step": 2629 }, { "epoch": 0.17466958889553033, "grad_norm": 219.3023223876953, "learning_rate": 1.982819306002209e-06, "loss": 24.0781, "step": 2630 }, { "epoch": 0.17473600318788604, "grad_norm": 215.4972381591797, "learning_rate": 1.9827994494163073e-06, "loss": 19.6406, "step": 2631 }, { "epoch": 0.17480241748024175, "grad_norm": 379.9983825683594, "learning_rate": 1.9827795814619483e-06, "loss": 22.1094, "step": 2632 }, { "epoch": 0.17486883177259746, "grad_norm": 432.48779296875, "learning_rate": 1.982759702139362e-06, "loss": 24.2812, "step": 2633 }, { "epoch": 0.17493524606495317, "grad_norm": 304.6738586425781, "learning_rate": 1.9827398114487777e-06, "loss": 25.5, "step": 2634 }, { "epoch": 0.17500166035730888, "grad_norm": 152.4980926513672, "learning_rate": 1.9827199093904265e-06, "loss": 22.4219, "step": 2635 }, { "epoch": 0.17506807464966462, "grad_norm": 156.2458953857422, "learning_rate": 1.9826999959645378e-06, "loss": 15.8906, "step": 2636 }, { "epoch": 0.17513448894202033, "grad_norm": 266.2185974121094, "learning_rate": 1.982680071171342e-06, "loss": 23.625, "step": 2637 }, { "epoch": 0.17520090323437604, "grad_norm": 348.3039855957031, "learning_rate": 1.98266013501107e-06, "loss": 20.125, "step": 2638 }, { "epoch": 0.17526731752673175, "grad_norm": 506.1846923828125, "learning_rate": 1.982640187483952e-06, "loss": 23.125, "step": 2639 }, { "epoch": 0.17533373181908746, "grad_norm": 225.6895294189453, "learning_rate": 1.982620228590219e-06, "loss": 22.8125, "step": 2640 }, { "epoch": 0.1754001461114432, "grad_norm": 593.5206298828125, "learning_rate": 1.9826002583301016e-06, "loss": 27.2812, "step": 2641 }, { "epoch": 0.1754665604037989, "grad_norm": 431.5924987792969, "learning_rate": 1.9825802767038313e-06, "loss": 26.7344, "step": 2642 }, { "epoch": 0.17553297469615461, "grad_norm": 180.6752166748047, "learning_rate": 1.9825602837116393e-06, "loss": 16.75, "step": 2643 }, { "epoch": 0.17559938898851032, "grad_norm": 201.94430541992188, "learning_rate": 1.982540279353756e-06, "loss": 20.3125, "step": 2644 }, { "epoch": 0.17566580328086603, "grad_norm": 475.7239074707031, "learning_rate": 1.9825202636304136e-06, "loss": 25.0938, "step": 2645 }, { "epoch": 0.17573221757322174, "grad_norm": 216.81410217285156, "learning_rate": 1.9825002365418433e-06, "loss": 21.375, "step": 2646 }, { "epoch": 0.17579863186557748, "grad_norm": 350.6981201171875, "learning_rate": 1.9824801980882765e-06, "loss": 20.7344, "step": 2647 }, { "epoch": 0.1758650461579332, "grad_norm": 220.37405395507812, "learning_rate": 1.9824601482699455e-06, "loss": 18.9844, "step": 2648 }, { "epoch": 0.1759314604502889, "grad_norm": 179.44276428222656, "learning_rate": 1.9824400870870823e-06, "loss": 25.5938, "step": 2649 }, { "epoch": 0.1759978747426446, "grad_norm": 187.52957153320312, "learning_rate": 1.982420014539918e-06, "loss": 21.375, "step": 2650 }, { "epoch": 0.17606428903500032, "grad_norm": 335.41864013671875, "learning_rate": 1.982399930628686e-06, "loss": 23.1562, "step": 2651 }, { "epoch": 0.17613070332735606, "grad_norm": 240.03378295898438, "learning_rate": 1.982379835353618e-06, "loss": 21.1875, "step": 2652 }, { "epoch": 0.17619711761971177, "grad_norm": 203.6833038330078, "learning_rate": 1.9823597287149465e-06, "loss": 19.0156, "step": 2653 }, { "epoch": 0.17626353191206748, "grad_norm": 192.4134979248047, "learning_rate": 1.9823396107129044e-06, "loss": 20.2656, "step": 2654 }, { "epoch": 0.1763299462044232, "grad_norm": 355.00732421875, "learning_rate": 1.982319481347724e-06, "loss": 20.125, "step": 2655 }, { "epoch": 0.1763963604967789, "grad_norm": 238.49728393554688, "learning_rate": 1.982299340619638e-06, "loss": 20.0781, "step": 2656 }, { "epoch": 0.17646277478913464, "grad_norm": 213.85586547851562, "learning_rate": 1.9822791885288797e-06, "loss": 20.9375, "step": 2657 }, { "epoch": 0.17652918908149035, "grad_norm": 353.04486083984375, "learning_rate": 1.982259025075682e-06, "loss": 23.25, "step": 2658 }, { "epoch": 0.17659560337384606, "grad_norm": 249.14581298828125, "learning_rate": 1.9822388502602788e-06, "loss": 19.4219, "step": 2659 }, { "epoch": 0.17666201766620176, "grad_norm": 167.20053100585938, "learning_rate": 1.9822186640829027e-06, "loss": 15.7969, "step": 2660 }, { "epoch": 0.17672843195855747, "grad_norm": 196.63458251953125, "learning_rate": 1.9821984665437875e-06, "loss": 14.9375, "step": 2661 }, { "epoch": 0.17679484625091318, "grad_norm": 340.5465087890625, "learning_rate": 1.9821782576431666e-06, "loss": 22.5469, "step": 2662 }, { "epoch": 0.17686126054326892, "grad_norm": 141.75343322753906, "learning_rate": 1.9821580373812745e-06, "loss": 15.2344, "step": 2663 }, { "epoch": 0.17692767483562463, "grad_norm": 1454.0706787109375, "learning_rate": 1.982137805758344e-06, "loss": 15.5625, "step": 2664 }, { "epoch": 0.17699408912798034, "grad_norm": 320.5716552734375, "learning_rate": 1.98211756277461e-06, "loss": 21.2188, "step": 2665 }, { "epoch": 0.17706050342033605, "grad_norm": 183.6583251953125, "learning_rate": 1.982097308430306e-06, "loss": 20.3438, "step": 2666 }, { "epoch": 0.17712691771269176, "grad_norm": 209.59591674804688, "learning_rate": 1.982077042725667e-06, "loss": 20.9375, "step": 2667 }, { "epoch": 0.1771933320050475, "grad_norm": 410.9725036621094, "learning_rate": 1.982056765660927e-06, "loss": 21.6875, "step": 2668 }, { "epoch": 0.1772597462974032, "grad_norm": 199.61920166015625, "learning_rate": 1.982036477236321e-06, "loss": 26.1875, "step": 2669 }, { "epoch": 0.17732616058975892, "grad_norm": 179.9371795654297, "learning_rate": 1.9820161774520826e-06, "loss": 21.4688, "step": 2670 }, { "epoch": 0.17739257488211463, "grad_norm": 1890.7181396484375, "learning_rate": 1.9819958663084478e-06, "loss": 20.125, "step": 2671 }, { "epoch": 0.17745898917447034, "grad_norm": 205.68971252441406, "learning_rate": 1.981975543805651e-06, "loss": 13.1406, "step": 2672 }, { "epoch": 0.17752540346682605, "grad_norm": 271.9710693359375, "learning_rate": 1.981955209943927e-06, "loss": 17.6719, "step": 2673 }, { "epoch": 0.17759181775918179, "grad_norm": 252.2249755859375, "learning_rate": 1.9819348647235117e-06, "loss": 16.2188, "step": 2674 }, { "epoch": 0.1776582320515375, "grad_norm": 250.26707458496094, "learning_rate": 1.9819145081446403e-06, "loss": 22.0625, "step": 2675 }, { "epoch": 0.1777246463438932, "grad_norm": 330.3987731933594, "learning_rate": 1.9818941402075476e-06, "loss": 21.3906, "step": 2676 }, { "epoch": 0.17779106063624892, "grad_norm": 317.5622253417969, "learning_rate": 1.98187376091247e-06, "loss": 22.2188, "step": 2677 }, { "epoch": 0.17785747492860463, "grad_norm": 309.198486328125, "learning_rate": 1.9818533702596428e-06, "loss": 21.2812, "step": 2678 }, { "epoch": 0.17792388922096036, "grad_norm": 221.84744262695312, "learning_rate": 1.981832968249302e-06, "loss": 21.2188, "step": 2679 }, { "epoch": 0.17799030351331607, "grad_norm": 240.98121643066406, "learning_rate": 1.9818125548816836e-06, "loss": 22.5312, "step": 2680 }, { "epoch": 0.17805671780567178, "grad_norm": 176.71278381347656, "learning_rate": 1.9817921301570237e-06, "loss": 17.3281, "step": 2681 }, { "epoch": 0.1781231320980275, "grad_norm": 223.33935546875, "learning_rate": 1.9817716940755586e-06, "loss": 26.9688, "step": 2682 }, { "epoch": 0.1781895463903832, "grad_norm": 291.02288818359375, "learning_rate": 1.9817512466375246e-06, "loss": 18.75, "step": 2683 }, { "epoch": 0.1782559606827389, "grad_norm": 296.79620361328125, "learning_rate": 1.981730787843158e-06, "loss": 17.1719, "step": 2684 }, { "epoch": 0.17832237497509465, "grad_norm": 362.5989074707031, "learning_rate": 1.981710317692696e-06, "loss": 21.2969, "step": 2685 }, { "epoch": 0.17838878926745036, "grad_norm": 172.30462646484375, "learning_rate": 1.9816898361863756e-06, "loss": 13.9141, "step": 2686 }, { "epoch": 0.17845520355980607, "grad_norm": 184.87628173828125, "learning_rate": 1.9816693433244326e-06, "loss": 18.9219, "step": 2687 }, { "epoch": 0.17852161785216178, "grad_norm": 282.01318359375, "learning_rate": 1.981648839107105e-06, "loss": 16.625, "step": 2688 }, { "epoch": 0.1785880321445175, "grad_norm": 560.7064819335938, "learning_rate": 1.9816283235346296e-06, "loss": 21.6719, "step": 2689 }, { "epoch": 0.17865444643687323, "grad_norm": 209.82518005371094, "learning_rate": 1.981607796607244e-06, "loss": 18.6875, "step": 2690 }, { "epoch": 0.17872086072922894, "grad_norm": 245.3760528564453, "learning_rate": 1.9815872583251852e-06, "loss": 23.4688, "step": 2691 }, { "epoch": 0.17878727502158465, "grad_norm": 128.12351989746094, "learning_rate": 1.981566708688691e-06, "loss": 17.6094, "step": 2692 }, { "epoch": 0.17885368931394036, "grad_norm": 307.13677978515625, "learning_rate": 1.981546147697999e-06, "loss": 27.25, "step": 2693 }, { "epoch": 0.17892010360629607, "grad_norm": 597.7212524414062, "learning_rate": 1.9815255753533475e-06, "loss": 22.1562, "step": 2694 }, { "epoch": 0.1789865178986518, "grad_norm": 167.15956115722656, "learning_rate": 1.9815049916549737e-06, "loss": 18.8438, "step": 2695 }, { "epoch": 0.1790529321910075, "grad_norm": 257.77288818359375, "learning_rate": 1.9814843966031164e-06, "loss": 19.9062, "step": 2696 }, { "epoch": 0.17911934648336322, "grad_norm": 237.71408081054688, "learning_rate": 1.9814637901980136e-06, "loss": 23.2188, "step": 2697 }, { "epoch": 0.17918576077571893, "grad_norm": 272.1268005371094, "learning_rate": 1.9814431724399037e-06, "loss": 27.0, "step": 2698 }, { "epoch": 0.17925217506807464, "grad_norm": 190.8462677001953, "learning_rate": 1.9814225433290252e-06, "loss": 21.4688, "step": 2699 }, { "epoch": 0.17931858936043035, "grad_norm": 206.00872802734375, "learning_rate": 1.981401902865616e-06, "loss": 32.0312, "step": 2700 }, { "epoch": 0.1793850036527861, "grad_norm": 455.3744201660156, "learning_rate": 1.981381251049916e-06, "loss": 23.3125, "step": 2701 }, { "epoch": 0.1794514179451418, "grad_norm": 237.31222534179688, "learning_rate": 1.9813605878821637e-06, "loss": 25.1094, "step": 2702 }, { "epoch": 0.1795178322374975, "grad_norm": 276.8252868652344, "learning_rate": 1.9813399133625977e-06, "loss": 30.5625, "step": 2703 }, { "epoch": 0.17958424652985322, "grad_norm": 332.04547119140625, "learning_rate": 1.9813192274914574e-06, "loss": 19.0781, "step": 2704 }, { "epoch": 0.17965066082220893, "grad_norm": 309.2504577636719, "learning_rate": 1.981298530268982e-06, "loss": 23.4688, "step": 2705 }, { "epoch": 0.17971707511456467, "grad_norm": 256.1448669433594, "learning_rate": 1.9812778216954114e-06, "loss": 18.3906, "step": 2706 }, { "epoch": 0.17978348940692038, "grad_norm": 206.68466186523438, "learning_rate": 1.981257101770985e-06, "loss": 24.0312, "step": 2707 }, { "epoch": 0.1798499036992761, "grad_norm": 167.9184112548828, "learning_rate": 1.981236370495942e-06, "loss": 19.3281, "step": 2708 }, { "epoch": 0.1799163179916318, "grad_norm": 301.2571716308594, "learning_rate": 1.9812156278705223e-06, "loss": 18.9375, "step": 2709 }, { "epoch": 0.1799827322839875, "grad_norm": 111.36860656738281, "learning_rate": 1.9811948738949662e-06, "loss": 15.0, "step": 2710 }, { "epoch": 0.18004914657634322, "grad_norm": 170.75192260742188, "learning_rate": 1.981174108569514e-06, "loss": 19.9688, "step": 2711 }, { "epoch": 0.18011556086869895, "grad_norm": 142.23828125, "learning_rate": 1.9811533318944045e-06, "loss": 18.7812, "step": 2712 }, { "epoch": 0.18018197516105466, "grad_norm": 284.7915344238281, "learning_rate": 1.98113254386988e-06, "loss": 22.3438, "step": 2713 }, { "epoch": 0.18024838945341037, "grad_norm": 185.1977081298828, "learning_rate": 1.9811117444961794e-06, "loss": 16.9062, "step": 2714 }, { "epoch": 0.18031480374576608, "grad_norm": 146.49630737304688, "learning_rate": 1.9810909337735444e-06, "loss": 17.5, "step": 2715 }, { "epoch": 0.1803812180381218, "grad_norm": 295.56207275390625, "learning_rate": 1.9810701117022145e-06, "loss": 26.0312, "step": 2716 }, { "epoch": 0.18044763233047753, "grad_norm": 278.7732849121094, "learning_rate": 1.9810492782824317e-06, "loss": 29.3281, "step": 2717 }, { "epoch": 0.18051404662283324, "grad_norm": 281.7614440917969, "learning_rate": 1.9810284335144364e-06, "loss": 19.7656, "step": 2718 }, { "epoch": 0.18058046091518895, "grad_norm": 169.17034912109375, "learning_rate": 1.98100757739847e-06, "loss": 20.6562, "step": 2719 }, { "epoch": 0.18064687520754466, "grad_norm": 186.4748992919922, "learning_rate": 1.980986709934774e-06, "loss": 21.0625, "step": 2720 }, { "epoch": 0.18071328949990037, "grad_norm": 251.09689331054688, "learning_rate": 1.980965831123589e-06, "loss": 21.25, "step": 2721 }, { "epoch": 0.18077970379225608, "grad_norm": 411.7676696777344, "learning_rate": 1.9809449409651567e-06, "loss": 13.9688, "step": 2722 }, { "epoch": 0.18084611808461182, "grad_norm": 553.6217041015625, "learning_rate": 1.980924039459719e-06, "loss": 17.4531, "step": 2723 }, { "epoch": 0.18091253237696753, "grad_norm": 384.0298156738281, "learning_rate": 1.980903126607518e-06, "loss": 23.5156, "step": 2724 }, { "epoch": 0.18097894666932324, "grad_norm": 393.4729919433594, "learning_rate": 1.980882202408795e-06, "loss": 27.7969, "step": 2725 }, { "epoch": 0.18104536096167895, "grad_norm": 403.046630859375, "learning_rate": 1.9808612668637926e-06, "loss": 25.6094, "step": 2726 }, { "epoch": 0.18111177525403466, "grad_norm": 190.22654724121094, "learning_rate": 1.9808403199727525e-06, "loss": 16.0, "step": 2727 }, { "epoch": 0.1811781895463904, "grad_norm": 251.2268524169922, "learning_rate": 1.980819361735917e-06, "loss": 21.6875, "step": 2728 }, { "epoch": 0.1812446038387461, "grad_norm": 129.64768981933594, "learning_rate": 1.980798392153529e-06, "loss": 15.4375, "step": 2729 }, { "epoch": 0.18131101813110181, "grad_norm": 199.44943237304688, "learning_rate": 1.9807774112258305e-06, "loss": 22.0, "step": 2730 }, { "epoch": 0.18137743242345752, "grad_norm": 391.0553894042969, "learning_rate": 1.9807564189530644e-06, "loss": 34.75, "step": 2731 }, { "epoch": 0.18144384671581323, "grad_norm": 200.32717895507812, "learning_rate": 1.9807354153354735e-06, "loss": 20.25, "step": 2732 }, { "epoch": 0.18151026100816897, "grad_norm": 195.91404724121094, "learning_rate": 1.980714400373301e-06, "loss": 19.4062, "step": 2733 }, { "epoch": 0.18157667530052468, "grad_norm": 269.65411376953125, "learning_rate": 1.98069337406679e-06, "loss": 23.9844, "step": 2734 }, { "epoch": 0.1816430895928804, "grad_norm": 193.98797607421875, "learning_rate": 1.9806723364161837e-06, "loss": 18.1094, "step": 2735 }, { "epoch": 0.1817095038852361, "grad_norm": 392.7337951660156, "learning_rate": 1.980651287421725e-06, "loss": 16.8438, "step": 2736 }, { "epoch": 0.1817759181775918, "grad_norm": 275.1130676269531, "learning_rate": 1.9806302270836574e-06, "loss": 22.7188, "step": 2737 }, { "epoch": 0.18184233246994752, "grad_norm": 322.15179443359375, "learning_rate": 1.980609155402225e-06, "loss": 20.6094, "step": 2738 }, { "epoch": 0.18190874676230326, "grad_norm": 233.04364013671875, "learning_rate": 1.9805880723776717e-06, "loss": 26.8438, "step": 2739 }, { "epoch": 0.18197516105465897, "grad_norm": 318.4563903808594, "learning_rate": 1.9805669780102406e-06, "loss": 16.25, "step": 2740 }, { "epoch": 0.18204157534701468, "grad_norm": 233.11392211914062, "learning_rate": 1.980545872300176e-06, "loss": 17.5938, "step": 2741 }, { "epoch": 0.1821079896393704, "grad_norm": 222.4332275390625, "learning_rate": 1.980524755247723e-06, "loss": 21.0312, "step": 2742 }, { "epoch": 0.1821744039317261, "grad_norm": 233.6787109375, "learning_rate": 1.980503626853124e-06, "loss": 17.4688, "step": 2743 }, { "epoch": 0.18224081822408184, "grad_norm": 156.75967407226562, "learning_rate": 1.980482487116625e-06, "loss": 15.8906, "step": 2744 }, { "epoch": 0.18230723251643755, "grad_norm": 336.552001953125, "learning_rate": 1.98046133603847e-06, "loss": 17.6406, "step": 2745 }, { "epoch": 0.18237364680879325, "grad_norm": 164.01803588867188, "learning_rate": 1.9804401736189037e-06, "loss": 17.9062, "step": 2746 }, { "epoch": 0.18244006110114896, "grad_norm": 181.3779754638672, "learning_rate": 1.9804189998581707e-06, "loss": 23.0938, "step": 2747 }, { "epoch": 0.18250647539350467, "grad_norm": 300.9369812011719, "learning_rate": 1.980397814756516e-06, "loss": 22.5469, "step": 2748 }, { "epoch": 0.18257288968586038, "grad_norm": 465.8500061035156, "learning_rate": 1.9803766183141847e-06, "loss": 23.875, "step": 2749 }, { "epoch": 0.18263930397821612, "grad_norm": 138.9194793701172, "learning_rate": 1.980355410531422e-06, "loss": 15.2969, "step": 2750 }, { "epoch": 0.18270571827057183, "grad_norm": 208.67300415039062, "learning_rate": 1.980334191408473e-06, "loss": 17.2031, "step": 2751 }, { "epoch": 0.18277213256292754, "grad_norm": 151.38861083984375, "learning_rate": 1.9803129609455835e-06, "loss": 16.8125, "step": 2752 }, { "epoch": 0.18283854685528325, "grad_norm": 319.7602233886719, "learning_rate": 1.980291719142999e-06, "loss": 25.625, "step": 2753 }, { "epoch": 0.18290496114763896, "grad_norm": 152.8848876953125, "learning_rate": 1.9802704660009653e-06, "loss": 21.2812, "step": 2754 }, { "epoch": 0.1829713754399947, "grad_norm": 135.73655700683594, "learning_rate": 1.980249201519728e-06, "loss": 18.6406, "step": 2755 }, { "epoch": 0.1830377897323504, "grad_norm": 190.17967224121094, "learning_rate": 1.9802279256995333e-06, "loss": 22.625, "step": 2756 }, { "epoch": 0.18310420402470612, "grad_norm": 285.03936767578125, "learning_rate": 1.980206638540627e-06, "loss": 19.1562, "step": 2757 }, { "epoch": 0.18317061831706183, "grad_norm": 362.739501953125, "learning_rate": 1.9801853400432557e-06, "loss": 29.7812, "step": 2758 }, { "epoch": 0.18323703260941754, "grad_norm": 255.28575134277344, "learning_rate": 1.9801640302076652e-06, "loss": 20.4062, "step": 2759 }, { "epoch": 0.18330344690177325, "grad_norm": 268.14215087890625, "learning_rate": 1.980142709034103e-06, "loss": 24.9844, "step": 2760 }, { "epoch": 0.18336986119412899, "grad_norm": 204.3937530517578, "learning_rate": 1.9801213765228145e-06, "loss": 25.1875, "step": 2761 }, { "epoch": 0.1834362754864847, "grad_norm": 248.2540740966797, "learning_rate": 1.9801000326740477e-06, "loss": 22.3906, "step": 2762 }, { "epoch": 0.1835026897788404, "grad_norm": 255.26507568359375, "learning_rate": 1.9800786774880485e-06, "loss": 16.9062, "step": 2763 }, { "epoch": 0.18356910407119612, "grad_norm": 282.8362731933594, "learning_rate": 1.9800573109650643e-06, "loss": 19.9062, "step": 2764 }, { "epoch": 0.18363551836355183, "grad_norm": 308.49078369140625, "learning_rate": 1.9800359331053424e-06, "loss": 16.9844, "step": 2765 }, { "epoch": 0.18370193265590756, "grad_norm": 938.9805908203125, "learning_rate": 1.98001454390913e-06, "loss": 22.25, "step": 2766 }, { "epoch": 0.18376834694826327, "grad_norm": 255.21682739257812, "learning_rate": 1.979993143376674e-06, "loss": 23.5781, "step": 2767 }, { "epoch": 0.18383476124061898, "grad_norm": 869.3424682617188, "learning_rate": 1.9799717315082233e-06, "loss": 20.2969, "step": 2768 }, { "epoch": 0.1839011755329747, "grad_norm": 225.57223510742188, "learning_rate": 1.9799503083040237e-06, "loss": 22.3125, "step": 2769 }, { "epoch": 0.1839675898253304, "grad_norm": 172.216064453125, "learning_rate": 1.9799288737643243e-06, "loss": 19.2188, "step": 2770 }, { "epoch": 0.18403400411768614, "grad_norm": 281.62420654296875, "learning_rate": 1.9799074278893734e-06, "loss": 21.2188, "step": 2771 }, { "epoch": 0.18410041841004185, "grad_norm": 367.1312561035156, "learning_rate": 1.979885970679418e-06, "loss": 20.125, "step": 2772 }, { "epoch": 0.18416683270239756, "grad_norm": 208.20166015625, "learning_rate": 1.9798645021347063e-06, "loss": 16.5156, "step": 2773 }, { "epoch": 0.18423324699475327, "grad_norm": 140.46762084960938, "learning_rate": 1.9798430222554877e-06, "loss": 20.7812, "step": 2774 }, { "epoch": 0.18429966128710898, "grad_norm": 301.0347595214844, "learning_rate": 1.97982153104201e-06, "loss": 22.6406, "step": 2775 }, { "epoch": 0.1843660755794647, "grad_norm": 196.781982421875, "learning_rate": 1.9798000284945213e-06, "loss": 24.4688, "step": 2776 }, { "epoch": 0.18443248987182043, "grad_norm": 478.6892395019531, "learning_rate": 1.979778514613271e-06, "loss": 19.6719, "step": 2777 }, { "epoch": 0.18449890416417614, "grad_norm": 280.2893981933594, "learning_rate": 1.979756989398508e-06, "loss": 18.125, "step": 2778 }, { "epoch": 0.18456531845653185, "grad_norm": 185.7694091796875, "learning_rate": 1.979735452850481e-06, "loss": 20.3438, "step": 2779 }, { "epoch": 0.18463173274888756, "grad_norm": 194.98074340820312, "learning_rate": 1.9797139049694393e-06, "loss": 22.2812, "step": 2780 }, { "epoch": 0.18469814704124327, "grad_norm": 183.53036499023438, "learning_rate": 1.979692345755632e-06, "loss": 19.7812, "step": 2781 }, { "epoch": 0.184764561333599, "grad_norm": 149.34512329101562, "learning_rate": 1.979670775209309e-06, "loss": 14.5469, "step": 2782 }, { "epoch": 0.1848309756259547, "grad_norm": 179.9938201904297, "learning_rate": 1.9796491933307186e-06, "loss": 20.0625, "step": 2783 }, { "epoch": 0.18489738991831042, "grad_norm": 369.4339904785156, "learning_rate": 1.9796276001201116e-06, "loss": 16.6562, "step": 2784 }, { "epoch": 0.18496380421066613, "grad_norm": 242.05982971191406, "learning_rate": 1.9796059955777374e-06, "loss": 22.2188, "step": 2785 }, { "epoch": 0.18503021850302184, "grad_norm": 287.7071838378906, "learning_rate": 1.979584379703846e-06, "loss": 16.1094, "step": 2786 }, { "epoch": 0.18509663279537755, "grad_norm": 217.9933624267578, "learning_rate": 1.9795627524986877e-06, "loss": 33.0312, "step": 2787 }, { "epoch": 0.1851630470877333, "grad_norm": 292.9227294921875, "learning_rate": 1.9795411139625117e-06, "loss": 19.0, "step": 2788 }, { "epoch": 0.185229461380089, "grad_norm": 207.5773162841797, "learning_rate": 1.979519464095569e-06, "loss": 21.3438, "step": 2789 }, { "epoch": 0.1852958756724447, "grad_norm": 672.6174926757812, "learning_rate": 1.9794978028981104e-06, "loss": 33.125, "step": 2790 }, { "epoch": 0.18536228996480042, "grad_norm": 186.84837341308594, "learning_rate": 1.9794761303703858e-06, "loss": 19.5312, "step": 2791 }, { "epoch": 0.18542870425715613, "grad_norm": 173.3604736328125, "learning_rate": 1.979454446512646e-06, "loss": 14.4688, "step": 2792 }, { "epoch": 0.18549511854951187, "grad_norm": 196.0865936279297, "learning_rate": 1.979432751325142e-06, "loss": 24.8594, "step": 2793 }, { "epoch": 0.18556153284186758, "grad_norm": 111.78138732910156, "learning_rate": 1.979411044808125e-06, "loss": 12.6875, "step": 2794 }, { "epoch": 0.1856279471342233, "grad_norm": 314.72113037109375, "learning_rate": 1.9793893269618454e-06, "loss": 21.4531, "step": 2795 }, { "epoch": 0.185694361426579, "grad_norm": 145.91400146484375, "learning_rate": 1.979367597786555e-06, "loss": 14.5156, "step": 2796 }, { "epoch": 0.1857607757189347, "grad_norm": 995.7431030273438, "learning_rate": 1.9793458572825046e-06, "loss": 18.9844, "step": 2797 }, { "epoch": 0.18582719001129042, "grad_norm": 172.70213317871094, "learning_rate": 1.9793241054499464e-06, "loss": 19.375, "step": 2798 }, { "epoch": 0.18589360430364615, "grad_norm": 382.3808288574219, "learning_rate": 1.979302342289132e-06, "loss": 22.5938, "step": 2799 }, { "epoch": 0.18596001859600186, "grad_norm": 185.99220275878906, "learning_rate": 1.979280567800312e-06, "loss": 20.3281, "step": 2800 }, { "epoch": 0.18602643288835757, "grad_norm": 164.6732177734375, "learning_rate": 1.9792587819837395e-06, "loss": 19.25, "step": 2801 }, { "epoch": 0.18609284718071328, "grad_norm": 266.5915222167969, "learning_rate": 1.9792369848396662e-06, "loss": 21.4219, "step": 2802 }, { "epoch": 0.186159261473069, "grad_norm": 231.90089416503906, "learning_rate": 1.979215176368344e-06, "loss": 19.9062, "step": 2803 }, { "epoch": 0.18622567576542473, "grad_norm": 270.9974365234375, "learning_rate": 1.9791933565700253e-06, "loss": 17.0312, "step": 2804 }, { "epoch": 0.18629209005778044, "grad_norm": 319.1421813964844, "learning_rate": 1.9791715254449625e-06, "loss": 23.5625, "step": 2805 }, { "epoch": 0.18635850435013615, "grad_norm": 162.50634765625, "learning_rate": 1.979149682993408e-06, "loss": 15.2188, "step": 2806 }, { "epoch": 0.18642491864249186, "grad_norm": 247.89613342285156, "learning_rate": 1.9791278292156146e-06, "loss": 23.875, "step": 2807 }, { "epoch": 0.18649133293484757, "grad_norm": 188.51620483398438, "learning_rate": 1.9791059641118353e-06, "loss": 22.0312, "step": 2808 }, { "epoch": 0.1865577472272033, "grad_norm": 260.4329833984375, "learning_rate": 1.979084087682323e-06, "loss": 19.6094, "step": 2809 }, { "epoch": 0.18662416151955902, "grad_norm": 206.31398010253906, "learning_rate": 1.97906219992733e-06, "loss": 20.0, "step": 2810 }, { "epoch": 0.18669057581191473, "grad_norm": 1110.5469970703125, "learning_rate": 1.97904030084711e-06, "loss": 17.8438, "step": 2811 }, { "epoch": 0.18675699010427044, "grad_norm": 164.55711364746094, "learning_rate": 1.979018390441917e-06, "loss": 19.6875, "step": 2812 }, { "epoch": 0.18682340439662615, "grad_norm": 224.5111846923828, "learning_rate": 1.978996468712003e-06, "loss": 16.5312, "step": 2813 }, { "epoch": 0.18688981868898186, "grad_norm": 200.76686096191406, "learning_rate": 1.978974535657623e-06, "loss": 21.5156, "step": 2814 }, { "epoch": 0.1869562329813376, "grad_norm": 150.1851348876953, "learning_rate": 1.9789525912790297e-06, "loss": 18.5938, "step": 2815 }, { "epoch": 0.1870226472736933, "grad_norm": 159.46630859375, "learning_rate": 1.978930635576477e-06, "loss": 18.5938, "step": 2816 }, { "epoch": 0.18708906156604901, "grad_norm": 193.51817321777344, "learning_rate": 1.9789086685502202e-06, "loss": 28.1562, "step": 2817 }, { "epoch": 0.18715547585840472, "grad_norm": 345.3730773925781, "learning_rate": 1.9788866902005114e-06, "loss": 21.25, "step": 2818 }, { "epoch": 0.18722189015076043, "grad_norm": 230.1956024169922, "learning_rate": 1.9788647005276063e-06, "loss": 22.375, "step": 2819 }, { "epoch": 0.18728830444311617, "grad_norm": 315.4277648925781, "learning_rate": 1.978842699531759e-06, "loss": 16.9219, "step": 2820 }, { "epoch": 0.18735471873547188, "grad_norm": 442.7695617675781, "learning_rate": 1.9788206872132234e-06, "loss": 21.0, "step": 2821 }, { "epoch": 0.1874211330278276, "grad_norm": 136.68179321289062, "learning_rate": 1.9787986635722545e-06, "loss": 17.0, "step": 2822 }, { "epoch": 0.1874875473201833, "grad_norm": 223.85006713867188, "learning_rate": 1.9787766286091073e-06, "loss": 18.7188, "step": 2823 }, { "epoch": 0.187553961612539, "grad_norm": 216.1411590576172, "learning_rate": 1.9787545823240365e-06, "loss": 23.625, "step": 2824 }, { "epoch": 0.18762037590489472, "grad_norm": 412.2438659667969, "learning_rate": 1.9787325247172968e-06, "loss": 24.5938, "step": 2825 }, { "epoch": 0.18768679019725046, "grad_norm": 174.92152404785156, "learning_rate": 1.9787104557891435e-06, "loss": 15.875, "step": 2826 }, { "epoch": 0.18775320448960617, "grad_norm": 465.6986389160156, "learning_rate": 1.9786883755398324e-06, "loss": 16.4219, "step": 2827 }, { "epoch": 0.18781961878196188, "grad_norm": 432.4377136230469, "learning_rate": 1.9786662839696185e-06, "loss": 20.0547, "step": 2828 }, { "epoch": 0.1878860330743176, "grad_norm": 313.71124267578125, "learning_rate": 1.978644181078757e-06, "loss": 20.2969, "step": 2829 }, { "epoch": 0.1879524473666733, "grad_norm": 194.23550415039062, "learning_rate": 1.9786220668675042e-06, "loss": 17.625, "step": 2830 }, { "epoch": 0.18801886165902904, "grad_norm": 242.5272979736328, "learning_rate": 1.9785999413361155e-06, "loss": 20.8125, "step": 2831 }, { "epoch": 0.18808527595138474, "grad_norm": 126.34123992919922, "learning_rate": 1.978577804484847e-06, "loss": 17.1406, "step": 2832 }, { "epoch": 0.18815169024374045, "grad_norm": 149.64706420898438, "learning_rate": 1.978555656313955e-06, "loss": 18.5781, "step": 2833 }, { "epoch": 0.18821810453609616, "grad_norm": 222.2075653076172, "learning_rate": 1.9785334968236952e-06, "loss": 25.1875, "step": 2834 }, { "epoch": 0.18828451882845187, "grad_norm": 192.60731506347656, "learning_rate": 1.978511326014324e-06, "loss": 16.9531, "step": 2835 }, { "epoch": 0.18835093312080758, "grad_norm": 293.822509765625, "learning_rate": 1.9784891438860984e-06, "loss": 21.8438, "step": 2836 }, { "epoch": 0.18841734741316332, "grad_norm": 150.3720245361328, "learning_rate": 1.978466950439274e-06, "loss": 17.7812, "step": 2837 }, { "epoch": 0.18848376170551903, "grad_norm": 239.53016662597656, "learning_rate": 1.978444745674109e-06, "loss": 16.2969, "step": 2838 }, { "epoch": 0.18855017599787474, "grad_norm": 230.95257568359375, "learning_rate": 1.978422529590859e-06, "loss": 17.9688, "step": 2839 }, { "epoch": 0.18861659029023045, "grad_norm": 128.5941619873047, "learning_rate": 1.978400302189781e-06, "loss": 13.9688, "step": 2840 }, { "epoch": 0.18868300458258616, "grad_norm": 176.41148376464844, "learning_rate": 1.978378063471133e-06, "loss": 17.4219, "step": 2841 }, { "epoch": 0.1887494188749419, "grad_norm": 158.03805541992188, "learning_rate": 1.9783558134351717e-06, "loss": 18.125, "step": 2842 }, { "epoch": 0.1888158331672976, "grad_norm": 182.74818420410156, "learning_rate": 1.978333552082154e-06, "loss": 21.7812, "step": 2843 }, { "epoch": 0.18888224745965332, "grad_norm": 187.21717834472656, "learning_rate": 1.9783112794123387e-06, "loss": 19.75, "step": 2844 }, { "epoch": 0.18894866175200903, "grad_norm": 171.8429412841797, "learning_rate": 1.978288995425982e-06, "loss": 17.4688, "step": 2845 }, { "epoch": 0.18901507604436474, "grad_norm": 184.21929931640625, "learning_rate": 1.9782667001233425e-06, "loss": 16.0625, "step": 2846 }, { "epoch": 0.18908149033672048, "grad_norm": 298.3687438964844, "learning_rate": 1.978244393504678e-06, "loss": 18.6094, "step": 2847 }, { "epoch": 0.18914790462907619, "grad_norm": 211.98304748535156, "learning_rate": 1.9782220755702464e-06, "loss": 14.8516, "step": 2848 }, { "epoch": 0.1892143189214319, "grad_norm": 226.9105987548828, "learning_rate": 1.978199746320306e-06, "loss": 18.0781, "step": 2849 }, { "epoch": 0.1892807332137876, "grad_norm": 185.75376892089844, "learning_rate": 1.9781774057551147e-06, "loss": 19.75, "step": 2850 }, { "epoch": 0.18934714750614332, "grad_norm": 291.1747131347656, "learning_rate": 1.978155053874931e-06, "loss": 15.1719, "step": 2851 }, { "epoch": 0.18941356179849902, "grad_norm": 234.7545166015625, "learning_rate": 1.9781326906800145e-06, "loss": 19.4062, "step": 2852 }, { "epoch": 0.18947997609085476, "grad_norm": 249.39466857910156, "learning_rate": 1.9781103161706224e-06, "loss": 17.0625, "step": 2853 }, { "epoch": 0.18954639038321047, "grad_norm": 141.72190856933594, "learning_rate": 1.9780879303470144e-06, "loss": 18.2031, "step": 2854 }, { "epoch": 0.18961280467556618, "grad_norm": 304.1312561035156, "learning_rate": 1.978065533209449e-06, "loss": 19.5, "step": 2855 }, { "epoch": 0.1896792189679219, "grad_norm": 357.4737548828125, "learning_rate": 1.978043124758186e-06, "loss": 28.7031, "step": 2856 }, { "epoch": 0.1897456332602776, "grad_norm": 212.70645141601562, "learning_rate": 1.9780207049934838e-06, "loss": 19.5469, "step": 2857 }, { "epoch": 0.18981204755263334, "grad_norm": 476.0568542480469, "learning_rate": 1.977998273915602e-06, "loss": 23.9375, "step": 2858 }, { "epoch": 0.18987846184498905, "grad_norm": 437.1686096191406, "learning_rate": 1.9779758315248005e-06, "loss": 17.75, "step": 2859 }, { "epoch": 0.18994487613734476, "grad_norm": 251.00892639160156, "learning_rate": 1.977953377821338e-06, "loss": 22.8438, "step": 2860 }, { "epoch": 0.19001129042970047, "grad_norm": 206.97906494140625, "learning_rate": 1.977930912805475e-06, "loss": 21.0781, "step": 2861 }, { "epoch": 0.19007770472205618, "grad_norm": 194.4879913330078, "learning_rate": 1.9779084364774706e-06, "loss": 27.9062, "step": 2862 }, { "epoch": 0.1901441190144119, "grad_norm": 215.31021118164062, "learning_rate": 1.977885948837586e-06, "loss": 19.6094, "step": 2863 }, { "epoch": 0.19021053330676763, "grad_norm": 303.87060546875, "learning_rate": 1.9778634498860803e-06, "loss": 25.2188, "step": 2864 }, { "epoch": 0.19027694759912334, "grad_norm": 165.1896514892578, "learning_rate": 1.977840939623214e-06, "loss": 14.875, "step": 2865 }, { "epoch": 0.19034336189147905, "grad_norm": 422.62615966796875, "learning_rate": 1.977818418049248e-06, "loss": 25.8125, "step": 2866 }, { "epoch": 0.19040977618383476, "grad_norm": 178.31024169921875, "learning_rate": 1.9777958851644423e-06, "loss": 19.3125, "step": 2867 }, { "epoch": 0.19047619047619047, "grad_norm": 121.96979522705078, "learning_rate": 1.977773340969057e-06, "loss": 16.1094, "step": 2868 }, { "epoch": 0.1905426047685462, "grad_norm": 589.1714477539062, "learning_rate": 1.977750785463354e-06, "loss": 23.25, "step": 2869 }, { "epoch": 0.1906090190609019, "grad_norm": 368.1482849121094, "learning_rate": 1.9777282186475938e-06, "loss": 17.3438, "step": 2870 }, { "epoch": 0.19067543335325762, "grad_norm": 162.99472045898438, "learning_rate": 1.977705640522037e-06, "loss": 18.9375, "step": 2871 }, { "epoch": 0.19074184764561333, "grad_norm": 181.65184020996094, "learning_rate": 1.9776830510869456e-06, "loss": 24.4062, "step": 2872 }, { "epoch": 0.19080826193796904, "grad_norm": 306.0671081542969, "learning_rate": 1.97766045034258e-06, "loss": 18.125, "step": 2873 }, { "epoch": 0.19087467623032475, "grad_norm": 1221.0072021484375, "learning_rate": 1.977637838289202e-06, "loss": 19.1406, "step": 2874 }, { "epoch": 0.1909410905226805, "grad_norm": 353.27789306640625, "learning_rate": 1.9776152149270736e-06, "loss": 22.1719, "step": 2875 }, { "epoch": 0.1910075048150362, "grad_norm": 294.3923034667969, "learning_rate": 1.9775925802564563e-06, "loss": 32.4062, "step": 2876 }, { "epoch": 0.1910739191073919, "grad_norm": 144.16818237304688, "learning_rate": 1.977569934277611e-06, "loss": 21.0625, "step": 2877 }, { "epoch": 0.19114033339974762, "grad_norm": 2079.208984375, "learning_rate": 1.977547276990801e-06, "loss": 21.4062, "step": 2878 }, { "epoch": 0.19120674769210333, "grad_norm": 401.4871826171875, "learning_rate": 1.9775246083962874e-06, "loss": 28.0938, "step": 2879 }, { "epoch": 0.19127316198445907, "grad_norm": 214.4935302734375, "learning_rate": 1.977501928494333e-06, "loss": 20.5156, "step": 2880 }, { "epoch": 0.19133957627681478, "grad_norm": 400.1295166015625, "learning_rate": 1.9774792372852e-06, "loss": 22.0312, "step": 2881 }, { "epoch": 0.1914059905691705, "grad_norm": 217.3887481689453, "learning_rate": 1.9774565347691506e-06, "loss": 15.2031, "step": 2882 }, { "epoch": 0.1914724048615262, "grad_norm": 266.9554138183594, "learning_rate": 1.977433820946448e-06, "loss": 18.3594, "step": 2883 }, { "epoch": 0.1915388191538819, "grad_norm": 227.71530151367188, "learning_rate": 1.9774110958173546e-06, "loss": 20.6094, "step": 2884 }, { "epoch": 0.19160523344623764, "grad_norm": 159.37664794921875, "learning_rate": 1.9773883593821327e-06, "loss": 21.4688, "step": 2885 }, { "epoch": 0.19167164773859335, "grad_norm": 964.6797485351562, "learning_rate": 1.9773656116410466e-06, "loss": 22.7812, "step": 2886 }, { "epoch": 0.19173806203094906, "grad_norm": 423.961181640625, "learning_rate": 1.977342852594358e-06, "loss": 20.6562, "step": 2887 }, { "epoch": 0.19180447632330477, "grad_norm": 253.05760192871094, "learning_rate": 1.9773200822423314e-06, "loss": 19.3438, "step": 2888 }, { "epoch": 0.19187089061566048, "grad_norm": 177.80284118652344, "learning_rate": 1.977297300585229e-06, "loss": 17.2031, "step": 2889 }, { "epoch": 0.1919373049080162, "grad_norm": 326.5075988769531, "learning_rate": 1.9772745076233155e-06, "loss": 26.5625, "step": 2890 }, { "epoch": 0.19200371920037193, "grad_norm": 495.17034912109375, "learning_rate": 1.977251703356854e-06, "loss": 20.875, "step": 2891 }, { "epoch": 0.19207013349272764, "grad_norm": 203.29440307617188, "learning_rate": 1.977228887786108e-06, "loss": 18.3594, "step": 2892 }, { "epoch": 0.19213654778508335, "grad_norm": 458.2315979003906, "learning_rate": 1.977206060911342e-06, "loss": 23.125, "step": 2893 }, { "epoch": 0.19220296207743906, "grad_norm": 166.44436645507812, "learning_rate": 1.9771832227328197e-06, "loss": 20.6562, "step": 2894 }, { "epoch": 0.19226937636979477, "grad_norm": 202.51611328125, "learning_rate": 1.9771603732508052e-06, "loss": 20.1875, "step": 2895 }, { "epoch": 0.1923357906621505, "grad_norm": 312.4392395019531, "learning_rate": 1.977137512465563e-06, "loss": 26.2656, "step": 2896 }, { "epoch": 0.19240220495450622, "grad_norm": 154.22251892089844, "learning_rate": 1.977114640377358e-06, "loss": 21.0625, "step": 2897 }, { "epoch": 0.19246861924686193, "grad_norm": 158.43386840820312, "learning_rate": 1.9770917569864536e-06, "loss": 17.3281, "step": 2898 }, { "epoch": 0.19253503353921764, "grad_norm": 126.4160385131836, "learning_rate": 1.9770688622931153e-06, "loss": 15.7344, "step": 2899 }, { "epoch": 0.19260144783157335, "grad_norm": 290.1101989746094, "learning_rate": 1.977045956297608e-06, "loss": 21.4531, "step": 2900 }, { "epoch": 0.19266786212392906, "grad_norm": 298.58685302734375, "learning_rate": 1.9770230390001966e-06, "loss": 27.5, "step": 2901 }, { "epoch": 0.1927342764162848, "grad_norm": 403.2672424316406, "learning_rate": 1.9770001104011464e-06, "loss": 26.1719, "step": 2902 }, { "epoch": 0.1928006907086405, "grad_norm": 292.51043701171875, "learning_rate": 1.9769771705007217e-06, "loss": 18.2812, "step": 2903 }, { "epoch": 0.19286710500099621, "grad_norm": 182.16177368164062, "learning_rate": 1.9769542192991885e-06, "loss": 21.3125, "step": 2904 }, { "epoch": 0.19293351929335192, "grad_norm": 173.60379028320312, "learning_rate": 1.9769312567968123e-06, "loss": 24.1562, "step": 2905 }, { "epoch": 0.19299993358570763, "grad_norm": 440.4042053222656, "learning_rate": 1.9769082829938583e-06, "loss": 27.7188, "step": 2906 }, { "epoch": 0.19306634787806337, "grad_norm": 201.07196044921875, "learning_rate": 1.9768852978905933e-06, "loss": 21.75, "step": 2907 }, { "epoch": 0.19313276217041908, "grad_norm": 203.34490966796875, "learning_rate": 1.976862301487282e-06, "loss": 24.625, "step": 2908 }, { "epoch": 0.1931991764627748, "grad_norm": 261.75177001953125, "learning_rate": 1.976839293784191e-06, "loss": 27.7969, "step": 2909 }, { "epoch": 0.1932655907551305, "grad_norm": 327.8131408691406, "learning_rate": 1.9768162747815863e-06, "loss": 31.0469, "step": 2910 }, { "epoch": 0.1933320050474862, "grad_norm": 108.94833374023438, "learning_rate": 1.9767932444797344e-06, "loss": 17.2656, "step": 2911 }, { "epoch": 0.19339841933984192, "grad_norm": 288.9325256347656, "learning_rate": 1.9767702028789013e-06, "loss": 18.6406, "step": 2912 }, { "epoch": 0.19346483363219766, "grad_norm": 160.3654327392578, "learning_rate": 1.9767471499793538e-06, "loss": 19.9375, "step": 2913 }, { "epoch": 0.19353124792455337, "grad_norm": 185.69403076171875, "learning_rate": 1.9767240857813583e-06, "loss": 18.7344, "step": 2914 }, { "epoch": 0.19359766221690908, "grad_norm": 174.83287048339844, "learning_rate": 1.976701010285182e-06, "loss": 20.9688, "step": 2915 }, { "epoch": 0.1936640765092648, "grad_norm": 371.37408447265625, "learning_rate": 1.9766779234910916e-06, "loss": 28.0312, "step": 2916 }, { "epoch": 0.1937304908016205, "grad_norm": 208.0980224609375, "learning_rate": 1.976654825399354e-06, "loss": 18.7188, "step": 2917 }, { "epoch": 0.19379690509397623, "grad_norm": 265.0218505859375, "learning_rate": 1.976631716010237e-06, "loss": 16.4219, "step": 2918 }, { "epoch": 0.19386331938633194, "grad_norm": 912.6685180664062, "learning_rate": 1.976608595324007e-06, "loss": 18.7656, "step": 2919 }, { "epoch": 0.19392973367868765, "grad_norm": 226.2706298828125, "learning_rate": 1.976585463340932e-06, "loss": 22.1875, "step": 2920 }, { "epoch": 0.19399614797104336, "grad_norm": 388.71270751953125, "learning_rate": 1.9765623200612794e-06, "loss": 24.9062, "step": 2921 }, { "epoch": 0.19406256226339907, "grad_norm": 224.26614379882812, "learning_rate": 1.9765391654853174e-06, "loss": 21.6406, "step": 2922 }, { "epoch": 0.1941289765557548, "grad_norm": 529.0150756835938, "learning_rate": 1.976515999613313e-06, "loss": 21.3125, "step": 2923 }, { "epoch": 0.19419539084811052, "grad_norm": 361.7767333984375, "learning_rate": 1.976492822445535e-06, "loss": 19.9062, "step": 2924 }, { "epoch": 0.19426180514046623, "grad_norm": 363.49908447265625, "learning_rate": 1.976469633982251e-06, "loss": 25.1406, "step": 2925 }, { "epoch": 0.19432821943282194, "grad_norm": 154.4506378173828, "learning_rate": 1.976446434223729e-06, "loss": 17.5781, "step": 2926 }, { "epoch": 0.19439463372517765, "grad_norm": 1170.6123046875, "learning_rate": 1.9764232231702383e-06, "loss": 20.875, "step": 2927 }, { "epoch": 0.19446104801753336, "grad_norm": 231.29983520507812, "learning_rate": 1.976400000822046e-06, "loss": 22.6094, "step": 2928 }, { "epoch": 0.1945274623098891, "grad_norm": 264.0614318847656, "learning_rate": 1.9763767671794223e-06, "loss": 20.6562, "step": 2929 }, { "epoch": 0.1945938766022448, "grad_norm": 260.1766052246094, "learning_rate": 1.9763535222426347e-06, "loss": 17.1094, "step": 2930 }, { "epoch": 0.19466029089460052, "grad_norm": 393.04522705078125, "learning_rate": 1.976330266011953e-06, "loss": 22.3594, "step": 2931 }, { "epoch": 0.19472670518695623, "grad_norm": 209.56497192382812, "learning_rate": 1.976306998487645e-06, "loss": 19.875, "step": 2932 }, { "epoch": 0.19479311947931194, "grad_norm": 198.93960571289062, "learning_rate": 1.9762837196699814e-06, "loss": 21.3125, "step": 2933 }, { "epoch": 0.19485953377166768, "grad_norm": 201.28811645507812, "learning_rate": 1.97626042955923e-06, "loss": 19.5312, "step": 2934 }, { "epoch": 0.19492594806402339, "grad_norm": 355.85076904296875, "learning_rate": 1.976237128155661e-06, "loss": 18.0312, "step": 2935 }, { "epoch": 0.1949923623563791, "grad_norm": 269.4599609375, "learning_rate": 1.9762138154595447e-06, "loss": 25.3438, "step": 2936 }, { "epoch": 0.1950587766487348, "grad_norm": 268.4314880371094, "learning_rate": 1.976190491471149e-06, "loss": 19.9219, "step": 2937 }, { "epoch": 0.19512519094109051, "grad_norm": 245.72549438476562, "learning_rate": 1.9761671561907447e-06, "loss": 22.9219, "step": 2938 }, { "epoch": 0.19519160523344622, "grad_norm": 576.0595092773438, "learning_rate": 1.976143809618602e-06, "loss": 30.1875, "step": 2939 }, { "epoch": 0.19525801952580196, "grad_norm": 194.51449584960938, "learning_rate": 1.9761204517549907e-06, "loss": 20.0469, "step": 2940 }, { "epoch": 0.19532443381815767, "grad_norm": 422.78887939453125, "learning_rate": 1.9760970826001803e-06, "loss": 26.75, "step": 2941 }, { "epoch": 0.19539084811051338, "grad_norm": 347.6509704589844, "learning_rate": 1.976073702154442e-06, "loss": 22.7656, "step": 2942 }, { "epoch": 0.1954572624028691, "grad_norm": 166.3556365966797, "learning_rate": 1.976050310418046e-06, "loss": 16.75, "step": 2943 }, { "epoch": 0.1955236766952248, "grad_norm": 767.3701171875, "learning_rate": 1.9760269073912624e-06, "loss": 18.7812, "step": 2944 }, { "epoch": 0.19559009098758054, "grad_norm": 278.057373046875, "learning_rate": 1.976003493074363e-06, "loss": 20.0781, "step": 2945 }, { "epoch": 0.19565650527993625, "grad_norm": 509.6607971191406, "learning_rate": 1.9759800674676173e-06, "loss": 21.125, "step": 2946 }, { "epoch": 0.19572291957229196, "grad_norm": 135.49229431152344, "learning_rate": 1.9759566305712977e-06, "loss": 20.9062, "step": 2947 }, { "epoch": 0.19578933386464767, "grad_norm": 272.2037658691406, "learning_rate": 1.975933182385674e-06, "loss": 21.5312, "step": 2948 }, { "epoch": 0.19585574815700338, "grad_norm": 226.49449157714844, "learning_rate": 1.975909722911018e-06, "loss": 17.75, "step": 2949 }, { "epoch": 0.1959221624493591, "grad_norm": 184.36570739746094, "learning_rate": 1.9758862521476015e-06, "loss": 23.0938, "step": 2950 }, { "epoch": 0.19598857674171483, "grad_norm": 195.53936767578125, "learning_rate": 1.9758627700956955e-06, "loss": 21.4688, "step": 2951 }, { "epoch": 0.19605499103407054, "grad_norm": 272.25225830078125, "learning_rate": 1.9758392767555716e-06, "loss": 23.125, "step": 2952 }, { "epoch": 0.19612140532642625, "grad_norm": 188.25155639648438, "learning_rate": 1.975815772127501e-06, "loss": 19.625, "step": 2953 }, { "epoch": 0.19618781961878196, "grad_norm": 271.9825744628906, "learning_rate": 1.9757922562117575e-06, "loss": 17.7188, "step": 2954 }, { "epoch": 0.19625423391113767, "grad_norm": 123.41973876953125, "learning_rate": 1.975768729008611e-06, "loss": 13.75, "step": 2955 }, { "epoch": 0.1963206482034934, "grad_norm": 243.95960998535156, "learning_rate": 1.975745190518335e-06, "loss": 16.1094, "step": 2956 }, { "epoch": 0.1963870624958491, "grad_norm": 363.5257263183594, "learning_rate": 1.9757216407412006e-06, "loss": 18.7031, "step": 2957 }, { "epoch": 0.19645347678820482, "grad_norm": 125.87448120117188, "learning_rate": 1.975698079677481e-06, "loss": 16.7969, "step": 2958 }, { "epoch": 0.19651989108056053, "grad_norm": 280.79827880859375, "learning_rate": 1.975674507327449e-06, "loss": 24.4688, "step": 2959 }, { "epoch": 0.19658630537291624, "grad_norm": 187.85189819335938, "learning_rate": 1.975650923691377e-06, "loss": 18.6719, "step": 2960 }, { "epoch": 0.19665271966527198, "grad_norm": 171.8811798095703, "learning_rate": 1.9756273287695373e-06, "loss": 25.7812, "step": 2961 }, { "epoch": 0.1967191339576277, "grad_norm": 117.46649932861328, "learning_rate": 1.9756037225622032e-06, "loss": 17.7812, "step": 2962 }, { "epoch": 0.1967855482499834, "grad_norm": 154.02838134765625, "learning_rate": 1.9755801050696485e-06, "loss": 15.2188, "step": 2963 }, { "epoch": 0.1968519625423391, "grad_norm": 210.7464599609375, "learning_rate": 1.975556476292145e-06, "loss": 24.7031, "step": 2964 }, { "epoch": 0.19691837683469482, "grad_norm": 211.4983367919922, "learning_rate": 1.975532836229967e-06, "loss": 24.0, "step": 2965 }, { "epoch": 0.19698479112705053, "grad_norm": 263.67669677734375, "learning_rate": 1.9755091848833874e-06, "loss": 18.125, "step": 2966 }, { "epoch": 0.19705120541940627, "grad_norm": 906.86328125, "learning_rate": 1.9754855222526807e-06, "loss": 21.5469, "step": 2967 }, { "epoch": 0.19711761971176198, "grad_norm": 173.62779235839844, "learning_rate": 1.9754618483381195e-06, "loss": 19.6562, "step": 2968 }, { "epoch": 0.1971840340041177, "grad_norm": 193.52943420410156, "learning_rate": 1.975438163139978e-06, "loss": 19.3438, "step": 2969 }, { "epoch": 0.1972504482964734, "grad_norm": 127.17327880859375, "learning_rate": 1.9754144666585308e-06, "loss": 17.8906, "step": 2970 }, { "epoch": 0.1973168625888291, "grad_norm": 291.7408142089844, "learning_rate": 1.9753907588940512e-06, "loss": 17.5625, "step": 2971 }, { "epoch": 0.19738327688118484, "grad_norm": 177.52210998535156, "learning_rate": 1.9753670398468133e-06, "loss": 22.875, "step": 2972 }, { "epoch": 0.19744969117354055, "grad_norm": 130.50955200195312, "learning_rate": 1.9753433095170923e-06, "loss": 14.7188, "step": 2973 }, { "epoch": 0.19751610546589626, "grad_norm": 675.5511474609375, "learning_rate": 1.9753195679051626e-06, "loss": 20.9688, "step": 2974 }, { "epoch": 0.19758251975825197, "grad_norm": 165.65322875976562, "learning_rate": 1.9752958150112983e-06, "loss": 21.6719, "step": 2975 }, { "epoch": 0.19764893405060768, "grad_norm": 565.95654296875, "learning_rate": 1.975272050835774e-06, "loss": 20.0938, "step": 2976 }, { "epoch": 0.1977153483429634, "grad_norm": 360.9781799316406, "learning_rate": 1.9752482753788656e-06, "loss": 19.0156, "step": 2977 }, { "epoch": 0.19778176263531913, "grad_norm": 302.24884033203125, "learning_rate": 1.9752244886408474e-06, "loss": 23.2344, "step": 2978 }, { "epoch": 0.19784817692767484, "grad_norm": 447.6671142578125, "learning_rate": 1.975200690621995e-06, "loss": 14.9219, "step": 2979 }, { "epoch": 0.19791459122003055, "grad_norm": 271.0928039550781, "learning_rate": 1.9751768813225824e-06, "loss": 26.6875, "step": 2980 }, { "epoch": 0.19798100551238626, "grad_norm": 407.8655700683594, "learning_rate": 1.9751530607428866e-06, "loss": 20.0469, "step": 2981 }, { "epoch": 0.19804741980474197, "grad_norm": 286.4283142089844, "learning_rate": 1.9751292288831823e-06, "loss": 19.7969, "step": 2982 }, { "epoch": 0.1981138340970977, "grad_norm": 299.3741760253906, "learning_rate": 1.9751053857437452e-06, "loss": 23.4844, "step": 2983 }, { "epoch": 0.19818024838945342, "grad_norm": 318.4007263183594, "learning_rate": 1.9750815313248515e-06, "loss": 23.7031, "step": 2984 }, { "epoch": 0.19824666268180913, "grad_norm": 7177.4189453125, "learning_rate": 1.9750576656267773e-06, "loss": 21.125, "step": 2985 }, { "epoch": 0.19831307697416484, "grad_norm": 229.2625274658203, "learning_rate": 1.975033788649798e-06, "loss": 15.2031, "step": 2986 }, { "epoch": 0.19837949126652055, "grad_norm": 295.79949951171875, "learning_rate": 1.9750099003941897e-06, "loss": 21.625, "step": 2987 }, { "epoch": 0.19844590555887626, "grad_norm": 150.15628051757812, "learning_rate": 1.9749860008602295e-06, "loss": 17.6562, "step": 2988 }, { "epoch": 0.198512319851232, "grad_norm": 729.5923461914062, "learning_rate": 1.9749620900481933e-06, "loss": 23.125, "step": 2989 }, { "epoch": 0.1985787341435877, "grad_norm": 329.8515625, "learning_rate": 1.9749381679583576e-06, "loss": 26.7656, "step": 2990 }, { "epoch": 0.1986451484359434, "grad_norm": 612.2587280273438, "learning_rate": 1.9749142345909996e-06, "loss": 27.0625, "step": 2991 }, { "epoch": 0.19871156272829912, "grad_norm": 443.2149658203125, "learning_rate": 1.974890289946396e-06, "loss": 26.2812, "step": 2992 }, { "epoch": 0.19877797702065483, "grad_norm": 212.19284057617188, "learning_rate": 1.9748663340248236e-06, "loss": 19.3125, "step": 2993 }, { "epoch": 0.19884439131301057, "grad_norm": 187.50540161132812, "learning_rate": 1.9748423668265596e-06, "loss": 19.4219, "step": 2994 }, { "epoch": 0.19891080560536628, "grad_norm": 263.8198547363281, "learning_rate": 1.974818388351881e-06, "loss": 28.9062, "step": 2995 }, { "epoch": 0.198977219897722, "grad_norm": 231.6014862060547, "learning_rate": 1.9747943986010655e-06, "loss": 18.8438, "step": 2996 }, { "epoch": 0.1990436341900777, "grad_norm": 195.43934631347656, "learning_rate": 1.9747703975743906e-06, "loss": 17.25, "step": 2997 }, { "epoch": 0.1991100484824334, "grad_norm": 249.44825744628906, "learning_rate": 1.974746385272134e-06, "loss": 19.4375, "step": 2998 }, { "epoch": 0.19917646277478915, "grad_norm": 640.5941772460938, "learning_rate": 1.974722361694573e-06, "loss": 31.1875, "step": 2999 }, { "epoch": 0.19924287706714486, "grad_norm": 191.59996032714844, "learning_rate": 1.974698326841986e-06, "loss": 15.9688, "step": 3000 }, { "epoch": 0.19930929135950057, "grad_norm": 287.4737243652344, "learning_rate": 1.9746742807146504e-06, "loss": 27.5938, "step": 3001 }, { "epoch": 0.19937570565185628, "grad_norm": 336.15216064453125, "learning_rate": 1.974650223312845e-06, "loss": 21.9844, "step": 3002 }, { "epoch": 0.199442119944212, "grad_norm": 307.6324157714844, "learning_rate": 1.974626154636848e-06, "loss": 18.7969, "step": 3003 }, { "epoch": 0.1995085342365677, "grad_norm": 839.64208984375, "learning_rate": 1.9746020746869376e-06, "loss": 29.6719, "step": 3004 }, { "epoch": 0.19957494852892343, "grad_norm": 129.23941040039062, "learning_rate": 1.9745779834633926e-06, "loss": 14.6719, "step": 3005 }, { "epoch": 0.19964136282127914, "grad_norm": 198.0585479736328, "learning_rate": 1.974553880966491e-06, "loss": 20.2656, "step": 3006 }, { "epoch": 0.19970777711363485, "grad_norm": 537.5272216796875, "learning_rate": 1.974529767196512e-06, "loss": 34.5938, "step": 3007 }, { "epoch": 0.19977419140599056, "grad_norm": 361.9762878417969, "learning_rate": 1.9745056421537346e-06, "loss": 31.3906, "step": 3008 }, { "epoch": 0.19984060569834627, "grad_norm": 500.02569580078125, "learning_rate": 1.974481505838438e-06, "loss": 27.1562, "step": 3009 }, { "epoch": 0.199907019990702, "grad_norm": 294.19598388671875, "learning_rate": 1.9744573582509018e-06, "loss": 16.6719, "step": 3010 }, { "epoch": 0.19997343428305772, "grad_norm": 218.32144165039062, "learning_rate": 1.974433199391404e-06, "loss": 19.3125, "step": 3011 }, { "epoch": 0.20003984857541343, "grad_norm": 249.05987548828125, "learning_rate": 1.974409029260225e-06, "loss": 25.5156, "step": 3012 }, { "epoch": 0.20010626286776914, "grad_norm": 194.61288452148438, "learning_rate": 1.974384847857645e-06, "loss": 21.6094, "step": 3013 }, { "epoch": 0.20017267716012485, "grad_norm": 182.63148498535156, "learning_rate": 1.974360655183942e-06, "loss": 18.1406, "step": 3014 }, { "epoch": 0.20023909145248056, "grad_norm": 227.32711791992188, "learning_rate": 1.974336451239397e-06, "loss": 24.7188, "step": 3015 }, { "epoch": 0.2003055057448363, "grad_norm": 371.9510498046875, "learning_rate": 1.97431223602429e-06, "loss": 18.4844, "step": 3016 }, { "epoch": 0.200371920037192, "grad_norm": 222.36294555664062, "learning_rate": 1.974288009538901e-06, "loss": 18.7812, "step": 3017 }, { "epoch": 0.20043833432954772, "grad_norm": 236.46595764160156, "learning_rate": 1.9742637717835096e-06, "loss": 25.7812, "step": 3018 }, { "epoch": 0.20050474862190343, "grad_norm": 133.73162841796875, "learning_rate": 1.9742395227583974e-06, "loss": 20.8125, "step": 3019 }, { "epoch": 0.20057116291425914, "grad_norm": 226.71636962890625, "learning_rate": 1.9742152624638435e-06, "loss": 23.1875, "step": 3020 }, { "epoch": 0.20063757720661488, "grad_norm": 184.5520477294922, "learning_rate": 1.9741909909001294e-06, "loss": 23.3906, "step": 3021 }, { "epoch": 0.20070399149897059, "grad_norm": 227.044921875, "learning_rate": 1.9741667080675357e-06, "loss": 20.8594, "step": 3022 }, { "epoch": 0.2007704057913263, "grad_norm": 160.11231994628906, "learning_rate": 1.9741424139663433e-06, "loss": 16.5625, "step": 3023 }, { "epoch": 0.200836820083682, "grad_norm": 314.803955078125, "learning_rate": 1.974118108596833e-06, "loss": 27.5312, "step": 3024 }, { "epoch": 0.20090323437603771, "grad_norm": 357.7435607910156, "learning_rate": 1.9740937919592863e-06, "loss": 20.5312, "step": 3025 }, { "epoch": 0.20096964866839342, "grad_norm": 677.8616943359375, "learning_rate": 1.9740694640539844e-06, "loss": 21.75, "step": 3026 }, { "epoch": 0.20103606296074916, "grad_norm": 158.1779327392578, "learning_rate": 1.9740451248812085e-06, "loss": 16.9531, "step": 3027 }, { "epoch": 0.20110247725310487, "grad_norm": 280.7584228515625, "learning_rate": 1.9740207744412405e-06, "loss": 19.2969, "step": 3028 }, { "epoch": 0.20116889154546058, "grad_norm": 426.9720153808594, "learning_rate": 1.9739964127343612e-06, "loss": 35.625, "step": 3029 }, { "epoch": 0.2012353058378163, "grad_norm": 301.1280517578125, "learning_rate": 1.9739720397608537e-06, "loss": 22.9375, "step": 3030 }, { "epoch": 0.201301720130172, "grad_norm": 260.9956970214844, "learning_rate": 1.973947655520999e-06, "loss": 16.6094, "step": 3031 }, { "epoch": 0.20136813442252774, "grad_norm": 176.09628295898438, "learning_rate": 1.9739232600150793e-06, "loss": 18.8594, "step": 3032 }, { "epoch": 0.20143454871488345, "grad_norm": 476.5358581542969, "learning_rate": 1.973898853243377e-06, "loss": 22.1094, "step": 3033 }, { "epoch": 0.20150096300723916, "grad_norm": 292.4134216308594, "learning_rate": 1.9738744352061746e-06, "loss": 19.4219, "step": 3034 }, { "epoch": 0.20156737729959487, "grad_norm": 253.26095581054688, "learning_rate": 1.9738500059037537e-06, "loss": 20.6406, "step": 3035 }, { "epoch": 0.20163379159195058, "grad_norm": 282.7866516113281, "learning_rate": 1.9738255653363982e-06, "loss": 19.2031, "step": 3036 }, { "epoch": 0.20170020588430632, "grad_norm": 258.386962890625, "learning_rate": 1.9738011135043897e-06, "loss": 19.9062, "step": 3037 }, { "epoch": 0.20176662017666203, "grad_norm": 229.55899047851562, "learning_rate": 1.9737766504080114e-06, "loss": 19.0156, "step": 3038 }, { "epoch": 0.20183303446901774, "grad_norm": 195.7372283935547, "learning_rate": 1.9737521760475467e-06, "loss": 18.3438, "step": 3039 }, { "epoch": 0.20189944876137345, "grad_norm": 334.0324401855469, "learning_rate": 1.9737276904232777e-06, "loss": 25.3125, "step": 3040 }, { "epoch": 0.20196586305372916, "grad_norm": 158.0963592529297, "learning_rate": 1.973703193535489e-06, "loss": 26.9062, "step": 3041 }, { "epoch": 0.20203227734608487, "grad_norm": 352.9104919433594, "learning_rate": 1.973678685384463e-06, "loss": 20.0938, "step": 3042 }, { "epoch": 0.2020986916384406, "grad_norm": 376.6772155761719, "learning_rate": 1.9736541659704832e-06, "loss": 19.75, "step": 3043 }, { "epoch": 0.2021651059307963, "grad_norm": 199.4482421875, "learning_rate": 1.9736296352938333e-06, "loss": 19.5625, "step": 3044 }, { "epoch": 0.20223152022315202, "grad_norm": 312.88897705078125, "learning_rate": 1.9736050933547977e-06, "loss": 20.3438, "step": 3045 }, { "epoch": 0.20229793451550773, "grad_norm": 134.87547302246094, "learning_rate": 1.9735805401536596e-06, "loss": 19.6719, "step": 3046 }, { "epoch": 0.20236434880786344, "grad_norm": 272.6955871582031, "learning_rate": 1.973555975690703e-06, "loss": 33.3438, "step": 3047 }, { "epoch": 0.20243076310021918, "grad_norm": 370.68438720703125, "learning_rate": 1.9735313999662125e-06, "loss": 24.9219, "step": 3048 }, { "epoch": 0.2024971773925749, "grad_norm": 388.2519836425781, "learning_rate": 1.973506812980472e-06, "loss": 21.1875, "step": 3049 }, { "epoch": 0.2025635916849306, "grad_norm": 423.8336486816406, "learning_rate": 1.973482214733766e-06, "loss": 27.2188, "step": 3050 }, { "epoch": 0.2026300059772863, "grad_norm": 164.5847930908203, "learning_rate": 1.973457605226379e-06, "loss": 18.125, "step": 3051 }, { "epoch": 0.20269642026964202, "grad_norm": 384.44696044921875, "learning_rate": 1.973432984458596e-06, "loss": 23.625, "step": 3052 }, { "epoch": 0.20276283456199773, "grad_norm": 461.38824462890625, "learning_rate": 1.9734083524307013e-06, "loss": 21.5781, "step": 3053 }, { "epoch": 0.20282924885435347, "grad_norm": 167.258056640625, "learning_rate": 1.9733837091429806e-06, "loss": 18.8125, "step": 3054 }, { "epoch": 0.20289566314670918, "grad_norm": 182.32351684570312, "learning_rate": 1.9733590545957178e-06, "loss": 22.4688, "step": 3055 }, { "epoch": 0.2029620774390649, "grad_norm": 345.068115234375, "learning_rate": 1.9733343887891993e-06, "loss": 23.125, "step": 3056 }, { "epoch": 0.2030284917314206, "grad_norm": 277.8550109863281, "learning_rate": 1.9733097117237096e-06, "loss": 19.3438, "step": 3057 }, { "epoch": 0.2030949060237763, "grad_norm": 321.29840087890625, "learning_rate": 1.9732850233995342e-06, "loss": 22.8438, "step": 3058 }, { "epoch": 0.20316132031613204, "grad_norm": 188.1409149169922, "learning_rate": 1.973260323816959e-06, "loss": 20.25, "step": 3059 }, { "epoch": 0.20322773460848775, "grad_norm": 147.99398803710938, "learning_rate": 1.97323561297627e-06, "loss": 21.2031, "step": 3060 }, { "epoch": 0.20329414890084346, "grad_norm": 392.09075927734375, "learning_rate": 1.9732108908777518e-06, "loss": 25.6094, "step": 3061 }, { "epoch": 0.20336056319319917, "grad_norm": 446.7231140136719, "learning_rate": 1.973186157521692e-06, "loss": 29.6094, "step": 3062 }, { "epoch": 0.20342697748555488, "grad_norm": 455.4176330566406, "learning_rate": 1.9731614129083753e-06, "loss": 26.7188, "step": 3063 }, { "epoch": 0.2034933917779106, "grad_norm": 196.48471069335938, "learning_rate": 1.973136657038089e-06, "loss": 21.5, "step": 3064 }, { "epoch": 0.20355980607026633, "grad_norm": 389.0236511230469, "learning_rate": 1.9731118899111187e-06, "loss": 22.0156, "step": 3065 }, { "epoch": 0.20362622036262204, "grad_norm": 189.86441040039062, "learning_rate": 1.9730871115277512e-06, "loss": 17.5312, "step": 3066 }, { "epoch": 0.20369263465497775, "grad_norm": 171.37498474121094, "learning_rate": 1.973062321888273e-06, "loss": 18.5625, "step": 3067 }, { "epoch": 0.20375904894733346, "grad_norm": 527.2545166015625, "learning_rate": 1.9730375209929714e-06, "loss": 20.25, "step": 3068 }, { "epoch": 0.20382546323968917, "grad_norm": 382.46722412109375, "learning_rate": 1.9730127088421324e-06, "loss": 19.4688, "step": 3069 }, { "epoch": 0.2038918775320449, "grad_norm": 166.26834106445312, "learning_rate": 1.9729878854360437e-06, "loss": 18.75, "step": 3070 }, { "epoch": 0.20395829182440062, "grad_norm": 268.5863037109375, "learning_rate": 1.972963050774992e-06, "loss": 20.0938, "step": 3071 }, { "epoch": 0.20402470611675633, "grad_norm": 348.60125732421875, "learning_rate": 1.9729382048592652e-06, "loss": 28.2656, "step": 3072 }, { "epoch": 0.20409112040911204, "grad_norm": 208.75621032714844, "learning_rate": 1.97291334768915e-06, "loss": 25.0625, "step": 3073 }, { "epoch": 0.20415753470146775, "grad_norm": 174.9657440185547, "learning_rate": 1.972888479264934e-06, "loss": 22.1094, "step": 3074 }, { "epoch": 0.20422394899382348, "grad_norm": 210.43121337890625, "learning_rate": 1.9728635995869053e-06, "loss": 22.625, "step": 3075 }, { "epoch": 0.2042903632861792, "grad_norm": 366.7779541015625, "learning_rate": 1.9728387086553517e-06, "loss": 22.5781, "step": 3076 }, { "epoch": 0.2043567775785349, "grad_norm": 243.61248779296875, "learning_rate": 1.9728138064705606e-06, "loss": 19.4062, "step": 3077 }, { "epoch": 0.2044231918708906, "grad_norm": 148.1136474609375, "learning_rate": 1.9727888930328202e-06, "loss": 19.1562, "step": 3078 }, { "epoch": 0.20448960616324632, "grad_norm": 260.79248046875, "learning_rate": 1.9727639683424187e-06, "loss": 21.0312, "step": 3079 }, { "epoch": 0.20455602045560203, "grad_norm": 194.2239532470703, "learning_rate": 1.972739032399645e-06, "loss": 19.9062, "step": 3080 }, { "epoch": 0.20462243474795777, "grad_norm": 189.1469268798828, "learning_rate": 1.972714085204787e-06, "loss": 19.3125, "step": 3081 }, { "epoch": 0.20468884904031348, "grad_norm": 271.6827392578125, "learning_rate": 1.972689126758133e-06, "loss": 19.0312, "step": 3082 }, { "epoch": 0.2047552633326692, "grad_norm": 149.7052764892578, "learning_rate": 1.9726641570599726e-06, "loss": 19.875, "step": 3083 }, { "epoch": 0.2048216776250249, "grad_norm": 136.30421447753906, "learning_rate": 1.9726391761105935e-06, "loss": 17.2188, "step": 3084 }, { "epoch": 0.2048880919173806, "grad_norm": 192.8924102783203, "learning_rate": 1.972614183910286e-06, "loss": 19.4531, "step": 3085 }, { "epoch": 0.20495450620973635, "grad_norm": 252.7989959716797, "learning_rate": 1.9725891804593376e-06, "loss": 16.5469, "step": 3086 }, { "epoch": 0.20502092050209206, "grad_norm": 289.88189697265625, "learning_rate": 1.9725641657580393e-06, "loss": 17.8906, "step": 3087 }, { "epoch": 0.20508733479444777, "grad_norm": 222.32730102539062, "learning_rate": 1.972539139806679e-06, "loss": 19.9219, "step": 3088 }, { "epoch": 0.20515374908680348, "grad_norm": 268.6097106933594, "learning_rate": 1.972514102605547e-06, "loss": 25.9688, "step": 3089 }, { "epoch": 0.2052201633791592, "grad_norm": 161.27850341796875, "learning_rate": 1.9724890541549326e-06, "loss": 17.3281, "step": 3090 }, { "epoch": 0.2052865776715149, "grad_norm": 199.7257843017578, "learning_rate": 1.972463994455125e-06, "loss": 17.9062, "step": 3091 }, { "epoch": 0.20535299196387063, "grad_norm": 192.49725341796875, "learning_rate": 1.972438923506415e-06, "loss": 24.5, "step": 3092 }, { "epoch": 0.20541940625622634, "grad_norm": 330.55682373046875, "learning_rate": 1.9724138413090926e-06, "loss": 24.3906, "step": 3093 }, { "epoch": 0.20548582054858205, "grad_norm": 223.6302032470703, "learning_rate": 1.9723887478634475e-06, "loss": 23.9062, "step": 3094 }, { "epoch": 0.20555223484093776, "grad_norm": 339.7419738769531, "learning_rate": 1.97236364316977e-06, "loss": 21.9688, "step": 3095 }, { "epoch": 0.20561864913329347, "grad_norm": 232.93408203125, "learning_rate": 1.9723385272283506e-06, "loss": 18.2188, "step": 3096 }, { "epoch": 0.2056850634256492, "grad_norm": 162.25814819335938, "learning_rate": 1.97231340003948e-06, "loss": 18.5938, "step": 3097 }, { "epoch": 0.20575147771800492, "grad_norm": 298.29815673828125, "learning_rate": 1.972288261603448e-06, "loss": 20.1875, "step": 3098 }, { "epoch": 0.20581789201036063, "grad_norm": 192.61680603027344, "learning_rate": 1.972263111920547e-06, "loss": 13.3281, "step": 3099 }, { "epoch": 0.20588430630271634, "grad_norm": 279.95709228515625, "learning_rate": 1.972237950991066e-06, "loss": 20.0, "step": 3100 }, { "epoch": 0.20595072059507205, "grad_norm": 165.56199645996094, "learning_rate": 1.9722127788152975e-06, "loss": 19.2031, "step": 3101 }, { "epoch": 0.20601713488742776, "grad_norm": 173.04148864746094, "learning_rate": 1.972187595393532e-06, "loss": 16.5625, "step": 3102 }, { "epoch": 0.2060835491797835, "grad_norm": 265.2159729003906, "learning_rate": 1.9721624007260613e-06, "loss": 19.0156, "step": 3103 }, { "epoch": 0.2061499634721392, "grad_norm": 325.2077941894531, "learning_rate": 1.9721371948131764e-06, "loss": 22.2188, "step": 3104 }, { "epoch": 0.20621637776449492, "grad_norm": 180.5459442138672, "learning_rate": 1.972111977655169e-06, "loss": 19.3438, "step": 3105 }, { "epoch": 0.20628279205685063, "grad_norm": 500.8667297363281, "learning_rate": 1.9720867492523305e-06, "loss": 31.8438, "step": 3106 }, { "epoch": 0.20634920634920634, "grad_norm": 232.73573303222656, "learning_rate": 1.9720615096049532e-06, "loss": 22.1719, "step": 3107 }, { "epoch": 0.20641562064156208, "grad_norm": 207.1402130126953, "learning_rate": 1.972036258713329e-06, "loss": 25.1406, "step": 3108 }, { "epoch": 0.20648203493391779, "grad_norm": 175.887451171875, "learning_rate": 1.9720109965777497e-06, "loss": 17.4531, "step": 3109 }, { "epoch": 0.2065484492262735, "grad_norm": 175.69223022460938, "learning_rate": 1.9719857231985076e-06, "loss": 23.6875, "step": 3110 }, { "epoch": 0.2066148635186292, "grad_norm": 1043.3798828125, "learning_rate": 1.9719604385758958e-06, "loss": 27.625, "step": 3111 }, { "epoch": 0.20668127781098491, "grad_norm": 261.6875305175781, "learning_rate": 1.9719351427102053e-06, "loss": 18.2969, "step": 3112 }, { "epoch": 0.20674769210334065, "grad_norm": 163.314208984375, "learning_rate": 1.97190983560173e-06, "loss": 20.125, "step": 3113 }, { "epoch": 0.20681410639569636, "grad_norm": 189.03216552734375, "learning_rate": 1.9718845172507617e-06, "loss": 15.5, "step": 3114 }, { "epoch": 0.20688052068805207, "grad_norm": 156.64376831054688, "learning_rate": 1.9718591876575944e-06, "loss": 15.9531, "step": 3115 }, { "epoch": 0.20694693498040778, "grad_norm": 177.6959228515625, "learning_rate": 1.97183384682252e-06, "loss": 18.1719, "step": 3116 }, { "epoch": 0.2070133492727635, "grad_norm": 252.45932006835938, "learning_rate": 1.971808494745832e-06, "loss": 20.4062, "step": 3117 }, { "epoch": 0.2070797635651192, "grad_norm": 351.6132507324219, "learning_rate": 1.971783131427824e-06, "loss": 26.6562, "step": 3118 }, { "epoch": 0.20714617785747494, "grad_norm": 216.62741088867188, "learning_rate": 1.9717577568687885e-06, "loss": 16.7969, "step": 3119 }, { "epoch": 0.20721259214983065, "grad_norm": 316.06683349609375, "learning_rate": 1.97173237106902e-06, "loss": 19.2969, "step": 3120 }, { "epoch": 0.20727900644218636, "grad_norm": 245.4031524658203, "learning_rate": 1.971706974028812e-06, "loss": 29.6562, "step": 3121 }, { "epoch": 0.20734542073454207, "grad_norm": 137.0437469482422, "learning_rate": 1.971681565748458e-06, "loss": 18.9375, "step": 3122 }, { "epoch": 0.20741183502689778, "grad_norm": 207.69140625, "learning_rate": 1.9716561462282517e-06, "loss": 18.8438, "step": 3123 }, { "epoch": 0.20747824931925352, "grad_norm": 210.85606384277344, "learning_rate": 1.9716307154684876e-06, "loss": 18.1406, "step": 3124 }, { "epoch": 0.20754466361160923, "grad_norm": 367.84686279296875, "learning_rate": 1.9716052734694594e-06, "loss": 33.9219, "step": 3125 }, { "epoch": 0.20761107790396494, "grad_norm": 287.81732177734375, "learning_rate": 1.971579820231462e-06, "loss": 24.75, "step": 3126 }, { "epoch": 0.20767749219632065, "grad_norm": 368.9500732421875, "learning_rate": 1.971554355754789e-06, "loss": 27.6562, "step": 3127 }, { "epoch": 0.20774390648867636, "grad_norm": 178.35614013671875, "learning_rate": 1.971528880039736e-06, "loss": 20.4844, "step": 3128 }, { "epoch": 0.20781032078103207, "grad_norm": 245.56097412109375, "learning_rate": 1.971503393086597e-06, "loss": 18.4219, "step": 3129 }, { "epoch": 0.2078767350733878, "grad_norm": 167.9539337158203, "learning_rate": 1.9714778948956664e-06, "loss": 20.0, "step": 3130 }, { "epoch": 0.2079431493657435, "grad_norm": 479.2478332519531, "learning_rate": 1.97145238546724e-06, "loss": 24.75, "step": 3131 }, { "epoch": 0.20800956365809922, "grad_norm": 642.9826049804688, "learning_rate": 1.971426864801613e-06, "loss": 18.7344, "step": 3132 }, { "epoch": 0.20807597795045493, "grad_norm": 531.3994750976562, "learning_rate": 1.9714013328990794e-06, "loss": 26.5, "step": 3133 }, { "epoch": 0.20814239224281064, "grad_norm": 158.7611083984375, "learning_rate": 1.971375789759936e-06, "loss": 17.4375, "step": 3134 }, { "epoch": 0.20820880653516638, "grad_norm": 446.41485595703125, "learning_rate": 1.9713502353844775e-06, "loss": 25.9531, "step": 3135 }, { "epoch": 0.2082752208275221, "grad_norm": 248.8684539794922, "learning_rate": 1.9713246697729994e-06, "loss": 21.0625, "step": 3136 }, { "epoch": 0.2083416351198778, "grad_norm": 193.82090759277344, "learning_rate": 1.971299092925798e-06, "loss": 17.5469, "step": 3137 }, { "epoch": 0.2084080494122335, "grad_norm": 174.26014709472656, "learning_rate": 1.971273504843168e-06, "loss": 18.8281, "step": 3138 }, { "epoch": 0.20847446370458922, "grad_norm": 117.61024475097656, "learning_rate": 1.971247905525407e-06, "loss": 21.0156, "step": 3139 }, { "epoch": 0.20854087799694493, "grad_norm": 292.94805908203125, "learning_rate": 1.9712222949728098e-06, "loss": 24.9062, "step": 3140 }, { "epoch": 0.20860729228930067, "grad_norm": 876.9987182617188, "learning_rate": 1.971196673185673e-06, "loss": 33.0312, "step": 3141 }, { "epoch": 0.20867370658165638, "grad_norm": 160.775634765625, "learning_rate": 1.9711710401642933e-06, "loss": 17.5625, "step": 3142 }, { "epoch": 0.2087401208740121, "grad_norm": 298.9833679199219, "learning_rate": 1.9711453959089672e-06, "loss": 17.6406, "step": 3143 }, { "epoch": 0.2088065351663678, "grad_norm": 271.67181396484375, "learning_rate": 1.971119740419991e-06, "loss": 22.4062, "step": 3144 }, { "epoch": 0.2088729494587235, "grad_norm": 304.4277038574219, "learning_rate": 1.9710940736976617e-06, "loss": 25.5938, "step": 3145 }, { "epoch": 0.20893936375107924, "grad_norm": 425.69781494140625, "learning_rate": 1.971068395742276e-06, "loss": 32.0312, "step": 3146 }, { "epoch": 0.20900577804343495, "grad_norm": 135.25303649902344, "learning_rate": 1.971042706554131e-06, "loss": 22.4531, "step": 3147 }, { "epoch": 0.20907219233579066, "grad_norm": 236.74009704589844, "learning_rate": 1.971017006133524e-06, "loss": 15.2812, "step": 3148 }, { "epoch": 0.20913860662814637, "grad_norm": 215.96875, "learning_rate": 1.9709912944807524e-06, "loss": 27.5312, "step": 3149 }, { "epoch": 0.20920502092050208, "grad_norm": 184.98934936523438, "learning_rate": 1.970965571596113e-06, "loss": 18.1094, "step": 3150 }, { "epoch": 0.20927143521285782, "grad_norm": 252.1166229248047, "learning_rate": 1.970939837479904e-06, "loss": 21.125, "step": 3151 }, { "epoch": 0.20933784950521353, "grad_norm": 206.14413452148438, "learning_rate": 1.970914092132423e-06, "loss": 18.9688, "step": 3152 }, { "epoch": 0.20940426379756924, "grad_norm": 276.75323486328125, "learning_rate": 1.9708883355539674e-06, "loss": 17.875, "step": 3153 }, { "epoch": 0.20947067808992495, "grad_norm": 333.0087585449219, "learning_rate": 1.9708625677448354e-06, "loss": 21.0938, "step": 3154 }, { "epoch": 0.20953709238228066, "grad_norm": 404.2521667480469, "learning_rate": 1.970836788705325e-06, "loss": 16.5, "step": 3155 }, { "epoch": 0.20960350667463637, "grad_norm": 1436.36865234375, "learning_rate": 1.9708109984357346e-06, "loss": 23.8594, "step": 3156 }, { "epoch": 0.2096699209669921, "grad_norm": 472.603515625, "learning_rate": 1.9707851969363622e-06, "loss": 20.9688, "step": 3157 }, { "epoch": 0.20973633525934782, "grad_norm": 407.2914123535156, "learning_rate": 1.9707593842075064e-06, "loss": 20.3125, "step": 3158 }, { "epoch": 0.20980274955170353, "grad_norm": 272.23577880859375, "learning_rate": 1.9707335602494663e-06, "loss": 19.7969, "step": 3159 }, { "epoch": 0.20986916384405924, "grad_norm": 146.7065887451172, "learning_rate": 1.97070772506254e-06, "loss": 22.4375, "step": 3160 }, { "epoch": 0.20993557813641495, "grad_norm": 386.40679931640625, "learning_rate": 1.9706818786470258e-06, "loss": 22.0, "step": 3161 }, { "epoch": 0.21000199242877068, "grad_norm": 221.40457153320312, "learning_rate": 1.970656021003224e-06, "loss": 17.4531, "step": 3162 }, { "epoch": 0.2100684067211264, "grad_norm": 243.64300537109375, "learning_rate": 1.9706301521314328e-06, "loss": 30.6562, "step": 3163 }, { "epoch": 0.2101348210134821, "grad_norm": 415.9472351074219, "learning_rate": 1.970604272031952e-06, "loss": 20.7188, "step": 3164 }, { "epoch": 0.2102012353058378, "grad_norm": 198.6611785888672, "learning_rate": 1.97057838070508e-06, "loss": 17.625, "step": 3165 }, { "epoch": 0.21026764959819352, "grad_norm": 655.2493286132812, "learning_rate": 1.9705524781511175e-06, "loss": 18.0312, "step": 3166 }, { "epoch": 0.21033406389054923, "grad_norm": 212.21383666992188, "learning_rate": 1.9705265643703633e-06, "loss": 18.0312, "step": 3167 }, { "epoch": 0.21040047818290497, "grad_norm": 375.58074951171875, "learning_rate": 1.970500639363118e-06, "loss": 25.3438, "step": 3168 }, { "epoch": 0.21046689247526068, "grad_norm": 492.4415588378906, "learning_rate": 1.9704747031296806e-06, "loss": 25.2812, "step": 3169 }, { "epoch": 0.2105333067676164, "grad_norm": 289.4626159667969, "learning_rate": 1.9704487556703514e-06, "loss": 17.1094, "step": 3170 }, { "epoch": 0.2105997210599721, "grad_norm": 190.9415740966797, "learning_rate": 1.9704227969854304e-06, "loss": 18.8594, "step": 3171 }, { "epoch": 0.2106661353523278, "grad_norm": 475.982666015625, "learning_rate": 1.970396827075218e-06, "loss": 20.6875, "step": 3172 }, { "epoch": 0.21073254964468355, "grad_norm": 137.0337677001953, "learning_rate": 1.970370845940015e-06, "loss": 16.0469, "step": 3173 }, { "epoch": 0.21079896393703926, "grad_norm": 319.4803466796875, "learning_rate": 1.9703448535801214e-06, "loss": 25.6875, "step": 3174 }, { "epoch": 0.21086537822939497, "grad_norm": 278.80047607421875, "learning_rate": 1.9703188499958385e-06, "loss": 27.4062, "step": 3175 }, { "epoch": 0.21093179252175068, "grad_norm": 153.18475341796875, "learning_rate": 1.970292835187466e-06, "loss": 18.8281, "step": 3176 }, { "epoch": 0.2109982068141064, "grad_norm": 244.2712860107422, "learning_rate": 1.970266809155306e-06, "loss": 18.2656, "step": 3177 }, { "epoch": 0.2110646211064621, "grad_norm": 721.4285888671875, "learning_rate": 1.9702407718996587e-06, "loss": 24.4375, "step": 3178 }, { "epoch": 0.21113103539881783, "grad_norm": 503.7388000488281, "learning_rate": 1.970214723420826e-06, "loss": 25.7969, "step": 3179 }, { "epoch": 0.21119744969117354, "grad_norm": 605.3494873046875, "learning_rate": 1.9701886637191084e-06, "loss": 18.7188, "step": 3180 }, { "epoch": 0.21126386398352925, "grad_norm": 300.32318115234375, "learning_rate": 1.970162592794808e-06, "loss": 30.0625, "step": 3181 }, { "epoch": 0.21133027827588496, "grad_norm": 247.15090942382812, "learning_rate": 1.9701365106482264e-06, "loss": 19.4375, "step": 3182 }, { "epoch": 0.21139669256824067, "grad_norm": 399.91839599609375, "learning_rate": 1.970110417279665e-06, "loss": 19.0156, "step": 3183 }, { "epoch": 0.2114631068605964, "grad_norm": 629.1046752929688, "learning_rate": 1.9700843126894254e-06, "loss": 18.3906, "step": 3184 }, { "epoch": 0.21152952115295212, "grad_norm": 267.08367919921875, "learning_rate": 1.97005819687781e-06, "loss": 18.9375, "step": 3185 }, { "epoch": 0.21159593544530783, "grad_norm": 176.89617919921875, "learning_rate": 1.9700320698451203e-06, "loss": 21.875, "step": 3186 }, { "epoch": 0.21166234973766354, "grad_norm": 209.76231384277344, "learning_rate": 1.9700059315916595e-06, "loss": 25.7188, "step": 3187 }, { "epoch": 0.21172876403001925, "grad_norm": 170.144287109375, "learning_rate": 1.9699797821177294e-06, "loss": 23.5312, "step": 3188 }, { "epoch": 0.211795178322375, "grad_norm": 297.1783447265625, "learning_rate": 1.9699536214236325e-06, "loss": 22.0312, "step": 3189 }, { "epoch": 0.2118615926147307, "grad_norm": 184.8455810546875, "learning_rate": 1.969927449509671e-06, "loss": 17.0, "step": 3190 }, { "epoch": 0.2119280069070864, "grad_norm": 213.03680419921875, "learning_rate": 1.9699012663761484e-06, "loss": 24.5938, "step": 3191 }, { "epoch": 0.21199442119944212, "grad_norm": 167.59765625, "learning_rate": 1.9698750720233673e-06, "loss": 17.1094, "step": 3192 }, { "epoch": 0.21206083549179783, "grad_norm": 353.62725830078125, "learning_rate": 1.9698488664516305e-06, "loss": 23.7344, "step": 3193 }, { "epoch": 0.21212724978415354, "grad_norm": 146.89678955078125, "learning_rate": 1.9698226496612414e-06, "loss": 18.7656, "step": 3194 }, { "epoch": 0.21219366407650928, "grad_norm": 140.98451232910156, "learning_rate": 1.9697964216525027e-06, "loss": 17.2812, "step": 3195 }, { "epoch": 0.21226007836886499, "grad_norm": 199.14088439941406, "learning_rate": 1.9697701824257186e-06, "loss": 23.4062, "step": 3196 }, { "epoch": 0.2123264926612207, "grad_norm": 147.9805908203125, "learning_rate": 1.9697439319811926e-06, "loss": 18.9375, "step": 3197 }, { "epoch": 0.2123929069535764, "grad_norm": 205.0626983642578, "learning_rate": 1.969717670319227e-06, "loss": 22.5625, "step": 3198 }, { "epoch": 0.21245932124593211, "grad_norm": 247.86920166015625, "learning_rate": 1.969691397440127e-06, "loss": 22.9062, "step": 3199 }, { "epoch": 0.21252573553828785, "grad_norm": 390.5672302246094, "learning_rate": 1.9696651133441964e-06, "loss": 24.625, "step": 3200 }, { "epoch": 0.21259214983064356, "grad_norm": 212.11651611328125, "learning_rate": 1.969638818031739e-06, "loss": 17.5625, "step": 3201 }, { "epoch": 0.21265856412299927, "grad_norm": 186.46949768066406, "learning_rate": 1.9696125115030583e-06, "loss": 27.1719, "step": 3202 }, { "epoch": 0.21272497841535498, "grad_norm": 223.49241638183594, "learning_rate": 1.9695861937584595e-06, "loss": 18.375, "step": 3203 }, { "epoch": 0.2127913927077107, "grad_norm": 152.18658447265625, "learning_rate": 1.9695598647982467e-06, "loss": 18.1875, "step": 3204 }, { "epoch": 0.2128578070000664, "grad_norm": 263.7134704589844, "learning_rate": 1.9695335246227242e-06, "loss": 24.75, "step": 3205 }, { "epoch": 0.21292422129242214, "grad_norm": 204.0601348876953, "learning_rate": 1.969507173232197e-06, "loss": 25.0625, "step": 3206 }, { "epoch": 0.21299063558477785, "grad_norm": 192.91793823242188, "learning_rate": 1.9694808106269704e-06, "loss": 21.4688, "step": 3207 }, { "epoch": 0.21305704987713356, "grad_norm": 230.29441833496094, "learning_rate": 1.9694544368073484e-06, "loss": 25.8438, "step": 3208 }, { "epoch": 0.21312346416948927, "grad_norm": 183.35389709472656, "learning_rate": 1.9694280517736362e-06, "loss": 18.9531, "step": 3209 }, { "epoch": 0.21318987846184498, "grad_norm": 367.25439453125, "learning_rate": 1.9694016555261395e-06, "loss": 14.7188, "step": 3210 }, { "epoch": 0.21325629275420072, "grad_norm": 1274.6708984375, "learning_rate": 1.9693752480651637e-06, "loss": 26.2188, "step": 3211 }, { "epoch": 0.21332270704655643, "grad_norm": 215.08554077148438, "learning_rate": 1.969348829391014e-06, "loss": 26.4688, "step": 3212 }, { "epoch": 0.21338912133891214, "grad_norm": 337.5348815917969, "learning_rate": 1.969322399503996e-06, "loss": 25.3438, "step": 3213 }, { "epoch": 0.21345553563126785, "grad_norm": 447.9641418457031, "learning_rate": 1.969295958404415e-06, "loss": 26.375, "step": 3214 }, { "epoch": 0.21352194992362356, "grad_norm": 540.29296875, "learning_rate": 1.969269506092578e-06, "loss": 20.8125, "step": 3215 }, { "epoch": 0.21358836421597927, "grad_norm": 169.72181701660156, "learning_rate": 1.9692430425687896e-06, "loss": 20.6875, "step": 3216 }, { "epoch": 0.213654778508335, "grad_norm": 337.6158447265625, "learning_rate": 1.969216567833357e-06, "loss": 24.625, "step": 3217 }, { "epoch": 0.2137211928006907, "grad_norm": 466.14935302734375, "learning_rate": 1.9691900818865856e-06, "loss": 21.9375, "step": 3218 }, { "epoch": 0.21378760709304642, "grad_norm": 181.685302734375, "learning_rate": 1.9691635847287827e-06, "loss": 29.3125, "step": 3219 }, { "epoch": 0.21385402138540213, "grad_norm": 248.41490173339844, "learning_rate": 1.969137076360254e-06, "loss": 17.2344, "step": 3220 }, { "epoch": 0.21392043567775784, "grad_norm": 217.8076629638672, "learning_rate": 1.9691105567813062e-06, "loss": 25.9688, "step": 3221 }, { "epoch": 0.21398684997011358, "grad_norm": 262.85687255859375, "learning_rate": 1.969084025992247e-06, "loss": 22.8281, "step": 3222 }, { "epoch": 0.2140532642624693, "grad_norm": 298.10430908203125, "learning_rate": 1.969057483993382e-06, "loss": 25.1875, "step": 3223 }, { "epoch": 0.214119678554825, "grad_norm": 195.97821044921875, "learning_rate": 1.969030930785019e-06, "loss": 20.125, "step": 3224 }, { "epoch": 0.2141860928471807, "grad_norm": 271.67620849609375, "learning_rate": 1.969004366367465e-06, "loss": 16.9844, "step": 3225 }, { "epoch": 0.21425250713953642, "grad_norm": 377.62457275390625, "learning_rate": 1.968977790741027e-06, "loss": 29.0156, "step": 3226 }, { "epoch": 0.21431892143189216, "grad_norm": 284.79833984375, "learning_rate": 1.968951203906013e-06, "loss": 21.8438, "step": 3227 }, { "epoch": 0.21438533572424787, "grad_norm": 271.219482421875, "learning_rate": 1.96892460586273e-06, "loss": 21.0, "step": 3228 }, { "epoch": 0.21445175001660358, "grad_norm": 275.115966796875, "learning_rate": 1.968897996611486e-06, "loss": 23.875, "step": 3229 }, { "epoch": 0.21451816430895929, "grad_norm": 308.77081298828125, "learning_rate": 1.9688713761525886e-06, "loss": 15.125, "step": 3230 }, { "epoch": 0.214584578601315, "grad_norm": 232.5425262451172, "learning_rate": 1.9688447444863458e-06, "loss": 30.25, "step": 3231 }, { "epoch": 0.2146509928936707, "grad_norm": 274.71826171875, "learning_rate": 1.968818101613066e-06, "loss": 28.8125, "step": 3232 }, { "epoch": 0.21471740718602644, "grad_norm": 651.3113403320312, "learning_rate": 1.9687914475330566e-06, "loss": 23.3125, "step": 3233 }, { "epoch": 0.21478382147838215, "grad_norm": 217.6052703857422, "learning_rate": 1.9687647822466264e-06, "loss": 22.7656, "step": 3234 }, { "epoch": 0.21485023577073786, "grad_norm": 183.305419921875, "learning_rate": 1.968738105754084e-06, "loss": 19.6562, "step": 3235 }, { "epoch": 0.21491665006309357, "grad_norm": 160.17396545410156, "learning_rate": 1.9687114180557377e-06, "loss": 16.5, "step": 3236 }, { "epoch": 0.21498306435544928, "grad_norm": 227.5284881591797, "learning_rate": 1.9686847191518963e-06, "loss": 26.6562, "step": 3237 }, { "epoch": 0.21504947864780502, "grad_norm": 244.2432098388672, "learning_rate": 1.9686580090428687e-06, "loss": 21.7656, "step": 3238 }, { "epoch": 0.21511589294016073, "grad_norm": 380.550537109375, "learning_rate": 1.968631287728964e-06, "loss": 25.125, "step": 3239 }, { "epoch": 0.21518230723251644, "grad_norm": 147.87794494628906, "learning_rate": 1.968604555210491e-06, "loss": 14.0312, "step": 3240 }, { "epoch": 0.21524872152487215, "grad_norm": 461.12933349609375, "learning_rate": 1.9685778114877586e-06, "loss": 33.0156, "step": 3241 }, { "epoch": 0.21531513581722786, "grad_norm": 220.2462158203125, "learning_rate": 1.968551056561077e-06, "loss": 17.0938, "step": 3242 }, { "epoch": 0.21538155010958357, "grad_norm": 177.3705291748047, "learning_rate": 1.9685242904307553e-06, "loss": 16.5, "step": 3243 }, { "epoch": 0.2154479644019393, "grad_norm": 163.79217529296875, "learning_rate": 1.968497513097103e-06, "loss": 19.2031, "step": 3244 }, { "epoch": 0.21551437869429502, "grad_norm": 261.0990905761719, "learning_rate": 1.96847072456043e-06, "loss": 22.6406, "step": 3245 }, { "epoch": 0.21558079298665073, "grad_norm": 163.37698364257812, "learning_rate": 1.968443924821046e-06, "loss": 20.0, "step": 3246 }, { "epoch": 0.21564720727900644, "grad_norm": 146.4748992919922, "learning_rate": 1.968417113879261e-06, "loss": 19.2188, "step": 3247 }, { "epoch": 0.21571362157136215, "grad_norm": 150.5200958251953, "learning_rate": 1.9683902917353856e-06, "loss": 17.5625, "step": 3248 }, { "epoch": 0.21578003586371788, "grad_norm": 259.0141296386719, "learning_rate": 1.9683634583897295e-06, "loss": 20.1562, "step": 3249 }, { "epoch": 0.2158464501560736, "grad_norm": 168.49066162109375, "learning_rate": 1.9683366138426034e-06, "loss": 17.2344, "step": 3250 }, { "epoch": 0.2159128644484293, "grad_norm": 846.3756713867188, "learning_rate": 1.9683097580943174e-06, "loss": 21.4062, "step": 3251 }, { "epoch": 0.215979278740785, "grad_norm": 245.92245483398438, "learning_rate": 1.968282891145183e-06, "loss": 20.2344, "step": 3252 }, { "epoch": 0.21604569303314072, "grad_norm": 324.67999267578125, "learning_rate": 1.96825601299551e-06, "loss": 24.9062, "step": 3253 }, { "epoch": 0.21611210732549643, "grad_norm": 226.2515106201172, "learning_rate": 1.96822912364561e-06, "loss": 25.6719, "step": 3254 }, { "epoch": 0.21617852161785217, "grad_norm": 306.7879333496094, "learning_rate": 1.968202223095794e-06, "loss": 23.5938, "step": 3255 }, { "epoch": 0.21624493591020788, "grad_norm": 341.0922546386719, "learning_rate": 1.9681753113463725e-06, "loss": 20.5625, "step": 3256 }, { "epoch": 0.2163113502025636, "grad_norm": 372.8132629394531, "learning_rate": 1.968148388397658e-06, "loss": 20.875, "step": 3257 }, { "epoch": 0.2163777644949193, "grad_norm": 167.9214630126953, "learning_rate": 1.9681214542499606e-06, "loss": 19.7969, "step": 3258 }, { "epoch": 0.216444178787275, "grad_norm": 192.0572052001953, "learning_rate": 1.968094508903592e-06, "loss": 23.8125, "step": 3259 }, { "epoch": 0.21651059307963075, "grad_norm": 579.015869140625, "learning_rate": 1.9680675523588653e-06, "loss": 25.4844, "step": 3260 }, { "epoch": 0.21657700737198646, "grad_norm": 444.17547607421875, "learning_rate": 1.9680405846160914e-06, "loss": 23.9062, "step": 3261 }, { "epoch": 0.21664342166434217, "grad_norm": 373.7348327636719, "learning_rate": 1.9680136056755818e-06, "loss": 27.0, "step": 3262 }, { "epoch": 0.21670983595669788, "grad_norm": 145.65267944335938, "learning_rate": 1.967986615537649e-06, "loss": 18.2188, "step": 3263 }, { "epoch": 0.2167762502490536, "grad_norm": 346.9366455078125, "learning_rate": 1.967959614202605e-06, "loss": 27.5312, "step": 3264 }, { "epoch": 0.21684266454140932, "grad_norm": 190.21832275390625, "learning_rate": 1.9679326016707627e-06, "loss": 23.9844, "step": 3265 }, { "epoch": 0.21690907883376503, "grad_norm": 182.5294647216797, "learning_rate": 1.9679055779424343e-06, "loss": 20.0938, "step": 3266 }, { "epoch": 0.21697549312612074, "grad_norm": 197.48008728027344, "learning_rate": 1.967878543017932e-06, "loss": 23.125, "step": 3267 }, { "epoch": 0.21704190741847645, "grad_norm": 243.1721649169922, "learning_rate": 1.9678514968975687e-06, "loss": 18.2969, "step": 3268 }, { "epoch": 0.21710832171083216, "grad_norm": 128.60128784179688, "learning_rate": 1.967824439581658e-06, "loss": 17.3594, "step": 3269 }, { "epoch": 0.21717473600318787, "grad_norm": 285.010498046875, "learning_rate": 1.9677973710705116e-06, "loss": 27.6406, "step": 3270 }, { "epoch": 0.2172411502955436, "grad_norm": 119.17457580566406, "learning_rate": 1.967770291364444e-06, "loss": 16.2812, "step": 3271 }, { "epoch": 0.21730756458789932, "grad_norm": 195.47645568847656, "learning_rate": 1.967743200463767e-06, "loss": 18.1875, "step": 3272 }, { "epoch": 0.21737397888025503, "grad_norm": 442.91754150390625, "learning_rate": 1.967716098368795e-06, "loss": 19.2656, "step": 3273 }, { "epoch": 0.21744039317261074, "grad_norm": 149.664306640625, "learning_rate": 1.967688985079841e-06, "loss": 18.25, "step": 3274 }, { "epoch": 0.21750680746496645, "grad_norm": 298.9350280761719, "learning_rate": 1.967661860597219e-06, "loss": 32.2188, "step": 3275 }, { "epoch": 0.2175732217573222, "grad_norm": 135.02688598632812, "learning_rate": 1.9676347249212425e-06, "loss": 18.7188, "step": 3276 }, { "epoch": 0.2176396360496779, "grad_norm": 257.3402099609375, "learning_rate": 1.9676075780522257e-06, "loss": 26.4062, "step": 3277 }, { "epoch": 0.2177060503420336, "grad_norm": 114.96286010742188, "learning_rate": 1.967580419990482e-06, "loss": 15.9375, "step": 3278 }, { "epoch": 0.21777246463438932, "grad_norm": 179.08035278320312, "learning_rate": 1.9675532507363262e-06, "loss": 20.8438, "step": 3279 }, { "epoch": 0.21783887892674503, "grad_norm": 1053.0643310546875, "learning_rate": 1.9675260702900725e-06, "loss": 27.0, "step": 3280 }, { "epoch": 0.21790529321910074, "grad_norm": 1661.1910400390625, "learning_rate": 1.9674988786520347e-06, "loss": 22.875, "step": 3281 }, { "epoch": 0.21797170751145648, "grad_norm": 438.0938415527344, "learning_rate": 1.967471675822528e-06, "loss": 19.7812, "step": 3282 }, { "epoch": 0.21803812180381218, "grad_norm": 416.5667724609375, "learning_rate": 1.967444461801867e-06, "loss": 14.3438, "step": 3283 }, { "epoch": 0.2181045360961679, "grad_norm": 159.5604248046875, "learning_rate": 1.967417236590366e-06, "loss": 15.5469, "step": 3284 }, { "epoch": 0.2181709503885236, "grad_norm": 179.99855041503906, "learning_rate": 1.9673900001883405e-06, "loss": 20.375, "step": 3285 }, { "epoch": 0.21823736468087931, "grad_norm": 188.43263244628906, "learning_rate": 1.967362752596105e-06, "loss": 19.0156, "step": 3286 }, { "epoch": 0.21830377897323505, "grad_norm": 348.9237365722656, "learning_rate": 1.967335493813975e-06, "loss": 21.9375, "step": 3287 }, { "epoch": 0.21837019326559076, "grad_norm": 260.56939697265625, "learning_rate": 1.9673082238422663e-06, "loss": 20.7969, "step": 3288 }, { "epoch": 0.21843660755794647, "grad_norm": 239.0149688720703, "learning_rate": 1.9672809426812935e-06, "loss": 24.875, "step": 3289 }, { "epoch": 0.21850302185030218, "grad_norm": 396.50250244140625, "learning_rate": 1.9672536503313724e-06, "loss": 19.5469, "step": 3290 }, { "epoch": 0.2185694361426579, "grad_norm": 318.0732727050781, "learning_rate": 1.967226346792819e-06, "loss": 25.875, "step": 3291 }, { "epoch": 0.2186358504350136, "grad_norm": 187.16595458984375, "learning_rate": 1.967199032065949e-06, "loss": 17.4375, "step": 3292 }, { "epoch": 0.21870226472736934, "grad_norm": 143.74815368652344, "learning_rate": 1.9671717061510785e-06, "loss": 24.0938, "step": 3293 }, { "epoch": 0.21876867901972505, "grad_norm": 200.87753295898438, "learning_rate": 1.967144369048523e-06, "loss": 18.7969, "step": 3294 }, { "epoch": 0.21883509331208076, "grad_norm": 211.70750427246094, "learning_rate": 1.967117020758599e-06, "loss": 21.375, "step": 3295 }, { "epoch": 0.21890150760443647, "grad_norm": 322.1913757324219, "learning_rate": 1.9670896612816235e-06, "loss": 23.3438, "step": 3296 }, { "epoch": 0.21896792189679218, "grad_norm": 134.13819885253906, "learning_rate": 1.967062290617912e-06, "loss": 17.0781, "step": 3297 }, { "epoch": 0.21903433618914792, "grad_norm": 449.9994201660156, "learning_rate": 1.967034908767782e-06, "loss": 25.3125, "step": 3298 }, { "epoch": 0.21910075048150363, "grad_norm": 312.1065673828125, "learning_rate": 1.9670075157315496e-06, "loss": 21.1094, "step": 3299 }, { "epoch": 0.21916716477385934, "grad_norm": 165.84884643554688, "learning_rate": 1.9669801115095318e-06, "loss": 17.2969, "step": 3300 }, { "epoch": 0.21923357906621505, "grad_norm": 300.2818908691406, "learning_rate": 1.9669526961020458e-06, "loss": 24.5312, "step": 3301 }, { "epoch": 0.21929999335857076, "grad_norm": 208.4765625, "learning_rate": 1.9669252695094084e-06, "loss": 21.2031, "step": 3302 }, { "epoch": 0.2193664076509265, "grad_norm": 370.1590576171875, "learning_rate": 1.9668978317319373e-06, "loss": 24.7188, "step": 3303 }, { "epoch": 0.2194328219432822, "grad_norm": 235.53390502929688, "learning_rate": 1.9668703827699492e-06, "loss": 17.8281, "step": 3304 }, { "epoch": 0.2194992362356379, "grad_norm": 301.66845703125, "learning_rate": 1.9668429226237623e-06, "loss": 34.125, "step": 3305 }, { "epoch": 0.21956565052799362, "grad_norm": 826.8306884765625, "learning_rate": 1.966815451293694e-06, "loss": 21.2188, "step": 3306 }, { "epoch": 0.21963206482034933, "grad_norm": 565.7451171875, "learning_rate": 1.9667879687800622e-06, "loss": 24.6094, "step": 3307 }, { "epoch": 0.21969847911270504, "grad_norm": 344.3348083496094, "learning_rate": 1.9667604750831844e-06, "loss": 21.5156, "step": 3308 }, { "epoch": 0.21976489340506078, "grad_norm": 229.87493896484375, "learning_rate": 1.966732970203379e-06, "loss": 18.8594, "step": 3309 }, { "epoch": 0.2198313076974165, "grad_norm": 226.00146484375, "learning_rate": 1.966705454140964e-06, "loss": 19.0938, "step": 3310 }, { "epoch": 0.2198977219897722, "grad_norm": 163.47938537597656, "learning_rate": 1.9666779268962578e-06, "loss": 16.0938, "step": 3311 }, { "epoch": 0.2199641362821279, "grad_norm": 163.0123291015625, "learning_rate": 1.9666503884695787e-06, "loss": 16.3594, "step": 3312 }, { "epoch": 0.22003055057448362, "grad_norm": 218.52859497070312, "learning_rate": 1.9666228388612453e-06, "loss": 17.9531, "step": 3313 }, { "epoch": 0.22009696486683936, "grad_norm": 324.09100341796875, "learning_rate": 1.9665952780715764e-06, "loss": 31.25, "step": 3314 }, { "epoch": 0.22016337915919507, "grad_norm": 518.175048828125, "learning_rate": 1.966567706100891e-06, "loss": 21.4688, "step": 3315 }, { "epoch": 0.22022979345155078, "grad_norm": 349.5177917480469, "learning_rate": 1.966540122949507e-06, "loss": 20.8906, "step": 3316 }, { "epoch": 0.22029620774390649, "grad_norm": 239.18466186523438, "learning_rate": 1.966512528617745e-06, "loss": 17.5312, "step": 3317 }, { "epoch": 0.2203626220362622, "grad_norm": 178.06393432617188, "learning_rate": 1.9664849231059225e-06, "loss": 20.75, "step": 3318 }, { "epoch": 0.2204290363286179, "grad_norm": 282.3436279296875, "learning_rate": 1.96645730641436e-06, "loss": 19.0781, "step": 3319 }, { "epoch": 0.22049545062097364, "grad_norm": 371.1149597167969, "learning_rate": 1.9664296785433774e-06, "loss": 18.7188, "step": 3320 }, { "epoch": 0.22056186491332935, "grad_norm": 343.2165832519531, "learning_rate": 1.9664020394932927e-06, "loss": 24.9219, "step": 3321 }, { "epoch": 0.22062827920568506, "grad_norm": 433.7078552246094, "learning_rate": 1.966374389264427e-06, "loss": 21.8906, "step": 3322 }, { "epoch": 0.22069469349804077, "grad_norm": 192.33567810058594, "learning_rate": 1.966346727857099e-06, "loss": 18.6094, "step": 3323 }, { "epoch": 0.22076110779039648, "grad_norm": 130.3797607421875, "learning_rate": 1.96631905527163e-06, "loss": 19.1875, "step": 3324 }, { "epoch": 0.22082752208275222, "grad_norm": 255.9453582763672, "learning_rate": 1.966291371508339e-06, "loss": 19.0469, "step": 3325 }, { "epoch": 0.22089393637510793, "grad_norm": 159.41163635253906, "learning_rate": 1.9662636765675465e-06, "loss": 18.3125, "step": 3326 }, { "epoch": 0.22096035066746364, "grad_norm": 488.5654602050781, "learning_rate": 1.966235970449573e-06, "loss": 25.0938, "step": 3327 }, { "epoch": 0.22102676495981935, "grad_norm": 134.0599365234375, "learning_rate": 1.9662082531547395e-06, "loss": 14.5625, "step": 3328 }, { "epoch": 0.22109317925217506, "grad_norm": 315.26312255859375, "learning_rate": 1.966180524683365e-06, "loss": 26.2969, "step": 3329 }, { "epoch": 0.22115959354453077, "grad_norm": 261.2496032714844, "learning_rate": 1.9661527850357724e-06, "loss": 22.5469, "step": 3330 }, { "epoch": 0.2212260078368865, "grad_norm": 230.92074584960938, "learning_rate": 1.966125034212281e-06, "loss": 18.8906, "step": 3331 }, { "epoch": 0.22129242212924222, "grad_norm": 202.90042114257812, "learning_rate": 1.966097272213212e-06, "loss": 19.0625, "step": 3332 }, { "epoch": 0.22135883642159793, "grad_norm": 241.00645446777344, "learning_rate": 1.9660694990388874e-06, "loss": 17.3281, "step": 3333 }, { "epoch": 0.22142525071395364, "grad_norm": 165.29176330566406, "learning_rate": 1.9660417146896278e-06, "loss": 26.0312, "step": 3334 }, { "epoch": 0.22149166500630935, "grad_norm": 263.555419921875, "learning_rate": 1.9660139191657547e-06, "loss": 16.2031, "step": 3335 }, { "epoch": 0.22155807929866508, "grad_norm": 247.54527282714844, "learning_rate": 1.965986112467589e-06, "loss": 21.5156, "step": 3336 }, { "epoch": 0.2216244935910208, "grad_norm": 356.85150146484375, "learning_rate": 1.965958294595454e-06, "loss": 21.6719, "step": 3337 }, { "epoch": 0.2216909078833765, "grad_norm": 161.14468383789062, "learning_rate": 1.96593046554967e-06, "loss": 15.2656, "step": 3338 }, { "epoch": 0.2217573221757322, "grad_norm": 263.84405517578125, "learning_rate": 1.965902625330559e-06, "loss": 20.8438, "step": 3339 }, { "epoch": 0.22182373646808792, "grad_norm": 274.1878662109375, "learning_rate": 1.965874773938444e-06, "loss": 19.4844, "step": 3340 }, { "epoch": 0.22189015076044366, "grad_norm": 282.43304443359375, "learning_rate": 1.965846911373646e-06, "loss": 16.7188, "step": 3341 }, { "epoch": 0.22195656505279937, "grad_norm": 152.96890258789062, "learning_rate": 1.9658190376364883e-06, "loss": 18.9375, "step": 3342 }, { "epoch": 0.22202297934515508, "grad_norm": 317.7041015625, "learning_rate": 1.965791152727293e-06, "loss": 24.6562, "step": 3343 }, { "epoch": 0.2220893936375108, "grad_norm": 589.1001586914062, "learning_rate": 1.9657632566463823e-06, "loss": 21.625, "step": 3344 }, { "epoch": 0.2221558079298665, "grad_norm": 221.69989013671875, "learning_rate": 1.965735349394079e-06, "loss": 13.3438, "step": 3345 }, { "epoch": 0.2222222222222222, "grad_norm": 476.80487060546875, "learning_rate": 1.9657074309707063e-06, "loss": 22.0, "step": 3346 }, { "epoch": 0.22228863651457795, "grad_norm": 175.90875244140625, "learning_rate": 1.9656795013765863e-06, "loss": 17.8438, "step": 3347 }, { "epoch": 0.22235505080693366, "grad_norm": 194.63287353515625, "learning_rate": 1.9656515606120433e-06, "loss": 19.0156, "step": 3348 }, { "epoch": 0.22242146509928937, "grad_norm": 404.4869689941406, "learning_rate": 1.9656236086773997e-06, "loss": 19.6406, "step": 3349 }, { "epoch": 0.22248787939164508, "grad_norm": 190.2107391357422, "learning_rate": 1.9655956455729793e-06, "loss": 19.6094, "step": 3350 }, { "epoch": 0.2225542936840008, "grad_norm": 527.6641845703125, "learning_rate": 1.965567671299105e-06, "loss": 28.7812, "step": 3351 }, { "epoch": 0.22262070797635652, "grad_norm": 148.62969970703125, "learning_rate": 1.9655396858561004e-06, "loss": 14.375, "step": 3352 }, { "epoch": 0.22268712226871223, "grad_norm": 241.3732147216797, "learning_rate": 1.96551168924429e-06, "loss": 21.9844, "step": 3353 }, { "epoch": 0.22275353656106794, "grad_norm": 248.48345947265625, "learning_rate": 1.9654836814639967e-06, "loss": 24.4062, "step": 3354 }, { "epoch": 0.22281995085342365, "grad_norm": 156.40286254882812, "learning_rate": 1.965455662515545e-06, "loss": 17.5781, "step": 3355 }, { "epoch": 0.22288636514577936, "grad_norm": 385.92596435546875, "learning_rate": 1.9654276323992593e-06, "loss": 23.6875, "step": 3356 }, { "epoch": 0.22295277943813507, "grad_norm": 190.1666259765625, "learning_rate": 1.965399591115463e-06, "loss": 22.3125, "step": 3357 }, { "epoch": 0.2230191937304908, "grad_norm": 394.4381408691406, "learning_rate": 1.965371538664481e-06, "loss": 21.125, "step": 3358 }, { "epoch": 0.22308560802284652, "grad_norm": 210.2529754638672, "learning_rate": 1.965343475046638e-06, "loss": 24.9062, "step": 3359 }, { "epoch": 0.22315202231520223, "grad_norm": 211.49679565429688, "learning_rate": 1.965315400262258e-06, "loss": 25.25, "step": 3360 }, { "epoch": 0.22321843660755794, "grad_norm": 266.4837341308594, "learning_rate": 1.9652873143116658e-06, "loss": 24.9062, "step": 3361 }, { "epoch": 0.22328485089991365, "grad_norm": 315.7974853515625, "learning_rate": 1.965259217195187e-06, "loss": 22.9375, "step": 3362 }, { "epoch": 0.2233512651922694, "grad_norm": 225.3711700439453, "learning_rate": 1.965231108913146e-06, "loss": 26.5312, "step": 3363 }, { "epoch": 0.2234176794846251, "grad_norm": 544.3736572265625, "learning_rate": 1.9652029894658685e-06, "loss": 27.2188, "step": 3364 }, { "epoch": 0.2234840937769808, "grad_norm": 169.13951110839844, "learning_rate": 1.965174858853679e-06, "loss": 14.1562, "step": 3365 }, { "epoch": 0.22355050806933652, "grad_norm": 264.0787353515625, "learning_rate": 1.9651467170769034e-06, "loss": 18.5312, "step": 3366 }, { "epoch": 0.22361692236169223, "grad_norm": 140.63470458984375, "learning_rate": 1.965118564135867e-06, "loss": 20.2188, "step": 3367 }, { "epoch": 0.22368333665404794, "grad_norm": 257.1209716796875, "learning_rate": 1.9650904000308955e-06, "loss": 17.5625, "step": 3368 }, { "epoch": 0.22374975094640367, "grad_norm": 177.84756469726562, "learning_rate": 1.965062224762315e-06, "loss": 18.6875, "step": 3369 }, { "epoch": 0.22381616523875938, "grad_norm": 256.984619140625, "learning_rate": 1.9650340383304507e-06, "loss": 20.8281, "step": 3370 }, { "epoch": 0.2238825795311151, "grad_norm": 303.581787109375, "learning_rate": 1.9650058407356297e-06, "loss": 18.2969, "step": 3371 }, { "epoch": 0.2239489938234708, "grad_norm": 223.33653259277344, "learning_rate": 1.9649776319781775e-06, "loss": 19.1719, "step": 3372 }, { "epoch": 0.22401540811582651, "grad_norm": 225.76998901367188, "learning_rate": 1.9649494120584204e-06, "loss": 18.4531, "step": 3373 }, { "epoch": 0.22408182240818225, "grad_norm": 453.0453186035156, "learning_rate": 1.964921180976685e-06, "loss": 26.3906, "step": 3374 }, { "epoch": 0.22414823670053796, "grad_norm": 443.1136779785156, "learning_rate": 1.9648929387332976e-06, "loss": 16.9375, "step": 3375 }, { "epoch": 0.22421465099289367, "grad_norm": 329.15234375, "learning_rate": 1.9648646853285854e-06, "loss": 20.9062, "step": 3376 }, { "epoch": 0.22428106528524938, "grad_norm": 264.6412048339844, "learning_rate": 1.964836420762874e-06, "loss": 21.0625, "step": 3377 }, { "epoch": 0.2243474795776051, "grad_norm": 451.8285217285156, "learning_rate": 1.9648081450364923e-06, "loss": 18.6719, "step": 3378 }, { "epoch": 0.22441389386996083, "grad_norm": 166.80494689941406, "learning_rate": 1.964779858149766e-06, "loss": 22.7969, "step": 3379 }, { "epoch": 0.22448030816231654, "grad_norm": 414.01910400390625, "learning_rate": 1.9647515601030223e-06, "loss": 18.3438, "step": 3380 }, { "epoch": 0.22454672245467225, "grad_norm": 540.11328125, "learning_rate": 1.9647232508965893e-06, "loss": 20.5312, "step": 3381 }, { "epoch": 0.22461313674702796, "grad_norm": 319.0933837890625, "learning_rate": 1.9646949305307937e-06, "loss": 19.3281, "step": 3382 }, { "epoch": 0.22467955103938367, "grad_norm": 308.8315734863281, "learning_rate": 1.9646665990059636e-06, "loss": 15.4531, "step": 3383 }, { "epoch": 0.22474596533173938, "grad_norm": 290.8159484863281, "learning_rate": 1.9646382563224266e-06, "loss": 21.3125, "step": 3384 }, { "epoch": 0.22481237962409512, "grad_norm": 200.96282958984375, "learning_rate": 1.9646099024805103e-06, "loss": 17.7656, "step": 3385 }, { "epoch": 0.22487879391645083, "grad_norm": 184.07373046875, "learning_rate": 1.964581537480543e-06, "loss": 18.1875, "step": 3386 }, { "epoch": 0.22494520820880654, "grad_norm": 326.31201171875, "learning_rate": 1.9645531613228526e-06, "loss": 26.0469, "step": 3387 }, { "epoch": 0.22501162250116225, "grad_norm": 349.23992919921875, "learning_rate": 1.9645247740077676e-06, "loss": 26.8438, "step": 3388 }, { "epoch": 0.22507803679351795, "grad_norm": 519.9393310546875, "learning_rate": 1.964496375535616e-06, "loss": 19.5625, "step": 3389 }, { "epoch": 0.2251444510858737, "grad_norm": 178.97634887695312, "learning_rate": 1.9644679659067265e-06, "loss": 17.3125, "step": 3390 }, { "epoch": 0.2252108653782294, "grad_norm": 221.3916778564453, "learning_rate": 1.9644395451214276e-06, "loss": 20.2812, "step": 3391 }, { "epoch": 0.2252772796705851, "grad_norm": 308.7911682128906, "learning_rate": 1.9644111131800485e-06, "loss": 19.5781, "step": 3392 }, { "epoch": 0.22534369396294082, "grad_norm": 173.21910095214844, "learning_rate": 1.9643826700829176e-06, "loss": 16.3906, "step": 3393 }, { "epoch": 0.22541010825529653, "grad_norm": 288.4114685058594, "learning_rate": 1.964354215830364e-06, "loss": 16.6406, "step": 3394 }, { "epoch": 0.22547652254765224, "grad_norm": 321.1859436035156, "learning_rate": 1.964325750422717e-06, "loss": 19.0469, "step": 3395 }, { "epoch": 0.22554293684000798, "grad_norm": 256.82293701171875, "learning_rate": 1.9642972738603057e-06, "loss": 23.5938, "step": 3396 }, { "epoch": 0.2256093511323637, "grad_norm": 359.90087890625, "learning_rate": 1.964268786143459e-06, "loss": 26.5156, "step": 3397 }, { "epoch": 0.2256757654247194, "grad_norm": 186.0602569580078, "learning_rate": 1.964240287272508e-06, "loss": 19.6875, "step": 3398 }, { "epoch": 0.2257421797170751, "grad_norm": 340.2216491699219, "learning_rate": 1.964211777247781e-06, "loss": 25.2188, "step": 3399 }, { "epoch": 0.22580859400943082, "grad_norm": 311.716064453125, "learning_rate": 1.9641832560696083e-06, "loss": 22.625, "step": 3400 }, { "epoch": 0.22587500830178656, "grad_norm": 321.1452331542969, "learning_rate": 1.964154723738319e-06, "loss": 21.7656, "step": 3401 }, { "epoch": 0.22594142259414227, "grad_norm": 210.1145477294922, "learning_rate": 1.9641261802542446e-06, "loss": 16.9062, "step": 3402 }, { "epoch": 0.22600783688649798, "grad_norm": 773.287841796875, "learning_rate": 1.9640976256177146e-06, "loss": 32.4062, "step": 3403 }, { "epoch": 0.22607425117885369, "grad_norm": 187.16697692871094, "learning_rate": 1.9640690598290585e-06, "loss": 18.8906, "step": 3404 }, { "epoch": 0.2261406654712094, "grad_norm": 102.62593841552734, "learning_rate": 1.9640404828886077e-06, "loss": 16.9688, "step": 3405 }, { "epoch": 0.2262070797635651, "grad_norm": 286.6242980957031, "learning_rate": 1.964011894796693e-06, "loss": 18.2188, "step": 3406 }, { "epoch": 0.22627349405592084, "grad_norm": 175.01373291015625, "learning_rate": 1.963983295553644e-06, "loss": 19.3125, "step": 3407 }, { "epoch": 0.22633990834827655, "grad_norm": 191.56687927246094, "learning_rate": 1.963954685159792e-06, "loss": 21.3125, "step": 3408 }, { "epoch": 0.22640632264063226, "grad_norm": 272.7580261230469, "learning_rate": 1.9639260636154683e-06, "loss": 18.2344, "step": 3409 }, { "epoch": 0.22647273693298797, "grad_norm": 211.822998046875, "learning_rate": 1.963897430921004e-06, "loss": 16.2031, "step": 3410 }, { "epoch": 0.22653915122534368, "grad_norm": 185.77395629882812, "learning_rate": 1.9638687870767297e-06, "loss": 18.5469, "step": 3411 }, { "epoch": 0.22660556551769942, "grad_norm": 411.8986511230469, "learning_rate": 1.963840132082977e-06, "loss": 20.0156, "step": 3412 }, { "epoch": 0.22667197981005513, "grad_norm": 160.89703369140625, "learning_rate": 1.9638114659400775e-06, "loss": 22.6562, "step": 3413 }, { "epoch": 0.22673839410241084, "grad_norm": 443.3297424316406, "learning_rate": 1.9637827886483626e-06, "loss": 19.1875, "step": 3414 }, { "epoch": 0.22680480839476655, "grad_norm": 205.2166748046875, "learning_rate": 1.9637541002081644e-06, "loss": 18.625, "step": 3415 }, { "epoch": 0.22687122268712226, "grad_norm": 240.0430450439453, "learning_rate": 1.963725400619815e-06, "loss": 19.1562, "step": 3416 }, { "epoch": 0.226937636979478, "grad_norm": 162.2563934326172, "learning_rate": 1.9636966898836448e-06, "loss": 19.9688, "step": 3417 }, { "epoch": 0.2270040512718337, "grad_norm": 454.6670837402344, "learning_rate": 1.9636679679999875e-06, "loss": 25.9062, "step": 3418 }, { "epoch": 0.22707046556418942, "grad_norm": 216.53944396972656, "learning_rate": 1.9636392349691746e-06, "loss": 26.0625, "step": 3419 }, { "epoch": 0.22713687985654513, "grad_norm": 135.07003784179688, "learning_rate": 1.9636104907915394e-06, "loss": 17.9375, "step": 3420 }, { "epoch": 0.22720329414890084, "grad_norm": 434.9657897949219, "learning_rate": 1.963581735467413e-06, "loss": 25.4062, "step": 3421 }, { "epoch": 0.22726970844125655, "grad_norm": 189.27175903320312, "learning_rate": 1.963552968997129e-06, "loss": 19.4062, "step": 3422 }, { "epoch": 0.22733612273361228, "grad_norm": 113.36161041259766, "learning_rate": 1.9635241913810197e-06, "loss": 17.3281, "step": 3423 }, { "epoch": 0.227402537025968, "grad_norm": 382.0986328125, "learning_rate": 1.9634954026194186e-06, "loss": 20.2812, "step": 3424 }, { "epoch": 0.2274689513183237, "grad_norm": 199.13404846191406, "learning_rate": 1.963466602712658e-06, "loss": 16.4531, "step": 3425 }, { "epoch": 0.2275353656106794, "grad_norm": 220.75009155273438, "learning_rate": 1.963437791661071e-06, "loss": 17.8125, "step": 3426 }, { "epoch": 0.22760177990303512, "grad_norm": 124.82218170166016, "learning_rate": 1.9634089694649914e-06, "loss": 16.4062, "step": 3427 }, { "epoch": 0.22766819419539086, "grad_norm": 259.4506530761719, "learning_rate": 1.9633801361247523e-06, "loss": 15.0156, "step": 3428 }, { "epoch": 0.22773460848774657, "grad_norm": 137.36471557617188, "learning_rate": 1.9633512916406874e-06, "loss": 17.3438, "step": 3429 }, { "epoch": 0.22780102278010228, "grad_norm": 213.8980712890625, "learning_rate": 1.9633224360131303e-06, "loss": 17.6406, "step": 3430 }, { "epoch": 0.227867437072458, "grad_norm": 229.81288146972656, "learning_rate": 1.9632935692424147e-06, "loss": 24.625, "step": 3431 }, { "epoch": 0.2279338513648137, "grad_norm": 236.37759399414062, "learning_rate": 1.9632646913288748e-06, "loss": 22.3906, "step": 3432 }, { "epoch": 0.2280002656571694, "grad_norm": 296.6187438964844, "learning_rate": 1.963235802272844e-06, "loss": 19.6094, "step": 3433 }, { "epoch": 0.22806667994952515, "grad_norm": 977.390625, "learning_rate": 1.963206902074657e-06, "loss": 18.9062, "step": 3434 }, { "epoch": 0.22813309424188086, "grad_norm": 145.71383666992188, "learning_rate": 1.963177990734648e-06, "loss": 17.8906, "step": 3435 }, { "epoch": 0.22819950853423657, "grad_norm": 542.1748657226562, "learning_rate": 1.9631490682531515e-06, "loss": 19.3906, "step": 3436 }, { "epoch": 0.22826592282659228, "grad_norm": 211.92347717285156, "learning_rate": 1.9631201346305015e-06, "loss": 23.0, "step": 3437 }, { "epoch": 0.228332337118948, "grad_norm": 253.8544158935547, "learning_rate": 1.9630911898670335e-06, "loss": 21.0625, "step": 3438 }, { "epoch": 0.22839875141130372, "grad_norm": 491.1603698730469, "learning_rate": 1.9630622339630824e-06, "loss": 29.0, "step": 3439 }, { "epoch": 0.22846516570365943, "grad_norm": 190.55715942382812, "learning_rate": 1.9630332669189823e-06, "loss": 17.3281, "step": 3440 }, { "epoch": 0.22853157999601514, "grad_norm": 309.915771484375, "learning_rate": 1.9630042887350683e-06, "loss": 33.9062, "step": 3441 }, { "epoch": 0.22859799428837085, "grad_norm": 163.21481323242188, "learning_rate": 1.9629752994116766e-06, "loss": 24.2656, "step": 3442 }, { "epoch": 0.22866440858072656, "grad_norm": 212.41281127929688, "learning_rate": 1.962946298949142e-06, "loss": 23.8438, "step": 3443 }, { "epoch": 0.22873082287308227, "grad_norm": 142.72779846191406, "learning_rate": 1.9629172873477994e-06, "loss": 17.5625, "step": 3444 }, { "epoch": 0.228797237165438, "grad_norm": 160.25830078125, "learning_rate": 1.9628882646079847e-06, "loss": 17.5469, "step": 3445 }, { "epoch": 0.22886365145779372, "grad_norm": 300.07147216796875, "learning_rate": 1.9628592307300344e-06, "loss": 23.8438, "step": 3446 }, { "epoch": 0.22893006575014943, "grad_norm": 207.6265411376953, "learning_rate": 1.962830185714283e-06, "loss": 19.7188, "step": 3447 }, { "epoch": 0.22899648004250514, "grad_norm": 358.50128173828125, "learning_rate": 1.962801129561068e-06, "loss": 15.5938, "step": 3448 }, { "epoch": 0.22906289433486085, "grad_norm": 279.5104064941406, "learning_rate": 1.962772062270724e-06, "loss": 20.4062, "step": 3449 }, { "epoch": 0.2291293086272166, "grad_norm": 238.20037841796875, "learning_rate": 1.9627429838435885e-06, "loss": 23.2188, "step": 3450 }, { "epoch": 0.2291957229195723, "grad_norm": 296.1814270019531, "learning_rate": 1.9627138942799974e-06, "loss": 22.2031, "step": 3451 }, { "epoch": 0.229262137211928, "grad_norm": 250.43116760253906, "learning_rate": 1.9626847935802867e-06, "loss": 21.375, "step": 3452 }, { "epoch": 0.22932855150428372, "grad_norm": 153.04454040527344, "learning_rate": 1.962655681744794e-06, "loss": 15.0469, "step": 3453 }, { "epoch": 0.22939496579663943, "grad_norm": 209.66854858398438, "learning_rate": 1.962626558773855e-06, "loss": 25.7812, "step": 3454 }, { "epoch": 0.22946138008899516, "grad_norm": 162.6425018310547, "learning_rate": 1.9625974246678067e-06, "loss": 17.5625, "step": 3455 }, { "epoch": 0.22952779438135087, "grad_norm": 180.2906494140625, "learning_rate": 1.962568279426987e-06, "loss": 18.9531, "step": 3456 }, { "epoch": 0.22959420867370658, "grad_norm": 254.0235137939453, "learning_rate": 1.962539123051732e-06, "loss": 23.9375, "step": 3457 }, { "epoch": 0.2296606229660623, "grad_norm": 129.69540405273438, "learning_rate": 1.9625099555423802e-06, "loss": 18.5312, "step": 3458 }, { "epoch": 0.229727037258418, "grad_norm": 228.0008087158203, "learning_rate": 1.9624807768992678e-06, "loss": 25.3125, "step": 3459 }, { "epoch": 0.22979345155077371, "grad_norm": 243.06776428222656, "learning_rate": 1.9624515871227326e-06, "loss": 22.5938, "step": 3460 }, { "epoch": 0.22985986584312945, "grad_norm": 153.43409729003906, "learning_rate": 1.962422386213113e-06, "loss": 20.0469, "step": 3461 }, { "epoch": 0.22992628013548516, "grad_norm": 172.86944580078125, "learning_rate": 1.962393174170746e-06, "loss": 22.625, "step": 3462 }, { "epoch": 0.22999269442784087, "grad_norm": 211.2798614501953, "learning_rate": 1.9623639509959692e-06, "loss": 22.0625, "step": 3463 }, { "epoch": 0.23005910872019658, "grad_norm": 285.3208312988281, "learning_rate": 1.9623347166891214e-06, "loss": 18.0312, "step": 3464 }, { "epoch": 0.2301255230125523, "grad_norm": 206.89622497558594, "learning_rate": 1.9623054712505408e-06, "loss": 21.4219, "step": 3465 }, { "epoch": 0.23019193730490803, "grad_norm": 216.87045288085938, "learning_rate": 1.9622762146805653e-06, "loss": 20.3438, "step": 3466 }, { "epoch": 0.23025835159726374, "grad_norm": 161.9148406982422, "learning_rate": 1.9622469469795333e-06, "loss": 13.5312, "step": 3467 }, { "epoch": 0.23032476588961945, "grad_norm": 188.9159393310547, "learning_rate": 1.9622176681477837e-06, "loss": 24.8906, "step": 3468 }, { "epoch": 0.23039118018197516, "grad_norm": 208.00872802734375, "learning_rate": 1.9621883781856544e-06, "loss": 21.25, "step": 3469 }, { "epoch": 0.23045759447433087, "grad_norm": 313.04302978515625, "learning_rate": 1.9621590770934855e-06, "loss": 24.0938, "step": 3470 }, { "epoch": 0.23052400876668658, "grad_norm": 415.6642150878906, "learning_rate": 1.962129764871615e-06, "loss": 23.1719, "step": 3471 }, { "epoch": 0.23059042305904232, "grad_norm": 179.07757568359375, "learning_rate": 1.962100441520382e-06, "loss": 15.9844, "step": 3472 }, { "epoch": 0.23065683735139803, "grad_norm": 243.00247192382812, "learning_rate": 1.962071107040126e-06, "loss": 21.9375, "step": 3473 }, { "epoch": 0.23072325164375374, "grad_norm": 185.17037963867188, "learning_rate": 1.9620417614311857e-06, "loss": 18.9531, "step": 3474 }, { "epoch": 0.23078966593610944, "grad_norm": 159.2083282470703, "learning_rate": 1.9620124046939014e-06, "loss": 16.0312, "step": 3475 }, { "epoch": 0.23085608022846515, "grad_norm": 581.606201171875, "learning_rate": 1.9619830368286124e-06, "loss": 15.1719, "step": 3476 }, { "epoch": 0.2309224945208209, "grad_norm": 243.7129364013672, "learning_rate": 1.9619536578356585e-06, "loss": 26.0938, "step": 3477 }, { "epoch": 0.2309889088131766, "grad_norm": 378.6918029785156, "learning_rate": 1.9619242677153793e-06, "loss": 28.75, "step": 3478 }, { "epoch": 0.2310553231055323, "grad_norm": 249.17330932617188, "learning_rate": 1.9618948664681145e-06, "loss": 16.0156, "step": 3479 }, { "epoch": 0.23112173739788802, "grad_norm": 155.82672119140625, "learning_rate": 1.961865454094205e-06, "loss": 18.0625, "step": 3480 }, { "epoch": 0.23118815169024373, "grad_norm": 260.1749572753906, "learning_rate": 1.9618360305939904e-06, "loss": 22.9688, "step": 3481 }, { "epoch": 0.23125456598259947, "grad_norm": 261.309814453125, "learning_rate": 1.9618065959678113e-06, "loss": 31.8125, "step": 3482 }, { "epoch": 0.23132098027495518, "grad_norm": 172.984619140625, "learning_rate": 1.9617771502160077e-06, "loss": 16.8281, "step": 3483 }, { "epoch": 0.2313873945673109, "grad_norm": 245.81053161621094, "learning_rate": 1.961747693338921e-06, "loss": 24.2188, "step": 3484 }, { "epoch": 0.2314538088596666, "grad_norm": 390.4872131347656, "learning_rate": 1.9617182253368914e-06, "loss": 16.5469, "step": 3485 }, { "epoch": 0.2315202231520223, "grad_norm": 249.70266723632812, "learning_rate": 1.96168874621026e-06, "loss": 19.0156, "step": 3486 }, { "epoch": 0.23158663744437802, "grad_norm": 344.181640625, "learning_rate": 1.961659255959368e-06, "loss": 29.3125, "step": 3487 }, { "epoch": 0.23165305173673376, "grad_norm": 190.64767456054688, "learning_rate": 1.9616297545845558e-06, "loss": 27.0625, "step": 3488 }, { "epoch": 0.23171946602908947, "grad_norm": 818.9393310546875, "learning_rate": 1.9616002420861655e-06, "loss": 14.3438, "step": 3489 }, { "epoch": 0.23178588032144518, "grad_norm": 271.7254638671875, "learning_rate": 1.9615707184645377e-06, "loss": 24.4688, "step": 3490 }, { "epoch": 0.23185229461380089, "grad_norm": 206.5594482421875, "learning_rate": 1.9615411837200146e-06, "loss": 19.9531, "step": 3491 }, { "epoch": 0.2319187089061566, "grad_norm": 298.71624755859375, "learning_rate": 1.961511637852937e-06, "loss": 18.3906, "step": 3492 }, { "epoch": 0.23198512319851233, "grad_norm": 417.2193298339844, "learning_rate": 1.961482080863648e-06, "loss": 28.3438, "step": 3493 }, { "epoch": 0.23205153749086804, "grad_norm": 149.15170288085938, "learning_rate": 1.9614525127524883e-06, "loss": 16.8594, "step": 3494 }, { "epoch": 0.23211795178322375, "grad_norm": 399.14593505859375, "learning_rate": 1.9614229335198005e-06, "loss": 20.9688, "step": 3495 }, { "epoch": 0.23218436607557946, "grad_norm": 231.86392211914062, "learning_rate": 1.9613933431659266e-06, "loss": 22.6094, "step": 3496 }, { "epoch": 0.23225078036793517, "grad_norm": 325.7566223144531, "learning_rate": 1.9613637416912086e-06, "loss": 16.2812, "step": 3497 }, { "epoch": 0.23231719466029088, "grad_norm": 425.24774169921875, "learning_rate": 1.9613341290959894e-06, "loss": 23.0469, "step": 3498 }, { "epoch": 0.23238360895264662, "grad_norm": 292.3743591308594, "learning_rate": 1.961304505380611e-06, "loss": 22.0625, "step": 3499 }, { "epoch": 0.23245002324500233, "grad_norm": 403.06427001953125, "learning_rate": 1.961274870545417e-06, "loss": 30.6562, "step": 3500 }, { "epoch": 0.23251643753735804, "grad_norm": 223.91986083984375, "learning_rate": 1.9612452245907493e-06, "loss": 22.5312, "step": 3501 }, { "epoch": 0.23258285182971375, "grad_norm": 217.10504150390625, "learning_rate": 1.961215567516951e-06, "loss": 18.1094, "step": 3502 }, { "epoch": 0.23264926612206946, "grad_norm": 230.6453857421875, "learning_rate": 1.9611858993243655e-06, "loss": 17.8594, "step": 3503 }, { "epoch": 0.2327156804144252, "grad_norm": 190.57162475585938, "learning_rate": 1.9611562200133357e-06, "loss": 19.2656, "step": 3504 }, { "epoch": 0.2327820947067809, "grad_norm": 180.51937866210938, "learning_rate": 1.961126529584205e-06, "loss": 18.5312, "step": 3505 }, { "epoch": 0.23284850899913662, "grad_norm": 991.2797241210938, "learning_rate": 1.961096828037317e-06, "loss": 28.5, "step": 3506 }, { "epoch": 0.23291492329149233, "grad_norm": 267.4385681152344, "learning_rate": 1.9610671153730147e-06, "loss": 29.3125, "step": 3507 }, { "epoch": 0.23298133758384804, "grad_norm": 893.0191650390625, "learning_rate": 1.961037391591642e-06, "loss": 18.0625, "step": 3508 }, { "epoch": 0.23304775187620375, "grad_norm": 169.79049682617188, "learning_rate": 1.9610076566935436e-06, "loss": 17.7812, "step": 3509 }, { "epoch": 0.23311416616855948, "grad_norm": 270.43536376953125, "learning_rate": 1.9609779106790626e-06, "loss": 20.0, "step": 3510 }, { "epoch": 0.2331805804609152, "grad_norm": 155.04037475585938, "learning_rate": 1.960948153548543e-06, "loss": 17.1094, "step": 3511 }, { "epoch": 0.2332469947532709, "grad_norm": 285.4990539550781, "learning_rate": 1.960918385302329e-06, "loss": 21.8125, "step": 3512 }, { "epoch": 0.2333134090456266, "grad_norm": 317.1846008300781, "learning_rate": 1.960888605940766e-06, "loss": 22.0, "step": 3513 }, { "epoch": 0.23337982333798232, "grad_norm": 163.5538787841797, "learning_rate": 1.9608588154641973e-06, "loss": 20.4844, "step": 3514 }, { "epoch": 0.23344623763033806, "grad_norm": 459.6637268066406, "learning_rate": 1.960829013872968e-06, "loss": 23.1875, "step": 3515 }, { "epoch": 0.23351265192269377, "grad_norm": 176.84341430664062, "learning_rate": 1.9607992011674223e-06, "loss": 17.9688, "step": 3516 }, { "epoch": 0.23357906621504948, "grad_norm": 257.8624572753906, "learning_rate": 1.9607693773479058e-06, "loss": 20.9219, "step": 3517 }, { "epoch": 0.2336454805074052, "grad_norm": 148.72372436523438, "learning_rate": 1.960739542414763e-06, "loss": 21.1875, "step": 3518 }, { "epoch": 0.2337118947997609, "grad_norm": 297.9092102050781, "learning_rate": 1.960709696368339e-06, "loss": 17.1406, "step": 3519 }, { "epoch": 0.23377830909211664, "grad_norm": 271.78546142578125, "learning_rate": 1.96067983920898e-06, "loss": 31.25, "step": 3520 }, { "epoch": 0.23384472338447235, "grad_norm": 219.64260864257812, "learning_rate": 1.96064997093703e-06, "loss": 16.125, "step": 3521 }, { "epoch": 0.23391113767682806, "grad_norm": 331.670654296875, "learning_rate": 1.960620091552835e-06, "loss": 25.6875, "step": 3522 }, { "epoch": 0.23397755196918377, "grad_norm": 354.8997802734375, "learning_rate": 1.960590201056741e-06, "loss": 18.8125, "step": 3523 }, { "epoch": 0.23404396626153948, "grad_norm": 384.59368896484375, "learning_rate": 1.9605602994490935e-06, "loss": 27.1562, "step": 3524 }, { "epoch": 0.2341103805538952, "grad_norm": 162.53494262695312, "learning_rate": 1.9605303867302377e-06, "loss": 16.3906, "step": 3525 }, { "epoch": 0.23417679484625092, "grad_norm": 194.2560272216797, "learning_rate": 1.9605004629005213e-06, "loss": 16.625, "step": 3526 }, { "epoch": 0.23424320913860663, "grad_norm": 138.76344299316406, "learning_rate": 1.9604705279602885e-06, "loss": 19.6562, "step": 3527 }, { "epoch": 0.23430962343096234, "grad_norm": 207.58067321777344, "learning_rate": 1.9604405819098866e-06, "loss": 21.8438, "step": 3528 }, { "epoch": 0.23437603772331805, "grad_norm": 428.592041015625, "learning_rate": 1.960410624749662e-06, "loss": 16.125, "step": 3529 }, { "epoch": 0.23444245201567376, "grad_norm": 169.6061553955078, "learning_rate": 1.9603806564799615e-06, "loss": 23.375, "step": 3530 }, { "epoch": 0.2345088663080295, "grad_norm": 124.49348449707031, "learning_rate": 1.960350677101131e-06, "loss": 17.1094, "step": 3531 }, { "epoch": 0.2345752806003852, "grad_norm": 394.1971130371094, "learning_rate": 1.9603206866135174e-06, "loss": 16.7188, "step": 3532 }, { "epoch": 0.23464169489274092, "grad_norm": 166.36354064941406, "learning_rate": 1.9602906850174684e-06, "loss": 18.25, "step": 3533 }, { "epoch": 0.23470810918509663, "grad_norm": 205.56671142578125, "learning_rate": 1.96026067231333e-06, "loss": 17.5625, "step": 3534 }, { "epoch": 0.23477452347745234, "grad_norm": 188.7243194580078, "learning_rate": 1.96023064850145e-06, "loss": 17.1875, "step": 3535 }, { "epoch": 0.23484093776980805, "grad_norm": 1969.989990234375, "learning_rate": 1.960200613582176e-06, "loss": 17.4297, "step": 3536 }, { "epoch": 0.2349073520621638, "grad_norm": 336.6967468261719, "learning_rate": 1.9601705675558543e-06, "loss": 20.2031, "step": 3537 }, { "epoch": 0.2349737663545195, "grad_norm": 484.6379089355469, "learning_rate": 1.9601405104228336e-06, "loss": 20.1094, "step": 3538 }, { "epoch": 0.2350401806468752, "grad_norm": 219.1975555419922, "learning_rate": 1.960110442183461e-06, "loss": 16.75, "step": 3539 }, { "epoch": 0.23510659493923092, "grad_norm": 204.6440887451172, "learning_rate": 1.9600803628380843e-06, "loss": 26.625, "step": 3540 }, { "epoch": 0.23517300923158663, "grad_norm": 142.07089233398438, "learning_rate": 1.960050272387052e-06, "loss": 17.8438, "step": 3541 }, { "epoch": 0.23523942352394236, "grad_norm": 134.87838745117188, "learning_rate": 1.960020170830711e-06, "loss": 12.4531, "step": 3542 }, { "epoch": 0.23530583781629807, "grad_norm": 294.9667663574219, "learning_rate": 1.959990058169411e-06, "loss": 18.75, "step": 3543 }, { "epoch": 0.23537225210865378, "grad_norm": 173.8145294189453, "learning_rate": 1.959959934403499e-06, "loss": 18.8281, "step": 3544 }, { "epoch": 0.2354386664010095, "grad_norm": 234.5760498046875, "learning_rate": 1.959929799533324e-06, "loss": 24.125, "step": 3545 }, { "epoch": 0.2355050806933652, "grad_norm": 224.0596160888672, "learning_rate": 1.9598996535592353e-06, "loss": 16.1406, "step": 3546 }, { "epoch": 0.23557149498572091, "grad_norm": 141.98492431640625, "learning_rate": 1.9598694964815805e-06, "loss": 17.3594, "step": 3547 }, { "epoch": 0.23563790927807665, "grad_norm": 176.05789184570312, "learning_rate": 1.9598393283007088e-06, "loss": 28.6562, "step": 3548 }, { "epoch": 0.23570432357043236, "grad_norm": 135.03379821777344, "learning_rate": 1.9598091490169694e-06, "loss": 18.4062, "step": 3549 }, { "epoch": 0.23577073786278807, "grad_norm": 426.3487854003906, "learning_rate": 1.959778958630711e-06, "loss": 28.9688, "step": 3550 }, { "epoch": 0.23583715215514378, "grad_norm": 155.75428771972656, "learning_rate": 1.959748757142283e-06, "loss": 16.5312, "step": 3551 }, { "epoch": 0.2359035664474995, "grad_norm": 1743.8995361328125, "learning_rate": 1.959718544552035e-06, "loss": 18.7031, "step": 3552 }, { "epoch": 0.23596998073985523, "grad_norm": 425.7135314941406, "learning_rate": 1.959688320860316e-06, "loss": 20.9062, "step": 3553 }, { "epoch": 0.23603639503221094, "grad_norm": 544.7344360351562, "learning_rate": 1.959658086067476e-06, "loss": 29.1875, "step": 3554 }, { "epoch": 0.23610280932456665, "grad_norm": 280.12530517578125, "learning_rate": 1.959627840173865e-06, "loss": 24.2344, "step": 3555 }, { "epoch": 0.23616922361692236, "grad_norm": 224.42556762695312, "learning_rate": 1.9595975831798324e-06, "loss": 19.3594, "step": 3556 }, { "epoch": 0.23623563790927807, "grad_norm": 201.96884155273438, "learning_rate": 1.9595673150857277e-06, "loss": 20.5938, "step": 3557 }, { "epoch": 0.2363020522016338, "grad_norm": 175.9623260498047, "learning_rate": 1.9595370358919022e-06, "loss": 15.5, "step": 3558 }, { "epoch": 0.23636846649398952, "grad_norm": 908.5472412109375, "learning_rate": 1.9595067455987056e-06, "loss": 18.6562, "step": 3559 }, { "epoch": 0.23643488078634523, "grad_norm": 140.8134765625, "learning_rate": 1.959476444206488e-06, "loss": 14.7969, "step": 3560 }, { "epoch": 0.23650129507870093, "grad_norm": 505.9639587402344, "learning_rate": 1.9594461317156005e-06, "loss": 21.9375, "step": 3561 }, { "epoch": 0.23656770937105664, "grad_norm": 282.47015380859375, "learning_rate": 1.959415808126393e-06, "loss": 18.1406, "step": 3562 }, { "epoch": 0.23663412366341235, "grad_norm": 219.86842346191406, "learning_rate": 1.9593854734392166e-06, "loss": 18.6562, "step": 3563 }, { "epoch": 0.2367005379557681, "grad_norm": 299.10125732421875, "learning_rate": 1.9593551276544225e-06, "loss": 20.3906, "step": 3564 }, { "epoch": 0.2367669522481238, "grad_norm": 310.7257080078125, "learning_rate": 1.9593247707723614e-06, "loss": 13.5781, "step": 3565 }, { "epoch": 0.2368333665404795, "grad_norm": 264.6969299316406, "learning_rate": 1.9592944027933847e-06, "loss": 21.5703, "step": 3566 }, { "epoch": 0.23689978083283522, "grad_norm": 582.1766357421875, "learning_rate": 1.959264023717843e-06, "loss": 35.75, "step": 3567 }, { "epoch": 0.23696619512519093, "grad_norm": 196.33682250976562, "learning_rate": 1.9592336335460887e-06, "loss": 20.4375, "step": 3568 }, { "epoch": 0.23703260941754667, "grad_norm": 1257.6683349609375, "learning_rate": 1.9592032322784728e-06, "loss": 17.3125, "step": 3569 }, { "epoch": 0.23709902370990238, "grad_norm": 414.0047302246094, "learning_rate": 1.9591728199153474e-06, "loss": 24.1875, "step": 3570 }, { "epoch": 0.2371654380022581, "grad_norm": 249.09861755371094, "learning_rate": 1.959142396457063e-06, "loss": 21.2812, "step": 3571 }, { "epoch": 0.2372318522946138, "grad_norm": 229.7623748779297, "learning_rate": 1.9591119619039727e-06, "loss": 23.1094, "step": 3572 }, { "epoch": 0.2372982665869695, "grad_norm": 181.740478515625, "learning_rate": 1.959081516256429e-06, "loss": 16.9844, "step": 3573 }, { "epoch": 0.23736468087932522, "grad_norm": 170.60842895507812, "learning_rate": 1.9590510595147826e-06, "loss": 15.0625, "step": 3574 }, { "epoch": 0.23743109517168096, "grad_norm": 213.76458740234375, "learning_rate": 1.959020591679387e-06, "loss": 17.6875, "step": 3575 }, { "epoch": 0.23749750946403667, "grad_norm": 335.84429931640625, "learning_rate": 1.9589901127505935e-06, "loss": 21.2188, "step": 3576 }, { "epoch": 0.23756392375639238, "grad_norm": 235.74351501464844, "learning_rate": 1.9589596227287563e-06, "loss": 24.1406, "step": 3577 }, { "epoch": 0.23763033804874809, "grad_norm": 252.10662841796875, "learning_rate": 1.9589291216142264e-06, "loss": 19.0469, "step": 3578 }, { "epoch": 0.2376967523411038, "grad_norm": 112.77571868896484, "learning_rate": 1.958898609407358e-06, "loss": 17.0469, "step": 3579 }, { "epoch": 0.23776316663345953, "grad_norm": 136.9547882080078, "learning_rate": 1.958868086108503e-06, "loss": 20.3438, "step": 3580 }, { "epoch": 0.23782958092581524, "grad_norm": 185.27798461914062, "learning_rate": 1.958837551718015e-06, "loss": 19.5, "step": 3581 }, { "epoch": 0.23789599521817095, "grad_norm": 798.1828002929688, "learning_rate": 1.9588070062362472e-06, "loss": 24.8594, "step": 3582 }, { "epoch": 0.23796240951052666, "grad_norm": 198.21530151367188, "learning_rate": 1.9587764496635527e-06, "loss": 22.3438, "step": 3583 }, { "epoch": 0.23802882380288237, "grad_norm": 150.28831481933594, "learning_rate": 1.9587458820002852e-06, "loss": 18.1719, "step": 3584 }, { "epoch": 0.23809523809523808, "grad_norm": 332.0624084472656, "learning_rate": 1.958715303246798e-06, "loss": 20.6562, "step": 3585 }, { "epoch": 0.23816165238759382, "grad_norm": 179.98526000976562, "learning_rate": 1.958684713403445e-06, "loss": 18.7969, "step": 3586 }, { "epoch": 0.23822806667994953, "grad_norm": 300.76788330078125, "learning_rate": 1.95865411247058e-06, "loss": 27.2344, "step": 3587 }, { "epoch": 0.23829448097230524, "grad_norm": 182.5250701904297, "learning_rate": 1.958623500448557e-06, "loss": 22.0312, "step": 3588 }, { "epoch": 0.23836089526466095, "grad_norm": 277.4201965332031, "learning_rate": 1.9585928773377307e-06, "loss": 21.0312, "step": 3589 }, { "epoch": 0.23842730955701666, "grad_norm": 167.0001678466797, "learning_rate": 1.9585622431384544e-06, "loss": 14.4531, "step": 3590 }, { "epoch": 0.2384937238493724, "grad_norm": 220.2885284423828, "learning_rate": 1.9585315978510827e-06, "loss": 19.7812, "step": 3591 }, { "epoch": 0.2385601381417281, "grad_norm": 441.00567626953125, "learning_rate": 1.95850094147597e-06, "loss": 21.7812, "step": 3592 }, { "epoch": 0.23862655243408382, "grad_norm": 809.8749389648438, "learning_rate": 1.9584702740134716e-06, "loss": 25.5625, "step": 3593 }, { "epoch": 0.23869296672643953, "grad_norm": 116.03091430664062, "learning_rate": 1.9584395954639415e-06, "loss": 16.4062, "step": 3594 }, { "epoch": 0.23875938101879524, "grad_norm": 769.3057250976562, "learning_rate": 1.9584089058277348e-06, "loss": 16.6094, "step": 3595 }, { "epoch": 0.23882579531115097, "grad_norm": 155.0812225341797, "learning_rate": 1.9583782051052064e-06, "loss": 16.8906, "step": 3596 }, { "epoch": 0.23889220960350668, "grad_norm": 199.63592529296875, "learning_rate": 1.9583474932967116e-06, "loss": 22.75, "step": 3597 }, { "epoch": 0.2389586238958624, "grad_norm": 318.4427795410156, "learning_rate": 1.958316770402606e-06, "loss": 18.2344, "step": 3598 }, { "epoch": 0.2390250381882181, "grad_norm": 292.3639831542969, "learning_rate": 1.9582860364232443e-06, "loss": 15.2188, "step": 3599 }, { "epoch": 0.2390914524805738, "grad_norm": 265.3907775878906, "learning_rate": 1.9582552913589824e-06, "loss": 17.5938, "step": 3600 }, { "epoch": 0.23915786677292952, "grad_norm": 277.4264221191406, "learning_rate": 1.9582245352101755e-06, "loss": 20.1562, "step": 3601 }, { "epoch": 0.23922428106528526, "grad_norm": 225.5330352783203, "learning_rate": 1.95819376797718e-06, "loss": 20.125, "step": 3602 }, { "epoch": 0.23929069535764097, "grad_norm": 157.2951202392578, "learning_rate": 1.9581629896603516e-06, "loss": 17.2656, "step": 3603 }, { "epoch": 0.23935710964999668, "grad_norm": 169.12013244628906, "learning_rate": 1.958132200260046e-06, "loss": 16.2656, "step": 3604 }, { "epoch": 0.2394235239423524, "grad_norm": 402.8316650390625, "learning_rate": 1.95810139977662e-06, "loss": 24.0, "step": 3605 }, { "epoch": 0.2394899382347081, "grad_norm": 237.32203674316406, "learning_rate": 1.9580705882104292e-06, "loss": 25.9688, "step": 3606 }, { "epoch": 0.23955635252706384, "grad_norm": 139.5574493408203, "learning_rate": 1.9580397655618305e-06, "loss": 17.7656, "step": 3607 }, { "epoch": 0.23962276681941955, "grad_norm": 430.9708251953125, "learning_rate": 1.95800893183118e-06, "loss": 22.5312, "step": 3608 }, { "epoch": 0.23968918111177526, "grad_norm": 823.7010498046875, "learning_rate": 1.9579780870188346e-06, "loss": 20.0, "step": 3609 }, { "epoch": 0.23975559540413097, "grad_norm": 253.47332763671875, "learning_rate": 1.957947231125151e-06, "loss": 13.2812, "step": 3610 }, { "epoch": 0.23982200969648668, "grad_norm": 335.40765380859375, "learning_rate": 1.9579163641504864e-06, "loss": 23.7188, "step": 3611 }, { "epoch": 0.2398884239888424, "grad_norm": 317.98712158203125, "learning_rate": 1.957885486095198e-06, "loss": 33.4062, "step": 3612 }, { "epoch": 0.23995483828119812, "grad_norm": 193.20631408691406, "learning_rate": 1.957854596959642e-06, "loss": 17.9531, "step": 3613 }, { "epoch": 0.24002125257355383, "grad_norm": 149.22760009765625, "learning_rate": 1.957823696744177e-06, "loss": 14.25, "step": 3614 }, { "epoch": 0.24008766686590954, "grad_norm": 307.0130310058594, "learning_rate": 1.9577927854491592e-06, "loss": 19.1406, "step": 3615 }, { "epoch": 0.24015408115826525, "grad_norm": 160.80459594726562, "learning_rate": 1.957761863074947e-06, "loss": 24.2812, "step": 3616 }, { "epoch": 0.24022049545062096, "grad_norm": 235.83700561523438, "learning_rate": 1.9577309296218977e-06, "loss": 23.0, "step": 3617 }, { "epoch": 0.2402869097429767, "grad_norm": 287.5814514160156, "learning_rate": 1.9576999850903697e-06, "loss": 24.3281, "step": 3618 }, { "epoch": 0.2403533240353324, "grad_norm": 307.6409606933594, "learning_rate": 1.95766902948072e-06, "loss": 14.0312, "step": 3619 }, { "epoch": 0.24041973832768812, "grad_norm": 191.9541015625, "learning_rate": 1.9576380627933077e-06, "loss": 21.875, "step": 3620 }, { "epoch": 0.24048615262004383, "grad_norm": 201.9121551513672, "learning_rate": 1.95760708502849e-06, "loss": 26.4219, "step": 3621 }, { "epoch": 0.24055256691239954, "grad_norm": 189.3588409423828, "learning_rate": 1.9575760961866257e-06, "loss": 20.8438, "step": 3622 }, { "epoch": 0.24061898120475525, "grad_norm": 203.60052490234375, "learning_rate": 1.9575450962680736e-06, "loss": 16.7969, "step": 3623 }, { "epoch": 0.240685395497111, "grad_norm": 893.42919921875, "learning_rate": 1.957514085273192e-06, "loss": 18.2031, "step": 3624 }, { "epoch": 0.2407518097894667, "grad_norm": 357.4256591796875, "learning_rate": 1.957483063202339e-06, "loss": 28.9688, "step": 3625 }, { "epoch": 0.2408182240818224, "grad_norm": 110.61547088623047, "learning_rate": 1.957452030055875e-06, "loss": 15.5625, "step": 3626 }, { "epoch": 0.24088463837417812, "grad_norm": 434.45269775390625, "learning_rate": 1.9574209858341575e-06, "loss": 20.7031, "step": 3627 }, { "epoch": 0.24095105266653383, "grad_norm": 542.1715087890625, "learning_rate": 1.957389930537546e-06, "loss": 14.0156, "step": 3628 }, { "epoch": 0.24101746695888956, "grad_norm": 198.32403564453125, "learning_rate": 1.9573588641664004e-06, "loss": 21.0781, "step": 3629 }, { "epoch": 0.24108388125124527, "grad_norm": 149.71432495117188, "learning_rate": 1.957327786721079e-06, "loss": 17.9531, "step": 3630 }, { "epoch": 0.24115029554360098, "grad_norm": 214.47047424316406, "learning_rate": 1.9572966982019415e-06, "loss": 17.1875, "step": 3631 }, { "epoch": 0.2412167098359567, "grad_norm": 158.68759155273438, "learning_rate": 1.957265598609348e-06, "loss": 16.0156, "step": 3632 }, { "epoch": 0.2412831241283124, "grad_norm": 159.87171936035156, "learning_rate": 1.9572344879436586e-06, "loss": 26.5781, "step": 3633 }, { "epoch": 0.24134953842066814, "grad_norm": 153.0856475830078, "learning_rate": 1.9572033662052325e-06, "loss": 17.0625, "step": 3634 }, { "epoch": 0.24141595271302385, "grad_norm": 188.9032745361328, "learning_rate": 1.9571722333944294e-06, "loss": 14.375, "step": 3635 }, { "epoch": 0.24148236700537956, "grad_norm": 246.317626953125, "learning_rate": 1.95714108951161e-06, "loss": 25.4375, "step": 3636 }, { "epoch": 0.24154878129773527, "grad_norm": 220.61929321289062, "learning_rate": 1.9571099345571343e-06, "loss": 20.1562, "step": 3637 }, { "epoch": 0.24161519559009098, "grad_norm": 162.0437774658203, "learning_rate": 1.957078768531363e-06, "loss": 19.1406, "step": 3638 }, { "epoch": 0.2416816098824467, "grad_norm": 218.09323120117188, "learning_rate": 1.9570475914346562e-06, "loss": 19.1562, "step": 3639 }, { "epoch": 0.24174802417480243, "grad_norm": 180.45286560058594, "learning_rate": 1.957016403267375e-06, "loss": 16.6406, "step": 3640 }, { "epoch": 0.24181443846715814, "grad_norm": 488.7325439453125, "learning_rate": 1.9569852040298797e-06, "loss": 17.4062, "step": 3641 }, { "epoch": 0.24188085275951385, "grad_norm": 167.5967254638672, "learning_rate": 1.9569539937225313e-06, "loss": 19.0469, "step": 3642 }, { "epoch": 0.24194726705186956, "grad_norm": 248.4351348876953, "learning_rate": 1.956922772345691e-06, "loss": 14.6406, "step": 3643 }, { "epoch": 0.24201368134422527, "grad_norm": 194.24424743652344, "learning_rate": 1.95689153989972e-06, "loss": 25.3125, "step": 3644 }, { "epoch": 0.242080095636581, "grad_norm": 138.9958953857422, "learning_rate": 1.9568602963849793e-06, "loss": 19.5781, "step": 3645 }, { "epoch": 0.24214650992893672, "grad_norm": 167.0458526611328, "learning_rate": 1.9568290418018304e-06, "loss": 24.0625, "step": 3646 }, { "epoch": 0.24221292422129242, "grad_norm": 238.09776306152344, "learning_rate": 1.956797776150635e-06, "loss": 22.0, "step": 3647 }, { "epoch": 0.24227933851364813, "grad_norm": 196.53933715820312, "learning_rate": 1.9567664994317545e-06, "loss": 23.2812, "step": 3648 }, { "epoch": 0.24234575280600384, "grad_norm": 192.6697235107422, "learning_rate": 1.9567352116455507e-06, "loss": 15.6719, "step": 3649 }, { "epoch": 0.24241216709835955, "grad_norm": 103.49105834960938, "learning_rate": 1.9567039127923862e-06, "loss": 16.2188, "step": 3650 }, { "epoch": 0.2424785813907153, "grad_norm": 207.0819091796875, "learning_rate": 1.956672602872622e-06, "loss": 16.9844, "step": 3651 }, { "epoch": 0.242544995683071, "grad_norm": 168.22698974609375, "learning_rate": 1.956641281886621e-06, "loss": 17.8281, "step": 3652 }, { "epoch": 0.2426114099754267, "grad_norm": 326.9307861328125, "learning_rate": 1.956609949834745e-06, "loss": 21.9844, "step": 3653 }, { "epoch": 0.24267782426778242, "grad_norm": 384.97344970703125, "learning_rate": 1.956578606717357e-06, "loss": 26.125, "step": 3654 }, { "epoch": 0.24274423856013813, "grad_norm": 285.8785400390625, "learning_rate": 1.9565472525348193e-06, "loss": 19.0156, "step": 3655 }, { "epoch": 0.24281065285249387, "grad_norm": 249.46478271484375, "learning_rate": 1.9565158872874944e-06, "loss": 16.4688, "step": 3656 }, { "epoch": 0.24287706714484958, "grad_norm": 239.85926818847656, "learning_rate": 1.9564845109757457e-06, "loss": 19.8125, "step": 3657 }, { "epoch": 0.2429434814372053, "grad_norm": 318.095458984375, "learning_rate": 1.9564531235999354e-06, "loss": 18.75, "step": 3658 }, { "epoch": 0.243009895729561, "grad_norm": 259.7831115722656, "learning_rate": 1.9564217251604267e-06, "loss": 32.8906, "step": 3659 }, { "epoch": 0.2430763100219167, "grad_norm": 519.3285522460938, "learning_rate": 1.9563903156575836e-06, "loss": 27.8281, "step": 3660 }, { "epoch": 0.24314272431427242, "grad_norm": 132.43292236328125, "learning_rate": 1.956358895091768e-06, "loss": 12.1406, "step": 3661 }, { "epoch": 0.24320913860662816, "grad_norm": 120.83287811279297, "learning_rate": 1.9563274634633444e-06, "loss": 17.9688, "step": 3662 }, { "epoch": 0.24327555289898387, "grad_norm": 212.9396514892578, "learning_rate": 1.9562960207726763e-06, "loss": 21.625, "step": 3663 }, { "epoch": 0.24334196719133958, "grad_norm": 208.98916625976562, "learning_rate": 1.9562645670201273e-06, "loss": 24.125, "step": 3664 }, { "epoch": 0.24340838148369529, "grad_norm": 211.18006896972656, "learning_rate": 1.9562331022060615e-06, "loss": 24.3125, "step": 3665 }, { "epoch": 0.243474795776051, "grad_norm": 167.28297424316406, "learning_rate": 1.956201626330842e-06, "loss": 17.375, "step": 3666 }, { "epoch": 0.24354121006840673, "grad_norm": 243.48143005371094, "learning_rate": 1.9561701393948336e-06, "loss": 17.8281, "step": 3667 }, { "epoch": 0.24360762436076244, "grad_norm": 156.84181213378906, "learning_rate": 1.9561386413984008e-06, "loss": 16.2344, "step": 3668 }, { "epoch": 0.24367403865311815, "grad_norm": 242.71644592285156, "learning_rate": 1.956107132341907e-06, "loss": 24.4062, "step": 3669 }, { "epoch": 0.24374045294547386, "grad_norm": 201.7278289794922, "learning_rate": 1.956075612225717e-06, "loss": 21.2031, "step": 3670 }, { "epoch": 0.24380686723782957, "grad_norm": 233.16314697265625, "learning_rate": 1.9560440810501966e-06, "loss": 23.125, "step": 3671 }, { "epoch": 0.2438732815301853, "grad_norm": 112.40972900390625, "learning_rate": 1.9560125388157088e-06, "loss": 13.0469, "step": 3672 }, { "epoch": 0.24393969582254102, "grad_norm": 218.37112426757812, "learning_rate": 1.9559809855226197e-06, "loss": 22.1562, "step": 3673 }, { "epoch": 0.24400611011489673, "grad_norm": 272.432861328125, "learning_rate": 1.9559494211712936e-06, "loss": 22.9219, "step": 3674 }, { "epoch": 0.24407252440725244, "grad_norm": 151.04954528808594, "learning_rate": 1.955917845762096e-06, "loss": 17.6875, "step": 3675 }, { "epoch": 0.24413893869960815, "grad_norm": 287.2369689941406, "learning_rate": 1.9558862592953913e-06, "loss": 23.0938, "step": 3676 }, { "epoch": 0.24420535299196386, "grad_norm": 173.56820678710938, "learning_rate": 1.9558546617715457e-06, "loss": 21.9688, "step": 3677 }, { "epoch": 0.2442717672843196, "grad_norm": 153.4658660888672, "learning_rate": 1.955823053190925e-06, "loss": 13.9766, "step": 3678 }, { "epoch": 0.2443381815766753, "grad_norm": 275.2692565917969, "learning_rate": 1.9557914335538945e-06, "loss": 21.2188, "step": 3679 }, { "epoch": 0.24440459586903102, "grad_norm": 219.2294158935547, "learning_rate": 1.9557598028608197e-06, "loss": 15.4062, "step": 3680 }, { "epoch": 0.24447101016138673, "grad_norm": 158.65113830566406, "learning_rate": 1.9557281611120664e-06, "loss": 19.0156, "step": 3681 }, { "epoch": 0.24453742445374244, "grad_norm": 190.5726776123047, "learning_rate": 1.955696508308001e-06, "loss": 23.5625, "step": 3682 }, { "epoch": 0.24460383874609817, "grad_norm": 172.319091796875, "learning_rate": 1.955664844448989e-06, "loss": 17.2031, "step": 3683 }, { "epoch": 0.24467025303845388, "grad_norm": 285.05560302734375, "learning_rate": 1.955633169535398e-06, "loss": 17.8438, "step": 3684 }, { "epoch": 0.2447366673308096, "grad_norm": 226.1590576171875, "learning_rate": 1.9556014835675925e-06, "loss": 16.6094, "step": 3685 }, { "epoch": 0.2448030816231653, "grad_norm": 467.8464050292969, "learning_rate": 1.955569786545941e-06, "loss": 24.5625, "step": 3686 }, { "epoch": 0.244869495915521, "grad_norm": 242.51315307617188, "learning_rate": 1.955538078470809e-06, "loss": 22.5312, "step": 3687 }, { "epoch": 0.24493591020787672, "grad_norm": 163.1824493408203, "learning_rate": 1.9555063593425635e-06, "loss": 27.8125, "step": 3688 }, { "epoch": 0.24500232450023246, "grad_norm": 240.364990234375, "learning_rate": 1.955474629161571e-06, "loss": 19.0938, "step": 3689 }, { "epoch": 0.24506873879258817, "grad_norm": 221.27359008789062, "learning_rate": 1.9554428879281997e-06, "loss": 25.4375, "step": 3690 }, { "epoch": 0.24513515308494388, "grad_norm": 156.1059112548828, "learning_rate": 1.955411135642815e-06, "loss": 15.4922, "step": 3691 }, { "epoch": 0.2452015673772996, "grad_norm": 138.56814575195312, "learning_rate": 1.9553793723057858e-06, "loss": 21.9688, "step": 3692 }, { "epoch": 0.2452679816696553, "grad_norm": 224.77516174316406, "learning_rate": 1.9553475979174793e-06, "loss": 28.3438, "step": 3693 }, { "epoch": 0.24533439596201104, "grad_norm": 209.3610076904297, "learning_rate": 1.9553158124782622e-06, "loss": 22.1562, "step": 3694 }, { "epoch": 0.24540081025436675, "grad_norm": 174.31863403320312, "learning_rate": 1.9552840159885026e-06, "loss": 22.6875, "step": 3695 }, { "epoch": 0.24546722454672246, "grad_norm": 122.50872039794922, "learning_rate": 1.9552522084485683e-06, "loss": 14.0, "step": 3696 }, { "epoch": 0.24553363883907817, "grad_norm": 240.30516052246094, "learning_rate": 1.9552203898588276e-06, "loss": 22.7031, "step": 3697 }, { "epoch": 0.24560005313143388, "grad_norm": 427.8479919433594, "learning_rate": 1.955188560219648e-06, "loss": 17.9688, "step": 3698 }, { "epoch": 0.2456664674237896, "grad_norm": 265.16583251953125, "learning_rate": 1.955156719531398e-06, "loss": 19.9844, "step": 3699 }, { "epoch": 0.24573288171614532, "grad_norm": 149.77357482910156, "learning_rate": 1.955124867794446e-06, "loss": 19.6406, "step": 3700 }, { "epoch": 0.24579929600850103, "grad_norm": 187.88278198242188, "learning_rate": 1.95509300500916e-06, "loss": 19.9688, "step": 3701 }, { "epoch": 0.24586571030085674, "grad_norm": 160.04566955566406, "learning_rate": 1.9550611311759087e-06, "loss": 16.625, "step": 3702 }, { "epoch": 0.24593212459321245, "grad_norm": 171.93927001953125, "learning_rate": 1.9550292462950617e-06, "loss": 18.1406, "step": 3703 }, { "epoch": 0.24599853888556816, "grad_norm": 231.1276397705078, "learning_rate": 1.9549973503669866e-06, "loss": 20.5625, "step": 3704 }, { "epoch": 0.2460649531779239, "grad_norm": 356.80865478515625, "learning_rate": 1.9549654433920526e-06, "loss": 23.3594, "step": 3705 }, { "epoch": 0.2461313674702796, "grad_norm": 248.3657989501953, "learning_rate": 1.9549335253706295e-06, "loss": 17.8281, "step": 3706 }, { "epoch": 0.24619778176263532, "grad_norm": 345.14007568359375, "learning_rate": 1.9549015963030857e-06, "loss": 25.1562, "step": 3707 }, { "epoch": 0.24626419605499103, "grad_norm": 170.58302307128906, "learning_rate": 1.954869656189791e-06, "loss": 24.1875, "step": 3708 }, { "epoch": 0.24633061034734674, "grad_norm": 259.1036071777344, "learning_rate": 1.9548377050311147e-06, "loss": 16.7812, "step": 3709 }, { "epoch": 0.24639702463970248, "grad_norm": 149.85186767578125, "learning_rate": 1.9548057428274264e-06, "loss": 17.25, "step": 3710 }, { "epoch": 0.2464634389320582, "grad_norm": 205.63499450683594, "learning_rate": 1.9547737695790956e-06, "loss": 25.0625, "step": 3711 }, { "epoch": 0.2465298532244139, "grad_norm": 308.3675537109375, "learning_rate": 1.9547417852864928e-06, "loss": 21.5938, "step": 3712 }, { "epoch": 0.2465962675167696, "grad_norm": 867.7876586914062, "learning_rate": 1.9547097899499876e-06, "loss": 17.2656, "step": 3713 }, { "epoch": 0.24666268180912532, "grad_norm": 219.2398223876953, "learning_rate": 1.95467778356995e-06, "loss": 18.0, "step": 3714 }, { "epoch": 0.24672909610148103, "grad_norm": 244.69056701660156, "learning_rate": 1.9546457661467502e-06, "loss": 15.2656, "step": 3715 }, { "epoch": 0.24679551039383676, "grad_norm": 186.77410888671875, "learning_rate": 1.9546137376807583e-06, "loss": 18.5781, "step": 3716 }, { "epoch": 0.24686192468619247, "grad_norm": 269.7684020996094, "learning_rate": 1.9545816981723456e-06, "loss": 22.875, "step": 3717 }, { "epoch": 0.24692833897854818, "grad_norm": 141.9998016357422, "learning_rate": 1.954549647621882e-06, "loss": 18.625, "step": 3718 }, { "epoch": 0.2469947532709039, "grad_norm": 445.9599914550781, "learning_rate": 1.954517586029739e-06, "loss": 29.4375, "step": 3719 }, { "epoch": 0.2470611675632596, "grad_norm": 256.8009033203125, "learning_rate": 1.9544855133962864e-06, "loss": 17.2812, "step": 3720 }, { "epoch": 0.24712758185561534, "grad_norm": 210.46923828125, "learning_rate": 1.954453429721896e-06, "loss": 27.2344, "step": 3721 }, { "epoch": 0.24719399614797105, "grad_norm": 247.38858032226562, "learning_rate": 1.9544213350069383e-06, "loss": 23.0938, "step": 3722 }, { "epoch": 0.24726041044032676, "grad_norm": 252.93563842773438, "learning_rate": 1.9543892292517857e-06, "loss": 18.25, "step": 3723 }, { "epoch": 0.24732682473268247, "grad_norm": 209.86770629882812, "learning_rate": 1.9543571124568084e-06, "loss": 21.7812, "step": 3724 }, { "epoch": 0.24739323902503818, "grad_norm": 209.67538452148438, "learning_rate": 1.9543249846223783e-06, "loss": 17.6875, "step": 3725 }, { "epoch": 0.2474596533173939, "grad_norm": 238.3377685546875, "learning_rate": 1.954292845748867e-06, "loss": 22.7812, "step": 3726 }, { "epoch": 0.24752606760974963, "grad_norm": 231.5930633544922, "learning_rate": 1.954260695836647e-06, "loss": 19.625, "step": 3727 }, { "epoch": 0.24759248190210534, "grad_norm": 269.1463928222656, "learning_rate": 1.9542285348860886e-06, "loss": 19.2031, "step": 3728 }, { "epoch": 0.24765889619446105, "grad_norm": 264.61553955078125, "learning_rate": 1.9541963628975652e-06, "loss": 19.1562, "step": 3729 }, { "epoch": 0.24772531048681676, "grad_norm": 177.30081176757812, "learning_rate": 1.9541641798714487e-06, "loss": 19.1562, "step": 3730 }, { "epoch": 0.24779172477917247, "grad_norm": 143.99609375, "learning_rate": 1.954131985808111e-06, "loss": 16.1875, "step": 3731 }, { "epoch": 0.2478581390715282, "grad_norm": 214.6641387939453, "learning_rate": 1.9540997807079244e-06, "loss": 22.2812, "step": 3732 }, { "epoch": 0.24792455336388391, "grad_norm": 245.0627899169922, "learning_rate": 1.954067564571262e-06, "loss": 21.4844, "step": 3733 }, { "epoch": 0.24799096765623962, "grad_norm": 442.5930480957031, "learning_rate": 1.954035337398496e-06, "loss": 25.9062, "step": 3734 }, { "epoch": 0.24805738194859533, "grad_norm": 398.0080261230469, "learning_rate": 1.9540030991899994e-06, "loss": 22.6094, "step": 3735 }, { "epoch": 0.24812379624095104, "grad_norm": 246.29318237304688, "learning_rate": 1.953970849946145e-06, "loss": 20.2656, "step": 3736 }, { "epoch": 0.24819021053330675, "grad_norm": 239.54881286621094, "learning_rate": 1.953938589667306e-06, "loss": 17.8438, "step": 3737 }, { "epoch": 0.2482566248256625, "grad_norm": 155.98497009277344, "learning_rate": 1.9539063183538555e-06, "loss": 19.1875, "step": 3738 }, { "epoch": 0.2483230391180182, "grad_norm": 169.6338348388672, "learning_rate": 1.9538740360061667e-06, "loss": 17.1875, "step": 3739 }, { "epoch": 0.2483894534103739, "grad_norm": 232.76055908203125, "learning_rate": 1.953841742624613e-06, "loss": 20.1875, "step": 3740 }, { "epoch": 0.24845586770272962, "grad_norm": 257.8534240722656, "learning_rate": 1.953809438209568e-06, "loss": 18.6562, "step": 3741 }, { "epoch": 0.24852228199508533, "grad_norm": 215.19839477539062, "learning_rate": 1.9537771227614056e-06, "loss": 17.9844, "step": 3742 }, { "epoch": 0.24858869628744107, "grad_norm": 179.47337341308594, "learning_rate": 1.9537447962804995e-06, "loss": 16.75, "step": 3743 }, { "epoch": 0.24865511057979678, "grad_norm": 154.42124938964844, "learning_rate": 1.9537124587672235e-06, "loss": 19.8281, "step": 3744 }, { "epoch": 0.2487215248721525, "grad_norm": 378.26263427734375, "learning_rate": 1.9536801102219515e-06, "loss": 25.2969, "step": 3745 }, { "epoch": 0.2487879391645082, "grad_norm": 235.12033081054688, "learning_rate": 1.953647750645058e-06, "loss": 14.625, "step": 3746 }, { "epoch": 0.2488543534568639, "grad_norm": 226.72067260742188, "learning_rate": 1.9536153800369176e-06, "loss": 18.1562, "step": 3747 }, { "epoch": 0.24892076774921965, "grad_norm": 389.8255310058594, "learning_rate": 1.9535829983979037e-06, "loss": 23.6719, "step": 3748 }, { "epoch": 0.24898718204157536, "grad_norm": 489.01629638671875, "learning_rate": 1.953550605728392e-06, "loss": 13.8438, "step": 3749 }, { "epoch": 0.24905359633393107, "grad_norm": 158.12588500976562, "learning_rate": 1.9535182020287565e-06, "loss": 18.8281, "step": 3750 }, { "epoch": 0.24912001062628678, "grad_norm": 276.8321228027344, "learning_rate": 1.9534857872993726e-06, "loss": 20.1875, "step": 3751 }, { "epoch": 0.24918642491864249, "grad_norm": 258.58770751953125, "learning_rate": 1.9534533615406147e-06, "loss": 14.8594, "step": 3752 }, { "epoch": 0.2492528392109982, "grad_norm": 277.2768249511719, "learning_rate": 1.953420924752858e-06, "loss": 21.1562, "step": 3753 }, { "epoch": 0.24931925350335393, "grad_norm": 162.92874145507812, "learning_rate": 1.9533884769364783e-06, "loss": 21.9062, "step": 3754 }, { "epoch": 0.24938566779570964, "grad_norm": 312.2860107421875, "learning_rate": 1.95335601809185e-06, "loss": 24.0625, "step": 3755 }, { "epoch": 0.24945208208806535, "grad_norm": 479.31536865234375, "learning_rate": 1.9533235482193492e-06, "loss": 24.4375, "step": 3756 }, { "epoch": 0.24951849638042106, "grad_norm": 428.53826904296875, "learning_rate": 1.9532910673193512e-06, "loss": 15.9062, "step": 3757 }, { "epoch": 0.24958491067277677, "grad_norm": 318.46624755859375, "learning_rate": 1.953258575392232e-06, "loss": 19.5312, "step": 3758 }, { "epoch": 0.2496513249651325, "grad_norm": 461.7298278808594, "learning_rate": 1.953226072438367e-06, "loss": 16.875, "step": 3759 }, { "epoch": 0.24971773925748822, "grad_norm": 195.3397216796875, "learning_rate": 1.9531935584581325e-06, "loss": 20.1953, "step": 3760 }, { "epoch": 0.24978415354984393, "grad_norm": 193.60382080078125, "learning_rate": 1.9531610334519046e-06, "loss": 15.2656, "step": 3761 }, { "epoch": 0.24985056784219964, "grad_norm": 2887.05126953125, "learning_rate": 1.9531284974200596e-06, "loss": 17.1562, "step": 3762 }, { "epoch": 0.24991698213455535, "grad_norm": 615.0501708984375, "learning_rate": 1.953095950362974e-06, "loss": 21.1406, "step": 3763 }, { "epoch": 0.24998339642691106, "grad_norm": 185.5062255859375, "learning_rate": 1.9530633922810233e-06, "loss": 17.875, "step": 3764 }, { "epoch": 0.2500498107192668, "grad_norm": 125.12425994873047, "learning_rate": 1.9530308231745852e-06, "loss": 13.3594, "step": 3765 }, { "epoch": 0.2501162250116225, "grad_norm": 173.27099609375, "learning_rate": 1.9529982430440362e-06, "loss": 16.8125, "step": 3766 }, { "epoch": 0.2501826393039782, "grad_norm": 229.51710510253906, "learning_rate": 1.952965651889753e-06, "loss": 18.1094, "step": 3767 }, { "epoch": 0.2502490535963339, "grad_norm": 459.843505859375, "learning_rate": 1.9529330497121128e-06, "loss": 17.8906, "step": 3768 }, { "epoch": 0.25031546788868964, "grad_norm": 390.5806884765625, "learning_rate": 1.9529004365114926e-06, "loss": 18.25, "step": 3769 }, { "epoch": 0.25038188218104535, "grad_norm": 114.09492492675781, "learning_rate": 1.9528678122882696e-06, "loss": 16.6875, "step": 3770 }, { "epoch": 0.25044829647340106, "grad_norm": 171.6638946533203, "learning_rate": 1.9528351770428207e-06, "loss": 17.9688, "step": 3771 }, { "epoch": 0.25051471076575677, "grad_norm": 193.51480102539062, "learning_rate": 1.9528025307755246e-06, "loss": 16.6875, "step": 3772 }, { "epoch": 0.25058112505811253, "grad_norm": 217.85658264160156, "learning_rate": 1.9527698734867577e-06, "loss": 21.75, "step": 3773 }, { "epoch": 0.25064753935046824, "grad_norm": 255.77732849121094, "learning_rate": 1.9527372051768983e-06, "loss": 15.8438, "step": 3774 }, { "epoch": 0.25071395364282395, "grad_norm": 214.61318969726562, "learning_rate": 1.952704525846325e-06, "loss": 23.5781, "step": 3775 }, { "epoch": 0.25078036793517966, "grad_norm": 296.7042541503906, "learning_rate": 1.9526718354954143e-06, "loss": 23.5, "step": 3776 }, { "epoch": 0.25084678222753537, "grad_norm": 580.1870727539062, "learning_rate": 1.9526391341245456e-06, "loss": 19.9375, "step": 3777 }, { "epoch": 0.2509131965198911, "grad_norm": 293.8099670410156, "learning_rate": 1.9526064217340964e-06, "loss": 21.25, "step": 3778 }, { "epoch": 0.2509796108122468, "grad_norm": 132.32803344726562, "learning_rate": 1.9525736983244456e-06, "loss": 15.1719, "step": 3779 }, { "epoch": 0.2510460251046025, "grad_norm": 127.28826904296875, "learning_rate": 1.952540963895972e-06, "loss": 19.9062, "step": 3780 }, { "epoch": 0.2511124393969582, "grad_norm": 206.70472717285156, "learning_rate": 1.9525082184490527e-06, "loss": 26.4688, "step": 3781 }, { "epoch": 0.2511788536893139, "grad_norm": 221.2439422607422, "learning_rate": 1.9524754619840682e-06, "loss": 25.0469, "step": 3782 }, { "epoch": 0.25124526798166963, "grad_norm": 936.6287231445312, "learning_rate": 1.952442694501397e-06, "loss": 22.7656, "step": 3783 }, { "epoch": 0.2513116822740254, "grad_norm": 349.09393310546875, "learning_rate": 1.9524099160014176e-06, "loss": 20.125, "step": 3784 }, { "epoch": 0.2513780965663811, "grad_norm": 413.9417419433594, "learning_rate": 1.9523771264845096e-06, "loss": 15.1328, "step": 3785 }, { "epoch": 0.2514445108587368, "grad_norm": 292.343017578125, "learning_rate": 1.9523443259510525e-06, "loss": 24.7031, "step": 3786 }, { "epoch": 0.2515109251510925, "grad_norm": 176.34158325195312, "learning_rate": 1.952311514401425e-06, "loss": 15.375, "step": 3787 }, { "epoch": 0.25157733944344823, "grad_norm": 299.0372619628906, "learning_rate": 1.952278691836007e-06, "loss": 24.3438, "step": 3788 }, { "epoch": 0.25164375373580394, "grad_norm": 156.59735107421875, "learning_rate": 1.9522458582551782e-06, "loss": 16.3906, "step": 3789 }, { "epoch": 0.25171016802815965, "grad_norm": 307.3919982910156, "learning_rate": 1.952213013659319e-06, "loss": 18.7812, "step": 3790 }, { "epoch": 0.25177658232051536, "grad_norm": 145.2198028564453, "learning_rate": 1.952180158048808e-06, "loss": 20.4688, "step": 3791 }, { "epoch": 0.2518429966128711, "grad_norm": 221.7451171875, "learning_rate": 1.9521472914240264e-06, "loss": 15.9219, "step": 3792 }, { "epoch": 0.2519094109052268, "grad_norm": 156.2723388671875, "learning_rate": 1.9521144137853537e-06, "loss": 17.6406, "step": 3793 }, { "epoch": 0.2519758251975825, "grad_norm": 117.4408950805664, "learning_rate": 1.9520815251331702e-06, "loss": 17.4531, "step": 3794 }, { "epoch": 0.25204223948993826, "grad_norm": 319.7724914550781, "learning_rate": 1.9520486254678576e-06, "loss": 20.9844, "step": 3795 }, { "epoch": 0.25210865378229397, "grad_norm": 231.2183380126953, "learning_rate": 1.952015714789795e-06, "loss": 23.1406, "step": 3796 }, { "epoch": 0.2521750680746497, "grad_norm": 458.6272888183594, "learning_rate": 1.951982793099363e-06, "loss": 13.9062, "step": 3797 }, { "epoch": 0.2522414823670054, "grad_norm": 253.17430114746094, "learning_rate": 1.9519498603969437e-06, "loss": 24.7812, "step": 3798 }, { "epoch": 0.2523078966593611, "grad_norm": 217.2003631591797, "learning_rate": 1.951916916682917e-06, "loss": 17.0781, "step": 3799 }, { "epoch": 0.2523743109517168, "grad_norm": 177.97314453125, "learning_rate": 1.9518839619576644e-06, "loss": 18.8594, "step": 3800 }, { "epoch": 0.2524407252440725, "grad_norm": 229.2642822265625, "learning_rate": 1.9518509962215673e-06, "loss": 14.8125, "step": 3801 }, { "epoch": 0.2525071395364282, "grad_norm": 145.6130828857422, "learning_rate": 1.9518180194750063e-06, "loss": 20.1562, "step": 3802 }, { "epoch": 0.25257355382878394, "grad_norm": 170.27288818359375, "learning_rate": 1.9517850317183637e-06, "loss": 19.1406, "step": 3803 }, { "epoch": 0.25263996812113965, "grad_norm": 369.8441467285156, "learning_rate": 1.9517520329520203e-06, "loss": 29.125, "step": 3804 }, { "epoch": 0.2527063824134954, "grad_norm": 238.33245849609375, "learning_rate": 1.951719023176358e-06, "loss": 16.2656, "step": 3805 }, { "epoch": 0.2527727967058511, "grad_norm": 256.4136962890625, "learning_rate": 1.9516860023917597e-06, "loss": 20.1719, "step": 3806 }, { "epoch": 0.25283921099820683, "grad_norm": 162.7617950439453, "learning_rate": 1.951652970598606e-06, "loss": 17.2031, "step": 3807 }, { "epoch": 0.25290562529056254, "grad_norm": 360.6598205566406, "learning_rate": 1.9516199277972796e-06, "loss": 32.8438, "step": 3808 }, { "epoch": 0.25297203958291825, "grad_norm": 176.60964965820312, "learning_rate": 1.951586873988162e-06, "loss": 19.3281, "step": 3809 }, { "epoch": 0.25303845387527396, "grad_norm": 155.77804565429688, "learning_rate": 1.951553809171637e-06, "loss": 16.5625, "step": 3810 }, { "epoch": 0.25310486816762967, "grad_norm": 240.3187713623047, "learning_rate": 1.9515207333480857e-06, "loss": 22.375, "step": 3811 }, { "epoch": 0.2531712824599854, "grad_norm": 810.7150268554688, "learning_rate": 1.9514876465178913e-06, "loss": 17.0312, "step": 3812 }, { "epoch": 0.2532376967523411, "grad_norm": 313.760986328125, "learning_rate": 1.9514545486814365e-06, "loss": 23.0312, "step": 3813 }, { "epoch": 0.2533041110446968, "grad_norm": 249.69822692871094, "learning_rate": 1.9514214398391043e-06, "loss": 20.9688, "step": 3814 }, { "epoch": 0.2533705253370525, "grad_norm": 222.42886352539062, "learning_rate": 1.9513883199912773e-06, "loss": 17.8438, "step": 3815 }, { "epoch": 0.2534369396294083, "grad_norm": 170.13772583007812, "learning_rate": 1.951355189138339e-06, "loss": 15.8281, "step": 3816 }, { "epoch": 0.253503353921764, "grad_norm": 175.42807006835938, "learning_rate": 1.9513220472806727e-06, "loss": 16.9219, "step": 3817 }, { "epoch": 0.2535697682141197, "grad_norm": 332.02593994140625, "learning_rate": 1.9512888944186613e-06, "loss": 20.9844, "step": 3818 }, { "epoch": 0.2536361825064754, "grad_norm": 199.69607543945312, "learning_rate": 1.951255730552688e-06, "loss": 21.9219, "step": 3819 }, { "epoch": 0.2537025967988311, "grad_norm": 214.29031372070312, "learning_rate": 1.951222555683138e-06, "loss": 14.8281, "step": 3820 }, { "epoch": 0.2537690110911868, "grad_norm": 316.25958251953125, "learning_rate": 1.9511893698103928e-06, "loss": 17.0781, "step": 3821 }, { "epoch": 0.25383542538354253, "grad_norm": 642.8858032226562, "learning_rate": 1.951156172934838e-06, "loss": 24.4531, "step": 3822 }, { "epoch": 0.25390183967589824, "grad_norm": 317.91290283203125, "learning_rate": 1.951122965056857e-06, "loss": 16.2969, "step": 3823 }, { "epoch": 0.25396825396825395, "grad_norm": 4759.24951171875, "learning_rate": 1.951089746176834e-06, "loss": 15.8594, "step": 3824 }, { "epoch": 0.25403466826060966, "grad_norm": 747.5789184570312, "learning_rate": 1.9510565162951534e-06, "loss": 23.125, "step": 3825 }, { "epoch": 0.2541010825529654, "grad_norm": 246.80079650878906, "learning_rate": 1.9510232754121996e-06, "loss": 31.3125, "step": 3826 }, { "epoch": 0.25416749684532114, "grad_norm": 121.49227905273438, "learning_rate": 1.9509900235283567e-06, "loss": 20.5469, "step": 3827 }, { "epoch": 0.25423391113767685, "grad_norm": 288.7403259277344, "learning_rate": 1.950956760644009e-06, "loss": 21.0312, "step": 3828 }, { "epoch": 0.25430032543003256, "grad_norm": 181.62530517578125, "learning_rate": 1.9509234867595426e-06, "loss": 19.0781, "step": 3829 }, { "epoch": 0.25436673972238827, "grad_norm": 263.7976379394531, "learning_rate": 1.9508902018753414e-06, "loss": 22.0469, "step": 3830 }, { "epoch": 0.254433154014744, "grad_norm": 268.5212707519531, "learning_rate": 1.950856905991791e-06, "loss": 26.125, "step": 3831 }, { "epoch": 0.2544995683070997, "grad_norm": 528.0631713867188, "learning_rate": 1.9508235991092755e-06, "loss": 19.625, "step": 3832 }, { "epoch": 0.2545659825994554, "grad_norm": 202.07150268554688, "learning_rate": 1.9507902812281816e-06, "loss": 17.625, "step": 3833 }, { "epoch": 0.2546323968918111, "grad_norm": 360.78021240234375, "learning_rate": 1.9507569523488934e-06, "loss": 21.7812, "step": 3834 }, { "epoch": 0.2546988111841668, "grad_norm": 525.5670166015625, "learning_rate": 1.9507236124717973e-06, "loss": 24.2656, "step": 3835 }, { "epoch": 0.2547652254765225, "grad_norm": 181.6602325439453, "learning_rate": 1.950690261597279e-06, "loss": 21.75, "step": 3836 }, { "epoch": 0.25483163976887824, "grad_norm": 441.9461364746094, "learning_rate": 1.9506568997257233e-06, "loss": 35.3281, "step": 3837 }, { "epoch": 0.254898054061234, "grad_norm": 150.09100341796875, "learning_rate": 1.950623526857517e-06, "loss": 17.9062, "step": 3838 }, { "epoch": 0.2549644683535897, "grad_norm": 184.45367431640625, "learning_rate": 1.9505901429930462e-06, "loss": 17.6719, "step": 3839 }, { "epoch": 0.2550308826459454, "grad_norm": 346.8046875, "learning_rate": 1.9505567481326964e-06, "loss": 23.6562, "step": 3840 }, { "epoch": 0.25509729693830113, "grad_norm": 152.1611785888672, "learning_rate": 1.9505233422768544e-06, "loss": 19.9844, "step": 3841 }, { "epoch": 0.25516371123065684, "grad_norm": 198.03346252441406, "learning_rate": 1.9504899254259068e-06, "loss": 15.0625, "step": 3842 }, { "epoch": 0.25523012552301255, "grad_norm": 414.6817932128906, "learning_rate": 1.950456497580239e-06, "loss": 24.0, "step": 3843 }, { "epoch": 0.25529653981536826, "grad_norm": 194.94430541992188, "learning_rate": 1.950423058740239e-06, "loss": 17.375, "step": 3844 }, { "epoch": 0.25536295410772397, "grad_norm": 297.0433044433594, "learning_rate": 1.950389608906293e-06, "loss": 18.9688, "step": 3845 }, { "epoch": 0.2554293684000797, "grad_norm": 116.33306121826172, "learning_rate": 1.9503561480787886e-06, "loss": 13.6875, "step": 3846 }, { "epoch": 0.2554957826924354, "grad_norm": 1283.2318115234375, "learning_rate": 1.9503226762581116e-06, "loss": 18.3906, "step": 3847 }, { "epoch": 0.2555621969847911, "grad_norm": 300.55291748046875, "learning_rate": 1.9502891934446502e-06, "loss": 19.7969, "step": 3848 }, { "epoch": 0.25562861127714687, "grad_norm": 269.93310546875, "learning_rate": 1.9502556996387913e-06, "loss": 27.5625, "step": 3849 }, { "epoch": 0.2556950255695026, "grad_norm": 184.49156188964844, "learning_rate": 1.9502221948409223e-06, "loss": 16.1406, "step": 3850 }, { "epoch": 0.2557614398618583, "grad_norm": 214.4910888671875, "learning_rate": 1.9501886790514307e-06, "loss": 20.0, "step": 3851 }, { "epoch": 0.255827854154214, "grad_norm": 232.2704315185547, "learning_rate": 1.950155152270705e-06, "loss": 27.875, "step": 3852 }, { "epoch": 0.2558942684465697, "grad_norm": 121.34141540527344, "learning_rate": 1.9501216144991316e-06, "loss": 15.2656, "step": 3853 }, { "epoch": 0.2559606827389254, "grad_norm": 333.7542419433594, "learning_rate": 1.9500880657371e-06, "loss": 25.125, "step": 3854 }, { "epoch": 0.2560270970312811, "grad_norm": 199.99813842773438, "learning_rate": 1.950054505984997e-06, "loss": 18.7188, "step": 3855 }, { "epoch": 0.25609351132363684, "grad_norm": 183.48651123046875, "learning_rate": 1.9500209352432117e-06, "loss": 21.2812, "step": 3856 }, { "epoch": 0.25615992561599255, "grad_norm": 216.1350860595703, "learning_rate": 1.9499873535121315e-06, "loss": 19.9219, "step": 3857 }, { "epoch": 0.25622633990834826, "grad_norm": 295.01177978515625, "learning_rate": 1.949953760792146e-06, "loss": 18.1094, "step": 3858 }, { "epoch": 0.25629275420070396, "grad_norm": 148.6490936279297, "learning_rate": 1.9499201570836427e-06, "loss": 17.3906, "step": 3859 }, { "epoch": 0.25635916849305973, "grad_norm": 964.5999755859375, "learning_rate": 1.949886542387011e-06, "loss": 18.2812, "step": 3860 }, { "epoch": 0.25642558278541544, "grad_norm": 234.24534606933594, "learning_rate": 1.9498529167026393e-06, "loss": 17.9844, "step": 3861 }, { "epoch": 0.25649199707777115, "grad_norm": 191.63516235351562, "learning_rate": 1.9498192800309167e-06, "loss": 20.625, "step": 3862 }, { "epoch": 0.25655841137012686, "grad_norm": 412.8174133300781, "learning_rate": 1.9497856323722328e-06, "loss": 18.3906, "step": 3863 }, { "epoch": 0.25662482566248257, "grad_norm": 149.34512329101562, "learning_rate": 1.9497519737269763e-06, "loss": 20.1719, "step": 3864 }, { "epoch": 0.2566912399548383, "grad_norm": 157.50860595703125, "learning_rate": 1.9497183040955365e-06, "loss": 21.9219, "step": 3865 }, { "epoch": 0.256757654247194, "grad_norm": 348.3340759277344, "learning_rate": 1.949684623478303e-06, "loss": 30.0312, "step": 3866 }, { "epoch": 0.2568240685395497, "grad_norm": 168.51072692871094, "learning_rate": 1.9496509318756653e-06, "loss": 15.0234, "step": 3867 }, { "epoch": 0.2568904828319054, "grad_norm": 271.2900085449219, "learning_rate": 1.9496172292880133e-06, "loss": 20.9375, "step": 3868 }, { "epoch": 0.2569568971242611, "grad_norm": 1043.2890625, "learning_rate": 1.949583515715737e-06, "loss": 18.9688, "step": 3869 }, { "epoch": 0.25702331141661683, "grad_norm": 162.91049194335938, "learning_rate": 1.9495497911592257e-06, "loss": 16.3594, "step": 3870 }, { "epoch": 0.2570897257089726, "grad_norm": 139.2553253173828, "learning_rate": 1.9495160556188703e-06, "loss": 17.5469, "step": 3871 }, { "epoch": 0.2571561400013283, "grad_norm": 287.4471435546875, "learning_rate": 1.9494823090950606e-06, "loss": 20.5312, "step": 3872 }, { "epoch": 0.257222554293684, "grad_norm": 224.99163818359375, "learning_rate": 1.949448551588187e-06, "loss": 18.4688, "step": 3873 }, { "epoch": 0.2572889685860397, "grad_norm": 282.8897399902344, "learning_rate": 1.9494147830986402e-06, "loss": 20.8438, "step": 3874 }, { "epoch": 0.25735538287839543, "grad_norm": 678.70654296875, "learning_rate": 1.9493810036268105e-06, "loss": 23.4375, "step": 3875 }, { "epoch": 0.25742179717075114, "grad_norm": 167.55101013183594, "learning_rate": 1.949347213173089e-06, "loss": 20.6094, "step": 3876 }, { "epoch": 0.25748821146310685, "grad_norm": 164.85977172851562, "learning_rate": 1.949313411737866e-06, "loss": 22.1094, "step": 3877 }, { "epoch": 0.25755462575546256, "grad_norm": 785.6422729492188, "learning_rate": 1.9492795993215328e-06, "loss": 16.2891, "step": 3878 }, { "epoch": 0.2576210400478183, "grad_norm": 305.9497375488281, "learning_rate": 1.949245775924481e-06, "loss": 18.2656, "step": 3879 }, { "epoch": 0.257687454340174, "grad_norm": 343.03619384765625, "learning_rate": 1.949211941547101e-06, "loss": 23.3125, "step": 3880 }, { "epoch": 0.25775386863252975, "grad_norm": 130.88699340820312, "learning_rate": 1.9491780961897854e-06, "loss": 16.4531, "step": 3881 }, { "epoch": 0.25782028292488546, "grad_norm": 2792.52294921875, "learning_rate": 1.9491442398529242e-06, "loss": 24.8125, "step": 3882 }, { "epoch": 0.25788669721724117, "grad_norm": 187.81033325195312, "learning_rate": 1.9491103725369097e-06, "loss": 22.875, "step": 3883 }, { "epoch": 0.2579531115095969, "grad_norm": 288.4322509765625, "learning_rate": 1.949076494242134e-06, "loss": 22.6562, "step": 3884 }, { "epoch": 0.2580195258019526, "grad_norm": 153.70614624023438, "learning_rate": 1.9490426049689886e-06, "loss": 17.9062, "step": 3885 }, { "epoch": 0.2580859400943083, "grad_norm": 482.2108154296875, "learning_rate": 1.9490087047178656e-06, "loss": 18.0625, "step": 3886 }, { "epoch": 0.258152354386664, "grad_norm": 252.2724151611328, "learning_rate": 1.948974793489157e-06, "loss": 17.5938, "step": 3887 }, { "epoch": 0.2582187686790197, "grad_norm": 347.33489990234375, "learning_rate": 1.9489408712832557e-06, "loss": 23.625, "step": 3888 }, { "epoch": 0.2582851829713754, "grad_norm": 586.1905517578125, "learning_rate": 1.948906938100553e-06, "loss": 34.375, "step": 3889 }, { "epoch": 0.25835159726373114, "grad_norm": 339.8514404296875, "learning_rate": 1.9488729939414422e-06, "loss": 18.4062, "step": 3890 }, { "epoch": 0.25841801155608685, "grad_norm": 390.07269287109375, "learning_rate": 1.948839038806316e-06, "loss": 30.5938, "step": 3891 }, { "epoch": 0.2584844258484426, "grad_norm": 365.5821228027344, "learning_rate": 1.9488050726955666e-06, "loss": 19.6094, "step": 3892 }, { "epoch": 0.2585508401407983, "grad_norm": 197.46847534179688, "learning_rate": 1.948771095609587e-06, "loss": 21.0625, "step": 3893 }, { "epoch": 0.25861725443315403, "grad_norm": 1717.883056640625, "learning_rate": 1.948737107548771e-06, "loss": 25.5781, "step": 3894 }, { "epoch": 0.25868366872550974, "grad_norm": 219.24905395507812, "learning_rate": 1.948703108513511e-06, "loss": 17.1094, "step": 3895 }, { "epoch": 0.25875008301786545, "grad_norm": 150.80271911621094, "learning_rate": 1.9486690985042006e-06, "loss": 16.3594, "step": 3896 }, { "epoch": 0.25881649731022116, "grad_norm": 215.88418579101562, "learning_rate": 1.948635077521233e-06, "loss": 19.2969, "step": 3897 }, { "epoch": 0.25888291160257687, "grad_norm": 399.9358825683594, "learning_rate": 1.948601045565002e-06, "loss": 20.2188, "step": 3898 }, { "epoch": 0.2589493258949326, "grad_norm": 136.05633544921875, "learning_rate": 1.948567002635901e-06, "loss": 17.5312, "step": 3899 }, { "epoch": 0.2590157401872883, "grad_norm": 208.09486389160156, "learning_rate": 1.9485329487343235e-06, "loss": 15.6406, "step": 3900 }, { "epoch": 0.259082154479644, "grad_norm": 155.74363708496094, "learning_rate": 1.9484988838606646e-06, "loss": 21.6719, "step": 3901 }, { "epoch": 0.2591485687719997, "grad_norm": 110.74466705322266, "learning_rate": 1.9484648080153168e-06, "loss": 17.0469, "step": 3902 }, { "epoch": 0.2592149830643555, "grad_norm": 918.4083251953125, "learning_rate": 1.9484307211986756e-06, "loss": 20.8906, "step": 3903 }, { "epoch": 0.2592813973567112, "grad_norm": 350.7803039550781, "learning_rate": 1.948396623411134e-06, "loss": 19.8594, "step": 3904 }, { "epoch": 0.2593478116490669, "grad_norm": 127.33989715576172, "learning_rate": 1.948362514653088e-06, "loss": 19.125, "step": 3905 }, { "epoch": 0.2594142259414226, "grad_norm": 184.64364624023438, "learning_rate": 1.9483283949249307e-06, "loss": 23.3281, "step": 3906 }, { "epoch": 0.2594806402337783, "grad_norm": 243.1719512939453, "learning_rate": 1.9482942642270573e-06, "loss": 16.8906, "step": 3907 }, { "epoch": 0.259547054526134, "grad_norm": 182.77081298828125, "learning_rate": 1.948260122559863e-06, "loss": 13.1406, "step": 3908 }, { "epoch": 0.25961346881848973, "grad_norm": 304.0369873046875, "learning_rate": 1.9482259699237423e-06, "loss": 21.6562, "step": 3909 }, { "epoch": 0.25967988311084544, "grad_norm": 314.88250732421875, "learning_rate": 1.9481918063190904e-06, "loss": 20.5312, "step": 3910 }, { "epoch": 0.25974629740320115, "grad_norm": 254.0806427001953, "learning_rate": 1.9481576317463025e-06, "loss": 21.375, "step": 3911 }, { "epoch": 0.25981271169555686, "grad_norm": 212.75717163085938, "learning_rate": 1.9481234462057733e-06, "loss": 17.2812, "step": 3912 }, { "epoch": 0.2598791259879126, "grad_norm": 350.6031494140625, "learning_rate": 1.9480892496978996e-06, "loss": 26.4688, "step": 3913 }, { "epoch": 0.25994554028026834, "grad_norm": 564.1569213867188, "learning_rate": 1.9480550422230757e-06, "loss": 24.4375, "step": 3914 }, { "epoch": 0.26001195457262405, "grad_norm": 208.9837646484375, "learning_rate": 1.948020823781698e-06, "loss": 15.7969, "step": 3915 }, { "epoch": 0.26007836886497976, "grad_norm": 147.92037963867188, "learning_rate": 1.9479865943741617e-06, "loss": 16.4375, "step": 3916 }, { "epoch": 0.26014478315733547, "grad_norm": 330.4004211425781, "learning_rate": 1.9479523540008635e-06, "loss": 23.5312, "step": 3917 }, { "epoch": 0.2602111974496912, "grad_norm": 456.1384582519531, "learning_rate": 1.9479181026621987e-06, "loss": 16.6094, "step": 3918 }, { "epoch": 0.2602776117420469, "grad_norm": 164.29150390625, "learning_rate": 1.947883840358564e-06, "loss": 16.8906, "step": 3919 }, { "epoch": 0.2603440260344026, "grad_norm": 210.80975341796875, "learning_rate": 1.947849567090356e-06, "loss": 16.1719, "step": 3920 }, { "epoch": 0.2604104403267583, "grad_norm": 351.14605712890625, "learning_rate": 1.9478152828579706e-06, "loss": 31.5312, "step": 3921 }, { "epoch": 0.260476854619114, "grad_norm": 183.3167266845703, "learning_rate": 1.947780987661804e-06, "loss": 22.3281, "step": 3922 }, { "epoch": 0.2605432689114697, "grad_norm": 197.65516662597656, "learning_rate": 1.947746681502254e-06, "loss": 23.2969, "step": 3923 }, { "epoch": 0.26060968320382544, "grad_norm": 286.9833068847656, "learning_rate": 1.947712364379717e-06, "loss": 24.8438, "step": 3924 }, { "epoch": 0.2606760974961812, "grad_norm": 208.48353576660156, "learning_rate": 1.9476780362945895e-06, "loss": 15.7812, "step": 3925 }, { "epoch": 0.2607425117885369, "grad_norm": 159.7201385498047, "learning_rate": 1.947643697247269e-06, "loss": 14.6562, "step": 3926 }, { "epoch": 0.2608089260808926, "grad_norm": 337.67108154296875, "learning_rate": 1.947609347238153e-06, "loss": 25.0469, "step": 3927 }, { "epoch": 0.26087534037324833, "grad_norm": 237.33290100097656, "learning_rate": 1.947574986267638e-06, "loss": 15.75, "step": 3928 }, { "epoch": 0.26094175466560404, "grad_norm": 564.3217163085938, "learning_rate": 1.9475406143361223e-06, "loss": 17.3594, "step": 3929 }, { "epoch": 0.26100816895795975, "grad_norm": 210.37171936035156, "learning_rate": 1.947506231444003e-06, "loss": 20.3438, "step": 3930 }, { "epoch": 0.26107458325031546, "grad_norm": 288.4942932128906, "learning_rate": 1.947471837591678e-06, "loss": 16.2031, "step": 3931 }, { "epoch": 0.26114099754267117, "grad_norm": 186.0314483642578, "learning_rate": 1.947437432779545e-06, "loss": 18.3125, "step": 3932 }, { "epoch": 0.2612074118350269, "grad_norm": 120.51468658447266, "learning_rate": 1.9474030170080024e-06, "loss": 18.3125, "step": 3933 }, { "epoch": 0.2612738261273826, "grad_norm": 223.15185546875, "learning_rate": 1.947368590277448e-06, "loss": 22.1406, "step": 3934 }, { "epoch": 0.2613402404197383, "grad_norm": 259.7105712890625, "learning_rate": 1.94733415258828e-06, "loss": 18.5781, "step": 3935 }, { "epoch": 0.26140665471209407, "grad_norm": 123.66973114013672, "learning_rate": 1.9472997039408968e-06, "loss": 18.4844, "step": 3936 }, { "epoch": 0.2614730690044498, "grad_norm": 186.63101196289062, "learning_rate": 1.9472652443356966e-06, "loss": 17.75, "step": 3937 }, { "epoch": 0.2615394832968055, "grad_norm": 199.25411987304688, "learning_rate": 1.9472307737730784e-06, "loss": 20.4219, "step": 3938 }, { "epoch": 0.2616058975891612, "grad_norm": 209.4795379638672, "learning_rate": 1.947196292253441e-06, "loss": 17.5, "step": 3939 }, { "epoch": 0.2616723118815169, "grad_norm": 202.72299194335938, "learning_rate": 1.9471617997771826e-06, "loss": 25.5312, "step": 3940 }, { "epoch": 0.2617387261738726, "grad_norm": 201.65512084960938, "learning_rate": 1.947127296344703e-06, "loss": 27.9688, "step": 3941 }, { "epoch": 0.2618051404662283, "grad_norm": 286.6324462890625, "learning_rate": 1.947092781956401e-06, "loss": 18.1094, "step": 3942 }, { "epoch": 0.26187155475858404, "grad_norm": 236.93621826171875, "learning_rate": 1.947058256612676e-06, "loss": 22.4531, "step": 3943 }, { "epoch": 0.26193796905093975, "grad_norm": 233.28616333007812, "learning_rate": 1.9470237203139267e-06, "loss": 21.375, "step": 3944 }, { "epoch": 0.26200438334329545, "grad_norm": 276.00140380859375, "learning_rate": 1.9469891730605533e-06, "loss": 15.4844, "step": 3945 }, { "epoch": 0.26207079763565116, "grad_norm": 207.84519958496094, "learning_rate": 1.9469546148529555e-06, "loss": 16.7969, "step": 3946 }, { "epoch": 0.26213721192800693, "grad_norm": 193.69818115234375, "learning_rate": 1.9469200456915327e-06, "loss": 20.3594, "step": 3947 }, { "epoch": 0.26220362622036264, "grad_norm": 234.20408630371094, "learning_rate": 1.9468854655766842e-06, "loss": 19.9375, "step": 3948 }, { "epoch": 0.26227004051271835, "grad_norm": 168.86740112304688, "learning_rate": 1.9468508745088117e-06, "loss": 18.6562, "step": 3949 }, { "epoch": 0.26233645480507406, "grad_norm": 121.33155822753906, "learning_rate": 1.946816272488313e-06, "loss": 16.9062, "step": 3950 }, { "epoch": 0.26240286909742977, "grad_norm": 224.7110137939453, "learning_rate": 1.9467816595155904e-06, "loss": 22.1719, "step": 3951 }, { "epoch": 0.2624692833897855, "grad_norm": 283.0148010253906, "learning_rate": 1.9467470355910435e-06, "loss": 17.5312, "step": 3952 }, { "epoch": 0.2625356976821412, "grad_norm": 273.3149108886719, "learning_rate": 1.946712400715073e-06, "loss": 17.9375, "step": 3953 }, { "epoch": 0.2626021119744969, "grad_norm": 194.5653839111328, "learning_rate": 1.9466777548880792e-06, "loss": 15.5938, "step": 3954 }, { "epoch": 0.2626685262668526, "grad_norm": 205.11427307128906, "learning_rate": 1.946643098110463e-06, "loss": 20.6875, "step": 3955 }, { "epoch": 0.2627349405592083, "grad_norm": 145.8584747314453, "learning_rate": 1.9466084303826247e-06, "loss": 20.6875, "step": 3956 }, { "epoch": 0.2628013548515641, "grad_norm": 518.2384643554688, "learning_rate": 1.9465737517049664e-06, "loss": 20.3438, "step": 3957 }, { "epoch": 0.2628677691439198, "grad_norm": 254.6880340576172, "learning_rate": 1.946539062077889e-06, "loss": 14.4062, "step": 3958 }, { "epoch": 0.2629341834362755, "grad_norm": 118.42717742919922, "learning_rate": 1.9465043615017933e-06, "loss": 14.5, "step": 3959 }, { "epoch": 0.2630005977286312, "grad_norm": 133.897216796875, "learning_rate": 1.946469649977081e-06, "loss": 18.4219, "step": 3960 }, { "epoch": 0.2630670120209869, "grad_norm": 154.17933654785156, "learning_rate": 1.9464349275041535e-06, "loss": 14.9219, "step": 3961 }, { "epoch": 0.26313342631334263, "grad_norm": 200.4166717529297, "learning_rate": 1.9464001940834126e-06, "loss": 22.8594, "step": 3962 }, { "epoch": 0.26319984060569834, "grad_norm": 296.0130310058594, "learning_rate": 1.94636544971526e-06, "loss": 17.2188, "step": 3963 }, { "epoch": 0.26326625489805405, "grad_norm": 124.3624038696289, "learning_rate": 1.9463306944000974e-06, "loss": 16.1719, "step": 3964 }, { "epoch": 0.26333266919040976, "grad_norm": 123.91768646240234, "learning_rate": 1.946295928138327e-06, "loss": 19.4062, "step": 3965 }, { "epoch": 0.2633990834827655, "grad_norm": 223.38143920898438, "learning_rate": 1.946261150930351e-06, "loss": 19.1094, "step": 3966 }, { "epoch": 0.2634654977751212, "grad_norm": 374.8646240234375, "learning_rate": 1.946226362776572e-06, "loss": 21.3906, "step": 3967 }, { "epoch": 0.26353191206747695, "grad_norm": 168.2753448486328, "learning_rate": 1.9461915636773914e-06, "loss": 20.3125, "step": 3968 }, { "epoch": 0.26359832635983266, "grad_norm": 290.8452453613281, "learning_rate": 1.9461567536332133e-06, "loss": 18.7969, "step": 3969 }, { "epoch": 0.26366474065218837, "grad_norm": 253.98768615722656, "learning_rate": 1.9461219326444386e-06, "loss": 19.9375, "step": 3970 }, { "epoch": 0.2637311549445441, "grad_norm": 181.5511932373047, "learning_rate": 1.9460871007114714e-06, "loss": 17.1562, "step": 3971 }, { "epoch": 0.2637975692368998, "grad_norm": 264.03887939453125, "learning_rate": 1.946052257834714e-06, "loss": 20.0, "step": 3972 }, { "epoch": 0.2638639835292555, "grad_norm": 262.34466552734375, "learning_rate": 1.9460174040145695e-06, "loss": 19.4062, "step": 3973 }, { "epoch": 0.2639303978216112, "grad_norm": 331.7105407714844, "learning_rate": 1.9459825392514417e-06, "loss": 23.0469, "step": 3974 }, { "epoch": 0.2639968121139669, "grad_norm": 362.3244934082031, "learning_rate": 1.945947663545733e-06, "loss": 21.5938, "step": 3975 }, { "epoch": 0.2640632264063226, "grad_norm": 136.14410400390625, "learning_rate": 1.9459127768978474e-06, "loss": 14.9531, "step": 3976 }, { "epoch": 0.26412964069867834, "grad_norm": 225.90597534179688, "learning_rate": 1.9458778793081876e-06, "loss": 19.0781, "step": 3977 }, { "epoch": 0.26419605499103405, "grad_norm": 140.7743682861328, "learning_rate": 1.9458429707771585e-06, "loss": 19.7188, "step": 3978 }, { "epoch": 0.2642624692833898, "grad_norm": 135.61033630371094, "learning_rate": 1.9458080513051633e-06, "loss": 21.9375, "step": 3979 }, { "epoch": 0.2643288835757455, "grad_norm": 265.8990173339844, "learning_rate": 1.9457731208926054e-06, "loss": 23.875, "step": 3980 }, { "epoch": 0.26439529786810123, "grad_norm": 290.0419006347656, "learning_rate": 1.9457381795398903e-06, "loss": 21.875, "step": 3981 }, { "epoch": 0.26446171216045694, "grad_norm": 362.03338623046875, "learning_rate": 1.9457032272474207e-06, "loss": 19.5469, "step": 3982 }, { "epoch": 0.26452812645281265, "grad_norm": 243.1973876953125, "learning_rate": 1.9456682640156014e-06, "loss": 15.6094, "step": 3983 }, { "epoch": 0.26459454074516836, "grad_norm": 138.6696319580078, "learning_rate": 1.9456332898448374e-06, "loss": 18.7188, "step": 3984 }, { "epoch": 0.26466095503752407, "grad_norm": 290.470458984375, "learning_rate": 1.9455983047355323e-06, "loss": 22.8438, "step": 3985 }, { "epoch": 0.2647273693298798, "grad_norm": 361.9315490722656, "learning_rate": 1.9455633086880914e-06, "loss": 27.4375, "step": 3986 }, { "epoch": 0.2647937836222355, "grad_norm": 255.06480407714844, "learning_rate": 1.9455283017029195e-06, "loss": 26.0312, "step": 3987 }, { "epoch": 0.2648601979145912, "grad_norm": 202.261962890625, "learning_rate": 1.9454932837804216e-06, "loss": 16.7969, "step": 3988 }, { "epoch": 0.2649266122069469, "grad_norm": 173.34698486328125, "learning_rate": 1.9454582549210025e-06, "loss": 19.0781, "step": 3989 }, { "epoch": 0.2649930264993027, "grad_norm": 190.41726684570312, "learning_rate": 1.9454232151250675e-06, "loss": 19.7969, "step": 3990 }, { "epoch": 0.2650594407916584, "grad_norm": 401.4130859375, "learning_rate": 1.9453881643930215e-06, "loss": 18.5625, "step": 3991 }, { "epoch": 0.2651258550840141, "grad_norm": 463.03765869140625, "learning_rate": 1.9453531027252707e-06, "loss": 21.1875, "step": 3992 }, { "epoch": 0.2651922693763698, "grad_norm": 153.0019073486328, "learning_rate": 1.94531803012222e-06, "loss": 13.0312, "step": 3993 }, { "epoch": 0.2652586836687255, "grad_norm": 198.67092895507812, "learning_rate": 1.945282946584276e-06, "loss": 20.4062, "step": 3994 }, { "epoch": 0.2653250979610812, "grad_norm": 411.51434326171875, "learning_rate": 1.9452478521118435e-06, "loss": 15.7031, "step": 3995 }, { "epoch": 0.26539151225343693, "grad_norm": 304.3321533203125, "learning_rate": 1.9452127467053293e-06, "loss": 16.8281, "step": 3996 }, { "epoch": 0.26545792654579264, "grad_norm": 284.98809814453125, "learning_rate": 1.945177630365139e-06, "loss": 21.6875, "step": 3997 }, { "epoch": 0.26552434083814835, "grad_norm": 275.4327087402344, "learning_rate": 1.9451425030916785e-06, "loss": 23.1562, "step": 3998 }, { "epoch": 0.26559075513050406, "grad_norm": 606.7813720703125, "learning_rate": 1.9451073648853547e-06, "loss": 19.7656, "step": 3999 }, { "epoch": 0.2656571694228598, "grad_norm": 241.91513061523438, "learning_rate": 1.945072215746574e-06, "loss": 18.0781, "step": 4000 }, { "epoch": 0.26572358371521554, "grad_norm": 267.5826110839844, "learning_rate": 1.945037055675743e-06, "loss": 19.7656, "step": 4001 }, { "epoch": 0.26578999800757125, "grad_norm": 362.6865539550781, "learning_rate": 1.945001884673268e-06, "loss": 20.7656, "step": 4002 }, { "epoch": 0.26585641229992696, "grad_norm": 195.980712890625, "learning_rate": 1.944966702739556e-06, "loss": 20.1875, "step": 4003 }, { "epoch": 0.26592282659228267, "grad_norm": 176.94932556152344, "learning_rate": 1.9449315098750147e-06, "loss": 18.3125, "step": 4004 }, { "epoch": 0.2659892408846384, "grad_norm": 643.0502319335938, "learning_rate": 1.94489630608005e-06, "loss": 34.2188, "step": 4005 }, { "epoch": 0.2660556551769941, "grad_norm": 252.14369201660156, "learning_rate": 1.94486109135507e-06, "loss": 17.9688, "step": 4006 }, { "epoch": 0.2661220694693498, "grad_norm": 224.71163940429688, "learning_rate": 1.9448258657004815e-06, "loss": 18.4531, "step": 4007 }, { "epoch": 0.2661884837617055, "grad_norm": 187.08847045898438, "learning_rate": 1.944790629116692e-06, "loss": 14.7344, "step": 4008 }, { "epoch": 0.2662548980540612, "grad_norm": 506.7032165527344, "learning_rate": 1.9447553816041096e-06, "loss": 23.4062, "step": 4009 }, { "epoch": 0.2663213123464169, "grad_norm": 201.51295471191406, "learning_rate": 1.944720123163142e-06, "loss": 17.8906, "step": 4010 }, { "epoch": 0.26638772663877264, "grad_norm": 255.6527862548828, "learning_rate": 1.9446848537941965e-06, "loss": 22.625, "step": 4011 }, { "epoch": 0.2664541409311284, "grad_norm": 196.00550842285156, "learning_rate": 1.9446495734976815e-06, "loss": 20.3594, "step": 4012 }, { "epoch": 0.2665205552234841, "grad_norm": 208.5948944091797, "learning_rate": 1.9446142822740045e-06, "loss": 24.5938, "step": 4013 }, { "epoch": 0.2665869695158398, "grad_norm": 389.49383544921875, "learning_rate": 1.9445789801235744e-06, "loss": 16.2969, "step": 4014 }, { "epoch": 0.26665338380819553, "grad_norm": 184.83383178710938, "learning_rate": 1.9445436670467997e-06, "loss": 17.2344, "step": 4015 }, { "epoch": 0.26671979810055124, "grad_norm": 663.9196166992188, "learning_rate": 1.944508343044088e-06, "loss": 25.5625, "step": 4016 }, { "epoch": 0.26678621239290695, "grad_norm": 262.79486083984375, "learning_rate": 1.944473008115849e-06, "loss": 22.0312, "step": 4017 }, { "epoch": 0.26685262668526266, "grad_norm": 182.22930908203125, "learning_rate": 1.9444376622624904e-06, "loss": 18.2812, "step": 4018 }, { "epoch": 0.26691904097761837, "grad_norm": 220.6510009765625, "learning_rate": 1.9444023054844213e-06, "loss": 26.6562, "step": 4019 }, { "epoch": 0.2669854552699741, "grad_norm": 139.86814880371094, "learning_rate": 1.9443669377820513e-06, "loss": 20.6875, "step": 4020 }, { "epoch": 0.2670518695623298, "grad_norm": 161.91128540039062, "learning_rate": 1.9443315591557892e-06, "loss": 18.7188, "step": 4021 }, { "epoch": 0.2671182838546855, "grad_norm": 316.98004150390625, "learning_rate": 1.944296169606044e-06, "loss": 25.2812, "step": 4022 }, { "epoch": 0.26718469814704127, "grad_norm": 221.43728637695312, "learning_rate": 1.944260769133225e-06, "loss": 25.25, "step": 4023 }, { "epoch": 0.267251112439397, "grad_norm": 148.5077362060547, "learning_rate": 1.9442253577377423e-06, "loss": 16.1719, "step": 4024 }, { "epoch": 0.2673175267317527, "grad_norm": 593.2849731445312, "learning_rate": 1.944189935420005e-06, "loss": 22.9219, "step": 4025 }, { "epoch": 0.2673839410241084, "grad_norm": 240.30540466308594, "learning_rate": 1.944154502180423e-06, "loss": 17.4844, "step": 4026 }, { "epoch": 0.2674503553164641, "grad_norm": 152.13441467285156, "learning_rate": 1.9441190580194064e-06, "loss": 17.6719, "step": 4027 }, { "epoch": 0.2675167696088198, "grad_norm": 514.5271606445312, "learning_rate": 1.9440836029373645e-06, "loss": 23.0, "step": 4028 }, { "epoch": 0.2675831839011755, "grad_norm": 180.55877685546875, "learning_rate": 1.944048136934708e-06, "loss": 15.25, "step": 4029 }, { "epoch": 0.26764959819353124, "grad_norm": 164.75758361816406, "learning_rate": 1.944012660011847e-06, "loss": 19.7812, "step": 4030 }, { "epoch": 0.26771601248588694, "grad_norm": 349.71923828125, "learning_rate": 1.943977172169192e-06, "loss": 23.8438, "step": 4031 }, { "epoch": 0.26778242677824265, "grad_norm": 305.13299560546875, "learning_rate": 1.943941673407153e-06, "loss": 20.2188, "step": 4032 }, { "epoch": 0.2678488410705984, "grad_norm": 343.6818542480469, "learning_rate": 1.9439061637261416e-06, "loss": 22.0, "step": 4033 }, { "epoch": 0.26791525536295413, "grad_norm": 457.65692138671875, "learning_rate": 1.9438706431265675e-06, "loss": 15.5625, "step": 4034 }, { "epoch": 0.26798166965530984, "grad_norm": 443.3158264160156, "learning_rate": 1.9438351116088425e-06, "loss": 30.25, "step": 4035 }, { "epoch": 0.26804808394766555, "grad_norm": 247.75148010253906, "learning_rate": 1.943799569173377e-06, "loss": 19.0, "step": 4036 }, { "epoch": 0.26811449824002126, "grad_norm": 178.10780334472656, "learning_rate": 1.943764015820582e-06, "loss": 25.2344, "step": 4037 }, { "epoch": 0.26818091253237697, "grad_norm": 146.71522521972656, "learning_rate": 1.9437284515508697e-06, "loss": 19.75, "step": 4038 }, { "epoch": 0.2682473268247327, "grad_norm": 215.03663635253906, "learning_rate": 1.9436928763646503e-06, "loss": 16.25, "step": 4039 }, { "epoch": 0.2683137411170884, "grad_norm": 344.42578125, "learning_rate": 1.9436572902623363e-06, "loss": 24.2969, "step": 4040 }, { "epoch": 0.2683801554094441, "grad_norm": 318.1109619140625, "learning_rate": 1.9436216932443387e-06, "loss": 20.2188, "step": 4041 }, { "epoch": 0.2684465697017998, "grad_norm": 176.84291076660156, "learning_rate": 1.9435860853110696e-06, "loss": 21.5781, "step": 4042 }, { "epoch": 0.2685129839941555, "grad_norm": 226.1915740966797, "learning_rate": 1.9435504664629408e-06, "loss": 21.0312, "step": 4043 }, { "epoch": 0.2685793982865113, "grad_norm": 184.0587921142578, "learning_rate": 1.943514836700364e-06, "loss": 21.2031, "step": 4044 }, { "epoch": 0.268645812578867, "grad_norm": 431.9822692871094, "learning_rate": 1.9434791960237517e-06, "loss": 26.0469, "step": 4045 }, { "epoch": 0.2687122268712227, "grad_norm": 212.56398010253906, "learning_rate": 1.9434435444335164e-06, "loss": 16.9219, "step": 4046 }, { "epoch": 0.2687786411635784, "grad_norm": 174.3817596435547, "learning_rate": 1.9434078819300697e-06, "loss": 19.4062, "step": 4047 }, { "epoch": 0.2688450554559341, "grad_norm": 437.592041015625, "learning_rate": 1.943372208513825e-06, "loss": 21.4688, "step": 4048 }, { "epoch": 0.26891146974828983, "grad_norm": 390.12945556640625, "learning_rate": 1.9433365241851945e-06, "loss": 24.0, "step": 4049 }, { "epoch": 0.26897788404064554, "grad_norm": 123.69310760498047, "learning_rate": 1.943300828944591e-06, "loss": 14.7812, "step": 4050 }, { "epoch": 0.26904429833300125, "grad_norm": 247.96929931640625, "learning_rate": 1.9432651227924277e-06, "loss": 31.0938, "step": 4051 }, { "epoch": 0.26911071262535696, "grad_norm": 217.61041259765625, "learning_rate": 1.9432294057291168e-06, "loss": 23.3438, "step": 4052 }, { "epoch": 0.2691771269177127, "grad_norm": 280.1221923828125, "learning_rate": 1.9431936777550724e-06, "loss": 20.6562, "step": 4053 }, { "epoch": 0.2692435412100684, "grad_norm": 162.8563690185547, "learning_rate": 1.9431579388707075e-06, "loss": 19.6406, "step": 4054 }, { "epoch": 0.26930995550242415, "grad_norm": 192.68150329589844, "learning_rate": 1.943122189076435e-06, "loss": 19.9844, "step": 4055 }, { "epoch": 0.26937636979477986, "grad_norm": 344.9088439941406, "learning_rate": 1.9430864283726695e-06, "loss": 26.4062, "step": 4056 }, { "epoch": 0.26944278408713557, "grad_norm": 107.1580581665039, "learning_rate": 1.9430506567598235e-06, "loss": 14.5625, "step": 4057 }, { "epoch": 0.2695091983794913, "grad_norm": 176.56922912597656, "learning_rate": 1.9430148742383113e-06, "loss": 22.2969, "step": 4058 }, { "epoch": 0.269575612671847, "grad_norm": 221.47056579589844, "learning_rate": 1.942979080808547e-06, "loss": 21.125, "step": 4059 }, { "epoch": 0.2696420269642027, "grad_norm": 180.85458374023438, "learning_rate": 1.9429432764709447e-06, "loss": 18.0625, "step": 4060 }, { "epoch": 0.2697084412565584, "grad_norm": 224.48916625976562, "learning_rate": 1.942907461225918e-06, "loss": 21.75, "step": 4061 }, { "epoch": 0.2697748555489141, "grad_norm": 178.44937133789062, "learning_rate": 1.9428716350738814e-06, "loss": 17.2812, "step": 4062 }, { "epoch": 0.2698412698412698, "grad_norm": 237.4675750732422, "learning_rate": 1.9428357980152495e-06, "loss": 27.7188, "step": 4063 }, { "epoch": 0.26990768413362554, "grad_norm": 154.94619750976562, "learning_rate": 1.942799950050437e-06, "loss": 18.6094, "step": 4064 }, { "epoch": 0.26997409842598125, "grad_norm": 205.70851135253906, "learning_rate": 1.9427640911798583e-06, "loss": 22.8438, "step": 4065 }, { "epoch": 0.270040512718337, "grad_norm": 125.87969970703125, "learning_rate": 1.942728221403928e-06, "loss": 17.1875, "step": 4066 }, { "epoch": 0.2701069270106927, "grad_norm": 212.370849609375, "learning_rate": 1.942692340723061e-06, "loss": 20.8125, "step": 4067 }, { "epoch": 0.27017334130304843, "grad_norm": 222.61050415039062, "learning_rate": 1.9426564491376733e-06, "loss": 17.9375, "step": 4068 }, { "epoch": 0.27023975559540414, "grad_norm": 245.7503204345703, "learning_rate": 1.9426205466481784e-06, "loss": 26.8438, "step": 4069 }, { "epoch": 0.27030616988775985, "grad_norm": 173.80592346191406, "learning_rate": 1.9425846332549932e-06, "loss": 20.7812, "step": 4070 }, { "epoch": 0.27037258418011556, "grad_norm": 313.8436279296875, "learning_rate": 1.9425487089585327e-06, "loss": 17.8906, "step": 4071 }, { "epoch": 0.27043899847247127, "grad_norm": 266.5832214355469, "learning_rate": 1.942512773759212e-06, "loss": 24.2969, "step": 4072 }, { "epoch": 0.270505412764827, "grad_norm": 405.74151611328125, "learning_rate": 1.942476827657447e-06, "loss": 25.7812, "step": 4073 }, { "epoch": 0.2705718270571827, "grad_norm": 211.98475646972656, "learning_rate": 1.9424408706536533e-06, "loss": 22.6562, "step": 4074 }, { "epoch": 0.2706382413495384, "grad_norm": 226.95733642578125, "learning_rate": 1.9424049027482473e-06, "loss": 23.4688, "step": 4075 }, { "epoch": 0.2707046556418941, "grad_norm": 121.80953216552734, "learning_rate": 1.9423689239416447e-06, "loss": 17.3438, "step": 4076 }, { "epoch": 0.2707710699342499, "grad_norm": 163.69471740722656, "learning_rate": 1.9423329342342616e-06, "loss": 18.3281, "step": 4077 }, { "epoch": 0.2708374842266056, "grad_norm": 383.5516357421875, "learning_rate": 1.942296933626515e-06, "loss": 13.0547, "step": 4078 }, { "epoch": 0.2709038985189613, "grad_norm": 2262.572021484375, "learning_rate": 1.9422609221188204e-06, "loss": 15.7812, "step": 4079 }, { "epoch": 0.270970312811317, "grad_norm": 563.567138671875, "learning_rate": 1.942224899711595e-06, "loss": 27.125, "step": 4080 }, { "epoch": 0.2710367271036727, "grad_norm": 179.4683074951172, "learning_rate": 1.942188866405255e-06, "loss": 19.0, "step": 4081 }, { "epoch": 0.2711031413960284, "grad_norm": 275.48516845703125, "learning_rate": 1.942152822200218e-06, "loss": 28.375, "step": 4082 }, { "epoch": 0.27116955568838413, "grad_norm": 176.14813232421875, "learning_rate": 1.9421167670969e-06, "loss": 23.9688, "step": 4083 }, { "epoch": 0.27123596998073984, "grad_norm": 281.3272399902344, "learning_rate": 1.9420807010957187e-06, "loss": 26.0781, "step": 4084 }, { "epoch": 0.27130238427309555, "grad_norm": 186.4254913330078, "learning_rate": 1.942044624197091e-06, "loss": 20.375, "step": 4085 }, { "epoch": 0.27136879856545126, "grad_norm": 325.2388916015625, "learning_rate": 1.942008536401434e-06, "loss": 23.3125, "step": 4086 }, { "epoch": 0.271435212857807, "grad_norm": 167.71817016601562, "learning_rate": 1.941972437709166e-06, "loss": 15.5156, "step": 4087 }, { "epoch": 0.27150162715016274, "grad_norm": 339.6752624511719, "learning_rate": 1.941936328120704e-06, "loss": 21.7344, "step": 4088 }, { "epoch": 0.27156804144251845, "grad_norm": 233.27748107910156, "learning_rate": 1.9419002076364652e-06, "loss": 20.0781, "step": 4089 }, { "epoch": 0.27163445573487416, "grad_norm": 162.30506896972656, "learning_rate": 1.9418640762568684e-06, "loss": 15.9375, "step": 4090 }, { "epoch": 0.27170087002722987, "grad_norm": 276.7576599121094, "learning_rate": 1.941827933982331e-06, "loss": 22.2344, "step": 4091 }, { "epoch": 0.2717672843195856, "grad_norm": 233.5619354248047, "learning_rate": 1.941791780813271e-06, "loss": 20.7969, "step": 4092 }, { "epoch": 0.2718336986119413, "grad_norm": 508.64520263671875, "learning_rate": 1.9417556167501067e-06, "loss": 20.1562, "step": 4093 }, { "epoch": 0.271900112904297, "grad_norm": 289.7105712890625, "learning_rate": 1.9417194417932566e-06, "loss": 16.1562, "step": 4094 }, { "epoch": 0.2719665271966527, "grad_norm": 519.2803955078125, "learning_rate": 1.941683255943139e-06, "loss": 19.8438, "step": 4095 }, { "epoch": 0.2720329414890084, "grad_norm": 166.5434112548828, "learning_rate": 1.9416470592001724e-06, "loss": 21.9688, "step": 4096 }, { "epoch": 0.2720993557813641, "grad_norm": 264.62890625, "learning_rate": 1.9416108515647756e-06, "loss": 17.3906, "step": 4097 }, { "epoch": 0.27216577007371984, "grad_norm": 208.22772216796875, "learning_rate": 1.9415746330373677e-06, "loss": 20.2656, "step": 4098 }, { "epoch": 0.2722321843660756, "grad_norm": 366.56365966796875, "learning_rate": 1.941538403618367e-06, "loss": 23.125, "step": 4099 }, { "epoch": 0.2722985986584313, "grad_norm": 176.52500915527344, "learning_rate": 1.9415021633081932e-06, "loss": 18.4844, "step": 4100 }, { "epoch": 0.272365012950787, "grad_norm": 542.1272583007812, "learning_rate": 1.9414659121072653e-06, "loss": 24.8906, "step": 4101 }, { "epoch": 0.27243142724314273, "grad_norm": 311.5672302246094, "learning_rate": 1.9414296500160025e-06, "loss": 20.5625, "step": 4102 }, { "epoch": 0.27249784153549844, "grad_norm": 259.9654541015625, "learning_rate": 1.9413933770348247e-06, "loss": 17.75, "step": 4103 }, { "epoch": 0.27256425582785415, "grad_norm": 475.9064636230469, "learning_rate": 1.941357093164151e-06, "loss": 22.125, "step": 4104 }, { "epoch": 0.27263067012020986, "grad_norm": 422.8355712890625, "learning_rate": 1.941320798404401e-06, "loss": 23.2188, "step": 4105 }, { "epoch": 0.27269708441256557, "grad_norm": 268.1438293457031, "learning_rate": 1.941284492755995e-06, "loss": 19.8281, "step": 4106 }, { "epoch": 0.2727634987049213, "grad_norm": 320.5318908691406, "learning_rate": 1.9412481762193527e-06, "loss": 30.0938, "step": 4107 }, { "epoch": 0.272829912997277, "grad_norm": 222.91134643554688, "learning_rate": 1.941211848794894e-06, "loss": 16.2656, "step": 4108 }, { "epoch": 0.27289632728963276, "grad_norm": 205.9908447265625, "learning_rate": 1.9411755104830396e-06, "loss": 22.0938, "step": 4109 }, { "epoch": 0.27296274158198847, "grad_norm": 256.75506591796875, "learning_rate": 1.94113916128421e-06, "loss": 24.0312, "step": 4110 }, { "epoch": 0.2730291558743442, "grad_norm": 467.18756103515625, "learning_rate": 1.9411028011988247e-06, "loss": 28.0625, "step": 4111 }, { "epoch": 0.2730955701666999, "grad_norm": 352.3834228515625, "learning_rate": 1.941066430227305e-06, "loss": 14.6719, "step": 4112 }, { "epoch": 0.2731619844590556, "grad_norm": 343.6704406738281, "learning_rate": 1.941030048370072e-06, "loss": 20.5469, "step": 4113 }, { "epoch": 0.2732283987514113, "grad_norm": 163.38645935058594, "learning_rate": 1.940993655627545e-06, "loss": 18.5312, "step": 4114 }, { "epoch": 0.273294813043767, "grad_norm": 242.47909545898438, "learning_rate": 1.940957252000147e-06, "loss": 26.0, "step": 4115 }, { "epoch": 0.2733612273361227, "grad_norm": 255.94393920898438, "learning_rate": 1.9409208374882976e-06, "loss": 14.4531, "step": 4116 }, { "epoch": 0.27342764162847844, "grad_norm": 206.2109832763672, "learning_rate": 1.9408844120924186e-06, "loss": 17.75, "step": 4117 }, { "epoch": 0.27349405592083414, "grad_norm": 333.0246887207031, "learning_rate": 1.9408479758129316e-06, "loss": 24.2812, "step": 4118 }, { "epoch": 0.27356047021318985, "grad_norm": 248.49026489257812, "learning_rate": 1.9408115286502575e-06, "loss": 17.3125, "step": 4119 }, { "epoch": 0.2736268845055456, "grad_norm": 233.9376983642578, "learning_rate": 1.9407750706048176e-06, "loss": 27.9062, "step": 4120 }, { "epoch": 0.27369329879790133, "grad_norm": 448.7729797363281, "learning_rate": 1.9407386016770348e-06, "loss": 21.0625, "step": 4121 }, { "epoch": 0.27375971309025704, "grad_norm": 322.43817138671875, "learning_rate": 1.9407021218673303e-06, "loss": 20.5, "step": 4122 }, { "epoch": 0.27382612738261275, "grad_norm": 239.8876953125, "learning_rate": 1.940665631176126e-06, "loss": 20.0625, "step": 4123 }, { "epoch": 0.27389254167496846, "grad_norm": 252.40518188476562, "learning_rate": 1.940629129603844e-06, "loss": 18.2656, "step": 4124 }, { "epoch": 0.27395895596732417, "grad_norm": 149.04434204101562, "learning_rate": 1.9405926171509064e-06, "loss": 17.125, "step": 4125 }, { "epoch": 0.2740253702596799, "grad_norm": 379.7170715332031, "learning_rate": 1.940556093817736e-06, "loss": 23.7812, "step": 4126 }, { "epoch": 0.2740917845520356, "grad_norm": 1074.2978515625, "learning_rate": 1.940519559604755e-06, "loss": 27.0156, "step": 4127 }, { "epoch": 0.2741581988443913, "grad_norm": 137.01222229003906, "learning_rate": 1.9404830145123862e-06, "loss": 15.0, "step": 4128 }, { "epoch": 0.274224613136747, "grad_norm": 311.8098449707031, "learning_rate": 1.9404464585410523e-06, "loss": 23.7188, "step": 4129 }, { "epoch": 0.2742910274291027, "grad_norm": 303.5673522949219, "learning_rate": 1.940409891691176e-06, "loss": 23.625, "step": 4130 }, { "epoch": 0.2743574417214585, "grad_norm": 510.1946105957031, "learning_rate": 1.9403733139631804e-06, "loss": 22.8438, "step": 4131 }, { "epoch": 0.2744238560138142, "grad_norm": 228.13751220703125, "learning_rate": 1.940336725357488e-06, "loss": 26.0938, "step": 4132 }, { "epoch": 0.2744902703061699, "grad_norm": 168.50741577148438, "learning_rate": 1.940300125874523e-06, "loss": 19.3906, "step": 4133 }, { "epoch": 0.2745566845985256, "grad_norm": 653.2823486328125, "learning_rate": 1.9402635155147087e-06, "loss": 14.2812, "step": 4134 }, { "epoch": 0.2746230988908813, "grad_norm": 127.75788116455078, "learning_rate": 1.940226894278468e-06, "loss": 17.9062, "step": 4135 }, { "epoch": 0.27468951318323703, "grad_norm": 230.1283721923828, "learning_rate": 1.9401902621662244e-06, "loss": 20.2031, "step": 4136 }, { "epoch": 0.27475592747559274, "grad_norm": 194.7140350341797, "learning_rate": 1.9401536191784026e-06, "loss": 18.8594, "step": 4137 }, { "epoch": 0.27482234176794845, "grad_norm": 392.91387939453125, "learning_rate": 1.9401169653154256e-06, "loss": 20.7031, "step": 4138 }, { "epoch": 0.27488875606030416, "grad_norm": 519.046142578125, "learning_rate": 1.9400803005777175e-06, "loss": 23.1719, "step": 4139 }, { "epoch": 0.2749551703526599, "grad_norm": 379.4604797363281, "learning_rate": 1.9400436249657026e-06, "loss": 19.7812, "step": 4140 }, { "epoch": 0.2750215846450156, "grad_norm": 142.56790161132812, "learning_rate": 1.940006938479805e-06, "loss": 21.8125, "step": 4141 }, { "epoch": 0.27508799893737135, "grad_norm": 192.60577392578125, "learning_rate": 1.9399702411204494e-06, "loss": 22.75, "step": 4142 }, { "epoch": 0.27515441322972706, "grad_norm": 169.54673767089844, "learning_rate": 1.9399335328880598e-06, "loss": 19.2031, "step": 4143 }, { "epoch": 0.27522082752208277, "grad_norm": 157.93560791015625, "learning_rate": 1.9398968137830612e-06, "loss": 20.6094, "step": 4144 }, { "epoch": 0.2752872418144385, "grad_norm": 371.83056640625, "learning_rate": 1.9398600838058786e-06, "loss": 28.6719, "step": 4145 }, { "epoch": 0.2753536561067942, "grad_norm": 222.9830322265625, "learning_rate": 1.939823342956936e-06, "loss": 19.0625, "step": 4146 }, { "epoch": 0.2754200703991499, "grad_norm": 324.4444885253906, "learning_rate": 1.939786591236659e-06, "loss": 21.5469, "step": 4147 }, { "epoch": 0.2754864846915056, "grad_norm": 251.63519287109375, "learning_rate": 1.939749828645473e-06, "loss": 19.4062, "step": 4148 }, { "epoch": 0.2755528989838613, "grad_norm": 299.7850341796875, "learning_rate": 1.9397130551838026e-06, "loss": 23.3438, "step": 4149 }, { "epoch": 0.275619313276217, "grad_norm": 422.51226806640625, "learning_rate": 1.939676270852073e-06, "loss": 21.4219, "step": 4150 }, { "epoch": 0.27568572756857274, "grad_norm": 401.6759948730469, "learning_rate": 1.939639475650711e-06, "loss": 21.9375, "step": 4151 }, { "epoch": 0.27575214186092845, "grad_norm": 780.8549194335938, "learning_rate": 1.9396026695801408e-06, "loss": 21.9062, "step": 4152 }, { "epoch": 0.2758185561532842, "grad_norm": 234.2027587890625, "learning_rate": 1.939565852640789e-06, "loss": 21.4844, "step": 4153 }, { "epoch": 0.2758849704456399, "grad_norm": 275.8407287597656, "learning_rate": 1.939529024833081e-06, "loss": 18.1719, "step": 4154 }, { "epoch": 0.27595138473799563, "grad_norm": 209.3868408203125, "learning_rate": 1.939492186157443e-06, "loss": 17.7344, "step": 4155 }, { "epoch": 0.27601779903035134, "grad_norm": 506.6939697265625, "learning_rate": 1.9394553366143013e-06, "loss": 22.8438, "step": 4156 }, { "epoch": 0.27608421332270705, "grad_norm": 190.98043823242188, "learning_rate": 1.939418476204082e-06, "loss": 19.0, "step": 4157 }, { "epoch": 0.27615062761506276, "grad_norm": 457.0316467285156, "learning_rate": 1.939381604927211e-06, "loss": 25.0625, "step": 4158 }, { "epoch": 0.27621704190741847, "grad_norm": 411.4357604980469, "learning_rate": 1.9393447227841162e-06, "loss": 18.75, "step": 4159 }, { "epoch": 0.2762834561997742, "grad_norm": 232.1797637939453, "learning_rate": 1.9393078297752225e-06, "loss": 21.8438, "step": 4160 }, { "epoch": 0.2763498704921299, "grad_norm": 310.42626953125, "learning_rate": 1.939270925900958e-06, "loss": 23.1875, "step": 4161 }, { "epoch": 0.2764162847844856, "grad_norm": 277.7898254394531, "learning_rate": 1.939234011161749e-06, "loss": 22.2812, "step": 4162 }, { "epoch": 0.2764826990768413, "grad_norm": 194.46514892578125, "learning_rate": 1.939197085558022e-06, "loss": 18.6562, "step": 4163 }, { "epoch": 0.2765491133691971, "grad_norm": 2784.439697265625, "learning_rate": 1.9391601490902054e-06, "loss": 23.1875, "step": 4164 }, { "epoch": 0.2766155276615528, "grad_norm": 220.41348266601562, "learning_rate": 1.939123201758725e-06, "loss": 22.875, "step": 4165 }, { "epoch": 0.2766819419539085, "grad_norm": 176.24839782714844, "learning_rate": 1.9390862435640096e-06, "loss": 16.4844, "step": 4166 }, { "epoch": 0.2767483562462642, "grad_norm": 324.63092041015625, "learning_rate": 1.9390492745064853e-06, "loss": 31.6562, "step": 4167 }, { "epoch": 0.2768147705386199, "grad_norm": 192.79786682128906, "learning_rate": 1.939012294586581e-06, "loss": 25.0938, "step": 4168 }, { "epoch": 0.2768811848309756, "grad_norm": 322.4548645019531, "learning_rate": 1.938975303804724e-06, "loss": 23.8438, "step": 4169 }, { "epoch": 0.27694759912333133, "grad_norm": 229.70445251464844, "learning_rate": 1.938938302161342e-06, "loss": 19.2031, "step": 4170 }, { "epoch": 0.27701401341568704, "grad_norm": 213.65171813964844, "learning_rate": 1.9389012896568627e-06, "loss": 15.2812, "step": 4171 }, { "epoch": 0.27708042770804275, "grad_norm": 239.2183074951172, "learning_rate": 1.9388642662917156e-06, "loss": 17.8438, "step": 4172 }, { "epoch": 0.27714684200039846, "grad_norm": 192.9434051513672, "learning_rate": 1.9388272320663274e-06, "loss": 15.8281, "step": 4173 }, { "epoch": 0.2772132562927542, "grad_norm": 201.60699462890625, "learning_rate": 1.9387901869811273e-06, "loss": 18.9375, "step": 4174 }, { "epoch": 0.27727967058510994, "grad_norm": 84.81400299072266, "learning_rate": 1.9387531310365434e-06, "loss": 11.2031, "step": 4175 }, { "epoch": 0.27734608487746565, "grad_norm": 393.40643310546875, "learning_rate": 1.938716064233005e-06, "loss": 24.6562, "step": 4176 }, { "epoch": 0.27741249916982136, "grad_norm": 277.86248779296875, "learning_rate": 1.9386789865709404e-06, "loss": 21.6094, "step": 4177 }, { "epoch": 0.27747891346217707, "grad_norm": 334.83660888671875, "learning_rate": 1.9386418980507785e-06, "loss": 17.7344, "step": 4178 }, { "epoch": 0.2775453277545328, "grad_norm": 216.3677520751953, "learning_rate": 1.9386047986729484e-06, "loss": 18.7812, "step": 4179 }, { "epoch": 0.2776117420468885, "grad_norm": 250.59930419921875, "learning_rate": 1.938567688437879e-06, "loss": 28.1719, "step": 4180 }, { "epoch": 0.2776781563392442, "grad_norm": 167.76446533203125, "learning_rate": 1.938530567346e-06, "loss": 17.375, "step": 4181 }, { "epoch": 0.2777445706315999, "grad_norm": 326.0567321777344, "learning_rate": 1.9384934353977406e-06, "loss": 22.4062, "step": 4182 }, { "epoch": 0.2778109849239556, "grad_norm": 826.0078735351562, "learning_rate": 1.93845629259353e-06, "loss": 23.4688, "step": 4183 }, { "epoch": 0.2778773992163113, "grad_norm": 306.980224609375, "learning_rate": 1.9384191389337985e-06, "loss": 19.875, "step": 4184 }, { "epoch": 0.2779438135086671, "grad_norm": 295.65814208984375, "learning_rate": 1.9383819744189755e-06, "loss": 24.7344, "step": 4185 }, { "epoch": 0.2780102278010228, "grad_norm": 268.14154052734375, "learning_rate": 1.9383447990494906e-06, "loss": 23.4375, "step": 4186 }, { "epoch": 0.2780766420933785, "grad_norm": 182.82537841796875, "learning_rate": 1.9383076128257743e-06, "loss": 17.6562, "step": 4187 }, { "epoch": 0.2781430563857342, "grad_norm": 262.52313232421875, "learning_rate": 1.9382704157482567e-06, "loss": 16.3828, "step": 4188 }, { "epoch": 0.27820947067808993, "grad_norm": 432.04632568359375, "learning_rate": 1.9382332078173684e-06, "loss": 28.6875, "step": 4189 }, { "epoch": 0.27827588497044564, "grad_norm": 226.2184600830078, "learning_rate": 1.9381959890335388e-06, "loss": 25.125, "step": 4190 }, { "epoch": 0.27834229926280135, "grad_norm": 580.134033203125, "learning_rate": 1.9381587593971987e-06, "loss": 17.5469, "step": 4191 }, { "epoch": 0.27840871355515706, "grad_norm": 253.95680236816406, "learning_rate": 1.9381215189087796e-06, "loss": 20.4062, "step": 4192 }, { "epoch": 0.27847512784751277, "grad_norm": 158.61802673339844, "learning_rate": 1.938084267568712e-06, "loss": 15.8594, "step": 4193 }, { "epoch": 0.2785415421398685, "grad_norm": 110.7625961303711, "learning_rate": 1.9380470053774257e-06, "loss": 13.4688, "step": 4194 }, { "epoch": 0.2786079564322242, "grad_norm": 255.6147003173828, "learning_rate": 1.938009732335353e-06, "loss": 24.3125, "step": 4195 }, { "epoch": 0.27867437072457996, "grad_norm": 178.99940490722656, "learning_rate": 1.937972448442925e-06, "loss": 15.9688, "step": 4196 }, { "epoch": 0.27874078501693567, "grad_norm": 131.2237548828125, "learning_rate": 1.9379351537005717e-06, "loss": 18.3438, "step": 4197 }, { "epoch": 0.2788071993092914, "grad_norm": 473.8890075683594, "learning_rate": 1.937897848108726e-06, "loss": 15.3125, "step": 4198 }, { "epoch": 0.2788736136016471, "grad_norm": 323.311767578125, "learning_rate": 1.9378605316678183e-06, "loss": 16.75, "step": 4199 }, { "epoch": 0.2789400278940028, "grad_norm": 230.05908203125, "learning_rate": 1.9378232043782813e-06, "loss": 22.375, "step": 4200 }, { "epoch": 0.2790064421863585, "grad_norm": 250.52870178222656, "learning_rate": 1.937785866240546e-06, "loss": 19.5156, "step": 4201 }, { "epoch": 0.2790728564787142, "grad_norm": 230.14894104003906, "learning_rate": 1.937748517255045e-06, "loss": 19.2344, "step": 4202 }, { "epoch": 0.2791392707710699, "grad_norm": 214.74620056152344, "learning_rate": 1.9377111574222093e-06, "loss": 18.0156, "step": 4203 }, { "epoch": 0.27920568506342563, "grad_norm": 174.3762969970703, "learning_rate": 1.937673786742472e-06, "loss": 19.3438, "step": 4204 }, { "epoch": 0.27927209935578134, "grad_norm": 229.27854919433594, "learning_rate": 1.9376364052162646e-06, "loss": 22.6719, "step": 4205 }, { "epoch": 0.27933851364813705, "grad_norm": 853.4232788085938, "learning_rate": 1.93759901284402e-06, "loss": 15.7812, "step": 4206 }, { "epoch": 0.2794049279404928, "grad_norm": 259.07025146484375, "learning_rate": 1.937561609626171e-06, "loss": 22.3125, "step": 4207 }, { "epoch": 0.27947134223284853, "grad_norm": 511.3079528808594, "learning_rate": 1.9375241955631497e-06, "loss": 21.1719, "step": 4208 }, { "epoch": 0.27953775652520424, "grad_norm": 266.44683837890625, "learning_rate": 1.9374867706553894e-06, "loss": 19.1094, "step": 4209 }, { "epoch": 0.27960417081755995, "grad_norm": 200.81101989746094, "learning_rate": 1.937449334903323e-06, "loss": 20.6094, "step": 4210 }, { "epoch": 0.27967058510991566, "grad_norm": 278.0892333984375, "learning_rate": 1.9374118883073825e-06, "loss": 23.75, "step": 4211 }, { "epoch": 0.27973699940227137, "grad_norm": 187.2794189453125, "learning_rate": 1.937374430868002e-06, "loss": 22.5781, "step": 4212 }, { "epoch": 0.2798034136946271, "grad_norm": 308.97662353515625, "learning_rate": 1.937336962585615e-06, "loss": 27.8438, "step": 4213 }, { "epoch": 0.2798698279869828, "grad_norm": 193.37905883789062, "learning_rate": 1.9372994834606538e-06, "loss": 22.9375, "step": 4214 }, { "epoch": 0.2799362422793385, "grad_norm": 248.58349609375, "learning_rate": 1.9372619934935536e-06, "loss": 21.2812, "step": 4215 }, { "epoch": 0.2800026565716942, "grad_norm": 214.45053100585938, "learning_rate": 1.9372244926847463e-06, "loss": 17.7344, "step": 4216 }, { "epoch": 0.2800690708640499, "grad_norm": 205.12489318847656, "learning_rate": 1.937186981034667e-06, "loss": 15.9062, "step": 4217 }, { "epoch": 0.2801354851564057, "grad_norm": 251.4149169921875, "learning_rate": 1.937149458543749e-06, "loss": 27.125, "step": 4218 }, { "epoch": 0.2802018994487614, "grad_norm": 227.63174438476562, "learning_rate": 1.9371119252124266e-06, "loss": 21.8438, "step": 4219 }, { "epoch": 0.2802683137411171, "grad_norm": 262.5974426269531, "learning_rate": 1.9370743810411334e-06, "loss": 23.0781, "step": 4220 }, { "epoch": 0.2803347280334728, "grad_norm": 161.7084197998047, "learning_rate": 1.937036826030304e-06, "loss": 20.7812, "step": 4221 }, { "epoch": 0.2804011423258285, "grad_norm": 264.29736328125, "learning_rate": 1.9369992601803732e-06, "loss": 28.9062, "step": 4222 }, { "epoch": 0.28046755661818423, "grad_norm": 281.618896484375, "learning_rate": 1.9369616834917753e-06, "loss": 16.1719, "step": 4223 }, { "epoch": 0.28053397091053994, "grad_norm": 265.0268249511719, "learning_rate": 1.936924095964945e-06, "loss": 25.5625, "step": 4224 }, { "epoch": 0.28060038520289565, "grad_norm": 289.00927734375, "learning_rate": 1.9368864976003173e-06, "loss": 15.0312, "step": 4225 }, { "epoch": 0.28066679949525136, "grad_norm": 257.1002197265625, "learning_rate": 1.9368488883983263e-06, "loss": 15.0469, "step": 4226 }, { "epoch": 0.28073321378760707, "grad_norm": 424.20458984375, "learning_rate": 1.936811268359408e-06, "loss": 25.75, "step": 4227 }, { "epoch": 0.2807996280799628, "grad_norm": 1133.4501953125, "learning_rate": 1.9367736374839965e-06, "loss": 23.5312, "step": 4228 }, { "epoch": 0.28086604237231855, "grad_norm": 258.6236572265625, "learning_rate": 1.9367359957725286e-06, "loss": 20.4375, "step": 4229 }, { "epoch": 0.28093245666467426, "grad_norm": 108.32262420654297, "learning_rate": 1.936698343225438e-06, "loss": 15.7031, "step": 4230 }, { "epoch": 0.28099887095702997, "grad_norm": 246.97793579101562, "learning_rate": 1.9366606798431614e-06, "loss": 16.8906, "step": 4231 }, { "epoch": 0.2810652852493857, "grad_norm": 168.98130798339844, "learning_rate": 1.9366230056261345e-06, "loss": 16.2344, "step": 4232 }, { "epoch": 0.2811316995417414, "grad_norm": 278.3323974609375, "learning_rate": 1.9365853205747922e-06, "loss": 28.7188, "step": 4233 }, { "epoch": 0.2811981138340971, "grad_norm": 193.08265686035156, "learning_rate": 1.9365476246895716e-06, "loss": 19.125, "step": 4234 }, { "epoch": 0.2812645281264528, "grad_norm": 163.5370330810547, "learning_rate": 1.9365099179709076e-06, "loss": 20.7812, "step": 4235 }, { "epoch": 0.2813309424188085, "grad_norm": 459.9504089355469, "learning_rate": 1.9364722004192372e-06, "loss": 26.7656, "step": 4236 }, { "epoch": 0.2813973567111642, "grad_norm": 217.1433563232422, "learning_rate": 1.9364344720349963e-06, "loss": 21.0156, "step": 4237 }, { "epoch": 0.28146377100351994, "grad_norm": 176.12551879882812, "learning_rate": 1.9363967328186216e-06, "loss": 22.0, "step": 4238 }, { "epoch": 0.28153018529587565, "grad_norm": 921.0579223632812, "learning_rate": 1.936358982770549e-06, "loss": 14.6875, "step": 4239 }, { "epoch": 0.2815965995882314, "grad_norm": 197.88194274902344, "learning_rate": 1.936321221891216e-06, "loss": 16.0938, "step": 4240 }, { "epoch": 0.2816630138805871, "grad_norm": 200.494384765625, "learning_rate": 1.936283450181059e-06, "loss": 16.5938, "step": 4241 }, { "epoch": 0.28172942817294283, "grad_norm": 419.49896240234375, "learning_rate": 1.936245667640515e-06, "loss": 19.2188, "step": 4242 }, { "epoch": 0.28179584246529854, "grad_norm": 422.0975036621094, "learning_rate": 1.9362078742700207e-06, "loss": 24.375, "step": 4243 }, { "epoch": 0.28186225675765425, "grad_norm": 294.9333190917969, "learning_rate": 1.9361700700700135e-06, "loss": 26.5469, "step": 4244 }, { "epoch": 0.28192867105000996, "grad_norm": 511.8170166015625, "learning_rate": 1.936132255040931e-06, "loss": 23.625, "step": 4245 }, { "epoch": 0.28199508534236567, "grad_norm": 295.9349060058594, "learning_rate": 1.9360944291832103e-06, "loss": 16.0, "step": 4246 }, { "epoch": 0.2820614996347214, "grad_norm": 727.2345581054688, "learning_rate": 1.9360565924972893e-06, "loss": 19.2344, "step": 4247 }, { "epoch": 0.2821279139270771, "grad_norm": 303.0041809082031, "learning_rate": 1.936018744983605e-06, "loss": 21.5, "step": 4248 }, { "epoch": 0.2821943282194328, "grad_norm": 148.31967163085938, "learning_rate": 1.9359808866425957e-06, "loss": 20.6875, "step": 4249 }, { "epoch": 0.2822607425117885, "grad_norm": 452.9697570800781, "learning_rate": 1.9359430174746993e-06, "loss": 26.5312, "step": 4250 }, { "epoch": 0.2823271568041443, "grad_norm": 196.37884521484375, "learning_rate": 1.9359051374803535e-06, "loss": 16.4531, "step": 4251 }, { "epoch": 0.2823935710965, "grad_norm": 288.536376953125, "learning_rate": 1.9358672466599974e-06, "loss": 20.4688, "step": 4252 }, { "epoch": 0.2824599853888557, "grad_norm": 296.84088134765625, "learning_rate": 1.9358293450140677e-06, "loss": 15.5312, "step": 4253 }, { "epoch": 0.2825263996812114, "grad_norm": 168.44064331054688, "learning_rate": 1.9357914325430045e-06, "loss": 17.75, "step": 4254 }, { "epoch": 0.2825928139735671, "grad_norm": 425.2766418457031, "learning_rate": 1.9357535092472455e-06, "loss": 15.6406, "step": 4255 }, { "epoch": 0.2826592282659228, "grad_norm": 225.5111541748047, "learning_rate": 1.9357155751272292e-06, "loss": 25.5, "step": 4256 }, { "epoch": 0.28272564255827853, "grad_norm": 454.2983703613281, "learning_rate": 1.935677630183395e-06, "loss": 17.5781, "step": 4257 }, { "epoch": 0.28279205685063424, "grad_norm": 222.65347290039062, "learning_rate": 1.9356396744161815e-06, "loss": 24.4375, "step": 4258 }, { "epoch": 0.28285847114298995, "grad_norm": 348.4278564453125, "learning_rate": 1.9356017078260275e-06, "loss": 18.4375, "step": 4259 }, { "epoch": 0.28292488543534566, "grad_norm": 148.51080322265625, "learning_rate": 1.935563730413373e-06, "loss": 12.8594, "step": 4260 }, { "epoch": 0.28299129972770143, "grad_norm": 672.133056640625, "learning_rate": 1.935525742178656e-06, "loss": 20.0625, "step": 4261 }, { "epoch": 0.28305771402005714, "grad_norm": 313.7603454589844, "learning_rate": 1.935487743122317e-06, "loss": 24.0, "step": 4262 }, { "epoch": 0.28312412831241285, "grad_norm": 166.85403442382812, "learning_rate": 1.935449733244795e-06, "loss": 16.0156, "step": 4263 }, { "epoch": 0.28319054260476856, "grad_norm": 349.3628234863281, "learning_rate": 1.9354117125465305e-06, "loss": 17.5625, "step": 4264 }, { "epoch": 0.28325695689712427, "grad_norm": 149.76242065429688, "learning_rate": 1.935373681027962e-06, "loss": 18.8125, "step": 4265 }, { "epoch": 0.28332337118948, "grad_norm": 232.35171508789062, "learning_rate": 1.935335638689531e-06, "loss": 18.4531, "step": 4266 }, { "epoch": 0.2833897854818357, "grad_norm": 214.11656188964844, "learning_rate": 1.935297585531676e-06, "loss": 15.875, "step": 4267 }, { "epoch": 0.2834561997741914, "grad_norm": 151.49588012695312, "learning_rate": 1.935259521554838e-06, "loss": 15.8281, "step": 4268 }, { "epoch": 0.2835226140665471, "grad_norm": 186.80413818359375, "learning_rate": 1.935221446759457e-06, "loss": 14.9844, "step": 4269 }, { "epoch": 0.2835890283589028, "grad_norm": 192.9459228515625, "learning_rate": 1.9351833611459736e-06, "loss": 22.5156, "step": 4270 }, { "epoch": 0.2836554426512585, "grad_norm": 243.8728485107422, "learning_rate": 1.9351452647148287e-06, "loss": 18.4844, "step": 4271 }, { "epoch": 0.2837218569436143, "grad_norm": 472.832763671875, "learning_rate": 1.9351071574664622e-06, "loss": 23.5938, "step": 4272 }, { "epoch": 0.28378827123597, "grad_norm": 335.7361755371094, "learning_rate": 1.9350690394013157e-06, "loss": 23.7031, "step": 4273 }, { "epoch": 0.2838546855283257, "grad_norm": 270.7243957519531, "learning_rate": 1.9350309105198294e-06, "loss": 14.9062, "step": 4274 }, { "epoch": 0.2839210998206814, "grad_norm": 551.8590087890625, "learning_rate": 1.9349927708224448e-06, "loss": 18.0, "step": 4275 }, { "epoch": 0.28398751411303713, "grad_norm": 247.10537719726562, "learning_rate": 1.934954620309603e-06, "loss": 20.0, "step": 4276 }, { "epoch": 0.28405392840539284, "grad_norm": 159.8889923095703, "learning_rate": 1.9349164589817456e-06, "loss": 15.9844, "step": 4277 }, { "epoch": 0.28412034269774855, "grad_norm": 409.1812744140625, "learning_rate": 1.9348782868393136e-06, "loss": 22.5625, "step": 4278 }, { "epoch": 0.28418675699010426, "grad_norm": 288.7698974609375, "learning_rate": 1.934840103882748e-06, "loss": 15.9844, "step": 4279 }, { "epoch": 0.28425317128245997, "grad_norm": 154.01837158203125, "learning_rate": 1.9348019101124915e-06, "loss": 16.9062, "step": 4280 }, { "epoch": 0.2843195855748157, "grad_norm": 281.8267822265625, "learning_rate": 1.934763705528986e-06, "loss": 24.4062, "step": 4281 }, { "epoch": 0.2843859998671714, "grad_norm": 124.7929458618164, "learning_rate": 1.9347254901326723e-06, "loss": 21.0625, "step": 4282 }, { "epoch": 0.28445241415952716, "grad_norm": 378.93963623046875, "learning_rate": 1.9346872639239935e-06, "loss": 20.125, "step": 4283 }, { "epoch": 0.28451882845188287, "grad_norm": 576.2761840820312, "learning_rate": 1.9346490269033914e-06, "loss": 18.7812, "step": 4284 }, { "epoch": 0.2845852427442386, "grad_norm": 195.24557495117188, "learning_rate": 1.9346107790713086e-06, "loss": 16.3438, "step": 4285 }, { "epoch": 0.2846516570365943, "grad_norm": 278.8121032714844, "learning_rate": 1.9345725204281865e-06, "loss": 21.3281, "step": 4286 }, { "epoch": 0.28471807132895, "grad_norm": 408.8148193359375, "learning_rate": 1.934534250974469e-06, "loss": 18.5312, "step": 4287 }, { "epoch": 0.2847844856213057, "grad_norm": 262.8782653808594, "learning_rate": 1.934495970710598e-06, "loss": 27.4375, "step": 4288 }, { "epoch": 0.2848508999136614, "grad_norm": 605.3619995117188, "learning_rate": 1.9344576796370164e-06, "loss": 21.9062, "step": 4289 }, { "epoch": 0.2849173142060171, "grad_norm": 170.61572265625, "learning_rate": 1.934419377754167e-06, "loss": 16.3438, "step": 4290 }, { "epoch": 0.28498372849837283, "grad_norm": 155.9854736328125, "learning_rate": 1.934381065062493e-06, "loss": 16.7969, "step": 4291 }, { "epoch": 0.28505014279072854, "grad_norm": 169.16835021972656, "learning_rate": 1.934342741562438e-06, "loss": 19.0469, "step": 4292 }, { "epoch": 0.28511655708308425, "grad_norm": 201.27745056152344, "learning_rate": 1.934304407254445e-06, "loss": 19.8438, "step": 4293 }, { "epoch": 0.28518297137544, "grad_norm": 341.589599609375, "learning_rate": 1.9342660621389572e-06, "loss": 21.125, "step": 4294 }, { "epoch": 0.28524938566779573, "grad_norm": 741.393310546875, "learning_rate": 1.9342277062164186e-06, "loss": 15.5781, "step": 4295 }, { "epoch": 0.28531579996015144, "grad_norm": 318.21136474609375, "learning_rate": 1.9341893394872722e-06, "loss": 19.9219, "step": 4296 }, { "epoch": 0.28538221425250715, "grad_norm": 1125.07666015625, "learning_rate": 1.934150961951962e-06, "loss": 17.2188, "step": 4297 }, { "epoch": 0.28544862854486286, "grad_norm": 169.478271484375, "learning_rate": 1.9341125736109327e-06, "loss": 20.2344, "step": 4298 }, { "epoch": 0.28551504283721857, "grad_norm": 248.76390075683594, "learning_rate": 1.9340741744646276e-06, "loss": 16.4688, "step": 4299 }, { "epoch": 0.2855814571295743, "grad_norm": 210.9119415283203, "learning_rate": 1.934035764513491e-06, "loss": 22.2344, "step": 4300 }, { "epoch": 0.28564787142193, "grad_norm": 689.0194702148438, "learning_rate": 1.9339973437579673e-06, "loss": 18.5312, "step": 4301 }, { "epoch": 0.2857142857142857, "grad_norm": 157.57574462890625, "learning_rate": 1.933958912198501e-06, "loss": 16.1094, "step": 4302 }, { "epoch": 0.2857807000066414, "grad_norm": 193.3306121826172, "learning_rate": 1.9339204698355364e-06, "loss": 20.1875, "step": 4303 }, { "epoch": 0.2858471142989971, "grad_norm": 239.48716735839844, "learning_rate": 1.9338820166695184e-06, "loss": 25.3906, "step": 4304 }, { "epoch": 0.2859135285913529, "grad_norm": 462.75628662109375, "learning_rate": 1.9338435527008914e-06, "loss": 19.9375, "step": 4305 }, { "epoch": 0.2859799428837086, "grad_norm": 401.1817626953125, "learning_rate": 1.9338050779301016e-06, "loss": 17.0156, "step": 4306 }, { "epoch": 0.2860463571760643, "grad_norm": 190.10476684570312, "learning_rate": 1.9337665923575923e-06, "loss": 18.625, "step": 4307 }, { "epoch": 0.28611277146842, "grad_norm": 339.18719482421875, "learning_rate": 1.9337280959838095e-06, "loss": 23.0781, "step": 4308 }, { "epoch": 0.2861791857607757, "grad_norm": 538.592041015625, "learning_rate": 1.9336895888091987e-06, "loss": 18.9062, "step": 4309 }, { "epoch": 0.28624560005313143, "grad_norm": 381.97003173828125, "learning_rate": 1.9336510708342055e-06, "loss": 18.0156, "step": 4310 }, { "epoch": 0.28631201434548714, "grad_norm": 198.49349975585938, "learning_rate": 1.9336125420592745e-06, "loss": 14.3125, "step": 4311 }, { "epoch": 0.28637842863784285, "grad_norm": 291.44171142578125, "learning_rate": 1.933574002484852e-06, "loss": 15.8906, "step": 4312 }, { "epoch": 0.28644484293019856, "grad_norm": 172.29794311523438, "learning_rate": 1.933535452111384e-06, "loss": 26.5625, "step": 4313 }, { "epoch": 0.28651125722255427, "grad_norm": 204.76930236816406, "learning_rate": 1.9334968909393165e-06, "loss": 17.2969, "step": 4314 }, { "epoch": 0.28657767151491, "grad_norm": 272.2844543457031, "learning_rate": 1.9334583189690946e-06, "loss": 19.4688, "step": 4315 }, { "epoch": 0.28664408580726575, "grad_norm": 194.80545043945312, "learning_rate": 1.9334197362011657e-06, "loss": 16.5, "step": 4316 }, { "epoch": 0.28671050009962146, "grad_norm": 200.5756072998047, "learning_rate": 1.933381142635975e-06, "loss": 18.7656, "step": 4317 }, { "epoch": 0.28677691439197717, "grad_norm": 319.8052673339844, "learning_rate": 1.93334253827397e-06, "loss": 22.7344, "step": 4318 }, { "epoch": 0.2868433286843329, "grad_norm": 634.3984375, "learning_rate": 1.9333039231155964e-06, "loss": 15.5234, "step": 4319 }, { "epoch": 0.2869097429766886, "grad_norm": 186.27586364746094, "learning_rate": 1.9332652971613013e-06, "loss": 17.3438, "step": 4320 }, { "epoch": 0.2869761572690443, "grad_norm": 431.32611083984375, "learning_rate": 1.933226660411531e-06, "loss": 23.5781, "step": 4321 }, { "epoch": 0.2870425715614, "grad_norm": 819.3129272460938, "learning_rate": 1.9331880128667333e-06, "loss": 15.4844, "step": 4322 }, { "epoch": 0.2871089858537557, "grad_norm": 180.16819763183594, "learning_rate": 1.9331493545273545e-06, "loss": 14.875, "step": 4323 }, { "epoch": 0.2871754001461114, "grad_norm": 338.8387145996094, "learning_rate": 1.9331106853938427e-06, "loss": 21.7188, "step": 4324 }, { "epoch": 0.28724181443846714, "grad_norm": 200.72845458984375, "learning_rate": 1.9330720054666437e-06, "loss": 25.3438, "step": 4325 }, { "epoch": 0.28730822873082285, "grad_norm": 282.881103515625, "learning_rate": 1.9330333147462063e-06, "loss": 20.8125, "step": 4326 }, { "epoch": 0.2873746430231786, "grad_norm": 400.4983215332031, "learning_rate": 1.9329946132329775e-06, "loss": 22.0938, "step": 4327 }, { "epoch": 0.2874410573155343, "grad_norm": 295.89373779296875, "learning_rate": 1.9329559009274046e-06, "loss": 22.7969, "step": 4328 }, { "epoch": 0.28750747160789003, "grad_norm": 135.64402770996094, "learning_rate": 1.932917177829936e-06, "loss": 18.375, "step": 4329 }, { "epoch": 0.28757388590024574, "grad_norm": 222.9623565673828, "learning_rate": 1.9328784439410197e-06, "loss": 28.9375, "step": 4330 }, { "epoch": 0.28764030019260145, "grad_norm": 190.5368194580078, "learning_rate": 1.932839699261103e-06, "loss": 26.8438, "step": 4331 }, { "epoch": 0.28770671448495716, "grad_norm": 182.9170684814453, "learning_rate": 1.932800943790635e-06, "loss": 20.7969, "step": 4332 }, { "epoch": 0.28777312877731287, "grad_norm": 173.085205078125, "learning_rate": 1.9327621775300633e-06, "loss": 18.5938, "step": 4333 }, { "epoch": 0.2878395430696686, "grad_norm": 346.011962890625, "learning_rate": 1.932723400479837e-06, "loss": 18.5938, "step": 4334 }, { "epoch": 0.2879059573620243, "grad_norm": 161.74807739257812, "learning_rate": 1.932684612640404e-06, "loss": 19.2969, "step": 4335 }, { "epoch": 0.28797237165438, "grad_norm": 169.63221740722656, "learning_rate": 1.9326458140122133e-06, "loss": 19.2656, "step": 4336 }, { "epoch": 0.28803878594673576, "grad_norm": 303.5545654296875, "learning_rate": 1.9326070045957135e-06, "loss": 16.7969, "step": 4337 }, { "epoch": 0.2881052002390915, "grad_norm": 803.3317260742188, "learning_rate": 1.9325681843913535e-06, "loss": 20.7188, "step": 4338 }, { "epoch": 0.2881716145314472, "grad_norm": 271.4066467285156, "learning_rate": 1.9325293533995825e-06, "loss": 19.625, "step": 4339 }, { "epoch": 0.2882380288238029, "grad_norm": 357.91534423828125, "learning_rate": 1.9324905116208503e-06, "loss": 23.5312, "step": 4340 }, { "epoch": 0.2883044431161586, "grad_norm": 182.51136779785156, "learning_rate": 1.9324516590556045e-06, "loss": 17.2031, "step": 4341 }, { "epoch": 0.2883708574085143, "grad_norm": 208.41085815429688, "learning_rate": 1.9324127957042963e-06, "loss": 16.0156, "step": 4342 }, { "epoch": 0.28843727170087, "grad_norm": 209.81344604492188, "learning_rate": 1.9323739215673745e-06, "loss": 21.6406, "step": 4343 }, { "epoch": 0.28850368599322573, "grad_norm": 177.98912048339844, "learning_rate": 1.9323350366452885e-06, "loss": 25.0625, "step": 4344 }, { "epoch": 0.28857010028558144, "grad_norm": 132.6653594970703, "learning_rate": 1.932296140938489e-06, "loss": 17.6562, "step": 4345 }, { "epoch": 0.28863651457793715, "grad_norm": 205.28858947753906, "learning_rate": 1.932257234447425e-06, "loss": 17.0469, "step": 4346 }, { "epoch": 0.28870292887029286, "grad_norm": 345.3501892089844, "learning_rate": 1.9322183171725467e-06, "loss": 23.7812, "step": 4347 }, { "epoch": 0.28876934316264863, "grad_norm": 133.60983276367188, "learning_rate": 1.9321793891143045e-06, "loss": 16.4375, "step": 4348 }, { "epoch": 0.28883575745500434, "grad_norm": 303.7685852050781, "learning_rate": 1.9321404502731487e-06, "loss": 23.0312, "step": 4349 }, { "epoch": 0.28890217174736005, "grad_norm": 478.2383117675781, "learning_rate": 1.93210150064953e-06, "loss": 22.8906, "step": 4350 }, { "epoch": 0.28896858603971576, "grad_norm": 173.18643188476562, "learning_rate": 1.9320625402438982e-06, "loss": 20.6562, "step": 4351 }, { "epoch": 0.28903500033207147, "grad_norm": 113.82952880859375, "learning_rate": 1.9320235690567043e-06, "loss": 17.8438, "step": 4352 }, { "epoch": 0.2891014146244272, "grad_norm": 134.3385772705078, "learning_rate": 1.9319845870883993e-06, "loss": 21.6406, "step": 4353 }, { "epoch": 0.2891678289167829, "grad_norm": 344.5055236816406, "learning_rate": 1.9319455943394346e-06, "loss": 22.7031, "step": 4354 }, { "epoch": 0.2892342432091386, "grad_norm": 375.61541748046875, "learning_rate": 1.93190659081026e-06, "loss": 20.0781, "step": 4355 }, { "epoch": 0.2893006575014943, "grad_norm": 292.48114013671875, "learning_rate": 1.9318675765013277e-06, "loss": 21.4688, "step": 4356 }, { "epoch": 0.28936707179385, "grad_norm": 160.0304412841797, "learning_rate": 1.9318285514130884e-06, "loss": 18.9531, "step": 4357 }, { "epoch": 0.2894334860862057, "grad_norm": 151.2272186279297, "learning_rate": 1.9317895155459937e-06, "loss": 19.6875, "step": 4358 }, { "epoch": 0.2894999003785615, "grad_norm": 164.5230255126953, "learning_rate": 1.9317504689004953e-06, "loss": 13.8906, "step": 4359 }, { "epoch": 0.2895663146709172, "grad_norm": 189.36756896972656, "learning_rate": 1.9317114114770448e-06, "loss": 24.2812, "step": 4360 }, { "epoch": 0.2896327289632729, "grad_norm": 141.56182861328125, "learning_rate": 1.9316723432760936e-06, "loss": 14.8125, "step": 4361 }, { "epoch": 0.2896991432556286, "grad_norm": 170.52407836914062, "learning_rate": 1.9316332642980944e-06, "loss": 18.8438, "step": 4362 }, { "epoch": 0.28976555754798433, "grad_norm": 316.26104736328125, "learning_rate": 1.931594174543499e-06, "loss": 16.9531, "step": 4363 }, { "epoch": 0.28983197184034004, "grad_norm": 320.0516662597656, "learning_rate": 1.9315550740127586e-06, "loss": 14.9219, "step": 4364 }, { "epoch": 0.28989838613269575, "grad_norm": 268.61273193359375, "learning_rate": 1.9315159627063265e-06, "loss": 19.8594, "step": 4365 }, { "epoch": 0.28996480042505146, "grad_norm": 345.4055480957031, "learning_rate": 1.9314768406246554e-06, "loss": 29.4062, "step": 4366 }, { "epoch": 0.29003121471740717, "grad_norm": 283.64251708984375, "learning_rate": 1.931437707768197e-06, "loss": 19.3125, "step": 4367 }, { "epoch": 0.2900976290097629, "grad_norm": 385.9671630859375, "learning_rate": 1.931398564137404e-06, "loss": 16.0469, "step": 4368 }, { "epoch": 0.2901640433021186, "grad_norm": 157.49212646484375, "learning_rate": 1.93135940973273e-06, "loss": 17.4062, "step": 4369 }, { "epoch": 0.29023045759447436, "grad_norm": 404.06903076171875, "learning_rate": 1.9313202445546272e-06, "loss": 13.4062, "step": 4370 }, { "epoch": 0.29029687188683007, "grad_norm": 516.7296752929688, "learning_rate": 1.9312810686035486e-06, "loss": 19.5938, "step": 4371 }, { "epoch": 0.2903632861791858, "grad_norm": 425.3343505859375, "learning_rate": 1.9312418818799482e-06, "loss": 24.0312, "step": 4372 }, { "epoch": 0.2904297004715415, "grad_norm": 245.4818115234375, "learning_rate": 1.9312026843842783e-06, "loss": 20.6875, "step": 4373 }, { "epoch": 0.2904961147638972, "grad_norm": 348.7431335449219, "learning_rate": 1.9311634761169925e-06, "loss": 16.3594, "step": 4374 }, { "epoch": 0.2905625290562529, "grad_norm": 286.72381591796875, "learning_rate": 1.9311242570785448e-06, "loss": 21.75, "step": 4375 }, { "epoch": 0.2906289433486086, "grad_norm": 161.1536865234375, "learning_rate": 1.9310850272693887e-06, "loss": 17.9844, "step": 4376 }, { "epoch": 0.2906953576409643, "grad_norm": 383.540771484375, "learning_rate": 1.9310457866899775e-06, "loss": 25.75, "step": 4377 }, { "epoch": 0.29076177193332003, "grad_norm": 243.4317169189453, "learning_rate": 1.931006535340766e-06, "loss": 24.5312, "step": 4378 }, { "epoch": 0.29082818622567574, "grad_norm": 801.9254150390625, "learning_rate": 1.9309672732222077e-06, "loss": 35.3281, "step": 4379 }, { "epoch": 0.29089460051803145, "grad_norm": 479.87689208984375, "learning_rate": 1.9309280003347567e-06, "loss": 15.8906, "step": 4380 }, { "epoch": 0.2909610148103872, "grad_norm": 171.29588317871094, "learning_rate": 1.9308887166788674e-06, "loss": 19.9375, "step": 4381 }, { "epoch": 0.29102742910274293, "grad_norm": 517.8677978515625, "learning_rate": 1.930849422254994e-06, "loss": 26.9219, "step": 4382 }, { "epoch": 0.29109384339509864, "grad_norm": 376.439697265625, "learning_rate": 1.9308101170635914e-06, "loss": 18.7031, "step": 4383 }, { "epoch": 0.29116025768745435, "grad_norm": 306.8897399902344, "learning_rate": 1.930770801105114e-06, "loss": 17.1719, "step": 4384 }, { "epoch": 0.29122667197981006, "grad_norm": 179.57125854492188, "learning_rate": 1.930731474380017e-06, "loss": 15.125, "step": 4385 }, { "epoch": 0.29129308627216577, "grad_norm": 296.40728759765625, "learning_rate": 1.9306921368887547e-06, "loss": 20.6562, "step": 4386 }, { "epoch": 0.2913595005645215, "grad_norm": 204.76400756835938, "learning_rate": 1.9306527886317827e-06, "loss": 19.0312, "step": 4387 }, { "epoch": 0.2914259148568772, "grad_norm": 165.34005737304688, "learning_rate": 1.9306134296095555e-06, "loss": 21.125, "step": 4388 }, { "epoch": 0.2914923291492329, "grad_norm": 221.2344512939453, "learning_rate": 1.930574059822529e-06, "loss": 15.5625, "step": 4389 }, { "epoch": 0.2915587434415886, "grad_norm": 295.6076354980469, "learning_rate": 1.9305346792711586e-06, "loss": 24.9219, "step": 4390 }, { "epoch": 0.2916251577339443, "grad_norm": 238.8423309326172, "learning_rate": 1.930495287955899e-06, "loss": 20.6562, "step": 4391 }, { "epoch": 0.2916915720263001, "grad_norm": 485.73980712890625, "learning_rate": 1.9304558858772067e-06, "loss": 26.1406, "step": 4392 }, { "epoch": 0.2917579863186558, "grad_norm": 125.0356216430664, "learning_rate": 1.9304164730355374e-06, "loss": 17.375, "step": 4393 }, { "epoch": 0.2918244006110115, "grad_norm": 323.5140686035156, "learning_rate": 1.930377049431347e-06, "loss": 20.9688, "step": 4394 }, { "epoch": 0.2918908149033672, "grad_norm": 166.59622192382812, "learning_rate": 1.9303376150650912e-06, "loss": 15.5938, "step": 4395 }, { "epoch": 0.2919572291957229, "grad_norm": 368.14642333984375, "learning_rate": 1.9302981699372263e-06, "loss": 26.1875, "step": 4396 }, { "epoch": 0.29202364348807863, "grad_norm": 283.3049621582031, "learning_rate": 1.930258714048209e-06, "loss": 20.5, "step": 4397 }, { "epoch": 0.29209005778043434, "grad_norm": 116.37454223632812, "learning_rate": 1.9302192473984945e-06, "loss": 14.9219, "step": 4398 }, { "epoch": 0.29215647207279005, "grad_norm": 166.13540649414062, "learning_rate": 1.9301797699885408e-06, "loss": 22.375, "step": 4399 }, { "epoch": 0.29222288636514576, "grad_norm": 486.8370056152344, "learning_rate": 1.9301402818188033e-06, "loss": 28.1875, "step": 4400 }, { "epoch": 0.29228930065750147, "grad_norm": 169.36595153808594, "learning_rate": 1.93010078288974e-06, "loss": 20.3594, "step": 4401 }, { "epoch": 0.2923557149498572, "grad_norm": 249.15567016601562, "learning_rate": 1.9300612732018073e-06, "loss": 20.125, "step": 4402 }, { "epoch": 0.29242212924221295, "grad_norm": 344.5645751953125, "learning_rate": 1.9300217527554613e-06, "loss": 28.1562, "step": 4403 }, { "epoch": 0.29248854353456866, "grad_norm": 187.19236755371094, "learning_rate": 1.9299822215511605e-06, "loss": 20.2031, "step": 4404 }, { "epoch": 0.29255495782692437, "grad_norm": 631.9411010742188, "learning_rate": 1.9299426795893616e-06, "loss": 29.0625, "step": 4405 }, { "epoch": 0.2926213721192801, "grad_norm": 224.24436950683594, "learning_rate": 1.9299031268705216e-06, "loss": 14.875, "step": 4406 }, { "epoch": 0.2926877864116358, "grad_norm": 236.88330078125, "learning_rate": 1.929863563395099e-06, "loss": 16.9688, "step": 4407 }, { "epoch": 0.2927542007039915, "grad_norm": 146.5513916015625, "learning_rate": 1.9298239891635504e-06, "loss": 20.2188, "step": 4408 }, { "epoch": 0.2928206149963472, "grad_norm": 303.27117919921875, "learning_rate": 1.9297844041763344e-06, "loss": 19.6875, "step": 4409 }, { "epoch": 0.2928870292887029, "grad_norm": 549.6415405273438, "learning_rate": 1.9297448084339086e-06, "loss": 27.8438, "step": 4410 }, { "epoch": 0.2929534435810586, "grad_norm": 412.7436828613281, "learning_rate": 1.9297052019367304e-06, "loss": 17.4531, "step": 4411 }, { "epoch": 0.29301985787341434, "grad_norm": 207.0309295654297, "learning_rate": 1.929665584685259e-06, "loss": 17.6875, "step": 4412 }, { "epoch": 0.2930862721657701, "grad_norm": 162.6040802001953, "learning_rate": 1.929625956679952e-06, "loss": 21.875, "step": 4413 }, { "epoch": 0.2931526864581258, "grad_norm": 195.44570922851562, "learning_rate": 1.9295863179212685e-06, "loss": 20.2344, "step": 4414 }, { "epoch": 0.2932191007504815, "grad_norm": 168.65841674804688, "learning_rate": 1.929546668409666e-06, "loss": 15.5938, "step": 4415 }, { "epoch": 0.29328551504283723, "grad_norm": 726.178466796875, "learning_rate": 1.9295070081456036e-06, "loss": 36.3438, "step": 4416 }, { "epoch": 0.29335192933519294, "grad_norm": 262.28338623046875, "learning_rate": 1.9294673371295405e-06, "loss": 18.5, "step": 4417 }, { "epoch": 0.29341834362754865, "grad_norm": 177.4783935546875, "learning_rate": 1.9294276553619352e-06, "loss": 15.6562, "step": 4418 }, { "epoch": 0.29348475791990436, "grad_norm": 275.8658447265625, "learning_rate": 1.929387962843246e-06, "loss": 18.6406, "step": 4419 }, { "epoch": 0.29355117221226007, "grad_norm": 166.89303588867188, "learning_rate": 1.9293482595739336e-06, "loss": 20.4844, "step": 4420 }, { "epoch": 0.2936175865046158, "grad_norm": 160.12811279296875, "learning_rate": 1.929308545554456e-06, "loss": 17.7188, "step": 4421 }, { "epoch": 0.2936840007969715, "grad_norm": 164.7672119140625, "learning_rate": 1.9292688207852735e-06, "loss": 22.5781, "step": 4422 }, { "epoch": 0.2937504150893272, "grad_norm": 154.67457580566406, "learning_rate": 1.9292290852668446e-06, "loss": 17.1562, "step": 4423 }, { "epoch": 0.29381682938168296, "grad_norm": 197.71743774414062, "learning_rate": 1.9291893389996295e-06, "loss": 15.5469, "step": 4424 }, { "epoch": 0.2938832436740387, "grad_norm": 206.1866455078125, "learning_rate": 1.929149581984088e-06, "loss": 17.5156, "step": 4425 }, { "epoch": 0.2939496579663944, "grad_norm": 164.23289489746094, "learning_rate": 1.92910981422068e-06, "loss": 19.2031, "step": 4426 }, { "epoch": 0.2940160722587501, "grad_norm": 185.23704528808594, "learning_rate": 1.9290700357098654e-06, "loss": 24.75, "step": 4427 }, { "epoch": 0.2940824865511058, "grad_norm": 203.204345703125, "learning_rate": 1.9290302464521046e-06, "loss": 15.25, "step": 4428 }, { "epoch": 0.2941489008434615, "grad_norm": 169.3656005859375, "learning_rate": 1.928990446447857e-06, "loss": 22.875, "step": 4429 }, { "epoch": 0.2942153151358172, "grad_norm": 151.93138122558594, "learning_rate": 1.9289506356975844e-06, "loss": 16.3281, "step": 4430 }, { "epoch": 0.29428172942817293, "grad_norm": 397.73187255859375, "learning_rate": 1.928910814201746e-06, "loss": 14.2188, "step": 4431 }, { "epoch": 0.29434814372052864, "grad_norm": 205.22854614257812, "learning_rate": 1.9288709819608025e-06, "loss": 21.8438, "step": 4432 }, { "epoch": 0.29441455801288435, "grad_norm": 313.89178466796875, "learning_rate": 1.928831138975216e-06, "loss": 15.5312, "step": 4433 }, { "epoch": 0.29448097230524006, "grad_norm": 240.7315216064453, "learning_rate": 1.928791285245446e-06, "loss": 29.5625, "step": 4434 }, { "epoch": 0.29454738659759583, "grad_norm": 705.2031860351562, "learning_rate": 1.928751420771954e-06, "loss": 20.9375, "step": 4435 }, { "epoch": 0.29461380088995154, "grad_norm": 159.23353576660156, "learning_rate": 1.9287115455552014e-06, "loss": 13.625, "step": 4436 }, { "epoch": 0.29468021518230725, "grad_norm": 145.82614135742188, "learning_rate": 1.928671659595649e-06, "loss": 19.2656, "step": 4437 }, { "epoch": 0.29474662947466296, "grad_norm": 230.32797241210938, "learning_rate": 1.928631762893758e-06, "loss": 17.3281, "step": 4438 }, { "epoch": 0.29481304376701867, "grad_norm": 457.24896240234375, "learning_rate": 1.9285918554499907e-06, "loss": 24.25, "step": 4439 }, { "epoch": 0.2948794580593744, "grad_norm": 287.6599426269531, "learning_rate": 1.928551937264808e-06, "loss": 22.5938, "step": 4440 }, { "epoch": 0.2949458723517301, "grad_norm": 145.160400390625, "learning_rate": 1.928512008338672e-06, "loss": 17.1406, "step": 4441 }, { "epoch": 0.2950122866440858, "grad_norm": 237.55735778808594, "learning_rate": 1.9284720686720446e-06, "loss": 21.9062, "step": 4442 }, { "epoch": 0.2950787009364415, "grad_norm": 182.57640075683594, "learning_rate": 1.9284321182653876e-06, "loss": 18.9219, "step": 4443 }, { "epoch": 0.2951451152287972, "grad_norm": 213.38180541992188, "learning_rate": 1.9283921571191634e-06, "loss": 24.2188, "step": 4444 }, { "epoch": 0.2952115295211529, "grad_norm": 262.27606201171875, "learning_rate": 1.9283521852338337e-06, "loss": 26.25, "step": 4445 }, { "epoch": 0.2952779438135087, "grad_norm": 203.00457763671875, "learning_rate": 1.928312202609862e-06, "loss": 32.9844, "step": 4446 }, { "epoch": 0.2953443581058644, "grad_norm": 415.41790771484375, "learning_rate": 1.928272209247709e-06, "loss": 18.1719, "step": 4447 }, { "epoch": 0.2954107723982201, "grad_norm": 393.4632568359375, "learning_rate": 1.928232205147839e-06, "loss": 19.7656, "step": 4448 }, { "epoch": 0.2954771866905758, "grad_norm": 684.1854858398438, "learning_rate": 1.928192190310714e-06, "loss": 25.8125, "step": 4449 }, { "epoch": 0.29554360098293153, "grad_norm": 246.64442443847656, "learning_rate": 1.928152164736797e-06, "loss": 16.5156, "step": 4450 }, { "epoch": 0.29561001527528724, "grad_norm": 218.57659912109375, "learning_rate": 1.9281121284265507e-06, "loss": 17.1406, "step": 4451 }, { "epoch": 0.29567642956764295, "grad_norm": 296.4888916015625, "learning_rate": 1.9280720813804386e-06, "loss": 19.4531, "step": 4452 }, { "epoch": 0.29574284385999866, "grad_norm": 213.97129821777344, "learning_rate": 1.928032023598924e-06, "loss": 16.4844, "step": 4453 }, { "epoch": 0.29580925815235437, "grad_norm": 231.20458984375, "learning_rate": 1.92799195508247e-06, "loss": 22.25, "step": 4454 }, { "epoch": 0.2958756724447101, "grad_norm": 246.19583129882812, "learning_rate": 1.9279518758315403e-06, "loss": 17.3438, "step": 4455 }, { "epoch": 0.2959420867370658, "grad_norm": 296.67144775390625, "learning_rate": 1.9279117858465985e-06, "loss": 22.7656, "step": 4456 }, { "epoch": 0.29600850102942156, "grad_norm": 526.5921020507812, "learning_rate": 1.9278716851281077e-06, "loss": 18.4219, "step": 4457 }, { "epoch": 0.29607491532177727, "grad_norm": 155.1764373779297, "learning_rate": 1.9278315736765325e-06, "loss": 21.2812, "step": 4458 }, { "epoch": 0.296141329614133, "grad_norm": 229.98765563964844, "learning_rate": 1.9277914514923366e-06, "loss": 16.2031, "step": 4459 }, { "epoch": 0.2962077439064887, "grad_norm": 238.33456420898438, "learning_rate": 1.9277513185759843e-06, "loss": 23.2969, "step": 4460 }, { "epoch": 0.2962741581988444, "grad_norm": 221.6703338623047, "learning_rate": 1.9277111749279397e-06, "loss": 15.9844, "step": 4461 }, { "epoch": 0.2963405724912001, "grad_norm": 410.059326171875, "learning_rate": 1.9276710205486674e-06, "loss": 24.25, "step": 4462 }, { "epoch": 0.2964069867835558, "grad_norm": 281.82574462890625, "learning_rate": 1.9276308554386316e-06, "loss": 23.4219, "step": 4463 }, { "epoch": 0.2964734010759115, "grad_norm": 126.17001342773438, "learning_rate": 1.9275906795982964e-06, "loss": 13.9688, "step": 4464 }, { "epoch": 0.29653981536826723, "grad_norm": 389.0433349609375, "learning_rate": 1.9275504930281277e-06, "loss": 26.6875, "step": 4465 }, { "epoch": 0.29660622966062294, "grad_norm": 308.2444763183594, "learning_rate": 1.9275102957285897e-06, "loss": 21.3281, "step": 4466 }, { "epoch": 0.29667264395297865, "grad_norm": 277.8482360839844, "learning_rate": 1.9274700877001475e-06, "loss": 14.9375, "step": 4467 }, { "epoch": 0.2967390582453344, "grad_norm": 313.34222412109375, "learning_rate": 1.927429868943266e-06, "loss": 17.0, "step": 4468 }, { "epoch": 0.29680547253769013, "grad_norm": 173.26791381835938, "learning_rate": 1.92738963945841e-06, "loss": 22.3906, "step": 4469 }, { "epoch": 0.29687188683004584, "grad_norm": 237.44851684570312, "learning_rate": 1.927349399246046e-06, "loss": 18.9844, "step": 4470 }, { "epoch": 0.29693830112240155, "grad_norm": 255.66506958007812, "learning_rate": 1.927309148306639e-06, "loss": 16.8906, "step": 4471 }, { "epoch": 0.29700471541475726, "grad_norm": 743.155517578125, "learning_rate": 1.9272688866406544e-06, "loss": 24.8438, "step": 4472 }, { "epoch": 0.29707112970711297, "grad_norm": 157.4265899658203, "learning_rate": 1.9272286142485578e-06, "loss": 19.1406, "step": 4473 }, { "epoch": 0.2971375439994687, "grad_norm": 283.31719970703125, "learning_rate": 1.9271883311308154e-06, "loss": 20.2344, "step": 4474 }, { "epoch": 0.2972039582918244, "grad_norm": 258.934326171875, "learning_rate": 1.927148037287893e-06, "loss": 21.4219, "step": 4475 }, { "epoch": 0.2972703725841801, "grad_norm": 159.34735107421875, "learning_rate": 1.927107732720257e-06, "loss": 17.4609, "step": 4476 }, { "epoch": 0.2973367868765358, "grad_norm": 234.39761352539062, "learning_rate": 1.927067417428373e-06, "loss": 16.875, "step": 4477 }, { "epoch": 0.2974032011688915, "grad_norm": 340.9475402832031, "learning_rate": 1.9270270914127075e-06, "loss": 18.1562, "step": 4478 }, { "epoch": 0.2974696154612473, "grad_norm": 349.5927429199219, "learning_rate": 1.9269867546737274e-06, "loss": 20.2812, "step": 4479 }, { "epoch": 0.297536029753603, "grad_norm": 215.83236694335938, "learning_rate": 1.9269464072118993e-06, "loss": 15.5625, "step": 4480 }, { "epoch": 0.2976024440459587, "grad_norm": 260.985595703125, "learning_rate": 1.9269060490276895e-06, "loss": 22.375, "step": 4481 }, { "epoch": 0.2976688583383144, "grad_norm": 213.33238220214844, "learning_rate": 1.926865680121565e-06, "loss": 12.5938, "step": 4482 }, { "epoch": 0.2977352726306701, "grad_norm": 247.42892456054688, "learning_rate": 1.926825300493993e-06, "loss": 19.5156, "step": 4483 }, { "epoch": 0.29780168692302583, "grad_norm": 266.1911315917969, "learning_rate": 1.92678491014544e-06, "loss": 18.1328, "step": 4484 }, { "epoch": 0.29786810121538154, "grad_norm": 257.41400146484375, "learning_rate": 1.9267445090763733e-06, "loss": 18.7031, "step": 4485 }, { "epoch": 0.29793451550773725, "grad_norm": 276.1507873535156, "learning_rate": 1.926704097287261e-06, "loss": 16.6406, "step": 4486 }, { "epoch": 0.29800092980009296, "grad_norm": 154.7196502685547, "learning_rate": 1.92666367477857e-06, "loss": 18.125, "step": 4487 }, { "epoch": 0.29806734409244867, "grad_norm": 212.80934143066406, "learning_rate": 1.9266232415507677e-06, "loss": 14.875, "step": 4488 }, { "epoch": 0.29813375838480444, "grad_norm": 176.6780548095703, "learning_rate": 1.9265827976043225e-06, "loss": 24.125, "step": 4489 }, { "epoch": 0.29820017267716015, "grad_norm": 752.6212768554688, "learning_rate": 1.9265423429397015e-06, "loss": 17.7188, "step": 4490 }, { "epoch": 0.29826658696951586, "grad_norm": 212.23226928710938, "learning_rate": 1.926501877557373e-06, "loss": 17.5938, "step": 4491 }, { "epoch": 0.29833300126187157, "grad_norm": 203.9532928466797, "learning_rate": 1.9264614014578054e-06, "loss": 14.5625, "step": 4492 }, { "epoch": 0.2983994155542273, "grad_norm": 334.5288391113281, "learning_rate": 1.9264209146414663e-06, "loss": 20.875, "step": 4493 }, { "epoch": 0.298465829846583, "grad_norm": 179.83457946777344, "learning_rate": 1.9263804171088237e-06, "loss": 16.7188, "step": 4494 }, { "epoch": 0.2985322441389387, "grad_norm": 281.2572021484375, "learning_rate": 1.9263399088603475e-06, "loss": 18.5, "step": 4495 }, { "epoch": 0.2985986584312944, "grad_norm": 116.93670654296875, "learning_rate": 1.9262993898965046e-06, "loss": 13.4688, "step": 4496 }, { "epoch": 0.2986650727236501, "grad_norm": 646.3325805664062, "learning_rate": 1.926258860217765e-06, "loss": 19.1875, "step": 4497 }, { "epoch": 0.2987314870160058, "grad_norm": 172.82696533203125, "learning_rate": 1.926218319824597e-06, "loss": 20.6406, "step": 4498 }, { "epoch": 0.29879790130836154, "grad_norm": 322.52178955078125, "learning_rate": 1.926177768717469e-06, "loss": 26.3438, "step": 4499 }, { "epoch": 0.2988643156007173, "grad_norm": 459.1292419433594, "learning_rate": 1.9261372068968513e-06, "loss": 21.2188, "step": 4500 }, { "epoch": 0.298930729893073, "grad_norm": 335.8359680175781, "learning_rate": 1.9260966343632123e-06, "loss": 12.6484, "step": 4501 }, { "epoch": 0.2989971441854287, "grad_norm": 156.59886169433594, "learning_rate": 1.9260560511170214e-06, "loss": 19.2656, "step": 4502 }, { "epoch": 0.29906355847778443, "grad_norm": 258.9165344238281, "learning_rate": 1.9260154571587483e-06, "loss": 22.6562, "step": 4503 }, { "epoch": 0.29912997277014014, "grad_norm": 167.74766540527344, "learning_rate": 1.925974852488862e-06, "loss": 21.0625, "step": 4504 }, { "epoch": 0.29919638706249585, "grad_norm": 253.6872100830078, "learning_rate": 1.925934237107833e-06, "loss": 15.9844, "step": 4505 }, { "epoch": 0.29926280135485156, "grad_norm": 191.94309997558594, "learning_rate": 1.92589361101613e-06, "loss": 16.1406, "step": 4506 }, { "epoch": 0.29932921564720727, "grad_norm": 286.5640563964844, "learning_rate": 1.925852974214224e-06, "loss": 15.75, "step": 4507 }, { "epoch": 0.299395629939563, "grad_norm": 259.3753662109375, "learning_rate": 1.9258123267025845e-06, "loss": 15.9688, "step": 4508 }, { "epoch": 0.2994620442319187, "grad_norm": 321.6258544921875, "learning_rate": 1.925771668481682e-06, "loss": 26.5312, "step": 4509 }, { "epoch": 0.2995284585242744, "grad_norm": 114.77167510986328, "learning_rate": 1.9257309995519868e-06, "loss": 14.1797, "step": 4510 }, { "epoch": 0.29959487281663016, "grad_norm": 410.6161804199219, "learning_rate": 1.925690319913969e-06, "loss": 13.4688, "step": 4511 }, { "epoch": 0.2996612871089859, "grad_norm": 348.26361083984375, "learning_rate": 1.9256496295680992e-06, "loss": 18.9062, "step": 4512 }, { "epoch": 0.2997277014013416, "grad_norm": 217.016845703125, "learning_rate": 1.9256089285148487e-06, "loss": 19.0312, "step": 4513 }, { "epoch": 0.2997941156936973, "grad_norm": 161.81887817382812, "learning_rate": 1.9255682167546874e-06, "loss": 15.4844, "step": 4514 }, { "epoch": 0.299860529986053, "grad_norm": 126.26803588867188, "learning_rate": 1.925527494288087e-06, "loss": 17.2812, "step": 4515 }, { "epoch": 0.2999269442784087, "grad_norm": 109.61760711669922, "learning_rate": 1.9254867611155185e-06, "loss": 16.6406, "step": 4516 }, { "epoch": 0.2999933585707644, "grad_norm": 202.4349365234375, "learning_rate": 1.9254460172374523e-06, "loss": 30.2188, "step": 4517 }, { "epoch": 0.30005977286312013, "grad_norm": 1117.354736328125, "learning_rate": 1.92540526265436e-06, "loss": 14.1562, "step": 4518 }, { "epoch": 0.30012618715547584, "grad_norm": 269.803955078125, "learning_rate": 1.9253644973667137e-06, "loss": 18.7344, "step": 4519 }, { "epoch": 0.30019260144783155, "grad_norm": 334.58892822265625, "learning_rate": 1.925323721374985e-06, "loss": 25.25, "step": 4520 }, { "epoch": 0.30025901574018726, "grad_norm": 329.7898864746094, "learning_rate": 1.925282934679644e-06, "loss": 23.0312, "step": 4521 }, { "epoch": 0.30032543003254303, "grad_norm": 365.62152099609375, "learning_rate": 1.925242137281164e-06, "loss": 21.7188, "step": 4522 }, { "epoch": 0.30039184432489874, "grad_norm": 168.18479919433594, "learning_rate": 1.9252013291800164e-06, "loss": 15.8281, "step": 4523 }, { "epoch": 0.30045825861725445, "grad_norm": 157.0980224609375, "learning_rate": 1.9251605103766738e-06, "loss": 13.0469, "step": 4524 }, { "epoch": 0.30052467290961016, "grad_norm": 243.12542724609375, "learning_rate": 1.9251196808716073e-06, "loss": 15.1562, "step": 4525 }, { "epoch": 0.30059108720196587, "grad_norm": 145.21461486816406, "learning_rate": 1.92507884066529e-06, "loss": 16.4688, "step": 4526 }, { "epoch": 0.3006575014943216, "grad_norm": 455.8394775390625, "learning_rate": 1.9250379897581943e-06, "loss": 20.8125, "step": 4527 }, { "epoch": 0.3007239157866773, "grad_norm": 265.53289794921875, "learning_rate": 1.924997128150792e-06, "loss": 21.9219, "step": 4528 }, { "epoch": 0.300790330079033, "grad_norm": 191.0762939453125, "learning_rate": 1.9249562558435567e-06, "loss": 16.75, "step": 4529 }, { "epoch": 0.3008567443713887, "grad_norm": 221.532470703125, "learning_rate": 1.9249153728369607e-06, "loss": 26.375, "step": 4530 }, { "epoch": 0.3009231586637444, "grad_norm": 207.87838745117188, "learning_rate": 1.924874479131477e-06, "loss": 26.1406, "step": 4531 }, { "epoch": 0.3009895729561001, "grad_norm": 381.752197265625, "learning_rate": 1.9248335747275786e-06, "loss": 15.8281, "step": 4532 }, { "epoch": 0.3010559872484559, "grad_norm": 258.04876708984375, "learning_rate": 1.924792659625739e-06, "loss": 18.8281, "step": 4533 }, { "epoch": 0.3011224015408116, "grad_norm": 191.76715087890625, "learning_rate": 1.9247517338264313e-06, "loss": 21.2812, "step": 4534 }, { "epoch": 0.3011888158331673, "grad_norm": 250.84336853027344, "learning_rate": 1.924710797330128e-06, "loss": 21.3438, "step": 4535 }, { "epoch": 0.301255230125523, "grad_norm": 336.1944580078125, "learning_rate": 1.924669850137304e-06, "loss": 26.2188, "step": 4536 }, { "epoch": 0.30132164441787873, "grad_norm": 280.0196228027344, "learning_rate": 1.9246288922484326e-06, "loss": 23.5469, "step": 4537 }, { "epoch": 0.30138805871023444, "grad_norm": 191.51597595214844, "learning_rate": 1.9245879236639867e-06, "loss": 19.4375, "step": 4538 }, { "epoch": 0.30145447300259015, "grad_norm": 345.29376220703125, "learning_rate": 1.924546944384441e-06, "loss": 25.0938, "step": 4539 }, { "epoch": 0.30152088729494586, "grad_norm": 502.3965148925781, "learning_rate": 1.92450595441027e-06, "loss": 21.1719, "step": 4540 }, { "epoch": 0.30158730158730157, "grad_norm": 296.7649841308594, "learning_rate": 1.9244649537419464e-06, "loss": 28.0, "step": 4541 }, { "epoch": 0.3016537158796573, "grad_norm": 143.17828369140625, "learning_rate": 1.9244239423799455e-06, "loss": 13.2031, "step": 4542 }, { "epoch": 0.301720130172013, "grad_norm": 173.00099182128906, "learning_rate": 1.924382920324742e-06, "loss": 20.5312, "step": 4543 }, { "epoch": 0.30178654446436876, "grad_norm": 285.7655944824219, "learning_rate": 1.9243418875768095e-06, "loss": 19.8906, "step": 4544 }, { "epoch": 0.30185295875672447, "grad_norm": 159.16012573242188, "learning_rate": 1.924300844136623e-06, "loss": 19.2031, "step": 4545 }, { "epoch": 0.3019193730490802, "grad_norm": 370.85107421875, "learning_rate": 1.9242597900046574e-06, "loss": 17.4062, "step": 4546 }, { "epoch": 0.3019857873414359, "grad_norm": 207.25318908691406, "learning_rate": 1.9242187251813872e-06, "loss": 18.8125, "step": 4547 }, { "epoch": 0.3020522016337916, "grad_norm": 193.85531616210938, "learning_rate": 1.924177649667288e-06, "loss": 16.5469, "step": 4548 }, { "epoch": 0.3021186159261473, "grad_norm": 172.96676635742188, "learning_rate": 1.924136563462835e-06, "loss": 20.4062, "step": 4549 }, { "epoch": 0.302185030218503, "grad_norm": 269.9049987792969, "learning_rate": 1.924095466568503e-06, "loss": 27.2656, "step": 4550 }, { "epoch": 0.3022514445108587, "grad_norm": 349.8396911621094, "learning_rate": 1.924054358984767e-06, "loss": 17.3906, "step": 4551 }, { "epoch": 0.30231785880321443, "grad_norm": 213.10116577148438, "learning_rate": 1.9240132407121034e-06, "loss": 31.1719, "step": 4552 }, { "epoch": 0.30238427309557014, "grad_norm": 262.7695617675781, "learning_rate": 1.923972111750987e-06, "loss": 22.1562, "step": 4553 }, { "epoch": 0.30245068738792585, "grad_norm": 225.12574768066406, "learning_rate": 1.9239309721018947e-06, "loss": 20.5625, "step": 4554 }, { "epoch": 0.3025171016802816, "grad_norm": 263.093017578125, "learning_rate": 1.923889821765301e-06, "loss": 19.7812, "step": 4555 }, { "epoch": 0.30258351597263733, "grad_norm": 165.20468139648438, "learning_rate": 1.9238486607416832e-06, "loss": 15.3906, "step": 4556 }, { "epoch": 0.30264993026499304, "grad_norm": 219.7615966796875, "learning_rate": 1.9238074890315165e-06, "loss": 18.6406, "step": 4557 }, { "epoch": 0.30271634455734875, "grad_norm": 415.5739440917969, "learning_rate": 1.9237663066352777e-06, "loss": 22.9375, "step": 4558 }, { "epoch": 0.30278275884970446, "grad_norm": 167.41891479492188, "learning_rate": 1.9237251135534427e-06, "loss": 15.9375, "step": 4559 }, { "epoch": 0.30284917314206017, "grad_norm": 216.2928466796875, "learning_rate": 1.923683909786488e-06, "loss": 14.8906, "step": 4560 }, { "epoch": 0.3029155874344159, "grad_norm": 729.11669921875, "learning_rate": 1.923642695334891e-06, "loss": 20.125, "step": 4561 }, { "epoch": 0.3029820017267716, "grad_norm": 143.78929138183594, "learning_rate": 1.923601470199128e-06, "loss": 14.7969, "step": 4562 }, { "epoch": 0.3030484160191273, "grad_norm": 218.9643096923828, "learning_rate": 1.923560234379675e-06, "loss": 18.3906, "step": 4563 }, { "epoch": 0.303114830311483, "grad_norm": 184.5767822265625, "learning_rate": 1.9235189878770105e-06, "loss": 14.4531, "step": 4564 }, { "epoch": 0.3031812446038388, "grad_norm": 294.2859802246094, "learning_rate": 1.9234777306916102e-06, "loss": 24.7188, "step": 4565 }, { "epoch": 0.3032476588961945, "grad_norm": 238.1768798828125, "learning_rate": 1.9234364628239526e-06, "loss": 21.6562, "step": 4566 }, { "epoch": 0.3033140731885502, "grad_norm": 208.27511596679688, "learning_rate": 1.923395184274514e-06, "loss": 16.0938, "step": 4567 }, { "epoch": 0.3033804874809059, "grad_norm": 208.47991943359375, "learning_rate": 1.9233538950437728e-06, "loss": 20.7969, "step": 4568 }, { "epoch": 0.3034469017732616, "grad_norm": 193.78721618652344, "learning_rate": 1.923312595132206e-06, "loss": 17.0781, "step": 4569 }, { "epoch": 0.3035133160656173, "grad_norm": 203.6952362060547, "learning_rate": 1.9232712845402913e-06, "loss": 19.7656, "step": 4570 }, { "epoch": 0.30357973035797303, "grad_norm": 384.9455261230469, "learning_rate": 1.9232299632685073e-06, "loss": 21.4531, "step": 4571 }, { "epoch": 0.30364614465032874, "grad_norm": 144.99267578125, "learning_rate": 1.923188631317331e-06, "loss": 15.5938, "step": 4572 }, { "epoch": 0.30371255894268445, "grad_norm": 200.8853302001953, "learning_rate": 1.9231472886872413e-06, "loss": 23.25, "step": 4573 }, { "epoch": 0.30377897323504016, "grad_norm": 466.859130859375, "learning_rate": 1.923105935378716e-06, "loss": 15.2812, "step": 4574 }, { "epoch": 0.30384538752739587, "grad_norm": 185.19284057617188, "learning_rate": 1.923064571392233e-06, "loss": 19.1719, "step": 4575 }, { "epoch": 0.30391180181975164, "grad_norm": 134.0297088623047, "learning_rate": 1.923023196728272e-06, "loss": 20.125, "step": 4576 }, { "epoch": 0.30397821611210735, "grad_norm": 129.8243408203125, "learning_rate": 1.9229818113873107e-06, "loss": 15.1406, "step": 4577 }, { "epoch": 0.30404463040446306, "grad_norm": 250.47662353515625, "learning_rate": 1.9229404153698277e-06, "loss": 19.7031, "step": 4578 }, { "epoch": 0.30411104469681877, "grad_norm": 340.07147216796875, "learning_rate": 1.9228990086763025e-06, "loss": 19.1406, "step": 4579 }, { "epoch": 0.3041774589891745, "grad_norm": 183.0340576171875, "learning_rate": 1.9228575913072136e-06, "loss": 16.0781, "step": 4580 }, { "epoch": 0.3042438732815302, "grad_norm": 274.01043701171875, "learning_rate": 1.9228161632630405e-06, "loss": 22.2656, "step": 4581 }, { "epoch": 0.3043102875738859, "grad_norm": 301.8050537109375, "learning_rate": 1.9227747245442617e-06, "loss": 14.3125, "step": 4582 }, { "epoch": 0.3043767018662416, "grad_norm": 255.8999786376953, "learning_rate": 1.9227332751513575e-06, "loss": 17.5391, "step": 4583 }, { "epoch": 0.3044431161585973, "grad_norm": 202.24862670898438, "learning_rate": 1.9226918150848065e-06, "loss": 20.0312, "step": 4584 }, { "epoch": 0.304509530450953, "grad_norm": 1057.6605224609375, "learning_rate": 1.922650344345089e-06, "loss": 18.2344, "step": 4585 }, { "epoch": 0.30457594474330874, "grad_norm": 477.4320983886719, "learning_rate": 1.922608862932684e-06, "loss": 25.3906, "step": 4586 }, { "epoch": 0.3046423590356645, "grad_norm": 273.9984436035156, "learning_rate": 1.9225673708480714e-06, "loss": 20.4688, "step": 4587 }, { "epoch": 0.3047087733280202, "grad_norm": 203.64657592773438, "learning_rate": 1.922525868091732e-06, "loss": 17.3438, "step": 4588 }, { "epoch": 0.3047751876203759, "grad_norm": 265.9499816894531, "learning_rate": 1.9224843546641454e-06, "loss": 24.4688, "step": 4589 }, { "epoch": 0.30484160191273163, "grad_norm": 140.8968963623047, "learning_rate": 1.9224428305657915e-06, "loss": 17.7344, "step": 4590 }, { "epoch": 0.30490801620508734, "grad_norm": 122.9827651977539, "learning_rate": 1.9224012957971505e-06, "loss": 19.1875, "step": 4591 }, { "epoch": 0.30497443049744305, "grad_norm": 434.18572998046875, "learning_rate": 1.9223597503587035e-06, "loss": 21.2188, "step": 4592 }, { "epoch": 0.30504084478979876, "grad_norm": 149.38511657714844, "learning_rate": 1.922318194250931e-06, "loss": 20.5938, "step": 4593 }, { "epoch": 0.30510725908215447, "grad_norm": 188.09976196289062, "learning_rate": 1.922276627474313e-06, "loss": 20.8125, "step": 4594 }, { "epoch": 0.3051736733745102, "grad_norm": 401.205078125, "learning_rate": 1.922235050029331e-06, "loss": 21.2188, "step": 4595 }, { "epoch": 0.3052400876668659, "grad_norm": 171.69384765625, "learning_rate": 1.922193461916466e-06, "loss": 19.3594, "step": 4596 }, { "epoch": 0.3053065019592216, "grad_norm": 126.02193450927734, "learning_rate": 1.922151863136198e-06, "loss": 16.4531, "step": 4597 }, { "epoch": 0.30537291625157736, "grad_norm": 329.0714111328125, "learning_rate": 1.9221102536890096e-06, "loss": 20.5, "step": 4598 }, { "epoch": 0.3054393305439331, "grad_norm": 177.9868927001953, "learning_rate": 1.9220686335753817e-06, "loss": 20.2031, "step": 4599 }, { "epoch": 0.3055057448362888, "grad_norm": 179.4183349609375, "learning_rate": 1.9220270027957954e-06, "loss": 15.9844, "step": 4600 }, { "epoch": 0.3055721591286445, "grad_norm": 906.097900390625, "learning_rate": 1.9219853613507323e-06, "loss": 22.2188, "step": 4601 }, { "epoch": 0.3056385734210002, "grad_norm": 301.0881652832031, "learning_rate": 1.921943709240674e-06, "loss": 19.1875, "step": 4602 }, { "epoch": 0.3057049877133559, "grad_norm": 747.1529541015625, "learning_rate": 1.9219020464661025e-06, "loss": 25.5, "step": 4603 }, { "epoch": 0.3057714020057116, "grad_norm": 194.04832458496094, "learning_rate": 1.9218603730275e-06, "loss": 17.7656, "step": 4604 }, { "epoch": 0.30583781629806733, "grad_norm": 240.0554656982422, "learning_rate": 1.9218186889253483e-06, "loss": 20.0938, "step": 4605 }, { "epoch": 0.30590423059042304, "grad_norm": 620.154052734375, "learning_rate": 1.921776994160129e-06, "loss": 22.4844, "step": 4606 }, { "epoch": 0.30597064488277875, "grad_norm": 371.1567077636719, "learning_rate": 1.921735288732326e-06, "loss": 39.8438, "step": 4607 }, { "epoch": 0.30603705917513446, "grad_norm": 466.77410888671875, "learning_rate": 1.92169357264242e-06, "loss": 18.1719, "step": 4608 }, { "epoch": 0.30610347346749023, "grad_norm": 166.49072265625, "learning_rate": 1.9216518458908943e-06, "loss": 14.8438, "step": 4609 }, { "epoch": 0.30616988775984594, "grad_norm": 253.23361206054688, "learning_rate": 1.9216101084782316e-06, "loss": 12.9922, "step": 4610 }, { "epoch": 0.30623630205220165, "grad_norm": 154.92860412597656, "learning_rate": 1.9215683604049143e-06, "loss": 19.5625, "step": 4611 }, { "epoch": 0.30630271634455736, "grad_norm": 1048.61865234375, "learning_rate": 1.9215266016714256e-06, "loss": 29.0312, "step": 4612 }, { "epoch": 0.30636913063691307, "grad_norm": 214.6060333251953, "learning_rate": 1.921484832278249e-06, "loss": 22.8438, "step": 4613 }, { "epoch": 0.3064355449292688, "grad_norm": 318.82415771484375, "learning_rate": 1.921443052225867e-06, "loss": 16.5781, "step": 4614 }, { "epoch": 0.3065019592216245, "grad_norm": 688.0708618164062, "learning_rate": 1.921401261514763e-06, "loss": 31.4688, "step": 4615 }, { "epoch": 0.3065683735139802, "grad_norm": 347.01361083984375, "learning_rate": 1.9213594601454206e-06, "loss": 21.6562, "step": 4616 }, { "epoch": 0.3066347878063359, "grad_norm": 185.03875732421875, "learning_rate": 1.9213176481183235e-06, "loss": 15.1875, "step": 4617 }, { "epoch": 0.3067012020986916, "grad_norm": 140.76597595214844, "learning_rate": 1.9212758254339545e-06, "loss": 17.0625, "step": 4618 }, { "epoch": 0.3067676163910473, "grad_norm": 226.69618225097656, "learning_rate": 1.9212339920927984e-06, "loss": 18.3281, "step": 4619 }, { "epoch": 0.3068340306834031, "grad_norm": 195.58407592773438, "learning_rate": 1.921192148095339e-06, "loss": 16.2969, "step": 4620 }, { "epoch": 0.3069004449757588, "grad_norm": 327.4925231933594, "learning_rate": 1.9211502934420592e-06, "loss": 30.3438, "step": 4621 }, { "epoch": 0.3069668592681145, "grad_norm": 338.7969665527344, "learning_rate": 1.9211084281334447e-06, "loss": 23.8438, "step": 4622 }, { "epoch": 0.3070332735604702, "grad_norm": 414.6040954589844, "learning_rate": 1.9210665521699787e-06, "loss": 17.5156, "step": 4623 }, { "epoch": 0.30709968785282593, "grad_norm": 243.76905822753906, "learning_rate": 1.9210246655521464e-06, "loss": 18.0938, "step": 4624 }, { "epoch": 0.30716610214518164, "grad_norm": 190.7751007080078, "learning_rate": 1.920982768280431e-06, "loss": 15.0312, "step": 4625 }, { "epoch": 0.30723251643753735, "grad_norm": 384.4833679199219, "learning_rate": 1.9209408603553186e-06, "loss": 23.5156, "step": 4626 }, { "epoch": 0.30729893072989306, "grad_norm": 415.7736511230469, "learning_rate": 1.9208989417772933e-06, "loss": 21.5312, "step": 4627 }, { "epoch": 0.30736534502224877, "grad_norm": 205.85247802734375, "learning_rate": 1.9208570125468403e-06, "loss": 28.5625, "step": 4628 }, { "epoch": 0.3074317593146045, "grad_norm": 460.7137451171875, "learning_rate": 1.920815072664444e-06, "loss": 38.7812, "step": 4629 }, { "epoch": 0.3074981736069602, "grad_norm": 174.153564453125, "learning_rate": 1.92077312213059e-06, "loss": 20.0, "step": 4630 }, { "epoch": 0.30756458789931596, "grad_norm": 134.31565856933594, "learning_rate": 1.920731160945763e-06, "loss": 16.7344, "step": 4631 }, { "epoch": 0.30763100219167167, "grad_norm": 419.52105712890625, "learning_rate": 1.92068918911045e-06, "loss": 17.75, "step": 4632 }, { "epoch": 0.3076974164840274, "grad_norm": 250.3206024169922, "learning_rate": 1.9206472066251343e-06, "loss": 19.125, "step": 4633 }, { "epoch": 0.3077638307763831, "grad_norm": 208.95083618164062, "learning_rate": 1.9206052134903034e-06, "loss": 22.5781, "step": 4634 }, { "epoch": 0.3078302450687388, "grad_norm": 179.43865966796875, "learning_rate": 1.9205632097064413e-06, "loss": 17.6328, "step": 4635 }, { "epoch": 0.3078966593610945, "grad_norm": 311.176025390625, "learning_rate": 1.9205211952740355e-06, "loss": 23.0938, "step": 4636 }, { "epoch": 0.3079630736534502, "grad_norm": 276.2876892089844, "learning_rate": 1.9204791701935713e-06, "loss": 21.4844, "step": 4637 }, { "epoch": 0.3080294879458059, "grad_norm": 124.9112319946289, "learning_rate": 1.920437134465535e-06, "loss": 16.2188, "step": 4638 }, { "epoch": 0.30809590223816163, "grad_norm": 360.1357727050781, "learning_rate": 1.920395088090412e-06, "loss": 16.1875, "step": 4639 }, { "epoch": 0.30816231653051734, "grad_norm": 291.98388671875, "learning_rate": 1.92035303106869e-06, "loss": 22.25, "step": 4640 }, { "epoch": 0.3082287308228731, "grad_norm": 163.00144958496094, "learning_rate": 1.9203109634008547e-06, "loss": 16.2812, "step": 4641 }, { "epoch": 0.3082951451152288, "grad_norm": 306.4732971191406, "learning_rate": 1.9202688850873924e-06, "loss": 19.6562, "step": 4642 }, { "epoch": 0.30836155940758453, "grad_norm": 145.40199279785156, "learning_rate": 1.920226796128791e-06, "loss": 18.7812, "step": 4643 }, { "epoch": 0.30842797369994024, "grad_norm": 230.60858154296875, "learning_rate": 1.9201846965255363e-06, "loss": 28.4844, "step": 4644 }, { "epoch": 0.30849438799229595, "grad_norm": 255.15597534179688, "learning_rate": 1.9201425862781154e-06, "loss": 17.6406, "step": 4645 }, { "epoch": 0.30856080228465166, "grad_norm": 303.800048828125, "learning_rate": 1.920100465387016e-06, "loss": 20.875, "step": 4646 }, { "epoch": 0.30862721657700737, "grad_norm": 208.758544921875, "learning_rate": 1.9200583338527246e-06, "loss": 16.1719, "step": 4647 }, { "epoch": 0.3086936308693631, "grad_norm": 330.4095458984375, "learning_rate": 1.9200161916757292e-06, "loss": 24.9688, "step": 4648 }, { "epoch": 0.3087600451617188, "grad_norm": 171.65414428710938, "learning_rate": 1.919974038856517e-06, "loss": 20.1875, "step": 4649 }, { "epoch": 0.3088264594540745, "grad_norm": 268.9966125488281, "learning_rate": 1.9199318753955755e-06, "loss": 17.1719, "step": 4650 }, { "epoch": 0.3088928737464302, "grad_norm": 449.3890075683594, "learning_rate": 1.919889701293393e-06, "loss": 17.7812, "step": 4651 }, { "epoch": 0.308959288038786, "grad_norm": 236.59915161132812, "learning_rate": 1.9198475165504564e-06, "loss": 29.1875, "step": 4652 }, { "epoch": 0.3090257023311417, "grad_norm": 246.89137268066406, "learning_rate": 1.9198053211672542e-06, "loss": 24.7188, "step": 4653 }, { "epoch": 0.3090921166234974, "grad_norm": 327.1888732910156, "learning_rate": 1.9197631151442746e-06, "loss": 18.0625, "step": 4654 }, { "epoch": 0.3091585309158531, "grad_norm": 201.65216064453125, "learning_rate": 1.9197208984820056e-06, "loss": 21.0312, "step": 4655 }, { "epoch": 0.3092249452082088, "grad_norm": 259.5658264160156, "learning_rate": 1.9196786711809354e-06, "loss": 25.2812, "step": 4656 }, { "epoch": 0.3092913595005645, "grad_norm": 341.7415466308594, "learning_rate": 1.9196364332415528e-06, "loss": 21.5781, "step": 4657 }, { "epoch": 0.30935777379292023, "grad_norm": 128.6463623046875, "learning_rate": 1.919594184664346e-06, "loss": 14.2969, "step": 4658 }, { "epoch": 0.30942418808527594, "grad_norm": 148.4701385498047, "learning_rate": 1.9195519254498044e-06, "loss": 21.0312, "step": 4659 }, { "epoch": 0.30949060237763165, "grad_norm": 442.3111267089844, "learning_rate": 1.919509655598416e-06, "loss": 22.9062, "step": 4660 }, { "epoch": 0.30955701666998736, "grad_norm": 209.86488342285156, "learning_rate": 1.9194673751106703e-06, "loss": 14.9844, "step": 4661 }, { "epoch": 0.30962343096234307, "grad_norm": 143.42491149902344, "learning_rate": 1.919425083987056e-06, "loss": 17.0312, "step": 4662 }, { "epoch": 0.30968984525469884, "grad_norm": 170.38174438476562, "learning_rate": 1.919382782228063e-06, "loss": 18.2188, "step": 4663 }, { "epoch": 0.30975625954705455, "grad_norm": 213.26153564453125, "learning_rate": 1.9193404698341794e-06, "loss": 19.4688, "step": 4664 }, { "epoch": 0.30982267383941026, "grad_norm": 191.0855712890625, "learning_rate": 1.9192981468058958e-06, "loss": 15.7344, "step": 4665 }, { "epoch": 0.30988908813176597, "grad_norm": 194.50523376464844, "learning_rate": 1.919255813143701e-06, "loss": 13.9297, "step": 4666 }, { "epoch": 0.3099555024241217, "grad_norm": 497.5166320800781, "learning_rate": 1.9192134688480853e-06, "loss": 17.7344, "step": 4667 }, { "epoch": 0.3100219167164774, "grad_norm": 208.01373291015625, "learning_rate": 1.9191711139195385e-06, "loss": 23.7188, "step": 4668 }, { "epoch": 0.3100883310088331, "grad_norm": 123.72017669677734, "learning_rate": 1.91912874835855e-06, "loss": 13.5, "step": 4669 }, { "epoch": 0.3101547453011888, "grad_norm": 216.8933563232422, "learning_rate": 1.9190863721656103e-06, "loss": 25.6562, "step": 4670 }, { "epoch": 0.3102211595935445, "grad_norm": 212.86415100097656, "learning_rate": 1.919043985341209e-06, "loss": 17.8438, "step": 4671 }, { "epoch": 0.3102875738859002, "grad_norm": 161.9327850341797, "learning_rate": 1.919001587885837e-06, "loss": 19.4062, "step": 4672 }, { "epoch": 0.31035398817825594, "grad_norm": 186.1427459716797, "learning_rate": 1.9189591797999845e-06, "loss": 20.1406, "step": 4673 }, { "epoch": 0.3104204024706117, "grad_norm": 200.69027709960938, "learning_rate": 1.9189167610841426e-06, "loss": 20.875, "step": 4674 }, { "epoch": 0.3104868167629674, "grad_norm": 245.31626892089844, "learning_rate": 1.918874331738801e-06, "loss": 18.3438, "step": 4675 }, { "epoch": 0.3105532310553231, "grad_norm": 129.65328979492188, "learning_rate": 1.918831891764451e-06, "loss": 16.1562, "step": 4676 }, { "epoch": 0.31061964534767883, "grad_norm": 216.31224060058594, "learning_rate": 1.918789441161584e-06, "loss": 11.9844, "step": 4677 }, { "epoch": 0.31068605964003454, "grad_norm": 165.9893798828125, "learning_rate": 1.91874697993069e-06, "loss": 17.625, "step": 4678 }, { "epoch": 0.31075247393239025, "grad_norm": 144.5074920654297, "learning_rate": 1.918704508072261e-06, "loss": 21.7969, "step": 4679 }, { "epoch": 0.31081888822474596, "grad_norm": 129.22650146484375, "learning_rate": 1.9186620255867877e-06, "loss": 16.4375, "step": 4680 }, { "epoch": 0.31088530251710167, "grad_norm": 172.66604614257812, "learning_rate": 1.9186195324747622e-06, "loss": 20.1875, "step": 4681 }, { "epoch": 0.3109517168094574, "grad_norm": 435.3284912109375, "learning_rate": 1.9185770287366754e-06, "loss": 22.1562, "step": 4682 }, { "epoch": 0.3110181311018131, "grad_norm": 559.0940551757812, "learning_rate": 1.9185345143730193e-06, "loss": 22.6719, "step": 4683 }, { "epoch": 0.3110845453941688, "grad_norm": 1483.7149658203125, "learning_rate": 1.9184919893842857e-06, "loss": 16.2656, "step": 4684 }, { "epoch": 0.31115095968652456, "grad_norm": 183.45419311523438, "learning_rate": 1.9184494537709663e-06, "loss": 19.7344, "step": 4685 }, { "epoch": 0.3112173739788803, "grad_norm": 196.03543090820312, "learning_rate": 1.918406907533553e-06, "loss": 23.4062, "step": 4686 }, { "epoch": 0.311283788271236, "grad_norm": 134.6534423828125, "learning_rate": 1.9183643506725384e-06, "loss": 13.5, "step": 4687 }, { "epoch": 0.3113502025635917, "grad_norm": 412.4930419921875, "learning_rate": 1.9183217831884147e-06, "loss": 22.0312, "step": 4688 }, { "epoch": 0.3114166168559474, "grad_norm": 213.8853759765625, "learning_rate": 1.9182792050816736e-06, "loss": 18.75, "step": 4689 }, { "epoch": 0.3114830311483031, "grad_norm": 304.7738037109375, "learning_rate": 1.918236616352809e-06, "loss": 23.6094, "step": 4690 }, { "epoch": 0.3115494454406588, "grad_norm": 486.496337890625, "learning_rate": 1.918194017002312e-06, "loss": 20.8281, "step": 4691 }, { "epoch": 0.31161585973301453, "grad_norm": 153.6033477783203, "learning_rate": 1.9181514070306767e-06, "loss": 16.9062, "step": 4692 }, { "epoch": 0.31168227402537024, "grad_norm": 170.11814880371094, "learning_rate": 1.918108786438395e-06, "loss": 18.5, "step": 4693 }, { "epoch": 0.31174868831772595, "grad_norm": 251.7075653076172, "learning_rate": 1.91806615522596e-06, "loss": 24.1562, "step": 4694 }, { "epoch": 0.31181510261008166, "grad_norm": 507.8155822753906, "learning_rate": 1.9180235133938654e-06, "loss": 16.5781, "step": 4695 }, { "epoch": 0.31188151690243743, "grad_norm": 295.9973449707031, "learning_rate": 1.9179808609426042e-06, "loss": 22.4688, "step": 4696 }, { "epoch": 0.31194793119479314, "grad_norm": 168.8004150390625, "learning_rate": 1.9179381978726695e-06, "loss": 21.3125, "step": 4697 }, { "epoch": 0.31201434548714885, "grad_norm": 212.54879760742188, "learning_rate": 1.9178955241845555e-06, "loss": 13.25, "step": 4698 }, { "epoch": 0.31208075977950456, "grad_norm": 270.2230529785156, "learning_rate": 1.917852839878755e-06, "loss": 28.7812, "step": 4699 }, { "epoch": 0.31214717407186027, "grad_norm": 167.292724609375, "learning_rate": 1.9178101449557624e-06, "loss": 22.0312, "step": 4700 }, { "epoch": 0.312213588364216, "grad_norm": 118.26651763916016, "learning_rate": 1.917767439416071e-06, "loss": 17.6875, "step": 4701 }, { "epoch": 0.3122800026565717, "grad_norm": 506.0906677246094, "learning_rate": 1.917724723260175e-06, "loss": 25.1719, "step": 4702 }, { "epoch": 0.3123464169489274, "grad_norm": 138.2994842529297, "learning_rate": 1.917681996488569e-06, "loss": 20.8438, "step": 4703 }, { "epoch": 0.3124128312412831, "grad_norm": 247.1670379638672, "learning_rate": 1.9176392591017463e-06, "loss": 23.625, "step": 4704 }, { "epoch": 0.3124792455336388, "grad_norm": 172.30612182617188, "learning_rate": 1.917596511100202e-06, "loss": 14.3594, "step": 4705 }, { "epoch": 0.3125456598259946, "grad_norm": 251.8610076904297, "learning_rate": 1.9175537524844304e-06, "loss": 23.7969, "step": 4706 }, { "epoch": 0.3126120741183503, "grad_norm": 198.01455688476562, "learning_rate": 1.9175109832549264e-06, "loss": 22.9844, "step": 4707 }, { "epoch": 0.312678488410706, "grad_norm": 566.948486328125, "learning_rate": 1.917468203412184e-06, "loss": 29.3125, "step": 4708 }, { "epoch": 0.3127449027030617, "grad_norm": 127.47454071044922, "learning_rate": 1.9174254129566986e-06, "loss": 17.4688, "step": 4709 }, { "epoch": 0.3128113169954174, "grad_norm": 320.92388916015625, "learning_rate": 1.917382611888965e-06, "loss": 17.2344, "step": 4710 }, { "epoch": 0.31287773128777313, "grad_norm": 119.5317153930664, "learning_rate": 1.9173398002094782e-06, "loss": 21.5781, "step": 4711 }, { "epoch": 0.31294414558012884, "grad_norm": 310.4053649902344, "learning_rate": 1.9172969779187336e-06, "loss": 18.9688, "step": 4712 }, { "epoch": 0.31301055987248455, "grad_norm": 180.197265625, "learning_rate": 1.917254145017227e-06, "loss": 21.4062, "step": 4713 }, { "epoch": 0.31307697416484026, "grad_norm": 213.19398498535156, "learning_rate": 1.9172113015054528e-06, "loss": 19.5312, "step": 4714 }, { "epoch": 0.31314338845719597, "grad_norm": 182.5383758544922, "learning_rate": 1.9171684473839076e-06, "loss": 19.7812, "step": 4715 }, { "epoch": 0.3132098027495517, "grad_norm": 172.127197265625, "learning_rate": 1.917125582653086e-06, "loss": 18.8438, "step": 4716 }, { "epoch": 0.31327621704190745, "grad_norm": 164.27481079101562, "learning_rate": 1.917082707313485e-06, "loss": 17.25, "step": 4717 }, { "epoch": 0.31334263133426316, "grad_norm": 789.7186279296875, "learning_rate": 1.9170398213656e-06, "loss": 20.7656, "step": 4718 }, { "epoch": 0.31340904562661887, "grad_norm": 221.8877716064453, "learning_rate": 1.9169969248099273e-06, "loss": 17.0469, "step": 4719 }, { "epoch": 0.3134754599189746, "grad_norm": 421.4948425292969, "learning_rate": 1.916954017646963e-06, "loss": 24.875, "step": 4720 }, { "epoch": 0.3135418742113303, "grad_norm": 497.34716796875, "learning_rate": 1.916911099877203e-06, "loss": 23.3125, "step": 4721 }, { "epoch": 0.313608288503686, "grad_norm": 150.34255981445312, "learning_rate": 1.916868171501144e-06, "loss": 16.2656, "step": 4722 }, { "epoch": 0.3136747027960417, "grad_norm": 140.77706909179688, "learning_rate": 1.9168252325192834e-06, "loss": 17.0312, "step": 4723 }, { "epoch": 0.3137411170883974, "grad_norm": 339.68377685546875, "learning_rate": 1.916782282932117e-06, "loss": 21.2812, "step": 4724 }, { "epoch": 0.3138075313807531, "grad_norm": 192.33763122558594, "learning_rate": 1.9167393227401415e-06, "loss": 19.4844, "step": 4725 }, { "epoch": 0.31387394567310883, "grad_norm": 238.8980255126953, "learning_rate": 1.916696351943854e-06, "loss": 19.7031, "step": 4726 }, { "epoch": 0.31394035996546454, "grad_norm": 348.4104919433594, "learning_rate": 1.916653370543752e-06, "loss": 13.3594, "step": 4727 }, { "epoch": 0.3140067742578203, "grad_norm": 280.309326171875, "learning_rate": 1.9166103785403322e-06, "loss": 18.4844, "step": 4728 }, { "epoch": 0.314073188550176, "grad_norm": 224.1030731201172, "learning_rate": 1.9165673759340918e-06, "loss": 19.1094, "step": 4729 }, { "epoch": 0.31413960284253173, "grad_norm": 231.16754150390625, "learning_rate": 1.916524362725529e-06, "loss": 16.5312, "step": 4730 }, { "epoch": 0.31420601713488744, "grad_norm": 148.51321411132812, "learning_rate": 1.916481338915141e-06, "loss": 17.2188, "step": 4731 }, { "epoch": 0.31427243142724315, "grad_norm": 168.43630981445312, "learning_rate": 1.9164383045034247e-06, "loss": 16.5, "step": 4732 }, { "epoch": 0.31433884571959886, "grad_norm": 291.8957824707031, "learning_rate": 1.916395259490879e-06, "loss": 17.2031, "step": 4733 }, { "epoch": 0.31440526001195457, "grad_norm": 2596.15234375, "learning_rate": 1.9163522038780012e-06, "loss": 14.7812, "step": 4734 }, { "epoch": 0.3144716743043103, "grad_norm": 195.10910034179688, "learning_rate": 1.91630913766529e-06, "loss": 15.7188, "step": 4735 }, { "epoch": 0.314538088596666, "grad_norm": 238.45521545410156, "learning_rate": 1.9162660608532424e-06, "loss": 18.0938, "step": 4736 }, { "epoch": 0.3146045028890217, "grad_norm": 176.69642639160156, "learning_rate": 1.9162229734423576e-06, "loss": 23.625, "step": 4737 }, { "epoch": 0.3146709171813774, "grad_norm": 358.9574279785156, "learning_rate": 1.916179875433134e-06, "loss": 27.2188, "step": 4738 }, { "epoch": 0.3147373314737332, "grad_norm": 215.08718872070312, "learning_rate": 1.9161367668260696e-06, "loss": 15.4219, "step": 4739 }, { "epoch": 0.3148037457660889, "grad_norm": 240.48731994628906, "learning_rate": 1.9160936476216634e-06, "loss": 18.7969, "step": 4740 }, { "epoch": 0.3148701600584446, "grad_norm": 164.49444580078125, "learning_rate": 1.9160505178204142e-06, "loss": 20.375, "step": 4741 }, { "epoch": 0.3149365743508003, "grad_norm": 192.4124298095703, "learning_rate": 1.916007377422821e-06, "loss": 20.1094, "step": 4742 }, { "epoch": 0.315002988643156, "grad_norm": 149.1371612548828, "learning_rate": 1.9159642264293826e-06, "loss": 17.3906, "step": 4743 }, { "epoch": 0.3150694029355117, "grad_norm": 219.1986541748047, "learning_rate": 1.9159210648405975e-06, "loss": 16.0312, "step": 4744 }, { "epoch": 0.31513581722786743, "grad_norm": 154.1059112548828, "learning_rate": 1.9158778926569664e-06, "loss": 16.6719, "step": 4745 }, { "epoch": 0.31520223152022314, "grad_norm": 474.2122802734375, "learning_rate": 1.915834709878988e-06, "loss": 22.7344, "step": 4746 }, { "epoch": 0.31526864581257885, "grad_norm": 150.88128662109375, "learning_rate": 1.9157915165071615e-06, "loss": 15.6406, "step": 4747 }, { "epoch": 0.31533506010493456, "grad_norm": 108.48863220214844, "learning_rate": 1.9157483125419872e-06, "loss": 15.625, "step": 4748 }, { "epoch": 0.31540147439729027, "grad_norm": 144.1134490966797, "learning_rate": 1.9157050979839638e-06, "loss": 19.6875, "step": 4749 }, { "epoch": 0.31546788868964604, "grad_norm": 151.71376037597656, "learning_rate": 1.9156618728335925e-06, "loss": 15.375, "step": 4750 }, { "epoch": 0.31553430298200175, "grad_norm": 268.82666015625, "learning_rate": 1.9156186370913725e-06, "loss": 13.1406, "step": 4751 }, { "epoch": 0.31560071727435746, "grad_norm": 245.1149139404297, "learning_rate": 1.9155753907578038e-06, "loss": 19.2188, "step": 4752 }, { "epoch": 0.31566713156671317, "grad_norm": 223.29066467285156, "learning_rate": 1.9155321338333874e-06, "loss": 25.4062, "step": 4753 }, { "epoch": 0.3157335458590689, "grad_norm": 315.67498779296875, "learning_rate": 1.9154888663186226e-06, "loss": 19.1875, "step": 4754 }, { "epoch": 0.3157999601514246, "grad_norm": 280.3020935058594, "learning_rate": 1.9154455882140106e-06, "loss": 15.6562, "step": 4755 }, { "epoch": 0.3158663744437803, "grad_norm": 294.7988586425781, "learning_rate": 1.9154022995200523e-06, "loss": 21.9688, "step": 4756 }, { "epoch": 0.315932788736136, "grad_norm": 183.40023803710938, "learning_rate": 1.915359000237248e-06, "loss": 20.6406, "step": 4757 }, { "epoch": 0.3159992030284917, "grad_norm": 180.9327392578125, "learning_rate": 1.9153156903660983e-06, "loss": 18.5625, "step": 4758 }, { "epoch": 0.3160656173208474, "grad_norm": 204.7761993408203, "learning_rate": 1.9152723699071047e-06, "loss": 23.9375, "step": 4759 }, { "epoch": 0.31613203161320313, "grad_norm": 275.20111083984375, "learning_rate": 1.915229038860768e-06, "loss": 20.5938, "step": 4760 }, { "epoch": 0.3161984459055589, "grad_norm": 163.95668029785156, "learning_rate": 1.9151856972275897e-06, "loss": 17.0469, "step": 4761 }, { "epoch": 0.3162648601979146, "grad_norm": 290.8008728027344, "learning_rate": 1.915142345008071e-06, "loss": 22.125, "step": 4762 }, { "epoch": 0.3163312744902703, "grad_norm": 191.53814697265625, "learning_rate": 1.915098982202713e-06, "loss": 16.5469, "step": 4763 }, { "epoch": 0.31639768878262603, "grad_norm": 331.2476501464844, "learning_rate": 1.915055608812018e-06, "loss": 15.0781, "step": 4764 }, { "epoch": 0.31646410307498174, "grad_norm": 392.47076416015625, "learning_rate": 1.915012224836487e-06, "loss": 23.1875, "step": 4765 }, { "epoch": 0.31653051736733745, "grad_norm": 220.55189514160156, "learning_rate": 1.9149688302766224e-06, "loss": 24.5, "step": 4766 }, { "epoch": 0.31659693165969316, "grad_norm": 114.7201919555664, "learning_rate": 1.9149254251329265e-06, "loss": 14.6406, "step": 4767 }, { "epoch": 0.31666334595204887, "grad_norm": 319.3370666503906, "learning_rate": 1.9148820094059002e-06, "loss": 20.0625, "step": 4768 }, { "epoch": 0.3167297602444046, "grad_norm": 132.68687438964844, "learning_rate": 1.9148385830960465e-06, "loss": 14.7188, "step": 4769 }, { "epoch": 0.3167961745367603, "grad_norm": 423.4405212402344, "learning_rate": 1.9147951462038675e-06, "loss": 23.4688, "step": 4770 }, { "epoch": 0.316862588829116, "grad_norm": 132.80694580078125, "learning_rate": 1.914751698729866e-06, "loss": 22.0, "step": 4771 }, { "epoch": 0.31692900312147176, "grad_norm": 395.23468017578125, "learning_rate": 1.9147082406745443e-06, "loss": 17.7188, "step": 4772 }, { "epoch": 0.3169954174138275, "grad_norm": 113.59385681152344, "learning_rate": 1.914664772038405e-06, "loss": 17.6875, "step": 4773 }, { "epoch": 0.3170618317061832, "grad_norm": 209.88169860839844, "learning_rate": 1.914621292821951e-06, "loss": 25.3125, "step": 4774 }, { "epoch": 0.3171282459985389, "grad_norm": 189.00628662109375, "learning_rate": 1.9145778030256853e-06, "loss": 17.3594, "step": 4775 }, { "epoch": 0.3171946602908946, "grad_norm": 176.63229370117188, "learning_rate": 1.914534302650111e-06, "loss": 19.5, "step": 4776 }, { "epoch": 0.3172610745832503, "grad_norm": 245.61790466308594, "learning_rate": 1.9144907916957315e-06, "loss": 20.4531, "step": 4777 }, { "epoch": 0.317327488875606, "grad_norm": 125.66594696044922, "learning_rate": 1.9144472701630496e-06, "loss": 18.8281, "step": 4778 }, { "epoch": 0.31739390316796173, "grad_norm": 436.8943176269531, "learning_rate": 1.914403738052569e-06, "loss": 30.2344, "step": 4779 }, { "epoch": 0.31746031746031744, "grad_norm": 163.275634765625, "learning_rate": 1.9143601953647933e-06, "loss": 14.0312, "step": 4780 }, { "epoch": 0.31752673175267315, "grad_norm": 264.2025146484375, "learning_rate": 1.914316642100226e-06, "loss": 18.4688, "step": 4781 }, { "epoch": 0.3175931460450289, "grad_norm": 196.13433837890625, "learning_rate": 1.914273078259371e-06, "loss": 16.9688, "step": 4782 }, { "epoch": 0.3176595603373846, "grad_norm": 163.0671844482422, "learning_rate": 1.9142295038427324e-06, "loss": 18.0, "step": 4783 }, { "epoch": 0.31772597462974034, "grad_norm": 145.943359375, "learning_rate": 1.914185918850814e-06, "loss": 20.3125, "step": 4784 }, { "epoch": 0.31779238892209605, "grad_norm": 230.71875, "learning_rate": 1.9141423232841198e-06, "loss": 15.6094, "step": 4785 }, { "epoch": 0.31785880321445176, "grad_norm": 327.1335144042969, "learning_rate": 1.914098717143155e-06, "loss": 23.5312, "step": 4786 }, { "epoch": 0.31792521750680747, "grad_norm": 379.560302734375, "learning_rate": 1.9140551004284223e-06, "loss": 26.4375, "step": 4787 }, { "epoch": 0.3179916317991632, "grad_norm": 494.07830810546875, "learning_rate": 1.914011473140428e-06, "loss": 20.4219, "step": 4788 }, { "epoch": 0.3180580460915189, "grad_norm": 142.09043884277344, "learning_rate": 1.9139678352796756e-06, "loss": 16.9219, "step": 4789 }, { "epoch": 0.3181244603838746, "grad_norm": 236.77450561523438, "learning_rate": 1.9139241868466707e-06, "loss": 16.2656, "step": 4790 }, { "epoch": 0.3181908746762303, "grad_norm": 138.76109313964844, "learning_rate": 1.9138805278419175e-06, "loss": 15.5781, "step": 4791 }, { "epoch": 0.318257288968586, "grad_norm": 134.76025390625, "learning_rate": 1.9138368582659216e-06, "loss": 17.3438, "step": 4792 }, { "epoch": 0.3183237032609418, "grad_norm": 559.5239868164062, "learning_rate": 1.913793178119188e-06, "loss": 25.0781, "step": 4793 }, { "epoch": 0.3183901175532975, "grad_norm": 357.04388427734375, "learning_rate": 1.9137494874022217e-06, "loss": 23.2344, "step": 4794 }, { "epoch": 0.3184565318456532, "grad_norm": 283.7655029296875, "learning_rate": 1.913705786115528e-06, "loss": 27.5312, "step": 4795 }, { "epoch": 0.3185229461380089, "grad_norm": 192.0396270751953, "learning_rate": 1.9136620742596125e-06, "loss": 22.125, "step": 4796 }, { "epoch": 0.3185893604303646, "grad_norm": 287.6543884277344, "learning_rate": 1.9136183518349817e-06, "loss": 25.2188, "step": 4797 }, { "epoch": 0.31865577472272033, "grad_norm": 175.61419677734375, "learning_rate": 1.91357461884214e-06, "loss": 15.1719, "step": 4798 }, { "epoch": 0.31872218901507604, "grad_norm": 293.25531005859375, "learning_rate": 1.913530875281594e-06, "loss": 16.75, "step": 4799 }, { "epoch": 0.31878860330743175, "grad_norm": 119.90150451660156, "learning_rate": 1.9134871211538497e-06, "loss": 14.2656, "step": 4800 }, { "epoch": 0.31885501759978746, "grad_norm": 473.0416259765625, "learning_rate": 1.913443356459413e-06, "loss": 17.6562, "step": 4801 }, { "epoch": 0.31892143189214317, "grad_norm": 126.42410278320312, "learning_rate": 1.9133995811987904e-06, "loss": 23.0312, "step": 4802 }, { "epoch": 0.3189878461844989, "grad_norm": 193.38055419921875, "learning_rate": 1.9133557953724883e-06, "loss": 15.875, "step": 4803 }, { "epoch": 0.31905426047685465, "grad_norm": 248.35411071777344, "learning_rate": 1.913311998981013e-06, "loss": 15.0625, "step": 4804 }, { "epoch": 0.31912067476921036, "grad_norm": 449.1672668457031, "learning_rate": 1.913268192024871e-06, "loss": 17.0938, "step": 4805 }, { "epoch": 0.31918708906156606, "grad_norm": 305.46160888671875, "learning_rate": 1.913224374504569e-06, "loss": 18.1719, "step": 4806 }, { "epoch": 0.3192535033539218, "grad_norm": 205.84515380859375, "learning_rate": 1.9131805464206142e-06, "loss": 14.0625, "step": 4807 }, { "epoch": 0.3193199176462775, "grad_norm": 169.16867065429688, "learning_rate": 1.9131367077735137e-06, "loss": 21.9688, "step": 4808 }, { "epoch": 0.3193863319386332, "grad_norm": 364.8723449707031, "learning_rate": 1.9130928585637738e-06, "loss": 22.8594, "step": 4809 }, { "epoch": 0.3194527462309889, "grad_norm": 170.90977478027344, "learning_rate": 1.9130489987919023e-06, "loss": 17.7656, "step": 4810 }, { "epoch": 0.3195191605233446, "grad_norm": 231.6357879638672, "learning_rate": 1.913005128458407e-06, "loss": 19.0469, "step": 4811 }, { "epoch": 0.3195855748157003, "grad_norm": 307.23431396484375, "learning_rate": 1.9129612475637943e-06, "loss": 23.125, "step": 4812 }, { "epoch": 0.31965198910805603, "grad_norm": 189.07315063476562, "learning_rate": 1.9129173561085725e-06, "loss": 14.4688, "step": 4813 }, { "epoch": 0.31971840340041174, "grad_norm": 216.86648559570312, "learning_rate": 1.912873454093249e-06, "loss": 21.5312, "step": 4814 }, { "epoch": 0.3197848176927675, "grad_norm": 238.3151092529297, "learning_rate": 1.912829541518332e-06, "loss": 19.6562, "step": 4815 }, { "epoch": 0.3198512319851232, "grad_norm": 138.3454132080078, "learning_rate": 1.912785618384329e-06, "loss": 20.875, "step": 4816 }, { "epoch": 0.31991764627747893, "grad_norm": 352.1679382324219, "learning_rate": 1.912741684691749e-06, "loss": 18.5, "step": 4817 }, { "epoch": 0.31998406056983464, "grad_norm": 241.34036254882812, "learning_rate": 1.9126977404410985e-06, "loss": 24.3438, "step": 4818 }, { "epoch": 0.32005047486219035, "grad_norm": 266.64404296875, "learning_rate": 1.9126537856328876e-06, "loss": 18.375, "step": 4819 }, { "epoch": 0.32011688915454606, "grad_norm": 205.37245178222656, "learning_rate": 1.9126098202676236e-06, "loss": 18.2969, "step": 4820 }, { "epoch": 0.32018330344690177, "grad_norm": 599.8533935546875, "learning_rate": 1.9125658443458155e-06, "loss": 22.5312, "step": 4821 }, { "epoch": 0.3202497177392575, "grad_norm": 249.3965606689453, "learning_rate": 1.912521857867972e-06, "loss": 19.5469, "step": 4822 }, { "epoch": 0.3203161320316132, "grad_norm": 161.4990997314453, "learning_rate": 1.9124778608346015e-06, "loss": 17.1719, "step": 4823 }, { "epoch": 0.3203825463239689, "grad_norm": 352.7081604003906, "learning_rate": 1.912433853246214e-06, "loss": 20.2656, "step": 4824 }, { "epoch": 0.3204489606163246, "grad_norm": 260.95892333984375, "learning_rate": 1.912389835103317e-06, "loss": 25.3438, "step": 4825 }, { "epoch": 0.3205153749086804, "grad_norm": 320.2734375, "learning_rate": 1.9123458064064215e-06, "loss": 24.4062, "step": 4826 }, { "epoch": 0.3205817892010361, "grad_norm": 342.0199279785156, "learning_rate": 1.912301767156035e-06, "loss": 22.1719, "step": 4827 }, { "epoch": 0.3206482034933918, "grad_norm": 196.17332458496094, "learning_rate": 1.912257717352668e-06, "loss": 16.8594, "step": 4828 }, { "epoch": 0.3207146177857475, "grad_norm": 263.7685241699219, "learning_rate": 1.9122136569968295e-06, "loss": 15.8594, "step": 4829 }, { "epoch": 0.3207810320781032, "grad_norm": 186.6596221923828, "learning_rate": 1.9121695860890296e-06, "loss": 25.375, "step": 4830 }, { "epoch": 0.3208474463704589, "grad_norm": 1252.01611328125, "learning_rate": 1.912125504629778e-06, "loss": 17.7031, "step": 4831 }, { "epoch": 0.32091386066281463, "grad_norm": 145.0529327392578, "learning_rate": 1.9120814126195845e-06, "loss": 22.4062, "step": 4832 }, { "epoch": 0.32098027495517034, "grad_norm": 201.97909545898438, "learning_rate": 1.912037310058959e-06, "loss": 19.1094, "step": 4833 }, { "epoch": 0.32104668924752605, "grad_norm": 189.76333618164062, "learning_rate": 1.911993196948412e-06, "loss": 17.0312, "step": 4834 }, { "epoch": 0.32111310353988176, "grad_norm": 255.16241455078125, "learning_rate": 1.911949073288453e-06, "loss": 24.3125, "step": 4835 }, { "epoch": 0.32117951783223747, "grad_norm": 163.56680297851562, "learning_rate": 1.9119049390795937e-06, "loss": 19.75, "step": 4836 }, { "epoch": 0.32124593212459324, "grad_norm": 200.183837890625, "learning_rate": 1.9118607943223436e-06, "loss": 17.0, "step": 4837 }, { "epoch": 0.32131234641694895, "grad_norm": 547.3995361328125, "learning_rate": 1.9118166390172138e-06, "loss": 26.5938, "step": 4838 }, { "epoch": 0.32137876070930466, "grad_norm": 403.46661376953125, "learning_rate": 1.911772473164715e-06, "loss": 29.5938, "step": 4839 }, { "epoch": 0.32144517500166037, "grad_norm": 237.74728393554688, "learning_rate": 1.9117282967653578e-06, "loss": 26.1875, "step": 4840 }, { "epoch": 0.3215115892940161, "grad_norm": 272.4279479980469, "learning_rate": 1.9116841098196536e-06, "loss": 24.9531, "step": 4841 }, { "epoch": 0.3215780035863718, "grad_norm": 123.50252532958984, "learning_rate": 1.911639912328113e-06, "loss": 15.9375, "step": 4842 }, { "epoch": 0.3216444178787275, "grad_norm": 390.36468505859375, "learning_rate": 1.9115957042912478e-06, "loss": 20.0, "step": 4843 }, { "epoch": 0.3217108321710832, "grad_norm": 143.92706298828125, "learning_rate": 1.9115514857095685e-06, "loss": 15.3438, "step": 4844 }, { "epoch": 0.3217772464634389, "grad_norm": 222.61126708984375, "learning_rate": 1.9115072565835882e-06, "loss": 17.1406, "step": 4845 }, { "epoch": 0.3218436607557946, "grad_norm": 175.71900939941406, "learning_rate": 1.911463016913817e-06, "loss": 16.3125, "step": 4846 }, { "epoch": 0.32191007504815033, "grad_norm": 173.58628845214844, "learning_rate": 1.9114187667007674e-06, "loss": 21.7812, "step": 4847 }, { "epoch": 0.3219764893405061, "grad_norm": 220.92300415039062, "learning_rate": 1.911374505944951e-06, "loss": 15.6094, "step": 4848 }, { "epoch": 0.3220429036328618, "grad_norm": 274.2783508300781, "learning_rate": 1.91133023464688e-06, "loss": 18.5, "step": 4849 }, { "epoch": 0.3221093179252175, "grad_norm": 157.3183135986328, "learning_rate": 1.911285952807066e-06, "loss": 18.2031, "step": 4850 }, { "epoch": 0.32217573221757323, "grad_norm": 903.0653076171875, "learning_rate": 1.911241660426022e-06, "loss": 27.0781, "step": 4851 }, { "epoch": 0.32224214650992894, "grad_norm": 111.4048843383789, "learning_rate": 1.9111973575042595e-06, "loss": 13.7031, "step": 4852 }, { "epoch": 0.32230856080228465, "grad_norm": 155.8892059326172, "learning_rate": 1.9111530440422914e-06, "loss": 20.0, "step": 4853 }, { "epoch": 0.32237497509464036, "grad_norm": 275.56915283203125, "learning_rate": 1.9111087200406306e-06, "loss": 22.2656, "step": 4854 }, { "epoch": 0.32244138938699607, "grad_norm": 201.16311645507812, "learning_rate": 1.9110643854997893e-06, "loss": 16.3125, "step": 4855 }, { "epoch": 0.3225078036793518, "grad_norm": 171.1566619873047, "learning_rate": 1.911020040420281e-06, "loss": 16.8438, "step": 4856 }, { "epoch": 0.3225742179717075, "grad_norm": 315.89227294921875, "learning_rate": 1.910975684802618e-06, "loss": 14.6094, "step": 4857 }, { "epoch": 0.32264063226406325, "grad_norm": 229.05789184570312, "learning_rate": 1.9109313186473133e-06, "loss": 21.8438, "step": 4858 }, { "epoch": 0.32270704655641896, "grad_norm": 170.01242065429688, "learning_rate": 1.9108869419548803e-06, "loss": 18.7656, "step": 4859 }, { "epoch": 0.3227734608487747, "grad_norm": 178.23141479492188, "learning_rate": 1.9108425547258323e-06, "loss": 17.5, "step": 4860 }, { "epoch": 0.3228398751411304, "grad_norm": 97.86172485351562, "learning_rate": 1.9107981569606837e-06, "loss": 12.6016, "step": 4861 }, { "epoch": 0.3229062894334861, "grad_norm": 181.2405548095703, "learning_rate": 1.910753748659946e-06, "loss": 18.4531, "step": 4862 }, { "epoch": 0.3229727037258418, "grad_norm": 264.6392517089844, "learning_rate": 1.910709329824135e-06, "loss": 18.9375, "step": 4863 }, { "epoch": 0.3230391180181975, "grad_norm": 306.0068664550781, "learning_rate": 1.9106649004537635e-06, "loss": 21.1094, "step": 4864 }, { "epoch": 0.3231055323105532, "grad_norm": 428.7276306152344, "learning_rate": 1.9106204605493454e-06, "loss": 14.4219, "step": 4865 }, { "epoch": 0.32317194660290893, "grad_norm": 479.46832275390625, "learning_rate": 1.910576010111395e-06, "loss": 20.7188, "step": 4866 }, { "epoch": 0.32323836089526464, "grad_norm": 382.0353088378906, "learning_rate": 1.9105315491404264e-06, "loss": 25.0312, "step": 4867 }, { "epoch": 0.32330477518762035, "grad_norm": 962.8792724609375, "learning_rate": 1.9104870776369533e-06, "loss": 20.7031, "step": 4868 }, { "epoch": 0.3233711894799761, "grad_norm": 372.38873291015625, "learning_rate": 1.9104425956014916e-06, "loss": 20.625, "step": 4869 }, { "epoch": 0.3234376037723318, "grad_norm": 211.46168518066406, "learning_rate": 1.9103981030345542e-06, "loss": 13.75, "step": 4870 }, { "epoch": 0.32350401806468754, "grad_norm": 206.74807739257812, "learning_rate": 1.910353599936657e-06, "loss": 17.4688, "step": 4871 }, { "epoch": 0.32357043235704325, "grad_norm": 210.5111846923828, "learning_rate": 1.9103090863083144e-06, "loss": 15.9219, "step": 4872 }, { "epoch": 0.32363684664939896, "grad_norm": 1061.3642578125, "learning_rate": 1.910264562150041e-06, "loss": 30.9375, "step": 4873 }, { "epoch": 0.32370326094175467, "grad_norm": 404.5947570800781, "learning_rate": 1.910220027462352e-06, "loss": 22.375, "step": 4874 }, { "epoch": 0.3237696752341104, "grad_norm": 178.54360961914062, "learning_rate": 1.9101754822457625e-06, "loss": 15.4844, "step": 4875 }, { "epoch": 0.3238360895264661, "grad_norm": 180.43011474609375, "learning_rate": 1.9101309265007878e-06, "loss": 15.7188, "step": 4876 }, { "epoch": 0.3239025038188218, "grad_norm": 254.39808654785156, "learning_rate": 1.9100863602279436e-06, "loss": 17.6875, "step": 4877 }, { "epoch": 0.3239689181111775, "grad_norm": 136.23577880859375, "learning_rate": 1.910041783427745e-06, "loss": 22.7344, "step": 4878 }, { "epoch": 0.3240353324035332, "grad_norm": 280.255615234375, "learning_rate": 1.909997196100708e-06, "loss": 24.7344, "step": 4879 }, { "epoch": 0.324101746695889, "grad_norm": 132.66073608398438, "learning_rate": 1.9099525982473477e-06, "loss": 15.2344, "step": 4880 }, { "epoch": 0.3241681609882447, "grad_norm": 258.3058776855469, "learning_rate": 1.909907989868181e-06, "loss": 21.2344, "step": 4881 }, { "epoch": 0.3242345752806004, "grad_norm": 387.2864685058594, "learning_rate": 1.909863370963723e-06, "loss": 18.8281, "step": 4882 }, { "epoch": 0.3243009895729561, "grad_norm": 240.40457153320312, "learning_rate": 1.9098187415344905e-06, "loss": 22.4062, "step": 4883 }, { "epoch": 0.3243674038653118, "grad_norm": 283.9216613769531, "learning_rate": 1.9097741015809993e-06, "loss": 18.9375, "step": 4884 }, { "epoch": 0.32443381815766753, "grad_norm": 351.9640197753906, "learning_rate": 1.9097294511037657e-06, "loss": 23.1562, "step": 4885 }, { "epoch": 0.32450023245002324, "grad_norm": 124.10597229003906, "learning_rate": 1.9096847901033065e-06, "loss": 15.2812, "step": 4886 }, { "epoch": 0.32456664674237895, "grad_norm": 330.81976318359375, "learning_rate": 1.9096401185801383e-06, "loss": 18.2031, "step": 4887 }, { "epoch": 0.32463306103473466, "grad_norm": 220.99063110351562, "learning_rate": 1.909595436534778e-06, "loss": 17.4219, "step": 4888 }, { "epoch": 0.32469947532709037, "grad_norm": 258.6479187011719, "learning_rate": 1.9095507439677415e-06, "loss": 19.2188, "step": 4889 }, { "epoch": 0.3247658896194461, "grad_norm": 327.8658142089844, "learning_rate": 1.909506040879547e-06, "loss": 16.7656, "step": 4890 }, { "epoch": 0.32483230391180185, "grad_norm": 345.6263122558594, "learning_rate": 1.909461327270711e-06, "loss": 21.5, "step": 4891 }, { "epoch": 0.32489871820415755, "grad_norm": 173.76736450195312, "learning_rate": 1.9094166031417506e-06, "loss": 18.8125, "step": 4892 }, { "epoch": 0.32496513249651326, "grad_norm": 285.6661376953125, "learning_rate": 1.9093718684931836e-06, "loss": 15.25, "step": 4893 }, { "epoch": 0.325031546788869, "grad_norm": 154.59295654296875, "learning_rate": 1.909327123325527e-06, "loss": 20.9531, "step": 4894 }, { "epoch": 0.3250979610812247, "grad_norm": 226.71046447753906, "learning_rate": 1.9092823676392987e-06, "loss": 20.6875, "step": 4895 }, { "epoch": 0.3251643753735804, "grad_norm": 336.2048034667969, "learning_rate": 1.909237601435016e-06, "loss": 18.5469, "step": 4896 }, { "epoch": 0.3252307896659361, "grad_norm": 194.0179901123047, "learning_rate": 1.909192824713197e-06, "loss": 19.4062, "step": 4897 }, { "epoch": 0.3252972039582918, "grad_norm": 306.28680419921875, "learning_rate": 1.90914803747436e-06, "loss": 17.0156, "step": 4898 }, { "epoch": 0.3253636182506475, "grad_norm": 344.67877197265625, "learning_rate": 1.909103239719023e-06, "loss": 16.5469, "step": 4899 }, { "epoch": 0.32543003254300323, "grad_norm": 241.58924865722656, "learning_rate": 1.9090584314477037e-06, "loss": 15.8438, "step": 4900 }, { "epoch": 0.32549644683535894, "grad_norm": 339.422607421875, "learning_rate": 1.9090136126609204e-06, "loss": 16.0156, "step": 4901 }, { "epoch": 0.3255628611277147, "grad_norm": 303.6455078125, "learning_rate": 1.9089687833591917e-06, "loss": 21.7812, "step": 4902 }, { "epoch": 0.3256292754200704, "grad_norm": 544.5542602539062, "learning_rate": 1.908923943543037e-06, "loss": 24.4531, "step": 4903 }, { "epoch": 0.32569568971242613, "grad_norm": 292.99420166015625, "learning_rate": 1.9088790932129736e-06, "loss": 22.4688, "step": 4904 }, { "epoch": 0.32576210400478184, "grad_norm": 274.3505554199219, "learning_rate": 1.908834232369521e-06, "loss": 31.7656, "step": 4905 }, { "epoch": 0.32582851829713755, "grad_norm": 292.5877685546875, "learning_rate": 1.908789361013198e-06, "loss": 24.5312, "step": 4906 }, { "epoch": 0.32589493258949326, "grad_norm": 311.9947204589844, "learning_rate": 1.908744479144524e-06, "loss": 20.7031, "step": 4907 }, { "epoch": 0.32596134688184897, "grad_norm": 139.67578125, "learning_rate": 1.908699586764018e-06, "loss": 20.625, "step": 4908 }, { "epoch": 0.3260277611742047, "grad_norm": 184.50588989257812, "learning_rate": 1.908654683872199e-06, "loss": 19.2344, "step": 4909 }, { "epoch": 0.3260941754665604, "grad_norm": 178.52085876464844, "learning_rate": 1.908609770469586e-06, "loss": 18.1094, "step": 4910 }, { "epoch": 0.3261605897589161, "grad_norm": 261.9573974609375, "learning_rate": 1.9085648465567e-06, "loss": 18.1562, "step": 4911 }, { "epoch": 0.3262270040512718, "grad_norm": 155.4453887939453, "learning_rate": 1.9085199121340593e-06, "loss": 20.9062, "step": 4912 }, { "epoch": 0.3262934183436276, "grad_norm": 346.8903503417969, "learning_rate": 1.908474967202184e-06, "loss": 21.2188, "step": 4913 }, { "epoch": 0.3263598326359833, "grad_norm": 184.27996826171875, "learning_rate": 1.9084300117615944e-06, "loss": 24.0625, "step": 4914 }, { "epoch": 0.326426246928339, "grad_norm": 156.5650634765625, "learning_rate": 1.9083850458128104e-06, "loss": 16.2969, "step": 4915 }, { "epoch": 0.3264926612206947, "grad_norm": 221.4414520263672, "learning_rate": 1.9083400693563514e-06, "loss": 22.5, "step": 4916 }, { "epoch": 0.3265590755130504, "grad_norm": 1138.208740234375, "learning_rate": 1.908295082392739e-06, "loss": 21.3281, "step": 4917 }, { "epoch": 0.3266254898054061, "grad_norm": 130.74302673339844, "learning_rate": 1.9082500849224924e-06, "loss": 17.7656, "step": 4918 }, { "epoch": 0.32669190409776183, "grad_norm": 171.11610412597656, "learning_rate": 1.908205076946133e-06, "loss": 17.7031, "step": 4919 }, { "epoch": 0.32675831839011754, "grad_norm": 183.47787475585938, "learning_rate": 1.9081600584641804e-06, "loss": 14.8281, "step": 4920 }, { "epoch": 0.32682473268247325, "grad_norm": 310.06146240234375, "learning_rate": 1.908115029477156e-06, "loss": 18.8438, "step": 4921 }, { "epoch": 0.32689114697482896, "grad_norm": 400.6324157714844, "learning_rate": 1.9080699899855807e-06, "loss": 20.25, "step": 4922 }, { "epoch": 0.32695756126718467, "grad_norm": 196.4838409423828, "learning_rate": 1.9080249399899754e-06, "loss": 18.3438, "step": 4923 }, { "epoch": 0.32702397555954044, "grad_norm": 350.60406494140625, "learning_rate": 1.907979879490861e-06, "loss": 19.9844, "step": 4924 }, { "epoch": 0.32709038985189615, "grad_norm": 167.9541473388672, "learning_rate": 1.9079348084887592e-06, "loss": 17.9688, "step": 4925 }, { "epoch": 0.32715680414425186, "grad_norm": 258.20648193359375, "learning_rate": 1.907889726984191e-06, "loss": 14.6719, "step": 4926 }, { "epoch": 0.32722321843660757, "grad_norm": 239.5193634033203, "learning_rate": 1.9078446349776777e-06, "loss": 20.1562, "step": 4927 }, { "epoch": 0.3272896327289633, "grad_norm": 133.59146118164062, "learning_rate": 1.9077995324697414e-06, "loss": 21.9062, "step": 4928 }, { "epoch": 0.327356047021319, "grad_norm": 155.50164794921875, "learning_rate": 1.9077544194609038e-06, "loss": 18.6875, "step": 4929 }, { "epoch": 0.3274224613136747, "grad_norm": 566.3213500976562, "learning_rate": 1.9077092959516863e-06, "loss": 21.4062, "step": 4930 }, { "epoch": 0.3274888756060304, "grad_norm": 150.56600952148438, "learning_rate": 1.907664161942611e-06, "loss": 18.9062, "step": 4931 }, { "epoch": 0.3275552898983861, "grad_norm": 316.01776123046875, "learning_rate": 1.9076190174342e-06, "loss": 22.0625, "step": 4932 }, { "epoch": 0.3276217041907418, "grad_norm": 236.78109741210938, "learning_rate": 1.9075738624269758e-06, "loss": 16.1719, "step": 4933 }, { "epoch": 0.3276881184830976, "grad_norm": 137.37547302246094, "learning_rate": 1.9075286969214607e-06, "loss": 19.9688, "step": 4934 }, { "epoch": 0.3277545327754533, "grad_norm": 174.16732788085938, "learning_rate": 1.9074835209181764e-06, "loss": 17.3125, "step": 4935 }, { "epoch": 0.327820947067809, "grad_norm": 241.36354064941406, "learning_rate": 1.907438334417646e-06, "loss": 19.0625, "step": 4936 }, { "epoch": 0.3278873613601647, "grad_norm": 241.54916381835938, "learning_rate": 1.9073931374203928e-06, "loss": 16.9844, "step": 4937 }, { "epoch": 0.32795377565252043, "grad_norm": 216.98997497558594, "learning_rate": 1.9073479299269387e-06, "loss": 20.5938, "step": 4938 }, { "epoch": 0.32802018994487614, "grad_norm": 205.9191131591797, "learning_rate": 1.9073027119378068e-06, "loss": 16.625, "step": 4939 }, { "epoch": 0.32808660423723185, "grad_norm": 250.8570556640625, "learning_rate": 1.9072574834535207e-06, "loss": 26.0781, "step": 4940 }, { "epoch": 0.32815301852958756, "grad_norm": 256.72344970703125, "learning_rate": 1.9072122444746028e-06, "loss": 25.5312, "step": 4941 }, { "epoch": 0.32821943282194327, "grad_norm": 304.3464660644531, "learning_rate": 1.9071669950015767e-06, "loss": 15.0312, "step": 4942 }, { "epoch": 0.328285847114299, "grad_norm": 330.9139709472656, "learning_rate": 1.9071217350349662e-06, "loss": 24.25, "step": 4943 }, { "epoch": 0.3283522614066547, "grad_norm": 280.24151611328125, "learning_rate": 1.9070764645752945e-06, "loss": 15.4375, "step": 4944 }, { "epoch": 0.32841867569901045, "grad_norm": 1156.567138671875, "learning_rate": 1.9070311836230855e-06, "loss": 25.0312, "step": 4945 }, { "epoch": 0.32848508999136616, "grad_norm": 229.7521209716797, "learning_rate": 1.9069858921788624e-06, "loss": 21.75, "step": 4946 }, { "epoch": 0.3285515042837219, "grad_norm": 512.5308227539062, "learning_rate": 1.9069405902431497e-06, "loss": 19.8281, "step": 4947 }, { "epoch": 0.3286179185760776, "grad_norm": 245.5117950439453, "learning_rate": 1.9068952778164712e-06, "loss": 18.6094, "step": 4948 }, { "epoch": 0.3286843328684333, "grad_norm": 293.9793701171875, "learning_rate": 1.906849954899351e-06, "loss": 18.9531, "step": 4949 }, { "epoch": 0.328750747160789, "grad_norm": 259.74420166015625, "learning_rate": 1.9068046214923134e-06, "loss": 19.0, "step": 4950 }, { "epoch": 0.3288171614531447, "grad_norm": 345.2640380859375, "learning_rate": 1.9067592775958831e-06, "loss": 21.5469, "step": 4951 }, { "epoch": 0.3288835757455004, "grad_norm": 309.8458251953125, "learning_rate": 1.9067139232105841e-06, "loss": 20.4531, "step": 4952 }, { "epoch": 0.32894999003785613, "grad_norm": 159.14080810546875, "learning_rate": 1.9066685583369412e-06, "loss": 22.7656, "step": 4953 }, { "epoch": 0.32901640433021184, "grad_norm": 148.79518127441406, "learning_rate": 1.9066231829754797e-06, "loss": 14.8281, "step": 4954 }, { "epoch": 0.32908281862256755, "grad_norm": 522.01513671875, "learning_rate": 1.9065777971267237e-06, "loss": 25.5, "step": 4955 }, { "epoch": 0.3291492329149233, "grad_norm": 399.9921875, "learning_rate": 1.9065324007911983e-06, "loss": 23.2812, "step": 4956 }, { "epoch": 0.329215647207279, "grad_norm": 157.01344299316406, "learning_rate": 1.906486993969429e-06, "loss": 16.8125, "step": 4957 }, { "epoch": 0.32928206149963474, "grad_norm": 134.80332946777344, "learning_rate": 1.9064415766619411e-06, "loss": 14.6406, "step": 4958 }, { "epoch": 0.32934847579199045, "grad_norm": 182.08047485351562, "learning_rate": 1.9063961488692594e-06, "loss": 21.3281, "step": 4959 }, { "epoch": 0.32941489008434616, "grad_norm": 368.33538818359375, "learning_rate": 1.9063507105919098e-06, "loss": 14.2656, "step": 4960 }, { "epoch": 0.32948130437670187, "grad_norm": 509.9245910644531, "learning_rate": 1.9063052618304178e-06, "loss": 18.1875, "step": 4961 }, { "epoch": 0.3295477186690576, "grad_norm": 198.96249389648438, "learning_rate": 1.9062598025853092e-06, "loss": 16.5938, "step": 4962 }, { "epoch": 0.3296141329614133, "grad_norm": 183.99185180664062, "learning_rate": 1.9062143328571096e-06, "loss": 13.6094, "step": 4963 }, { "epoch": 0.329680547253769, "grad_norm": 294.3269348144531, "learning_rate": 1.9061688526463456e-06, "loss": 26.6094, "step": 4964 }, { "epoch": 0.3297469615461247, "grad_norm": 146.5398712158203, "learning_rate": 1.9061233619535427e-06, "loss": 18.9062, "step": 4965 }, { "epoch": 0.3298133758384804, "grad_norm": 308.6536560058594, "learning_rate": 1.9060778607792267e-06, "loss": 14.8594, "step": 4966 }, { "epoch": 0.3298797901308362, "grad_norm": 427.0195007324219, "learning_rate": 1.906032349123925e-06, "loss": 23.7812, "step": 4967 }, { "epoch": 0.3299462044231919, "grad_norm": 227.77354431152344, "learning_rate": 1.9059868269881636e-06, "loss": 14.5625, "step": 4968 }, { "epoch": 0.3300126187155476, "grad_norm": 204.9041748046875, "learning_rate": 1.9059412943724687e-06, "loss": 18.1094, "step": 4969 }, { "epoch": 0.3300790330079033, "grad_norm": 195.7932891845703, "learning_rate": 1.9058957512773675e-06, "loss": 22.5625, "step": 4970 }, { "epoch": 0.330145447300259, "grad_norm": 353.415283203125, "learning_rate": 1.9058501977033864e-06, "loss": 23.0781, "step": 4971 }, { "epoch": 0.33021186159261473, "grad_norm": 179.8820037841797, "learning_rate": 1.9058046336510526e-06, "loss": 25.2188, "step": 4972 }, { "epoch": 0.33027827588497044, "grad_norm": 296.9322204589844, "learning_rate": 1.9057590591208933e-06, "loss": 20.5469, "step": 4973 }, { "epoch": 0.33034469017732615, "grad_norm": 401.08770751953125, "learning_rate": 1.9057134741134355e-06, "loss": 24.4375, "step": 4974 }, { "epoch": 0.33041110446968186, "grad_norm": 272.2723693847656, "learning_rate": 1.9056678786292063e-06, "loss": 17.9062, "step": 4975 }, { "epoch": 0.33047751876203757, "grad_norm": 410.9441223144531, "learning_rate": 1.9056222726687335e-06, "loss": 30.4062, "step": 4976 }, { "epoch": 0.3305439330543933, "grad_norm": 330.5665588378906, "learning_rate": 1.9055766562325445e-06, "loss": 24.8281, "step": 4977 }, { "epoch": 0.33061034734674905, "grad_norm": 157.1499786376953, "learning_rate": 1.9055310293211665e-06, "loss": 18.3438, "step": 4978 }, { "epoch": 0.33067676163910475, "grad_norm": 372.7688293457031, "learning_rate": 1.905485391935128e-06, "loss": 16.5625, "step": 4979 }, { "epoch": 0.33074317593146046, "grad_norm": 317.14959716796875, "learning_rate": 1.9054397440749569e-06, "loss": 26.875, "step": 4980 }, { "epoch": 0.3308095902238162, "grad_norm": 196.23509216308594, "learning_rate": 1.9053940857411805e-06, "loss": 20.4062, "step": 4981 }, { "epoch": 0.3308760045161719, "grad_norm": 265.86279296875, "learning_rate": 1.9053484169343276e-06, "loss": 13.5156, "step": 4982 }, { "epoch": 0.3309424188085276, "grad_norm": 474.18853759765625, "learning_rate": 1.9053027376549262e-06, "loss": 18.25, "step": 4983 }, { "epoch": 0.3310088331008833, "grad_norm": 183.2490234375, "learning_rate": 1.9052570479035048e-06, "loss": 18.2969, "step": 4984 }, { "epoch": 0.331075247393239, "grad_norm": 165.11366271972656, "learning_rate": 1.905211347680592e-06, "loss": 20.0, "step": 4985 }, { "epoch": 0.3311416616855947, "grad_norm": 334.2666931152344, "learning_rate": 1.9051656369867161e-06, "loss": 20.7812, "step": 4986 }, { "epoch": 0.33120807597795043, "grad_norm": 503.959716796875, "learning_rate": 1.9051199158224062e-06, "loss": 19.1406, "step": 4987 }, { "epoch": 0.33127449027030614, "grad_norm": 523.8827514648438, "learning_rate": 1.905074184188191e-06, "loss": 16.0625, "step": 4988 }, { "epoch": 0.3313409045626619, "grad_norm": 193.9732666015625, "learning_rate": 1.9050284420845996e-06, "loss": 15.875, "step": 4989 }, { "epoch": 0.3314073188550176, "grad_norm": 298.0467529296875, "learning_rate": 1.904982689512161e-06, "loss": 17.9844, "step": 4990 }, { "epoch": 0.33147373314737333, "grad_norm": 103.84493255615234, "learning_rate": 1.9049369264714042e-06, "loss": 19.0781, "step": 4991 }, { "epoch": 0.33154014743972904, "grad_norm": 152.6759033203125, "learning_rate": 1.9048911529628593e-06, "loss": 15.5469, "step": 4992 }, { "epoch": 0.33160656173208475, "grad_norm": 234.98439025878906, "learning_rate": 1.904845368987055e-06, "loss": 18.1562, "step": 4993 }, { "epoch": 0.33167297602444046, "grad_norm": 483.47625732421875, "learning_rate": 1.9047995745445214e-06, "loss": 20.875, "step": 4994 }, { "epoch": 0.33173939031679617, "grad_norm": 209.48805236816406, "learning_rate": 1.904753769635788e-06, "loss": 18.4688, "step": 4995 }, { "epoch": 0.3318058046091519, "grad_norm": 186.10086059570312, "learning_rate": 1.9047079542613846e-06, "loss": 17.3281, "step": 4996 }, { "epoch": 0.3318722189015076, "grad_norm": 232.5984649658203, "learning_rate": 1.9046621284218417e-06, "loss": 18.9062, "step": 4997 }, { "epoch": 0.3319386331938633, "grad_norm": 1125.8616943359375, "learning_rate": 1.9046162921176884e-06, "loss": 18.7812, "step": 4998 }, { "epoch": 0.332005047486219, "grad_norm": 163.97535705566406, "learning_rate": 1.904570445349456e-06, "loss": 19.5312, "step": 4999 }, { "epoch": 0.3320714617785748, "grad_norm": 243.82510375976562, "learning_rate": 1.9045245881176738e-06, "loss": 16.6562, "step": 5000 }, { "epoch": 0.3321378760709305, "grad_norm": 595.900634765625, "learning_rate": 1.904478720422873e-06, "loss": 22.25, "step": 5001 }, { "epoch": 0.3322042903632862, "grad_norm": 215.06517028808594, "learning_rate": 1.9044328422655838e-06, "loss": 16.6875, "step": 5002 }, { "epoch": 0.3322707046556419, "grad_norm": 247.02957153320312, "learning_rate": 1.904386953646337e-06, "loss": 16.3125, "step": 5003 }, { "epoch": 0.3323371189479976, "grad_norm": 194.35421752929688, "learning_rate": 1.9043410545656638e-06, "loss": 21.25, "step": 5004 }, { "epoch": 0.3324035332403533, "grad_norm": 245.18687438964844, "learning_rate": 1.9042951450240943e-06, "loss": 22.8125, "step": 5005 }, { "epoch": 0.33246994753270903, "grad_norm": 150.2875213623047, "learning_rate": 1.9042492250221601e-06, "loss": 15.4844, "step": 5006 }, { "epoch": 0.33253636182506474, "grad_norm": 211.74630737304688, "learning_rate": 1.9042032945603923e-06, "loss": 21.1094, "step": 5007 }, { "epoch": 0.33260277611742045, "grad_norm": 327.9364318847656, "learning_rate": 1.9041573536393224e-06, "loss": 18.3125, "step": 5008 }, { "epoch": 0.33266919040977616, "grad_norm": 162.57025146484375, "learning_rate": 1.9041114022594815e-06, "loss": 18.6406, "step": 5009 }, { "epoch": 0.3327356047021319, "grad_norm": 203.23150634765625, "learning_rate": 1.9040654404214011e-06, "loss": 22.3281, "step": 5010 }, { "epoch": 0.33280201899448764, "grad_norm": 168.6050262451172, "learning_rate": 1.9040194681256133e-06, "loss": 18.3438, "step": 5011 }, { "epoch": 0.33286843328684335, "grad_norm": 128.6827850341797, "learning_rate": 1.9039734853726491e-06, "loss": 17.0469, "step": 5012 }, { "epoch": 0.33293484757919906, "grad_norm": 409.35260009765625, "learning_rate": 1.9039274921630412e-06, "loss": 19.5, "step": 5013 }, { "epoch": 0.33300126187155477, "grad_norm": 143.78733825683594, "learning_rate": 1.9038814884973213e-06, "loss": 17.5625, "step": 5014 }, { "epoch": 0.3330676761639105, "grad_norm": 141.83807373046875, "learning_rate": 1.9038354743760215e-06, "loss": 18.6562, "step": 5015 }, { "epoch": 0.3331340904562662, "grad_norm": 303.70458984375, "learning_rate": 1.9037894497996741e-06, "loss": 16.5781, "step": 5016 }, { "epoch": 0.3332005047486219, "grad_norm": 287.1881408691406, "learning_rate": 1.9037434147688112e-06, "loss": 25.8281, "step": 5017 }, { "epoch": 0.3332669190409776, "grad_norm": 145.9509735107422, "learning_rate": 1.9036973692839663e-06, "loss": 13.5781, "step": 5018 }, { "epoch": 0.3333333333333333, "grad_norm": 142.51568603515625, "learning_rate": 1.9036513133456707e-06, "loss": 21.1875, "step": 5019 }, { "epoch": 0.333399747625689, "grad_norm": 1318.3681640625, "learning_rate": 1.903605246954458e-06, "loss": 16.4922, "step": 5020 }, { "epoch": 0.3334661619180448, "grad_norm": 193.8049774169922, "learning_rate": 1.903559170110861e-06, "loss": 15.7812, "step": 5021 }, { "epoch": 0.3335325762104005, "grad_norm": 128.80738830566406, "learning_rate": 1.9035130828154125e-06, "loss": 12.4062, "step": 5022 }, { "epoch": 0.3335989905027562, "grad_norm": 179.702880859375, "learning_rate": 1.9034669850686453e-06, "loss": 15.4062, "step": 5023 }, { "epoch": 0.3336654047951119, "grad_norm": 608.0245971679688, "learning_rate": 1.9034208768710933e-06, "loss": 36.3125, "step": 5024 }, { "epoch": 0.33373181908746763, "grad_norm": 1393.529541015625, "learning_rate": 1.9033747582232895e-06, "loss": 23.4531, "step": 5025 }, { "epoch": 0.33379823337982334, "grad_norm": 245.36207580566406, "learning_rate": 1.9033286291257675e-06, "loss": 15.8438, "step": 5026 }, { "epoch": 0.33386464767217905, "grad_norm": 213.7081298828125, "learning_rate": 1.9032824895790607e-06, "loss": 21.6719, "step": 5027 }, { "epoch": 0.33393106196453476, "grad_norm": 221.01739501953125, "learning_rate": 1.903236339583703e-06, "loss": 21.4688, "step": 5028 }, { "epoch": 0.33399747625689047, "grad_norm": 275.5252685546875, "learning_rate": 1.9031901791402279e-06, "loss": 19.3594, "step": 5029 }, { "epoch": 0.3340638905492462, "grad_norm": 143.8323516845703, "learning_rate": 1.9031440082491698e-06, "loss": 22.0781, "step": 5030 }, { "epoch": 0.3341303048416019, "grad_norm": 324.3638610839844, "learning_rate": 1.9030978269110627e-06, "loss": 16.0, "step": 5031 }, { "epoch": 0.33419671913395765, "grad_norm": 345.6374206542969, "learning_rate": 1.9030516351264407e-06, "loss": 22.7656, "step": 5032 }, { "epoch": 0.33426313342631336, "grad_norm": 183.15505981445312, "learning_rate": 1.903005432895838e-06, "loss": 16.4375, "step": 5033 }, { "epoch": 0.3343295477186691, "grad_norm": 249.8694610595703, "learning_rate": 1.902959220219789e-06, "loss": 15.9844, "step": 5034 }, { "epoch": 0.3343959620110248, "grad_norm": 148.13365173339844, "learning_rate": 1.9029129970988287e-06, "loss": 17.5859, "step": 5035 }, { "epoch": 0.3344623763033805, "grad_norm": 169.41358947753906, "learning_rate": 1.9028667635334915e-06, "loss": 19.6406, "step": 5036 }, { "epoch": 0.3345287905957362, "grad_norm": 176.3766632080078, "learning_rate": 1.9028205195243121e-06, "loss": 23.3438, "step": 5037 }, { "epoch": 0.3345952048880919, "grad_norm": 310.24603271484375, "learning_rate": 1.9027742650718252e-06, "loss": 19.2656, "step": 5038 }, { "epoch": 0.3346616191804476, "grad_norm": 233.17376708984375, "learning_rate": 1.9027280001765667e-06, "loss": 27.0469, "step": 5039 }, { "epoch": 0.33472803347280333, "grad_norm": 333.0440368652344, "learning_rate": 1.902681724839071e-06, "loss": 17.5781, "step": 5040 }, { "epoch": 0.33479444776515904, "grad_norm": 299.42657470703125, "learning_rate": 1.9026354390598737e-06, "loss": 28.5938, "step": 5041 }, { "epoch": 0.33486086205751475, "grad_norm": 171.66038513183594, "learning_rate": 1.90258914283951e-06, "loss": 16.4062, "step": 5042 }, { "epoch": 0.3349272763498705, "grad_norm": 154.34498596191406, "learning_rate": 1.9025428361785155e-06, "loss": 15.8281, "step": 5043 }, { "epoch": 0.3349936906422262, "grad_norm": 231.9405975341797, "learning_rate": 1.902496519077426e-06, "loss": 16.5781, "step": 5044 }, { "epoch": 0.33506010493458194, "grad_norm": 247.61968994140625, "learning_rate": 1.9024501915367775e-06, "loss": 19.1406, "step": 5045 }, { "epoch": 0.33512651922693765, "grad_norm": 280.1313781738281, "learning_rate": 1.902403853557105e-06, "loss": 19.375, "step": 5046 }, { "epoch": 0.33519293351929336, "grad_norm": 377.5955505371094, "learning_rate": 1.9023575051389455e-06, "loss": 24.9062, "step": 5047 }, { "epoch": 0.33525934781164907, "grad_norm": 180.21336364746094, "learning_rate": 1.9023111462828346e-06, "loss": 18.3438, "step": 5048 }, { "epoch": 0.3353257621040048, "grad_norm": 430.83367919921875, "learning_rate": 1.9022647769893085e-06, "loss": 21.9219, "step": 5049 }, { "epoch": 0.3353921763963605, "grad_norm": 375.26312255859375, "learning_rate": 1.902218397258904e-06, "loss": 17.9219, "step": 5050 }, { "epoch": 0.3354585906887162, "grad_norm": 151.02499389648438, "learning_rate": 1.9021720070921572e-06, "loss": 14.7812, "step": 5051 }, { "epoch": 0.3355250049810719, "grad_norm": 197.27175903320312, "learning_rate": 1.902125606489605e-06, "loss": 17.7344, "step": 5052 }, { "epoch": 0.3355914192734276, "grad_norm": 190.14321899414062, "learning_rate": 1.9020791954517836e-06, "loss": 18.9062, "step": 5053 }, { "epoch": 0.3356578335657834, "grad_norm": 214.87570190429688, "learning_rate": 1.9020327739792306e-06, "loss": 19.4531, "step": 5054 }, { "epoch": 0.3357242478581391, "grad_norm": 167.4700164794922, "learning_rate": 1.9019863420724825e-06, "loss": 20.3125, "step": 5055 }, { "epoch": 0.3357906621504948, "grad_norm": 291.4346618652344, "learning_rate": 1.9019398997320763e-06, "loss": 21.0156, "step": 5056 }, { "epoch": 0.3358570764428505, "grad_norm": 243.88848876953125, "learning_rate": 1.90189344695855e-06, "loss": 27.5391, "step": 5057 }, { "epoch": 0.3359234907352062, "grad_norm": 405.5487060546875, "learning_rate": 1.9018469837524397e-06, "loss": 23.7188, "step": 5058 }, { "epoch": 0.33598990502756193, "grad_norm": 297.0545959472656, "learning_rate": 1.9018005101142837e-06, "loss": 15.3438, "step": 5059 }, { "epoch": 0.33605631931991764, "grad_norm": 281.85711669921875, "learning_rate": 1.9017540260446196e-06, "loss": 15.1406, "step": 5060 }, { "epoch": 0.33612273361227335, "grad_norm": 151.0530548095703, "learning_rate": 1.9017075315439847e-06, "loss": 21.1562, "step": 5061 }, { "epoch": 0.33618914790462906, "grad_norm": 255.79916381835938, "learning_rate": 1.9016610266129173e-06, "loss": 16.625, "step": 5062 }, { "epoch": 0.33625556219698477, "grad_norm": 224.61228942871094, "learning_rate": 1.9016145112519548e-06, "loss": 14.2188, "step": 5063 }, { "epoch": 0.3363219764893405, "grad_norm": 189.6581573486328, "learning_rate": 1.9015679854616359e-06, "loss": 34.3906, "step": 5064 }, { "epoch": 0.33638839078169624, "grad_norm": 1194.5936279296875, "learning_rate": 1.901521449242498e-06, "loss": 21.2812, "step": 5065 }, { "epoch": 0.33645480507405195, "grad_norm": 263.6720886230469, "learning_rate": 1.9014749025950802e-06, "loss": 23.7344, "step": 5066 }, { "epoch": 0.33652121936640766, "grad_norm": 252.31761169433594, "learning_rate": 1.9014283455199202e-06, "loss": 18.6562, "step": 5067 }, { "epoch": 0.3365876336587634, "grad_norm": 213.29490661621094, "learning_rate": 1.9013817780175572e-06, "loss": 17.2969, "step": 5068 }, { "epoch": 0.3366540479511191, "grad_norm": 239.89559936523438, "learning_rate": 1.9013352000885292e-06, "loss": 19.9375, "step": 5069 }, { "epoch": 0.3367204622434748, "grad_norm": 200.50408935546875, "learning_rate": 1.9012886117333755e-06, "loss": 21.8906, "step": 5070 }, { "epoch": 0.3367868765358305, "grad_norm": 428.399169921875, "learning_rate": 1.901242012952635e-06, "loss": 23.2656, "step": 5071 }, { "epoch": 0.3368532908281862, "grad_norm": 185.30355834960938, "learning_rate": 1.9011954037468466e-06, "loss": 18.7969, "step": 5072 }, { "epoch": 0.3369197051205419, "grad_norm": 289.5688781738281, "learning_rate": 1.901148784116549e-06, "loss": 17.9844, "step": 5073 }, { "epoch": 0.33698611941289763, "grad_norm": 347.0054626464844, "learning_rate": 1.9011021540622822e-06, "loss": 20.7812, "step": 5074 }, { "epoch": 0.33705253370525334, "grad_norm": 356.58477783203125, "learning_rate": 1.9010555135845853e-06, "loss": 21.0312, "step": 5075 }, { "epoch": 0.3371189479976091, "grad_norm": 482.9906311035156, "learning_rate": 1.901008862683998e-06, "loss": 16.6719, "step": 5076 }, { "epoch": 0.3371853622899648, "grad_norm": 206.965087890625, "learning_rate": 1.9009622013610595e-06, "loss": 18.6406, "step": 5077 }, { "epoch": 0.33725177658232053, "grad_norm": 215.03451538085938, "learning_rate": 1.9009155296163098e-06, "loss": 23.3438, "step": 5078 }, { "epoch": 0.33731819087467624, "grad_norm": 298.402099609375, "learning_rate": 1.9008688474502887e-06, "loss": 21.4531, "step": 5079 }, { "epoch": 0.33738460516703195, "grad_norm": 253.48207092285156, "learning_rate": 1.9008221548635363e-06, "loss": 24.8438, "step": 5080 }, { "epoch": 0.33745101945938766, "grad_norm": 202.08078002929688, "learning_rate": 1.9007754518565926e-06, "loss": 25.5625, "step": 5081 }, { "epoch": 0.33751743375174337, "grad_norm": 270.42626953125, "learning_rate": 1.9007287384299979e-06, "loss": 15.5, "step": 5082 }, { "epoch": 0.3375838480440991, "grad_norm": 1481.425048828125, "learning_rate": 1.9006820145842925e-06, "loss": 26.9531, "step": 5083 }, { "epoch": 0.3376502623364548, "grad_norm": 220.4654541015625, "learning_rate": 1.9006352803200172e-06, "loss": 19.25, "step": 5084 }, { "epoch": 0.3377166766288105, "grad_norm": 582.1527099609375, "learning_rate": 1.9005885356377121e-06, "loss": 21.75, "step": 5085 }, { "epoch": 0.33778309092116626, "grad_norm": 243.38876342773438, "learning_rate": 1.900541780537918e-06, "loss": 21.0156, "step": 5086 }, { "epoch": 0.337849505213522, "grad_norm": 311.7442321777344, "learning_rate": 1.9004950150211761e-06, "loss": 19.7812, "step": 5087 }, { "epoch": 0.3379159195058777, "grad_norm": 528.296630859375, "learning_rate": 1.900448239088027e-06, "loss": 20.9688, "step": 5088 }, { "epoch": 0.3379823337982334, "grad_norm": 154.8186492919922, "learning_rate": 1.900401452739012e-06, "loss": 17.6406, "step": 5089 }, { "epoch": 0.3380487480905891, "grad_norm": 234.3552703857422, "learning_rate": 1.900354655974672e-06, "loss": 24.4062, "step": 5090 }, { "epoch": 0.3381151623829448, "grad_norm": 138.9029998779297, "learning_rate": 1.9003078487955485e-06, "loss": 15.3125, "step": 5091 }, { "epoch": 0.3381815766753005, "grad_norm": 280.88970947265625, "learning_rate": 1.9002610312021832e-06, "loss": 17.9062, "step": 5092 }, { "epoch": 0.33824799096765623, "grad_norm": 301.6233825683594, "learning_rate": 1.9002142031951172e-06, "loss": 19.4062, "step": 5093 }, { "epoch": 0.33831440526001194, "grad_norm": 134.2750244140625, "learning_rate": 1.9001673647748927e-06, "loss": 20.4219, "step": 5094 }, { "epoch": 0.33838081955236765, "grad_norm": 343.37481689453125, "learning_rate": 1.900120515942051e-06, "loss": 34.2812, "step": 5095 }, { "epoch": 0.33844723384472336, "grad_norm": 309.0272216796875, "learning_rate": 1.9000736566971341e-06, "loss": 19.8125, "step": 5096 }, { "epoch": 0.3385136481370791, "grad_norm": 936.8888549804688, "learning_rate": 1.9000267870406841e-06, "loss": 21.5938, "step": 5097 }, { "epoch": 0.33858006242943484, "grad_norm": 149.2958221435547, "learning_rate": 1.8999799069732436e-06, "loss": 18.3906, "step": 5098 }, { "epoch": 0.33864647672179055, "grad_norm": 193.92054748535156, "learning_rate": 1.8999330164953541e-06, "loss": 13.9062, "step": 5099 }, { "epoch": 0.33871289101414626, "grad_norm": 443.540771484375, "learning_rate": 1.8998861156075585e-06, "loss": 24.0938, "step": 5100 }, { "epoch": 0.33877930530650197, "grad_norm": 291.7806701660156, "learning_rate": 1.8998392043103992e-06, "loss": 18.4531, "step": 5101 }, { "epoch": 0.3388457195988577, "grad_norm": 195.11158752441406, "learning_rate": 1.899792282604419e-06, "loss": 18.1094, "step": 5102 }, { "epoch": 0.3389121338912134, "grad_norm": 139.29017639160156, "learning_rate": 1.8997453504901606e-06, "loss": 20.3594, "step": 5103 }, { "epoch": 0.3389785481835691, "grad_norm": 188.79417419433594, "learning_rate": 1.8996984079681665e-06, "loss": 18.0156, "step": 5104 }, { "epoch": 0.3390449624759248, "grad_norm": 712.6280517578125, "learning_rate": 1.8996514550389802e-06, "loss": 27.0781, "step": 5105 }, { "epoch": 0.3391113767682805, "grad_norm": 136.60690307617188, "learning_rate": 1.8996044917031446e-06, "loss": 19.2031, "step": 5106 }, { "epoch": 0.3391777910606362, "grad_norm": 169.33934020996094, "learning_rate": 1.899557517961203e-06, "loss": 20.375, "step": 5107 }, { "epoch": 0.339244205352992, "grad_norm": 291.47637939453125, "learning_rate": 1.8995105338136988e-06, "loss": 24.9375, "step": 5108 }, { "epoch": 0.3393106196453477, "grad_norm": 311.8502502441406, "learning_rate": 1.8994635392611754e-06, "loss": 14.3281, "step": 5109 }, { "epoch": 0.3393770339377034, "grad_norm": 361.0697326660156, "learning_rate": 1.8994165343041763e-06, "loss": 27.3125, "step": 5110 }, { "epoch": 0.3394434482300591, "grad_norm": 483.21990966796875, "learning_rate": 1.8993695189432455e-06, "loss": 24.4375, "step": 5111 }, { "epoch": 0.33950986252241483, "grad_norm": 158.5640869140625, "learning_rate": 1.8993224931789267e-06, "loss": 17.7188, "step": 5112 }, { "epoch": 0.33957627681477054, "grad_norm": 185.01715087890625, "learning_rate": 1.8992754570117638e-06, "loss": 17.1562, "step": 5113 }, { "epoch": 0.33964269110712625, "grad_norm": 272.4220275878906, "learning_rate": 1.8992284104423008e-06, "loss": 22.25, "step": 5114 }, { "epoch": 0.33970910539948196, "grad_norm": 181.23414611816406, "learning_rate": 1.8991813534710826e-06, "loss": 18.0, "step": 5115 }, { "epoch": 0.33977551969183767, "grad_norm": 151.05661010742188, "learning_rate": 1.8991342860986525e-06, "loss": 21.0625, "step": 5116 }, { "epoch": 0.3398419339841934, "grad_norm": 142.41571044921875, "learning_rate": 1.8990872083255556e-06, "loss": 18.4062, "step": 5117 }, { "epoch": 0.3399083482765491, "grad_norm": 170.9159698486328, "learning_rate": 1.8990401201523365e-06, "loss": 16.4375, "step": 5118 }, { "epoch": 0.33997476256890485, "grad_norm": 161.13653564453125, "learning_rate": 1.8989930215795395e-06, "loss": 21.7031, "step": 5119 }, { "epoch": 0.34004117686126056, "grad_norm": 431.56884765625, "learning_rate": 1.8989459126077095e-06, "loss": 20.9531, "step": 5120 }, { "epoch": 0.3401075911536163, "grad_norm": 376.9411315917969, "learning_rate": 1.8988987932373921e-06, "loss": 18.3906, "step": 5121 }, { "epoch": 0.340174005445972, "grad_norm": 304.2427978515625, "learning_rate": 1.8988516634691311e-06, "loss": 18.0312, "step": 5122 }, { "epoch": 0.3402404197383277, "grad_norm": 210.04893493652344, "learning_rate": 1.8988045233034728e-06, "loss": 15.9375, "step": 5123 }, { "epoch": 0.3403068340306834, "grad_norm": 255.05091857910156, "learning_rate": 1.898757372740962e-06, "loss": 19.0781, "step": 5124 }, { "epoch": 0.3403732483230391, "grad_norm": 181.0811004638672, "learning_rate": 1.898710211782144e-06, "loss": 16.9688, "step": 5125 }, { "epoch": 0.3404396626153948, "grad_norm": 189.54103088378906, "learning_rate": 1.8986630404275648e-06, "loss": 26.1562, "step": 5126 }, { "epoch": 0.34050607690775053, "grad_norm": 670.0617065429688, "learning_rate": 1.8986158586777696e-06, "loss": 19.7188, "step": 5127 }, { "epoch": 0.34057249120010624, "grad_norm": 194.67518615722656, "learning_rate": 1.898568666533304e-06, "loss": 17.375, "step": 5128 }, { "epoch": 0.34063890549246195, "grad_norm": 237.61463928222656, "learning_rate": 1.8985214639947148e-06, "loss": 17.1562, "step": 5129 }, { "epoch": 0.3407053197848177, "grad_norm": 360.5954284667969, "learning_rate": 1.8984742510625468e-06, "loss": 18.2031, "step": 5130 }, { "epoch": 0.3407717340771734, "grad_norm": 205.32974243164062, "learning_rate": 1.8984270277373473e-06, "loss": 20.5625, "step": 5131 }, { "epoch": 0.34083814836952914, "grad_norm": 202.47329711914062, "learning_rate": 1.8983797940196618e-06, "loss": 19.3125, "step": 5132 }, { "epoch": 0.34090456266188485, "grad_norm": 780.2909545898438, "learning_rate": 1.8983325499100366e-06, "loss": 16.0469, "step": 5133 }, { "epoch": 0.34097097695424056, "grad_norm": 223.79074096679688, "learning_rate": 1.8982852954090189e-06, "loss": 19.0078, "step": 5134 }, { "epoch": 0.34103739124659627, "grad_norm": 162.81455993652344, "learning_rate": 1.8982380305171547e-06, "loss": 16.0, "step": 5135 }, { "epoch": 0.341103805538952, "grad_norm": 243.82601928710938, "learning_rate": 1.898190755234991e-06, "loss": 24.375, "step": 5136 }, { "epoch": 0.3411702198313077, "grad_norm": 254.68238830566406, "learning_rate": 1.8981434695630742e-06, "loss": 16.8125, "step": 5137 }, { "epoch": 0.3412366341236634, "grad_norm": 176.01344299316406, "learning_rate": 1.898096173501952e-06, "loss": 17.9062, "step": 5138 }, { "epoch": 0.3413030484160191, "grad_norm": 360.3665771484375, "learning_rate": 1.898048867052171e-06, "loss": 17.3281, "step": 5139 }, { "epoch": 0.3413694627083748, "grad_norm": 411.78997802734375, "learning_rate": 1.8980015502142788e-06, "loss": 26.3438, "step": 5140 }, { "epoch": 0.3414358770007306, "grad_norm": 2015.6365966796875, "learning_rate": 1.8979542229888223e-06, "loss": 20.4062, "step": 5141 }, { "epoch": 0.3415022912930863, "grad_norm": 247.03805541992188, "learning_rate": 1.897906885376349e-06, "loss": 14.2344, "step": 5142 }, { "epoch": 0.341568705585442, "grad_norm": 398.0884094238281, "learning_rate": 1.897859537377407e-06, "loss": 11.125, "step": 5143 }, { "epoch": 0.3416351198777977, "grad_norm": 135.1695098876953, "learning_rate": 1.897812178992543e-06, "loss": 18.3594, "step": 5144 }, { "epoch": 0.3417015341701534, "grad_norm": 177.17088317871094, "learning_rate": 1.8977648102223058e-06, "loss": 18.2344, "step": 5145 }, { "epoch": 0.34176794846250913, "grad_norm": 316.38079833984375, "learning_rate": 1.897717431067243e-06, "loss": 22.5469, "step": 5146 }, { "epoch": 0.34183436275486484, "grad_norm": 176.78025817871094, "learning_rate": 1.8976700415279026e-06, "loss": 18.3281, "step": 5147 }, { "epoch": 0.34190077704722055, "grad_norm": 1291.8668212890625, "learning_rate": 1.8976226416048327e-06, "loss": 23.3125, "step": 5148 }, { "epoch": 0.34196719133957626, "grad_norm": 175.19786071777344, "learning_rate": 1.8975752312985817e-06, "loss": 16.0938, "step": 5149 }, { "epoch": 0.34203360563193197, "grad_norm": 162.1467742919922, "learning_rate": 1.897527810609698e-06, "loss": 23.9688, "step": 5150 }, { "epoch": 0.3421000199242877, "grad_norm": 163.42874145507812, "learning_rate": 1.8974803795387302e-06, "loss": 18.6406, "step": 5151 }, { "epoch": 0.34216643421664344, "grad_norm": 172.73255920410156, "learning_rate": 1.8974329380862266e-06, "loss": 17.8125, "step": 5152 }, { "epoch": 0.34223284850899915, "grad_norm": 212.24539184570312, "learning_rate": 1.8973854862527365e-06, "loss": 17.4531, "step": 5153 }, { "epoch": 0.34229926280135486, "grad_norm": 306.5047912597656, "learning_rate": 1.8973380240388086e-06, "loss": 23.1719, "step": 5154 }, { "epoch": 0.3423656770937106, "grad_norm": 424.7279052734375, "learning_rate": 1.8972905514449919e-06, "loss": 21.0625, "step": 5155 }, { "epoch": 0.3424320913860663, "grad_norm": 340.2978515625, "learning_rate": 1.8972430684718354e-06, "loss": 25.625, "step": 5156 }, { "epoch": 0.342498505678422, "grad_norm": 330.4277038574219, "learning_rate": 1.8971955751198886e-06, "loss": 15.9219, "step": 5157 }, { "epoch": 0.3425649199707777, "grad_norm": 256.6654052734375, "learning_rate": 1.8971480713897006e-06, "loss": 18.1719, "step": 5158 }, { "epoch": 0.3426313342631334, "grad_norm": 146.12135314941406, "learning_rate": 1.897100557281821e-06, "loss": 15.375, "step": 5159 }, { "epoch": 0.3426977485554891, "grad_norm": 358.1485290527344, "learning_rate": 1.8970530327967995e-06, "loss": 24.3281, "step": 5160 }, { "epoch": 0.34276416284784483, "grad_norm": 452.593505859375, "learning_rate": 1.897005497935186e-06, "loss": 21.5938, "step": 5161 }, { "epoch": 0.3428305771402006, "grad_norm": 198.82632446289062, "learning_rate": 1.8969579526975297e-06, "loss": 17.5781, "step": 5162 }, { "epoch": 0.3428969914325563, "grad_norm": 191.56826782226562, "learning_rate": 1.8969103970843813e-06, "loss": 19.625, "step": 5163 }, { "epoch": 0.342963405724912, "grad_norm": 144.2544708251953, "learning_rate": 1.8968628310962904e-06, "loss": 18.2969, "step": 5164 }, { "epoch": 0.34302982001726773, "grad_norm": 202.7700958251953, "learning_rate": 1.8968152547338078e-06, "loss": 14.4375, "step": 5165 }, { "epoch": 0.34309623430962344, "grad_norm": 214.4324188232422, "learning_rate": 1.896767667997483e-06, "loss": 20.25, "step": 5166 }, { "epoch": 0.34316264860197915, "grad_norm": 427.5494079589844, "learning_rate": 1.8967200708878675e-06, "loss": 26.5938, "step": 5167 }, { "epoch": 0.34322906289433486, "grad_norm": 272.24554443359375, "learning_rate": 1.8966724634055108e-06, "loss": 17.0234, "step": 5168 }, { "epoch": 0.34329547718669057, "grad_norm": 290.54931640625, "learning_rate": 1.8966248455509644e-06, "loss": 18.5, "step": 5169 }, { "epoch": 0.3433618914790463, "grad_norm": 188.17991638183594, "learning_rate": 1.8965772173247787e-06, "loss": 18.5781, "step": 5170 }, { "epoch": 0.343428305771402, "grad_norm": 210.57786560058594, "learning_rate": 1.8965295787275046e-06, "loss": 18.6562, "step": 5171 }, { "epoch": 0.3434947200637577, "grad_norm": 194.83590698242188, "learning_rate": 1.8964819297596936e-06, "loss": 13.8125, "step": 5172 }, { "epoch": 0.34356113435611346, "grad_norm": 827.25439453125, "learning_rate": 1.8964342704218965e-06, "loss": 19.4062, "step": 5173 }, { "epoch": 0.34362754864846917, "grad_norm": 303.0469055175781, "learning_rate": 1.8963866007146646e-06, "loss": 20.2969, "step": 5174 }, { "epoch": 0.3436939629408249, "grad_norm": 248.71939086914062, "learning_rate": 1.8963389206385494e-06, "loss": 27.8281, "step": 5175 }, { "epoch": 0.3437603772331806, "grad_norm": 189.59336853027344, "learning_rate": 1.8962912301941024e-06, "loss": 19.8906, "step": 5176 }, { "epoch": 0.3438267915255363, "grad_norm": 266.0353698730469, "learning_rate": 1.8962435293818753e-06, "loss": 21.1562, "step": 5177 }, { "epoch": 0.343893205817892, "grad_norm": 357.052490234375, "learning_rate": 1.8961958182024197e-06, "loss": 20.8438, "step": 5178 }, { "epoch": 0.3439596201102477, "grad_norm": 475.0566101074219, "learning_rate": 1.896148096656288e-06, "loss": 17.5312, "step": 5179 }, { "epoch": 0.34402603440260343, "grad_norm": 213.99732971191406, "learning_rate": 1.8961003647440314e-06, "loss": 20.375, "step": 5180 }, { "epoch": 0.34409244869495914, "grad_norm": 249.47735595703125, "learning_rate": 1.896052622466203e-06, "loss": 20.5625, "step": 5181 }, { "epoch": 0.34415886298731485, "grad_norm": 397.11236572265625, "learning_rate": 1.8960048698233543e-06, "loss": 16.125, "step": 5182 }, { "epoch": 0.34422527727967056, "grad_norm": 173.79664611816406, "learning_rate": 1.8959571068160379e-06, "loss": 18.8906, "step": 5183 }, { "epoch": 0.3442916915720263, "grad_norm": 1977.263916015625, "learning_rate": 1.8959093334448064e-06, "loss": 17.5625, "step": 5184 }, { "epoch": 0.34435810586438204, "grad_norm": 206.29310607910156, "learning_rate": 1.8958615497102124e-06, "loss": 17.7969, "step": 5185 }, { "epoch": 0.34442452015673775, "grad_norm": 202.75466918945312, "learning_rate": 1.8958137556128084e-06, "loss": 25.2188, "step": 5186 }, { "epoch": 0.34449093444909346, "grad_norm": 221.06893920898438, "learning_rate": 1.8957659511531475e-06, "loss": 15.7812, "step": 5187 }, { "epoch": 0.34455734874144917, "grad_norm": 172.47007751464844, "learning_rate": 1.8957181363317826e-06, "loss": 20.625, "step": 5188 }, { "epoch": 0.3446237630338049, "grad_norm": 430.2976379394531, "learning_rate": 1.895670311149267e-06, "loss": 20.0938, "step": 5189 }, { "epoch": 0.3446901773261606, "grad_norm": 291.6751708984375, "learning_rate": 1.8956224756061533e-06, "loss": 17.0469, "step": 5190 }, { "epoch": 0.3447565916185163, "grad_norm": 319.97149658203125, "learning_rate": 1.8955746297029957e-06, "loss": 17.9219, "step": 5191 }, { "epoch": 0.344823005910872, "grad_norm": 332.00360107421875, "learning_rate": 1.8955267734403472e-06, "loss": 17.2969, "step": 5192 }, { "epoch": 0.3448894202032277, "grad_norm": 279.0722961425781, "learning_rate": 1.8954789068187611e-06, "loss": 19.9062, "step": 5193 }, { "epoch": 0.3449558344955834, "grad_norm": 245.96981811523438, "learning_rate": 1.8954310298387912e-06, "loss": 22.9688, "step": 5194 }, { "epoch": 0.3450222487879392, "grad_norm": 296.6825866699219, "learning_rate": 1.8953831425009918e-06, "loss": 23.1875, "step": 5195 }, { "epoch": 0.3450886630802949, "grad_norm": 211.81365966796875, "learning_rate": 1.8953352448059165e-06, "loss": 18.8281, "step": 5196 }, { "epoch": 0.3451550773726506, "grad_norm": 214.07371520996094, "learning_rate": 1.8952873367541193e-06, "loss": 17.5938, "step": 5197 }, { "epoch": 0.3452214916650063, "grad_norm": 197.72535705566406, "learning_rate": 1.8952394183461543e-06, "loss": 14.8906, "step": 5198 }, { "epoch": 0.34528790595736203, "grad_norm": 188.57505798339844, "learning_rate": 1.8951914895825762e-06, "loss": 16.6719, "step": 5199 }, { "epoch": 0.34535432024971774, "grad_norm": 141.6361083984375, "learning_rate": 1.895143550463939e-06, "loss": 23.6406, "step": 5200 }, { "epoch": 0.34542073454207345, "grad_norm": 1051.5081787109375, "learning_rate": 1.8950956009907975e-06, "loss": 18.7969, "step": 5201 }, { "epoch": 0.34548714883442916, "grad_norm": 122.99577331542969, "learning_rate": 1.895047641163706e-06, "loss": 11.625, "step": 5202 }, { "epoch": 0.34555356312678487, "grad_norm": 371.67657470703125, "learning_rate": 1.8949996709832197e-06, "loss": 22.3594, "step": 5203 }, { "epoch": 0.3456199774191406, "grad_norm": 226.17791748046875, "learning_rate": 1.8949516904498932e-06, "loss": 15.0312, "step": 5204 }, { "epoch": 0.3456863917114963, "grad_norm": 498.5865783691406, "learning_rate": 1.8949036995642817e-06, "loss": 22.8125, "step": 5205 }, { "epoch": 0.34575280600385205, "grad_norm": 248.96490478515625, "learning_rate": 1.8948556983269402e-06, "loss": 20.4531, "step": 5206 }, { "epoch": 0.34581922029620776, "grad_norm": 123.9862060546875, "learning_rate": 1.894807686738424e-06, "loss": 14.9062, "step": 5207 }, { "epoch": 0.3458856345885635, "grad_norm": 215.07542419433594, "learning_rate": 1.8947596647992883e-06, "loss": 22.1562, "step": 5208 }, { "epoch": 0.3459520488809192, "grad_norm": 482.1416931152344, "learning_rate": 1.8947116325100886e-06, "loss": 20.0469, "step": 5209 }, { "epoch": 0.3460184631732749, "grad_norm": 242.15890502929688, "learning_rate": 1.8946635898713811e-06, "loss": 13.7656, "step": 5210 }, { "epoch": 0.3460848774656306, "grad_norm": 169.68753051757812, "learning_rate": 1.894615536883721e-06, "loss": 22.8125, "step": 5211 }, { "epoch": 0.3461512917579863, "grad_norm": 188.85971069335938, "learning_rate": 1.894567473547664e-06, "loss": 18.4844, "step": 5212 }, { "epoch": 0.346217706050342, "grad_norm": 163.41139221191406, "learning_rate": 1.8945193998637665e-06, "loss": 23.2812, "step": 5213 }, { "epoch": 0.34628412034269773, "grad_norm": 306.08892822265625, "learning_rate": 1.8944713158325843e-06, "loss": 25.5312, "step": 5214 }, { "epoch": 0.34635053463505344, "grad_norm": 117.90058135986328, "learning_rate": 1.8944232214546738e-06, "loss": 17.8281, "step": 5215 }, { "epoch": 0.34641694892740915, "grad_norm": 206.4654998779297, "learning_rate": 1.894375116730591e-06, "loss": 19.0, "step": 5216 }, { "epoch": 0.3464833632197649, "grad_norm": 541.4750366210938, "learning_rate": 1.8943270016608928e-06, "loss": 20.7188, "step": 5217 }, { "epoch": 0.3465497775121206, "grad_norm": 443.55224609375, "learning_rate": 1.8942788762461355e-06, "loss": 22.2031, "step": 5218 }, { "epoch": 0.34661619180447634, "grad_norm": 424.7906799316406, "learning_rate": 1.8942307404868757e-06, "loss": 22.4062, "step": 5219 }, { "epoch": 0.34668260609683205, "grad_norm": 212.99522399902344, "learning_rate": 1.8941825943836705e-06, "loss": 15.0625, "step": 5220 }, { "epoch": 0.34674902038918776, "grad_norm": 160.63198852539062, "learning_rate": 1.8941344379370764e-06, "loss": 17.9688, "step": 5221 }, { "epoch": 0.34681543468154347, "grad_norm": 333.0956726074219, "learning_rate": 1.894086271147651e-06, "loss": 18.625, "step": 5222 }, { "epoch": 0.3468818489738992, "grad_norm": 178.76580810546875, "learning_rate": 1.894038094015951e-06, "loss": 21.0312, "step": 5223 }, { "epoch": 0.3469482632662549, "grad_norm": 382.5750732421875, "learning_rate": 1.8939899065425341e-06, "loss": 17.7969, "step": 5224 }, { "epoch": 0.3470146775586106, "grad_norm": 411.8917236328125, "learning_rate": 1.8939417087279574e-06, "loss": 17.0625, "step": 5225 }, { "epoch": 0.3470810918509663, "grad_norm": 517.2420654296875, "learning_rate": 1.8938935005727786e-06, "loss": 22.4062, "step": 5226 }, { "epoch": 0.347147506143322, "grad_norm": 212.0997772216797, "learning_rate": 1.8938452820775549e-06, "loss": 21.5625, "step": 5227 }, { "epoch": 0.3472139204356778, "grad_norm": 157.72805786132812, "learning_rate": 1.8937970532428445e-06, "loss": 17.2344, "step": 5228 }, { "epoch": 0.3472803347280335, "grad_norm": 295.4676818847656, "learning_rate": 1.8937488140692054e-06, "loss": 14.375, "step": 5229 }, { "epoch": 0.3473467490203892, "grad_norm": 148.5985870361328, "learning_rate": 1.8937005645571951e-06, "loss": 14.4688, "step": 5230 }, { "epoch": 0.3474131633127449, "grad_norm": 281.07135009765625, "learning_rate": 1.893652304707372e-06, "loss": 21.0781, "step": 5231 }, { "epoch": 0.3474795776051006, "grad_norm": 160.479248046875, "learning_rate": 1.8936040345202944e-06, "loss": 16.0625, "step": 5232 }, { "epoch": 0.34754599189745633, "grad_norm": 216.49180603027344, "learning_rate": 1.8935557539965208e-06, "loss": 26.5625, "step": 5233 }, { "epoch": 0.34761240618981204, "grad_norm": 338.9996643066406, "learning_rate": 1.8935074631366093e-06, "loss": 21.0625, "step": 5234 }, { "epoch": 0.34767882048216775, "grad_norm": 153.17269897460938, "learning_rate": 1.8934591619411186e-06, "loss": 17.6562, "step": 5235 }, { "epoch": 0.34774523477452346, "grad_norm": 363.87799072265625, "learning_rate": 1.8934108504106076e-06, "loss": 33.0938, "step": 5236 }, { "epoch": 0.34781164906687917, "grad_norm": 596.611083984375, "learning_rate": 1.893362528545635e-06, "loss": 25.8438, "step": 5237 }, { "epoch": 0.34787806335923493, "grad_norm": 411.7811279296875, "learning_rate": 1.8933141963467602e-06, "loss": 38.0312, "step": 5238 }, { "epoch": 0.34794447765159064, "grad_norm": 286.2321472167969, "learning_rate": 1.8932658538145417e-06, "loss": 21.7188, "step": 5239 }, { "epoch": 0.34801089194394635, "grad_norm": 224.59423828125, "learning_rate": 1.8932175009495386e-06, "loss": 20.7812, "step": 5240 }, { "epoch": 0.34807730623630206, "grad_norm": 189.69479370117188, "learning_rate": 1.8931691377523106e-06, "loss": 20.0156, "step": 5241 }, { "epoch": 0.3481437205286578, "grad_norm": 132.80931091308594, "learning_rate": 1.8931207642234175e-06, "loss": 17.6562, "step": 5242 }, { "epoch": 0.3482101348210135, "grad_norm": 281.17181396484375, "learning_rate": 1.893072380363418e-06, "loss": 21.25, "step": 5243 }, { "epoch": 0.3482765491133692, "grad_norm": 169.1405029296875, "learning_rate": 1.8930239861728721e-06, "loss": 17.1562, "step": 5244 }, { "epoch": 0.3483429634057249, "grad_norm": 175.0796356201172, "learning_rate": 1.8929755816523398e-06, "loss": 19.8438, "step": 5245 }, { "epoch": 0.3484093776980806, "grad_norm": 187.84471130371094, "learning_rate": 1.892927166802381e-06, "loss": 22.2188, "step": 5246 }, { "epoch": 0.3484757919904363, "grad_norm": 208.2425079345703, "learning_rate": 1.8928787416235554e-06, "loss": 16.9062, "step": 5247 }, { "epoch": 0.34854220628279203, "grad_norm": 145.82681274414062, "learning_rate": 1.8928303061164238e-06, "loss": 15.5312, "step": 5248 }, { "epoch": 0.3486086205751478, "grad_norm": 232.32540893554688, "learning_rate": 1.8927818602815457e-06, "loss": 23.7188, "step": 5249 }, { "epoch": 0.3486750348675035, "grad_norm": 176.30337524414062, "learning_rate": 1.8927334041194818e-06, "loss": 21.1094, "step": 5250 }, { "epoch": 0.3487414491598592, "grad_norm": 109.65581512451172, "learning_rate": 1.892684937630793e-06, "loss": 13.5156, "step": 5251 }, { "epoch": 0.34880786345221493, "grad_norm": 144.17823791503906, "learning_rate": 1.892636460816039e-06, "loss": 19.9688, "step": 5252 }, { "epoch": 0.34887427774457064, "grad_norm": 1198.0999755859375, "learning_rate": 1.8925879736757817e-06, "loss": 16.4375, "step": 5253 }, { "epoch": 0.34894069203692635, "grad_norm": 295.8039245605469, "learning_rate": 1.8925394762105812e-06, "loss": 16.8594, "step": 5254 }, { "epoch": 0.34900710632928206, "grad_norm": 217.1896514892578, "learning_rate": 1.8924909684209986e-06, "loss": 17.1562, "step": 5255 }, { "epoch": 0.34907352062163777, "grad_norm": 198.3386993408203, "learning_rate": 1.8924424503075953e-06, "loss": 19.0156, "step": 5256 }, { "epoch": 0.3491399349139935, "grad_norm": 212.60931396484375, "learning_rate": 1.8923939218709324e-06, "loss": 19.6719, "step": 5257 }, { "epoch": 0.3492063492063492, "grad_norm": 193.3759765625, "learning_rate": 1.892345383111571e-06, "loss": 19.4531, "step": 5258 }, { "epoch": 0.3492727634987049, "grad_norm": 283.3250427246094, "learning_rate": 1.8922968340300729e-06, "loss": 17.1719, "step": 5259 }, { "epoch": 0.34933917779106066, "grad_norm": 210.85618591308594, "learning_rate": 1.8922482746269997e-06, "loss": 20.1875, "step": 5260 }, { "epoch": 0.34940559208341637, "grad_norm": 412.6726989746094, "learning_rate": 1.8921997049029126e-06, "loss": 18.1719, "step": 5261 }, { "epoch": 0.3494720063757721, "grad_norm": 344.6054382324219, "learning_rate": 1.8921511248583739e-06, "loss": 15.5938, "step": 5262 }, { "epoch": 0.3495384206681278, "grad_norm": 202.4056854248047, "learning_rate": 1.8921025344939455e-06, "loss": 20.2812, "step": 5263 }, { "epoch": 0.3496048349604835, "grad_norm": 264.6114807128906, "learning_rate": 1.8920539338101893e-06, "loss": 15.4219, "step": 5264 }, { "epoch": 0.3496712492528392, "grad_norm": 414.69268798828125, "learning_rate": 1.8920053228076678e-06, "loss": 17.9844, "step": 5265 }, { "epoch": 0.3497376635451949, "grad_norm": 219.589599609375, "learning_rate": 1.8919567014869432e-06, "loss": 20.6719, "step": 5266 }, { "epoch": 0.34980407783755063, "grad_norm": 221.3588409423828, "learning_rate": 1.8919080698485775e-06, "loss": 18.6094, "step": 5267 }, { "epoch": 0.34987049212990634, "grad_norm": 614.732177734375, "learning_rate": 1.8918594278931335e-06, "loss": 23.125, "step": 5268 }, { "epoch": 0.34993690642226205, "grad_norm": 217.98941040039062, "learning_rate": 1.8918107756211738e-06, "loss": 22.0469, "step": 5269 }, { "epoch": 0.35000332071461776, "grad_norm": 169.30409240722656, "learning_rate": 1.8917621130332616e-06, "loss": 18.3281, "step": 5270 }, { "epoch": 0.3500697350069735, "grad_norm": 307.3839111328125, "learning_rate": 1.8917134401299595e-06, "loss": 28.7031, "step": 5271 }, { "epoch": 0.35013614929932924, "grad_norm": 201.8002166748047, "learning_rate": 1.8916647569118304e-06, "loss": 18.3125, "step": 5272 }, { "epoch": 0.35020256359168495, "grad_norm": 154.9840087890625, "learning_rate": 1.8916160633794377e-06, "loss": 16.0312, "step": 5273 }, { "epoch": 0.35026897788404066, "grad_norm": 228.3055419921875, "learning_rate": 1.8915673595333441e-06, "loss": 16.5156, "step": 5274 }, { "epoch": 0.35033539217639637, "grad_norm": 116.66073608398438, "learning_rate": 1.891518645374114e-06, "loss": 16.5, "step": 5275 }, { "epoch": 0.3504018064687521, "grad_norm": 406.0074157714844, "learning_rate": 1.8914699209023096e-06, "loss": 16.2656, "step": 5276 }, { "epoch": 0.3504682207611078, "grad_norm": 162.46620178222656, "learning_rate": 1.8914211861184959e-06, "loss": 17.2969, "step": 5277 }, { "epoch": 0.3505346350534635, "grad_norm": 549.8341674804688, "learning_rate": 1.8913724410232357e-06, "loss": 27.9375, "step": 5278 }, { "epoch": 0.3506010493458192, "grad_norm": 497.32440185546875, "learning_rate": 1.8913236856170928e-06, "loss": 27.2031, "step": 5279 }, { "epoch": 0.3506674636381749, "grad_norm": 195.19544982910156, "learning_rate": 1.8912749199006317e-06, "loss": 22.4531, "step": 5280 }, { "epoch": 0.3507338779305306, "grad_norm": 247.22142028808594, "learning_rate": 1.8912261438744163e-06, "loss": 24.875, "step": 5281 }, { "epoch": 0.3508002922228864, "grad_norm": 464.4497375488281, "learning_rate": 1.8911773575390108e-06, "loss": 23.8906, "step": 5282 }, { "epoch": 0.3508667065152421, "grad_norm": 124.13130187988281, "learning_rate": 1.8911285608949795e-06, "loss": 13.6797, "step": 5283 }, { "epoch": 0.3509331208075978, "grad_norm": 400.08905029296875, "learning_rate": 1.8910797539428867e-06, "loss": 23.5312, "step": 5284 }, { "epoch": 0.3509995350999535, "grad_norm": 333.20684814453125, "learning_rate": 1.8910309366832972e-06, "loss": 24.6562, "step": 5285 }, { "epoch": 0.35106594939230923, "grad_norm": 332.4573669433594, "learning_rate": 1.8909821091167758e-06, "loss": 22.375, "step": 5286 }, { "epoch": 0.35113236368466494, "grad_norm": 140.02479553222656, "learning_rate": 1.8909332712438871e-06, "loss": 18.6875, "step": 5287 }, { "epoch": 0.35119877797702065, "grad_norm": 185.2825164794922, "learning_rate": 1.8908844230651958e-06, "loss": 19.1094, "step": 5288 }, { "epoch": 0.35126519226937636, "grad_norm": 261.5325622558594, "learning_rate": 1.8908355645812675e-06, "loss": 19.6406, "step": 5289 }, { "epoch": 0.35133160656173207, "grad_norm": 497.20501708984375, "learning_rate": 1.8907866957926669e-06, "loss": 13.8438, "step": 5290 }, { "epoch": 0.3513980208540878, "grad_norm": 244.08364868164062, "learning_rate": 1.8907378166999596e-06, "loss": 19.7188, "step": 5291 }, { "epoch": 0.3514644351464435, "grad_norm": 295.82958984375, "learning_rate": 1.8906889273037106e-06, "loss": 17.8438, "step": 5292 }, { "epoch": 0.35153084943879925, "grad_norm": 228.3473358154297, "learning_rate": 1.8906400276044861e-06, "loss": 16.8281, "step": 5293 }, { "epoch": 0.35159726373115496, "grad_norm": 170.49374389648438, "learning_rate": 1.890591117602851e-06, "loss": 26.2188, "step": 5294 }, { "epoch": 0.3516636780235107, "grad_norm": 257.0765075683594, "learning_rate": 1.8905421972993713e-06, "loss": 26.75, "step": 5295 }, { "epoch": 0.3517300923158664, "grad_norm": 225.56808471679688, "learning_rate": 1.8904932666946133e-06, "loss": 17.8594, "step": 5296 }, { "epoch": 0.3517965066082221, "grad_norm": 198.94918823242188, "learning_rate": 1.8904443257891426e-06, "loss": 16.4219, "step": 5297 }, { "epoch": 0.3518629209005778, "grad_norm": 190.8137664794922, "learning_rate": 1.8903953745835253e-06, "loss": 20.3438, "step": 5298 }, { "epoch": 0.3519293351929335, "grad_norm": 205.39114379882812, "learning_rate": 1.8903464130783277e-06, "loss": 23.0781, "step": 5299 }, { "epoch": 0.3519957494852892, "grad_norm": 156.44522094726562, "learning_rate": 1.8902974412741162e-06, "loss": 16.625, "step": 5300 }, { "epoch": 0.35206216377764493, "grad_norm": 201.8564910888672, "learning_rate": 1.8902484591714573e-06, "loss": 18.1875, "step": 5301 }, { "epoch": 0.35212857807000064, "grad_norm": 542.6024780273438, "learning_rate": 1.8901994667709178e-06, "loss": 18.625, "step": 5302 }, { "epoch": 0.35219499236235635, "grad_norm": 266.9232482910156, "learning_rate": 1.890150464073064e-06, "loss": 20.6094, "step": 5303 }, { "epoch": 0.3522614066547121, "grad_norm": 193.3056182861328, "learning_rate": 1.890101451078463e-06, "loss": 13.8672, "step": 5304 }, { "epoch": 0.3523278209470678, "grad_norm": 181.14344787597656, "learning_rate": 1.8900524277876814e-06, "loss": 18.4531, "step": 5305 }, { "epoch": 0.35239423523942354, "grad_norm": 374.70745849609375, "learning_rate": 1.8900033942012867e-06, "loss": 16.8281, "step": 5306 }, { "epoch": 0.35246064953177925, "grad_norm": 96.97093200683594, "learning_rate": 1.8899543503198463e-06, "loss": 17.0781, "step": 5307 }, { "epoch": 0.35252706382413496, "grad_norm": 294.58685302734375, "learning_rate": 1.8899052961439268e-06, "loss": 15.25, "step": 5308 }, { "epoch": 0.35259347811649067, "grad_norm": 263.17242431640625, "learning_rate": 1.889856231674096e-06, "loss": 21.3125, "step": 5309 }, { "epoch": 0.3526598924088464, "grad_norm": 9451.93359375, "learning_rate": 1.8898071569109213e-06, "loss": 15.3906, "step": 5310 }, { "epoch": 0.3527263067012021, "grad_norm": 234.35491943359375, "learning_rate": 1.8897580718549709e-06, "loss": 16.8438, "step": 5311 }, { "epoch": 0.3527927209935578, "grad_norm": 628.5899658203125, "learning_rate": 1.8897089765068118e-06, "loss": 28.0312, "step": 5312 }, { "epoch": 0.3528591352859135, "grad_norm": 445.0852355957031, "learning_rate": 1.8896598708670125e-06, "loss": 24.7812, "step": 5313 }, { "epoch": 0.35292554957826927, "grad_norm": 151.1016387939453, "learning_rate": 1.889610754936141e-06, "loss": 16.0938, "step": 5314 }, { "epoch": 0.352991963870625, "grad_norm": 410.7518310546875, "learning_rate": 1.8895616287147647e-06, "loss": 25.6875, "step": 5315 }, { "epoch": 0.3530583781629807, "grad_norm": 610.127197265625, "learning_rate": 1.889512492203453e-06, "loss": 36.25, "step": 5316 }, { "epoch": 0.3531247924553364, "grad_norm": 228.4720458984375, "learning_rate": 1.8894633454027733e-06, "loss": 20.2188, "step": 5317 }, { "epoch": 0.3531912067476921, "grad_norm": 319.2766418457031, "learning_rate": 1.8894141883132945e-06, "loss": 20.1406, "step": 5318 }, { "epoch": 0.3532576210400478, "grad_norm": 205.88807678222656, "learning_rate": 1.8893650209355858e-06, "loss": 12.5625, "step": 5319 }, { "epoch": 0.35332403533240353, "grad_norm": 210.3908233642578, "learning_rate": 1.8893158432702148e-06, "loss": 16.5312, "step": 5320 }, { "epoch": 0.35339044962475924, "grad_norm": 266.9187316894531, "learning_rate": 1.8892666553177512e-06, "loss": 24.25, "step": 5321 }, { "epoch": 0.35345686391711495, "grad_norm": 670.1309204101562, "learning_rate": 1.8892174570787636e-06, "loss": 23.5, "step": 5322 }, { "epoch": 0.35352327820947066, "grad_norm": 279.7653503417969, "learning_rate": 1.8891682485538211e-06, "loss": 16.4688, "step": 5323 }, { "epoch": 0.35358969250182637, "grad_norm": 218.04730224609375, "learning_rate": 1.8891190297434933e-06, "loss": 22.4219, "step": 5324 }, { "epoch": 0.35365610679418213, "grad_norm": 400.3608093261719, "learning_rate": 1.8890698006483495e-06, "loss": 21.6406, "step": 5325 }, { "epoch": 0.35372252108653784, "grad_norm": 261.12628173828125, "learning_rate": 1.8890205612689584e-06, "loss": 17.0312, "step": 5326 }, { "epoch": 0.35378893537889355, "grad_norm": 253.05828857421875, "learning_rate": 1.8889713116058901e-06, "loss": 18.7188, "step": 5327 }, { "epoch": 0.35385534967124926, "grad_norm": 1449.938232421875, "learning_rate": 1.8889220516597144e-06, "loss": 20.75, "step": 5328 }, { "epoch": 0.353921763963605, "grad_norm": 154.61634826660156, "learning_rate": 1.888872781431001e-06, "loss": 18.3438, "step": 5329 }, { "epoch": 0.3539881782559607, "grad_norm": 130.05357360839844, "learning_rate": 1.8888235009203197e-06, "loss": 16.4531, "step": 5330 }, { "epoch": 0.3540545925483164, "grad_norm": 413.1751403808594, "learning_rate": 1.888774210128241e-06, "loss": 26.1875, "step": 5331 }, { "epoch": 0.3541210068406721, "grad_norm": 347.8968811035156, "learning_rate": 1.8887249090553346e-06, "loss": 27.1562, "step": 5332 }, { "epoch": 0.3541874211330278, "grad_norm": 139.3249053955078, "learning_rate": 1.888675597702171e-06, "loss": 18.0312, "step": 5333 }, { "epoch": 0.3542538354253835, "grad_norm": 245.5228729248047, "learning_rate": 1.8886262760693202e-06, "loss": 26.4375, "step": 5334 }, { "epoch": 0.35432024971773923, "grad_norm": 430.68280029296875, "learning_rate": 1.8885769441573531e-06, "loss": 22.9688, "step": 5335 }, { "epoch": 0.354386664010095, "grad_norm": 110.72111511230469, "learning_rate": 1.8885276019668405e-06, "loss": 15.3281, "step": 5336 }, { "epoch": 0.3544530783024507, "grad_norm": 154.93531799316406, "learning_rate": 1.8884782494983527e-06, "loss": 17.6875, "step": 5337 }, { "epoch": 0.3545194925948064, "grad_norm": 291.7940979003906, "learning_rate": 1.888428886752461e-06, "loss": 13.8828, "step": 5338 }, { "epoch": 0.35458590688716213, "grad_norm": 167.83645629882812, "learning_rate": 1.8883795137297366e-06, "loss": 16.2656, "step": 5339 }, { "epoch": 0.35465232117951784, "grad_norm": 234.778076171875, "learning_rate": 1.8883301304307495e-06, "loss": 22.4062, "step": 5340 }, { "epoch": 0.35471873547187355, "grad_norm": 168.89556884765625, "learning_rate": 1.8882807368560723e-06, "loss": 17.5156, "step": 5341 }, { "epoch": 0.35478514976422926, "grad_norm": 951.6866455078125, "learning_rate": 1.8882313330062755e-06, "loss": 18.6875, "step": 5342 }, { "epoch": 0.35485156405658497, "grad_norm": 434.7548828125, "learning_rate": 1.8881819188819307e-06, "loss": 23.2188, "step": 5343 }, { "epoch": 0.3549179783489407, "grad_norm": 127.88360595703125, "learning_rate": 1.88813249448361e-06, "loss": 15.6406, "step": 5344 }, { "epoch": 0.3549843926412964, "grad_norm": 456.0146789550781, "learning_rate": 1.8880830598118845e-06, "loss": 25.5312, "step": 5345 }, { "epoch": 0.3550508069336521, "grad_norm": 138.40943908691406, "learning_rate": 1.888033614867326e-06, "loss": 18.2969, "step": 5346 }, { "epoch": 0.35511722122600786, "grad_norm": 206.79388427734375, "learning_rate": 1.8879841596505073e-06, "loss": 18.9062, "step": 5347 }, { "epoch": 0.35518363551836357, "grad_norm": 228.47378540039062, "learning_rate": 1.8879346941619993e-06, "loss": 20.6562, "step": 5348 }, { "epoch": 0.3552500498107193, "grad_norm": 146.14996337890625, "learning_rate": 1.887885218402375e-06, "loss": 18.4844, "step": 5349 }, { "epoch": 0.355316464103075, "grad_norm": 294.0769958496094, "learning_rate": 1.8878357323722068e-06, "loss": 20.25, "step": 5350 }, { "epoch": 0.3553828783954307, "grad_norm": 237.08067321777344, "learning_rate": 1.8877862360720664e-06, "loss": 15.2344, "step": 5351 }, { "epoch": 0.3554492926877864, "grad_norm": 170.70751953125, "learning_rate": 1.8877367295025267e-06, "loss": 15.1875, "step": 5352 }, { "epoch": 0.3555157069801421, "grad_norm": 286.47821044921875, "learning_rate": 1.8876872126641608e-06, "loss": 21.0156, "step": 5353 }, { "epoch": 0.35558212127249783, "grad_norm": 234.31277465820312, "learning_rate": 1.8876376855575408e-06, "loss": 23.4844, "step": 5354 }, { "epoch": 0.35564853556485354, "grad_norm": 646.0176391601562, "learning_rate": 1.8875881481832398e-06, "loss": 23.4688, "step": 5355 }, { "epoch": 0.35571494985720925, "grad_norm": 157.0883026123047, "learning_rate": 1.8875386005418313e-06, "loss": 17.4844, "step": 5356 }, { "epoch": 0.35578136414956496, "grad_norm": 203.50274658203125, "learning_rate": 1.8874890426338879e-06, "loss": 13.75, "step": 5357 }, { "epoch": 0.3558477784419207, "grad_norm": 404.58392333984375, "learning_rate": 1.887439474459983e-06, "loss": 22.5312, "step": 5358 }, { "epoch": 0.35591419273427644, "grad_norm": 160.6056365966797, "learning_rate": 1.8873898960206897e-06, "loss": 15.9375, "step": 5359 }, { "epoch": 0.35598060702663215, "grad_norm": 218.24148559570312, "learning_rate": 1.8873403073165819e-06, "loss": 22.375, "step": 5360 }, { "epoch": 0.35604702131898786, "grad_norm": 516.9058837890625, "learning_rate": 1.8872907083482335e-06, "loss": 25.6719, "step": 5361 }, { "epoch": 0.35611343561134357, "grad_norm": 442.7703552246094, "learning_rate": 1.8872410991162174e-06, "loss": 26.4844, "step": 5362 }, { "epoch": 0.3561798499036993, "grad_norm": 203.88360595703125, "learning_rate": 1.8871914796211079e-06, "loss": 20.2812, "step": 5363 }, { "epoch": 0.356246264196055, "grad_norm": 220.009765625, "learning_rate": 1.8871418498634793e-06, "loss": 20.3906, "step": 5364 }, { "epoch": 0.3563126784884107, "grad_norm": 310.6955261230469, "learning_rate": 1.8870922098439048e-06, "loss": 17.9375, "step": 5365 }, { "epoch": 0.3563790927807664, "grad_norm": 207.2978973388672, "learning_rate": 1.8870425595629595e-06, "loss": 19.0781, "step": 5366 }, { "epoch": 0.3564455070731221, "grad_norm": 277.4207763671875, "learning_rate": 1.8869928990212172e-06, "loss": 15.625, "step": 5367 }, { "epoch": 0.3565119213654778, "grad_norm": 223.4432830810547, "learning_rate": 1.8869432282192528e-06, "loss": 14.0781, "step": 5368 }, { "epoch": 0.3565783356578336, "grad_norm": 269.57275390625, "learning_rate": 1.8868935471576403e-06, "loss": 16.3594, "step": 5369 }, { "epoch": 0.3566447499501893, "grad_norm": 157.57984924316406, "learning_rate": 1.8868438558369548e-06, "loss": 19.2188, "step": 5370 }, { "epoch": 0.356711164242545, "grad_norm": 192.28155517578125, "learning_rate": 1.886794154257771e-06, "loss": 18.4375, "step": 5371 }, { "epoch": 0.3567775785349007, "grad_norm": 146.262939453125, "learning_rate": 1.8867444424206636e-06, "loss": 18.6094, "step": 5372 }, { "epoch": 0.35684399282725643, "grad_norm": 385.3585205078125, "learning_rate": 1.8866947203262078e-06, "loss": 14.625, "step": 5373 }, { "epoch": 0.35691040711961214, "grad_norm": 256.5374755859375, "learning_rate": 1.886644987974979e-06, "loss": 17.6719, "step": 5374 }, { "epoch": 0.35697682141196785, "grad_norm": 140.63540649414062, "learning_rate": 1.8865952453675522e-06, "loss": 13.4219, "step": 5375 }, { "epoch": 0.35704323570432356, "grad_norm": 565.3684692382812, "learning_rate": 1.8865454925045027e-06, "loss": 33.0312, "step": 5376 }, { "epoch": 0.35710964999667927, "grad_norm": 218.39190673828125, "learning_rate": 1.8864957293864064e-06, "loss": 17.875, "step": 5377 }, { "epoch": 0.357176064289035, "grad_norm": 523.650390625, "learning_rate": 1.8864459560138383e-06, "loss": 18.4375, "step": 5378 }, { "epoch": 0.3572424785813907, "grad_norm": 1519.618896484375, "learning_rate": 1.8863961723873748e-06, "loss": 23.8125, "step": 5379 }, { "epoch": 0.35730889287374645, "grad_norm": 154.21426391601562, "learning_rate": 1.8863463785075914e-06, "loss": 16.75, "step": 5380 }, { "epoch": 0.35737530716610216, "grad_norm": 159.25392150878906, "learning_rate": 1.886296574375064e-06, "loss": 20.4219, "step": 5381 }, { "epoch": 0.3574417214584579, "grad_norm": 256.55120849609375, "learning_rate": 1.8862467599903693e-06, "loss": 19.7031, "step": 5382 }, { "epoch": 0.3575081357508136, "grad_norm": 279.41412353515625, "learning_rate": 1.886196935354083e-06, "loss": 19.3594, "step": 5383 }, { "epoch": 0.3575745500431693, "grad_norm": 317.9841613769531, "learning_rate": 1.8861471004667814e-06, "loss": 24.75, "step": 5384 }, { "epoch": 0.357640964335525, "grad_norm": 278.6704406738281, "learning_rate": 1.8860972553290413e-06, "loss": 15.0781, "step": 5385 }, { "epoch": 0.3577073786278807, "grad_norm": 337.8457336425781, "learning_rate": 1.886047399941439e-06, "loss": 29.6719, "step": 5386 }, { "epoch": 0.3577737929202364, "grad_norm": 212.39064025878906, "learning_rate": 1.8859975343045513e-06, "loss": 33.9531, "step": 5387 }, { "epoch": 0.35784020721259213, "grad_norm": 205.97120666503906, "learning_rate": 1.885947658418955e-06, "loss": 16.2344, "step": 5388 }, { "epoch": 0.35790662150494784, "grad_norm": 704.1098022460938, "learning_rate": 1.8858977722852271e-06, "loss": 29.8281, "step": 5389 }, { "epoch": 0.3579730357973036, "grad_norm": 380.6082763671875, "learning_rate": 1.8858478759039448e-06, "loss": 20.1719, "step": 5390 }, { "epoch": 0.3580394500896593, "grad_norm": 187.71316528320312, "learning_rate": 1.8857979692756848e-06, "loss": 17.75, "step": 5391 }, { "epoch": 0.358105864382015, "grad_norm": 352.6417541503906, "learning_rate": 1.8857480524010248e-06, "loss": 28.3125, "step": 5392 }, { "epoch": 0.35817227867437074, "grad_norm": 226.36671447753906, "learning_rate": 1.8856981252805423e-06, "loss": 18.9062, "step": 5393 }, { "epoch": 0.35823869296672645, "grad_norm": 340.7677917480469, "learning_rate": 1.885648187914814e-06, "loss": 13.625, "step": 5394 }, { "epoch": 0.35830510725908216, "grad_norm": 275.611328125, "learning_rate": 1.8855982403044187e-06, "loss": 21.7656, "step": 5395 }, { "epoch": 0.35837152155143787, "grad_norm": 466.6712951660156, "learning_rate": 1.8855482824499335e-06, "loss": 20.4062, "step": 5396 }, { "epoch": 0.3584379358437936, "grad_norm": 362.2442321777344, "learning_rate": 1.8854983143519363e-06, "loss": 21.0, "step": 5397 }, { "epoch": 0.3585043501361493, "grad_norm": 250.8347625732422, "learning_rate": 1.8854483360110055e-06, "loss": 16.8438, "step": 5398 }, { "epoch": 0.358570764428505, "grad_norm": 221.36175537109375, "learning_rate": 1.8853983474277184e-06, "loss": 20.3281, "step": 5399 }, { "epoch": 0.3586371787208607, "grad_norm": 320.0205383300781, "learning_rate": 1.8853483486026542e-06, "loss": 28.375, "step": 5400 }, { "epoch": 0.35870359301321647, "grad_norm": 573.4700927734375, "learning_rate": 1.8852983395363906e-06, "loss": 20.7812, "step": 5401 }, { "epoch": 0.3587700073055722, "grad_norm": 271.4530029296875, "learning_rate": 1.8852483202295062e-06, "loss": 22.9375, "step": 5402 }, { "epoch": 0.3588364215979279, "grad_norm": 497.76739501953125, "learning_rate": 1.8851982906825801e-06, "loss": 19.5312, "step": 5403 }, { "epoch": 0.3589028358902836, "grad_norm": 230.53305053710938, "learning_rate": 1.8851482508961904e-06, "loss": 19.8125, "step": 5404 }, { "epoch": 0.3589692501826393, "grad_norm": 219.67979431152344, "learning_rate": 1.8850982008709159e-06, "loss": 18.4609, "step": 5405 }, { "epoch": 0.359035664474995, "grad_norm": 253.16683959960938, "learning_rate": 1.8850481406073363e-06, "loss": 19.125, "step": 5406 }, { "epoch": 0.35910207876735073, "grad_norm": 144.75448608398438, "learning_rate": 1.8849980701060298e-06, "loss": 17.9219, "step": 5407 }, { "epoch": 0.35916849305970644, "grad_norm": 331.4661865234375, "learning_rate": 1.884947989367576e-06, "loss": 14.9219, "step": 5408 }, { "epoch": 0.35923490735206215, "grad_norm": 188.79644775390625, "learning_rate": 1.8848978983925542e-06, "loss": 16.4219, "step": 5409 }, { "epoch": 0.35930132164441786, "grad_norm": 220.08413696289062, "learning_rate": 1.8848477971815435e-06, "loss": 19.4062, "step": 5410 }, { "epoch": 0.35936773593677357, "grad_norm": 142.45518493652344, "learning_rate": 1.884797685735124e-06, "loss": 16.3438, "step": 5411 }, { "epoch": 0.35943415022912933, "grad_norm": 209.5081329345703, "learning_rate": 1.884747564053875e-06, "loss": 19.4219, "step": 5412 }, { "epoch": 0.35950056452148504, "grad_norm": 194.9868621826172, "learning_rate": 1.8846974321383763e-06, "loss": 22.3438, "step": 5413 }, { "epoch": 0.35956697881384075, "grad_norm": 122.9707260131836, "learning_rate": 1.8846472899892081e-06, "loss": 21.0781, "step": 5414 }, { "epoch": 0.35963339310619646, "grad_norm": 167.86180114746094, "learning_rate": 1.88459713760695e-06, "loss": 21.5938, "step": 5415 }, { "epoch": 0.3596998073985522, "grad_norm": 377.8747253417969, "learning_rate": 1.8845469749921823e-06, "loss": 18.7812, "step": 5416 }, { "epoch": 0.3597662216909079, "grad_norm": 1950.9334716796875, "learning_rate": 1.8844968021454853e-06, "loss": 26.375, "step": 5417 }, { "epoch": 0.3598326359832636, "grad_norm": 254.85617065429688, "learning_rate": 1.884446619067439e-06, "loss": 18.125, "step": 5418 }, { "epoch": 0.3598990502756193, "grad_norm": 522.989501953125, "learning_rate": 1.8843964257586247e-06, "loss": 16.1719, "step": 5419 }, { "epoch": 0.359965464567975, "grad_norm": 281.8819274902344, "learning_rate": 1.8843462222196222e-06, "loss": 19.8906, "step": 5420 }, { "epoch": 0.3600318788603307, "grad_norm": 243.37384033203125, "learning_rate": 1.8842960084510127e-06, "loss": 21.4219, "step": 5421 }, { "epoch": 0.36009829315268643, "grad_norm": 230.4527130126953, "learning_rate": 1.8842457844533769e-06, "loss": 19.2656, "step": 5422 }, { "epoch": 0.3601647074450422, "grad_norm": 169.4595489501953, "learning_rate": 1.8841955502272956e-06, "loss": 16.2188, "step": 5423 }, { "epoch": 0.3602311217373979, "grad_norm": 214.8555908203125, "learning_rate": 1.88414530577335e-06, "loss": 17.5469, "step": 5424 }, { "epoch": 0.3602975360297536, "grad_norm": 186.8874053955078, "learning_rate": 1.8840950510921214e-06, "loss": 17.3438, "step": 5425 }, { "epoch": 0.3603639503221093, "grad_norm": 276.35455322265625, "learning_rate": 1.8840447861841913e-06, "loss": 22.5625, "step": 5426 }, { "epoch": 0.36043036461446504, "grad_norm": 321.0789794921875, "learning_rate": 1.8839945110501407e-06, "loss": 28.4062, "step": 5427 }, { "epoch": 0.36049677890682075, "grad_norm": 182.17388916015625, "learning_rate": 1.8839442256905511e-06, "loss": 20.7031, "step": 5428 }, { "epoch": 0.36056319319917646, "grad_norm": 373.7344055175781, "learning_rate": 1.8838939301060046e-06, "loss": 24.3594, "step": 5429 }, { "epoch": 0.36062960749153217, "grad_norm": 261.0743713378906, "learning_rate": 1.883843624297083e-06, "loss": 25.75, "step": 5430 }, { "epoch": 0.3606960217838879, "grad_norm": 131.4191436767578, "learning_rate": 1.8837933082643678e-06, "loss": 13.4375, "step": 5431 }, { "epoch": 0.3607624360762436, "grad_norm": 235.1500701904297, "learning_rate": 1.8837429820084414e-06, "loss": 17.625, "step": 5432 }, { "epoch": 0.3608288503685993, "grad_norm": 215.1317596435547, "learning_rate": 1.8836926455298854e-06, "loss": 18.0625, "step": 5433 }, { "epoch": 0.36089526466095506, "grad_norm": 218.76126098632812, "learning_rate": 1.8836422988292827e-06, "loss": 22.3906, "step": 5434 }, { "epoch": 0.36096167895331077, "grad_norm": 258.60589599609375, "learning_rate": 1.8835919419072154e-06, "loss": 16.625, "step": 5435 }, { "epoch": 0.3610280932456665, "grad_norm": 181.24429321289062, "learning_rate": 1.883541574764266e-06, "loss": 19.2812, "step": 5436 }, { "epoch": 0.3610945075380222, "grad_norm": 204.00445556640625, "learning_rate": 1.8834911974010171e-06, "loss": 16.25, "step": 5437 }, { "epoch": 0.3611609218303779, "grad_norm": 388.5028381347656, "learning_rate": 1.8834408098180519e-06, "loss": 19.0, "step": 5438 }, { "epoch": 0.3612273361227336, "grad_norm": 371.2745666503906, "learning_rate": 1.8833904120159524e-06, "loss": 25.4375, "step": 5439 }, { "epoch": 0.3612937504150893, "grad_norm": 210.4973602294922, "learning_rate": 1.8833400039953018e-06, "loss": 18.7188, "step": 5440 }, { "epoch": 0.36136016470744503, "grad_norm": 263.3134765625, "learning_rate": 1.883289585756684e-06, "loss": 23.8906, "step": 5441 }, { "epoch": 0.36142657899980074, "grad_norm": 573.3753051757812, "learning_rate": 1.8832391573006811e-06, "loss": 23.4062, "step": 5442 }, { "epoch": 0.36149299329215645, "grad_norm": 835.5424194335938, "learning_rate": 1.8831887186278771e-06, "loss": 12.9375, "step": 5443 }, { "epoch": 0.36155940758451216, "grad_norm": 261.22113037109375, "learning_rate": 1.8831382697388554e-06, "loss": 21.0469, "step": 5444 }, { "epoch": 0.3616258218768679, "grad_norm": 202.4844970703125, "learning_rate": 1.8830878106341995e-06, "loss": 15.2031, "step": 5445 }, { "epoch": 0.36169223616922364, "grad_norm": 220.96438598632812, "learning_rate": 1.8830373413144928e-06, "loss": 20.9688, "step": 5446 }, { "epoch": 0.36175865046157935, "grad_norm": 163.26205444335938, "learning_rate": 1.8829868617803193e-06, "loss": 15.1719, "step": 5447 }, { "epoch": 0.36182506475393506, "grad_norm": 190.7628173828125, "learning_rate": 1.882936372032263e-06, "loss": 18.9531, "step": 5448 }, { "epoch": 0.36189147904629076, "grad_norm": 170.6713104248047, "learning_rate": 1.8828858720709079e-06, "loss": 16.9062, "step": 5449 }, { "epoch": 0.3619578933386465, "grad_norm": 332.9981384277344, "learning_rate": 1.8828353618968383e-06, "loss": 18.5625, "step": 5450 }, { "epoch": 0.3620243076310022, "grad_norm": 226.0600128173828, "learning_rate": 1.882784841510638e-06, "loss": 17.0938, "step": 5451 }, { "epoch": 0.3620907219233579, "grad_norm": 303.2410888671875, "learning_rate": 1.882734310912892e-06, "loss": 16.5938, "step": 5452 }, { "epoch": 0.3621571362157136, "grad_norm": 201.1939239501953, "learning_rate": 1.8826837701041842e-06, "loss": 16.0312, "step": 5453 }, { "epoch": 0.3622235505080693, "grad_norm": 235.4423370361328, "learning_rate": 1.8826332190850998e-06, "loss": 18.6562, "step": 5454 }, { "epoch": 0.362289964800425, "grad_norm": 137.06687927246094, "learning_rate": 1.8825826578562231e-06, "loss": 13.9688, "step": 5455 }, { "epoch": 0.3623563790927808, "grad_norm": 283.6361389160156, "learning_rate": 1.8825320864181392e-06, "loss": 21.6562, "step": 5456 }, { "epoch": 0.3624227933851365, "grad_norm": 271.8161926269531, "learning_rate": 1.8824815047714331e-06, "loss": 34.5469, "step": 5457 }, { "epoch": 0.3624892076774922, "grad_norm": 114.46171569824219, "learning_rate": 1.8824309129166896e-06, "loss": 18.375, "step": 5458 }, { "epoch": 0.3625556219698479, "grad_norm": 151.13275146484375, "learning_rate": 1.8823803108544944e-06, "loss": 17.2188, "step": 5459 }, { "epoch": 0.36262203626220363, "grad_norm": 1042.1851806640625, "learning_rate": 1.8823296985854324e-06, "loss": 17.2031, "step": 5460 }, { "epoch": 0.36268845055455934, "grad_norm": 603.718994140625, "learning_rate": 1.8822790761100894e-06, "loss": 23.375, "step": 5461 }, { "epoch": 0.36275486484691505, "grad_norm": 265.0626220703125, "learning_rate": 1.8822284434290507e-06, "loss": 27.3125, "step": 5462 }, { "epoch": 0.36282127913927076, "grad_norm": 1289.574951171875, "learning_rate": 1.882177800542902e-06, "loss": 26.7188, "step": 5463 }, { "epoch": 0.36288769343162647, "grad_norm": 356.70501708984375, "learning_rate": 1.882127147452229e-06, "loss": 21.8125, "step": 5464 }, { "epoch": 0.3629541077239822, "grad_norm": 231.2398223876953, "learning_rate": 1.8820764841576184e-06, "loss": 20.0156, "step": 5465 }, { "epoch": 0.36302052201633794, "grad_norm": 102.63298034667969, "learning_rate": 1.8820258106596555e-06, "loss": 15.9062, "step": 5466 }, { "epoch": 0.36308693630869365, "grad_norm": 341.77130126953125, "learning_rate": 1.8819751269589263e-06, "loss": 19.3438, "step": 5467 }, { "epoch": 0.36315335060104936, "grad_norm": 227.854736328125, "learning_rate": 1.8819244330560176e-06, "loss": 14.7969, "step": 5468 }, { "epoch": 0.3632197648934051, "grad_norm": 208.03550720214844, "learning_rate": 1.8818737289515157e-06, "loss": 18.3594, "step": 5469 }, { "epoch": 0.3632861791857608, "grad_norm": 245.05914306640625, "learning_rate": 1.8818230146460068e-06, "loss": 21.5938, "step": 5470 }, { "epoch": 0.3633525934781165, "grad_norm": 241.4337158203125, "learning_rate": 1.8817722901400779e-06, "loss": 17.1562, "step": 5471 }, { "epoch": 0.3634190077704722, "grad_norm": 279.4306335449219, "learning_rate": 1.8817215554343156e-06, "loss": 16.4375, "step": 5472 }, { "epoch": 0.3634854220628279, "grad_norm": 207.11631774902344, "learning_rate": 1.8816708105293068e-06, "loss": 19.4375, "step": 5473 }, { "epoch": 0.3635518363551836, "grad_norm": 179.10446166992188, "learning_rate": 1.8816200554256384e-06, "loss": 19.2656, "step": 5474 }, { "epoch": 0.36361825064753933, "grad_norm": 273.72918701171875, "learning_rate": 1.8815692901238975e-06, "loss": 14.2344, "step": 5475 }, { "epoch": 0.36368466493989504, "grad_norm": 361.7187194824219, "learning_rate": 1.8815185146246715e-06, "loss": 31.5938, "step": 5476 }, { "epoch": 0.3637510792322508, "grad_norm": 185.47691345214844, "learning_rate": 1.8814677289285477e-06, "loss": 18.0469, "step": 5477 }, { "epoch": 0.3638174935246065, "grad_norm": 220.09130859375, "learning_rate": 1.8814169330361135e-06, "loss": 19.2188, "step": 5478 }, { "epoch": 0.3638839078169622, "grad_norm": 362.3970031738281, "learning_rate": 1.8813661269479563e-06, "loss": 26.0938, "step": 5479 }, { "epoch": 0.36395032210931794, "grad_norm": 725.3422241210938, "learning_rate": 1.881315310664664e-06, "loss": 21.625, "step": 5480 }, { "epoch": 0.36401673640167365, "grad_norm": 177.58151245117188, "learning_rate": 1.8812644841868246e-06, "loss": 14.25, "step": 5481 }, { "epoch": 0.36408315069402936, "grad_norm": 745.6128540039062, "learning_rate": 1.8812136475150256e-06, "loss": 18.9375, "step": 5482 }, { "epoch": 0.36414956498638507, "grad_norm": 186.64353942871094, "learning_rate": 1.8811628006498551e-06, "loss": 16.6719, "step": 5483 }, { "epoch": 0.3642159792787408, "grad_norm": 205.10433959960938, "learning_rate": 1.8811119435919019e-06, "loss": 14.8438, "step": 5484 }, { "epoch": 0.3642823935710965, "grad_norm": 136.329345703125, "learning_rate": 1.8810610763417536e-06, "loss": 13.7188, "step": 5485 }, { "epoch": 0.3643488078634522, "grad_norm": 190.19146728515625, "learning_rate": 1.8810101988999988e-06, "loss": 16.3125, "step": 5486 }, { "epoch": 0.3644152221558079, "grad_norm": 261.0939025878906, "learning_rate": 1.880959311267226e-06, "loss": 20.6875, "step": 5487 }, { "epoch": 0.36448163644816367, "grad_norm": 133.94322204589844, "learning_rate": 1.8809084134440236e-06, "loss": 13.9844, "step": 5488 }, { "epoch": 0.3645480507405194, "grad_norm": 191.06871032714844, "learning_rate": 1.8808575054309811e-06, "loss": 22.4375, "step": 5489 }, { "epoch": 0.3646144650328751, "grad_norm": 230.6933135986328, "learning_rate": 1.8808065872286867e-06, "loss": 21.6562, "step": 5490 }, { "epoch": 0.3646808793252308, "grad_norm": 219.45982360839844, "learning_rate": 1.8807556588377297e-06, "loss": 13.1172, "step": 5491 }, { "epoch": 0.3647472936175865, "grad_norm": 172.70236206054688, "learning_rate": 1.880704720258699e-06, "loss": 19.7812, "step": 5492 }, { "epoch": 0.3648137079099422, "grad_norm": 188.86373901367188, "learning_rate": 1.880653771492184e-06, "loss": 19.1406, "step": 5493 }, { "epoch": 0.36488012220229793, "grad_norm": 123.69873046875, "learning_rate": 1.880602812538774e-06, "loss": 18.0156, "step": 5494 }, { "epoch": 0.36494653649465364, "grad_norm": 198.65902709960938, "learning_rate": 1.8805518433990583e-06, "loss": 21.7812, "step": 5495 }, { "epoch": 0.36501295078700935, "grad_norm": 175.73887634277344, "learning_rate": 1.8805008640736269e-06, "loss": 17.9688, "step": 5496 }, { "epoch": 0.36507936507936506, "grad_norm": 209.1062774658203, "learning_rate": 1.880449874563069e-06, "loss": 16.75, "step": 5497 }, { "epoch": 0.36514577937172077, "grad_norm": 263.01324462890625, "learning_rate": 1.880398874867975e-06, "loss": 17.5, "step": 5498 }, { "epoch": 0.36521219366407653, "grad_norm": 169.0430145263672, "learning_rate": 1.8803478649889338e-06, "loss": 18.9219, "step": 5499 }, { "epoch": 0.36527860795643224, "grad_norm": 369.35980224609375, "learning_rate": 1.8802968449265365e-06, "loss": 32.3125, "step": 5500 }, { "epoch": 0.36534502224878795, "grad_norm": 126.89667510986328, "learning_rate": 1.8802458146813734e-06, "loss": 19.3438, "step": 5501 }, { "epoch": 0.36541143654114366, "grad_norm": 213.552001953125, "learning_rate": 1.8801947742540336e-06, "loss": 17.2344, "step": 5502 }, { "epoch": 0.3654778508334994, "grad_norm": 434.46136474609375, "learning_rate": 1.8801437236451085e-06, "loss": 21.3438, "step": 5503 }, { "epoch": 0.3655442651258551, "grad_norm": 270.7042541503906, "learning_rate": 1.8800926628551883e-06, "loss": 22.4062, "step": 5504 }, { "epoch": 0.3656106794182108, "grad_norm": 222.0203399658203, "learning_rate": 1.8800415918848635e-06, "loss": 19.0156, "step": 5505 }, { "epoch": 0.3656770937105665, "grad_norm": 253.80682373046875, "learning_rate": 1.8799905107347254e-06, "loss": 15.9375, "step": 5506 }, { "epoch": 0.3657435080029222, "grad_norm": 689.64892578125, "learning_rate": 1.879939419405364e-06, "loss": 18.8281, "step": 5507 }, { "epoch": 0.3658099222952779, "grad_norm": 288.5389099121094, "learning_rate": 1.8798883178973713e-06, "loss": 20.2812, "step": 5508 }, { "epoch": 0.36587633658763363, "grad_norm": 320.1141052246094, "learning_rate": 1.8798372062113377e-06, "loss": 26.5938, "step": 5509 }, { "epoch": 0.3659427508799894, "grad_norm": 278.3707580566406, "learning_rate": 1.8797860843478545e-06, "loss": 25.9688, "step": 5510 }, { "epoch": 0.3660091651723451, "grad_norm": 281.2412414550781, "learning_rate": 1.8797349523075133e-06, "loss": 18.3906, "step": 5511 }, { "epoch": 0.3660755794647008, "grad_norm": 2497.743408203125, "learning_rate": 1.8796838100909054e-06, "loss": 19.2656, "step": 5512 }, { "epoch": 0.3661419937570565, "grad_norm": 246.19102478027344, "learning_rate": 1.8796326576986226e-06, "loss": 24.6875, "step": 5513 }, { "epoch": 0.36620840804941224, "grad_norm": 357.2362976074219, "learning_rate": 1.8795814951312563e-06, "loss": 21.375, "step": 5514 }, { "epoch": 0.36627482234176795, "grad_norm": 226.73773193359375, "learning_rate": 1.8795303223893987e-06, "loss": 17.2344, "step": 5515 }, { "epoch": 0.36634123663412366, "grad_norm": 245.78802490234375, "learning_rate": 1.8794791394736413e-06, "loss": 25.625, "step": 5516 }, { "epoch": 0.36640765092647937, "grad_norm": 169.12950134277344, "learning_rate": 1.8794279463845761e-06, "loss": 18.9375, "step": 5517 }, { "epoch": 0.3664740652188351, "grad_norm": 242.71112060546875, "learning_rate": 1.8793767431227958e-06, "loss": 21.6406, "step": 5518 }, { "epoch": 0.3665404795111908, "grad_norm": 276.5848083496094, "learning_rate": 1.8793255296888927e-06, "loss": 25.8906, "step": 5519 }, { "epoch": 0.3666068938035465, "grad_norm": 195.49729919433594, "learning_rate": 1.8792743060834587e-06, "loss": 11.7656, "step": 5520 }, { "epoch": 0.36667330809590226, "grad_norm": 183.5813446044922, "learning_rate": 1.879223072307086e-06, "loss": 20.5938, "step": 5521 }, { "epoch": 0.36673972238825797, "grad_norm": 273.5304870605469, "learning_rate": 1.8791718283603685e-06, "loss": 15.9062, "step": 5522 }, { "epoch": 0.3668061366806137, "grad_norm": 264.1814880371094, "learning_rate": 1.879120574243898e-06, "loss": 20.9688, "step": 5523 }, { "epoch": 0.3668725509729694, "grad_norm": 693.2741088867188, "learning_rate": 1.8790693099582677e-06, "loss": 17.9062, "step": 5524 }, { "epoch": 0.3669389652653251, "grad_norm": 233.8858184814453, "learning_rate": 1.8790180355040704e-06, "loss": 21.9688, "step": 5525 }, { "epoch": 0.3670053795576808, "grad_norm": 292.467529296875, "learning_rate": 1.8789667508818994e-06, "loss": 19.2188, "step": 5526 }, { "epoch": 0.3670717938500365, "grad_norm": 334.991455078125, "learning_rate": 1.8789154560923478e-06, "loss": 19.1562, "step": 5527 }, { "epoch": 0.36713820814239223, "grad_norm": 452.78167724609375, "learning_rate": 1.8788641511360091e-06, "loss": 37.75, "step": 5528 }, { "epoch": 0.36720462243474794, "grad_norm": 206.51210021972656, "learning_rate": 1.8788128360134766e-06, "loss": 21.8438, "step": 5529 }, { "epoch": 0.36727103672710365, "grad_norm": 251.4765625, "learning_rate": 1.878761510725344e-06, "loss": 18.2188, "step": 5530 }, { "epoch": 0.36733745101945936, "grad_norm": 269.53558349609375, "learning_rate": 1.878710175272205e-06, "loss": 16.5781, "step": 5531 }, { "epoch": 0.3674038653118151, "grad_norm": 218.12789916992188, "learning_rate": 1.8786588296546535e-06, "loss": 27.3125, "step": 5532 }, { "epoch": 0.36747027960417084, "grad_norm": 242.89358520507812, "learning_rate": 1.8786074738732831e-06, "loss": 16.6875, "step": 5533 }, { "epoch": 0.36753669389652655, "grad_norm": 204.9150390625, "learning_rate": 1.8785561079286882e-06, "loss": 15.8281, "step": 5534 }, { "epoch": 0.36760310818888225, "grad_norm": 205.33880615234375, "learning_rate": 1.878504731821463e-06, "loss": 17.1094, "step": 5535 }, { "epoch": 0.36766952248123796, "grad_norm": 164.6007537841797, "learning_rate": 1.8784533455522013e-06, "loss": 22.5625, "step": 5536 }, { "epoch": 0.3677359367735937, "grad_norm": 276.6316223144531, "learning_rate": 1.878401949121498e-06, "loss": 19.875, "step": 5537 }, { "epoch": 0.3678023510659494, "grad_norm": 158.534912109375, "learning_rate": 1.8783505425299473e-06, "loss": 17.6406, "step": 5538 }, { "epoch": 0.3678687653583051, "grad_norm": 351.7865295410156, "learning_rate": 1.8782991257781444e-06, "loss": 25.9375, "step": 5539 }, { "epoch": 0.3679351796506608, "grad_norm": 281.8865051269531, "learning_rate": 1.8782476988666834e-06, "loss": 16.8594, "step": 5540 }, { "epoch": 0.3680015939430165, "grad_norm": 226.73388671875, "learning_rate": 1.8781962617961593e-06, "loss": 27.25, "step": 5541 }, { "epoch": 0.3680680082353723, "grad_norm": 161.74566650390625, "learning_rate": 1.8781448145671678e-06, "loss": 19.3125, "step": 5542 }, { "epoch": 0.368134422527728, "grad_norm": 312.46209716796875, "learning_rate": 1.8780933571803028e-06, "loss": 22.1875, "step": 5543 }, { "epoch": 0.3682008368200837, "grad_norm": 177.01321411132812, "learning_rate": 1.8780418896361606e-06, "loss": 26.375, "step": 5544 }, { "epoch": 0.3682672511124394, "grad_norm": 401.4348449707031, "learning_rate": 1.877990411935336e-06, "loss": 23.4375, "step": 5545 }, { "epoch": 0.3683336654047951, "grad_norm": 187.33033752441406, "learning_rate": 1.8779389240784245e-06, "loss": 22.5469, "step": 5546 }, { "epoch": 0.36840007969715083, "grad_norm": 440.7157287597656, "learning_rate": 1.8778874260660217e-06, "loss": 27.0938, "step": 5547 }, { "epoch": 0.36846649398950654, "grad_norm": 185.0688934326172, "learning_rate": 1.8778359178987238e-06, "loss": 19.8281, "step": 5548 }, { "epoch": 0.36853290828186225, "grad_norm": 216.3392333984375, "learning_rate": 1.8777843995771256e-06, "loss": 15.0, "step": 5549 }, { "epoch": 0.36859932257421796, "grad_norm": 459.1590881347656, "learning_rate": 1.8777328711018241e-06, "loss": 14.4688, "step": 5550 }, { "epoch": 0.36866573686657367, "grad_norm": 374.67132568359375, "learning_rate": 1.8776813324734148e-06, "loss": 16.9531, "step": 5551 }, { "epoch": 0.3687321511589294, "grad_norm": 110.931396484375, "learning_rate": 1.8776297836924937e-06, "loss": 14.5, "step": 5552 }, { "epoch": 0.36879856545128514, "grad_norm": 531.4107055664062, "learning_rate": 1.8775782247596577e-06, "loss": 26.0625, "step": 5553 }, { "epoch": 0.36886497974364085, "grad_norm": 201.64097595214844, "learning_rate": 1.8775266556755027e-06, "loss": 32.1094, "step": 5554 }, { "epoch": 0.36893139403599656, "grad_norm": 216.16143798828125, "learning_rate": 1.8774750764406253e-06, "loss": 19.0312, "step": 5555 }, { "epoch": 0.3689978083283523, "grad_norm": 265.10235595703125, "learning_rate": 1.8774234870556223e-06, "loss": 22.9062, "step": 5556 }, { "epoch": 0.369064222620708, "grad_norm": 235.20272827148438, "learning_rate": 1.87737188752109e-06, "loss": 23.0938, "step": 5557 }, { "epoch": 0.3691306369130637, "grad_norm": 200.4820098876953, "learning_rate": 1.8773202778376258e-06, "loss": 21.1562, "step": 5558 }, { "epoch": 0.3691970512054194, "grad_norm": 939.966796875, "learning_rate": 1.8772686580058268e-06, "loss": 19.4219, "step": 5559 }, { "epoch": 0.3692634654977751, "grad_norm": 400.4028625488281, "learning_rate": 1.8772170280262895e-06, "loss": 19.25, "step": 5560 }, { "epoch": 0.3693298797901308, "grad_norm": 262.4936218261719, "learning_rate": 1.8771653878996116e-06, "loss": 20.6562, "step": 5561 }, { "epoch": 0.36939629408248653, "grad_norm": 156.4698028564453, "learning_rate": 1.8771137376263903e-06, "loss": 18.4062, "step": 5562 }, { "epoch": 0.36946270837484224, "grad_norm": 218.1472930908203, "learning_rate": 1.8770620772072232e-06, "loss": 24.7188, "step": 5563 }, { "epoch": 0.369529122667198, "grad_norm": 276.5989074707031, "learning_rate": 1.8770104066427073e-06, "loss": 25.9062, "step": 5564 }, { "epoch": 0.3695955369595537, "grad_norm": 155.20130920410156, "learning_rate": 1.876958725933441e-06, "loss": 17.6406, "step": 5565 }, { "epoch": 0.3696619512519094, "grad_norm": 651.1063842773438, "learning_rate": 1.876907035080022e-06, "loss": 19.0, "step": 5566 }, { "epoch": 0.36972836554426514, "grad_norm": 224.3536834716797, "learning_rate": 1.8768553340830477e-06, "loss": 16.6875, "step": 5567 }, { "epoch": 0.36979477983662085, "grad_norm": 235.11436462402344, "learning_rate": 1.8768036229431167e-06, "loss": 20.5781, "step": 5568 }, { "epoch": 0.36986119412897656, "grad_norm": 238.5742950439453, "learning_rate": 1.876751901660827e-06, "loss": 19.7031, "step": 5569 }, { "epoch": 0.36992760842133227, "grad_norm": 244.23301696777344, "learning_rate": 1.876700170236777e-06, "loss": 21.0312, "step": 5570 }, { "epoch": 0.369994022713688, "grad_norm": 138.64556884765625, "learning_rate": 1.8766484286715646e-06, "loss": 17.9062, "step": 5571 }, { "epoch": 0.3700604370060437, "grad_norm": 131.6220703125, "learning_rate": 1.8765966769657893e-06, "loss": 15.0312, "step": 5572 }, { "epoch": 0.3701268512983994, "grad_norm": 274.262939453125, "learning_rate": 1.8765449151200486e-06, "loss": 25.0, "step": 5573 }, { "epoch": 0.3701932655907551, "grad_norm": 176.01153564453125, "learning_rate": 1.876493143134942e-06, "loss": 14.7812, "step": 5574 }, { "epoch": 0.37025967988311087, "grad_norm": 421.4930114746094, "learning_rate": 1.876441361011068e-06, "loss": 27.8438, "step": 5575 }, { "epoch": 0.3703260941754666, "grad_norm": 307.82037353515625, "learning_rate": 1.8763895687490257e-06, "loss": 21.0938, "step": 5576 }, { "epoch": 0.3703925084678223, "grad_norm": 425.03924560546875, "learning_rate": 1.8763377663494144e-06, "loss": 16.8438, "step": 5577 }, { "epoch": 0.370458922760178, "grad_norm": 280.1966247558594, "learning_rate": 1.8762859538128328e-06, "loss": 15.9062, "step": 5578 }, { "epoch": 0.3705253370525337, "grad_norm": 414.8734436035156, "learning_rate": 1.8762341311398809e-06, "loss": 28.4688, "step": 5579 }, { "epoch": 0.3705917513448894, "grad_norm": 221.17478942871094, "learning_rate": 1.8761822983311578e-06, "loss": 16.7656, "step": 5580 }, { "epoch": 0.37065816563724513, "grad_norm": 270.5797119140625, "learning_rate": 1.876130455387263e-06, "loss": 23.1562, "step": 5581 }, { "epoch": 0.37072457992960084, "grad_norm": 149.60888671875, "learning_rate": 1.8760786023087962e-06, "loss": 16.1719, "step": 5582 }, { "epoch": 0.37079099422195655, "grad_norm": 210.2247772216797, "learning_rate": 1.8760267390963579e-06, "loss": 19.9062, "step": 5583 }, { "epoch": 0.37085740851431226, "grad_norm": 205.47848510742188, "learning_rate": 1.8759748657505467e-06, "loss": 16.2812, "step": 5584 }, { "epoch": 0.37092382280666797, "grad_norm": 253.6920166015625, "learning_rate": 1.8759229822719637e-06, "loss": 17.6094, "step": 5585 }, { "epoch": 0.37099023709902373, "grad_norm": 159.74591064453125, "learning_rate": 1.8758710886612086e-06, "loss": 13.9219, "step": 5586 }, { "epoch": 0.37105665139137944, "grad_norm": 237.4954833984375, "learning_rate": 1.8758191849188817e-06, "loss": 23.0625, "step": 5587 }, { "epoch": 0.37112306568373515, "grad_norm": 295.4000244140625, "learning_rate": 1.8757672710455838e-06, "loss": 18.4375, "step": 5588 }, { "epoch": 0.37118947997609086, "grad_norm": 102.89604187011719, "learning_rate": 1.8757153470419152e-06, "loss": 20.6719, "step": 5589 }, { "epoch": 0.3712558942684466, "grad_norm": 105.17203521728516, "learning_rate": 1.875663412908476e-06, "loss": 16.5312, "step": 5590 }, { "epoch": 0.3713223085608023, "grad_norm": 134.8030242919922, "learning_rate": 1.8756114686458677e-06, "loss": 18.8906, "step": 5591 }, { "epoch": 0.371388722853158, "grad_norm": 530.4029541015625, "learning_rate": 1.8755595142546907e-06, "loss": 22.375, "step": 5592 }, { "epoch": 0.3714551371455137, "grad_norm": 170.2458953857422, "learning_rate": 1.875507549735546e-06, "loss": 16.3594, "step": 5593 }, { "epoch": 0.3715215514378694, "grad_norm": 278.7489013671875, "learning_rate": 1.875455575089035e-06, "loss": 25.0, "step": 5594 }, { "epoch": 0.3715879657302251, "grad_norm": 419.5923156738281, "learning_rate": 1.8754035903157588e-06, "loss": 26.75, "step": 5595 }, { "epoch": 0.37165438002258083, "grad_norm": 217.4539794921875, "learning_rate": 1.8753515954163185e-06, "loss": 17.0156, "step": 5596 }, { "epoch": 0.3717207943149366, "grad_norm": 252.40383911132812, "learning_rate": 1.8752995903913157e-06, "loss": 23.7969, "step": 5597 }, { "epoch": 0.3717872086072923, "grad_norm": 257.29571533203125, "learning_rate": 1.875247575241352e-06, "loss": 17.6406, "step": 5598 }, { "epoch": 0.371853622899648, "grad_norm": 206.24276733398438, "learning_rate": 1.875195549967029e-06, "loss": 21.0469, "step": 5599 }, { "epoch": 0.3719200371920037, "grad_norm": 642.0662841796875, "learning_rate": 1.8751435145689488e-06, "loss": 21.4375, "step": 5600 }, { "epoch": 0.37198645148435944, "grad_norm": 448.98138427734375, "learning_rate": 1.8750914690477129e-06, "loss": 17.4219, "step": 5601 }, { "epoch": 0.37205286577671515, "grad_norm": 275.8693542480469, "learning_rate": 1.8750394134039233e-06, "loss": 27.8281, "step": 5602 }, { "epoch": 0.37211928006907086, "grad_norm": 203.70472717285156, "learning_rate": 1.8749873476381826e-06, "loss": 19.9531, "step": 5603 }, { "epoch": 0.37218569436142657, "grad_norm": 144.31626892089844, "learning_rate": 1.8749352717510925e-06, "loss": 17.125, "step": 5604 }, { "epoch": 0.3722521086537823, "grad_norm": 154.00486755371094, "learning_rate": 1.8748831857432559e-06, "loss": 18.25, "step": 5605 }, { "epoch": 0.372318522946138, "grad_norm": 303.9126281738281, "learning_rate": 1.874831089615275e-06, "loss": 27.6875, "step": 5606 }, { "epoch": 0.3723849372384937, "grad_norm": 149.06771850585938, "learning_rate": 1.8747789833677528e-06, "loss": 19.5156, "step": 5607 }, { "epoch": 0.37245135153084946, "grad_norm": 312.55377197265625, "learning_rate": 1.8747268670012915e-06, "loss": 16.7031, "step": 5608 }, { "epoch": 0.37251776582320517, "grad_norm": 218.2406463623047, "learning_rate": 1.8746747405164943e-06, "loss": 18.4531, "step": 5609 }, { "epoch": 0.3725841801155609, "grad_norm": 153.1117401123047, "learning_rate": 1.874622603913964e-06, "loss": 13.5, "step": 5610 }, { "epoch": 0.3726505944079166, "grad_norm": 180.0237579345703, "learning_rate": 1.8745704571943035e-06, "loss": 16.6875, "step": 5611 }, { "epoch": 0.3727170087002723, "grad_norm": 463.3542785644531, "learning_rate": 1.8745183003581167e-06, "loss": 14.6406, "step": 5612 }, { "epoch": 0.372783422992628, "grad_norm": 170.79852294921875, "learning_rate": 1.874466133406006e-06, "loss": 15.75, "step": 5613 }, { "epoch": 0.3728498372849837, "grad_norm": 251.26194763183594, "learning_rate": 1.8744139563385758e-06, "loss": 22.3125, "step": 5614 }, { "epoch": 0.37291625157733943, "grad_norm": 361.3244934082031, "learning_rate": 1.8743617691564291e-06, "loss": 25.2656, "step": 5615 }, { "epoch": 0.37298266586969514, "grad_norm": 250.0970916748047, "learning_rate": 1.8743095718601697e-06, "loss": 18.9062, "step": 5616 }, { "epoch": 0.37304908016205085, "grad_norm": 241.33843994140625, "learning_rate": 1.874257364450401e-06, "loss": 17.3281, "step": 5617 }, { "epoch": 0.3731154944544066, "grad_norm": 293.2330627441406, "learning_rate": 1.8742051469277273e-06, "loss": 27.5625, "step": 5618 }, { "epoch": 0.3731819087467623, "grad_norm": 305.05401611328125, "learning_rate": 1.8741529192927526e-06, "loss": 18.6719, "step": 5619 }, { "epoch": 0.37324832303911804, "grad_norm": 274.7047119140625, "learning_rate": 1.874100681546081e-06, "loss": 21.6875, "step": 5620 }, { "epoch": 0.37331473733147374, "grad_norm": 170.02459716796875, "learning_rate": 1.874048433688317e-06, "loss": 16.4844, "step": 5621 }, { "epoch": 0.37338115162382945, "grad_norm": 645.9522705078125, "learning_rate": 1.8739961757200644e-06, "loss": 19.1719, "step": 5622 }, { "epoch": 0.37344756591618516, "grad_norm": 324.1239013671875, "learning_rate": 1.873943907641928e-06, "loss": 16.6562, "step": 5623 }, { "epoch": 0.3735139802085409, "grad_norm": 302.42779541015625, "learning_rate": 1.8738916294545126e-06, "loss": 18.5156, "step": 5624 }, { "epoch": 0.3735803945008966, "grad_norm": 368.8487548828125, "learning_rate": 1.8738393411584225e-06, "loss": 21.4688, "step": 5625 }, { "epoch": 0.3736468087932523, "grad_norm": 443.3140563964844, "learning_rate": 1.8737870427542633e-06, "loss": 18.8281, "step": 5626 }, { "epoch": 0.373713223085608, "grad_norm": 273.8814697265625, "learning_rate": 1.8737347342426389e-06, "loss": 16.25, "step": 5627 }, { "epoch": 0.3737796373779637, "grad_norm": 241.9080810546875, "learning_rate": 1.873682415624155e-06, "loss": 22.0156, "step": 5628 }, { "epoch": 0.3738460516703195, "grad_norm": 247.151123046875, "learning_rate": 1.873630086899417e-06, "loss": 15.625, "step": 5629 }, { "epoch": 0.3739124659626752, "grad_norm": 165.75320434570312, "learning_rate": 1.8735777480690297e-06, "loss": 20.3438, "step": 5630 }, { "epoch": 0.3739788802550309, "grad_norm": 533.9954223632812, "learning_rate": 1.8735253991335988e-06, "loss": 20.0781, "step": 5631 }, { "epoch": 0.3740452945473866, "grad_norm": 297.5655517578125, "learning_rate": 1.8734730400937299e-06, "loss": 22.7656, "step": 5632 }, { "epoch": 0.3741117088397423, "grad_norm": 511.9851379394531, "learning_rate": 1.8734206709500287e-06, "loss": 20.9688, "step": 5633 }, { "epoch": 0.37417812313209803, "grad_norm": 380.6510925292969, "learning_rate": 1.8733682917031004e-06, "loss": 22.8125, "step": 5634 }, { "epoch": 0.37424453742445374, "grad_norm": 253.1698760986328, "learning_rate": 1.8733159023535513e-06, "loss": 27.875, "step": 5635 }, { "epoch": 0.37431095171680945, "grad_norm": 213.7993621826172, "learning_rate": 1.8732635029019877e-06, "loss": 20.4219, "step": 5636 }, { "epoch": 0.37437736600916516, "grad_norm": 200.3103790283203, "learning_rate": 1.8732110933490155e-06, "loss": 15.2812, "step": 5637 }, { "epoch": 0.37444378030152087, "grad_norm": 159.25180053710938, "learning_rate": 1.8731586736952406e-06, "loss": 13.8516, "step": 5638 }, { "epoch": 0.3745101945938766, "grad_norm": 149.78204345703125, "learning_rate": 1.8731062439412698e-06, "loss": 16.75, "step": 5639 }, { "epoch": 0.37457660888623234, "grad_norm": 199.35980224609375, "learning_rate": 1.8730538040877095e-06, "loss": 20.25, "step": 5640 }, { "epoch": 0.37464302317858805, "grad_norm": 210.48207092285156, "learning_rate": 1.873001354135166e-06, "loss": 19.2812, "step": 5641 }, { "epoch": 0.37470943747094376, "grad_norm": 771.6799926757812, "learning_rate": 1.8729488940842465e-06, "loss": 24.625, "step": 5642 }, { "epoch": 0.3747758517632995, "grad_norm": 372.46514892578125, "learning_rate": 1.8728964239355576e-06, "loss": 22.75, "step": 5643 }, { "epoch": 0.3748422660556552, "grad_norm": 140.42784118652344, "learning_rate": 1.8728439436897058e-06, "loss": 17.0781, "step": 5644 }, { "epoch": 0.3749086803480109, "grad_norm": 111.21466827392578, "learning_rate": 1.872791453347299e-06, "loss": 15.0938, "step": 5645 }, { "epoch": 0.3749750946403666, "grad_norm": 207.01947021484375, "learning_rate": 1.8727389529089438e-06, "loss": 17.4219, "step": 5646 }, { "epoch": 0.3750415089327223, "grad_norm": 163.34048461914062, "learning_rate": 1.8726864423752477e-06, "loss": 16.4219, "step": 5647 }, { "epoch": 0.375107923225078, "grad_norm": 441.7923583984375, "learning_rate": 1.872633921746818e-06, "loss": 19.1406, "step": 5648 }, { "epoch": 0.37517433751743373, "grad_norm": 339.7774963378906, "learning_rate": 1.872581391024262e-06, "loss": 18.2812, "step": 5649 }, { "epoch": 0.37524075180978944, "grad_norm": 145.30406188964844, "learning_rate": 1.8725288502081881e-06, "loss": 17.4844, "step": 5650 }, { "epoch": 0.3753071661021452, "grad_norm": 167.7664794921875, "learning_rate": 1.8724762992992033e-06, "loss": 16.6406, "step": 5651 }, { "epoch": 0.3753735803945009, "grad_norm": 460.77740478515625, "learning_rate": 1.872423738297916e-06, "loss": 18.5312, "step": 5652 }, { "epoch": 0.3754399946868566, "grad_norm": 248.86041259765625, "learning_rate": 1.8723711672049337e-06, "loss": 17.6406, "step": 5653 }, { "epoch": 0.37550640897921234, "grad_norm": 257.5184020996094, "learning_rate": 1.872318586020865e-06, "loss": 18.7031, "step": 5654 }, { "epoch": 0.37557282327156805, "grad_norm": 399.1786804199219, "learning_rate": 1.8722659947463177e-06, "loss": 22.6562, "step": 5655 }, { "epoch": 0.37563923756392376, "grad_norm": 188.0661163330078, "learning_rate": 1.8722133933819007e-06, "loss": 18.1719, "step": 5656 }, { "epoch": 0.37570565185627947, "grad_norm": 208.2655487060547, "learning_rate": 1.8721607819282217e-06, "loss": 21.5469, "step": 5657 }, { "epoch": 0.3757720661486352, "grad_norm": 198.4376220703125, "learning_rate": 1.8721081603858898e-06, "loss": 22.1875, "step": 5658 }, { "epoch": 0.3758384804409909, "grad_norm": 227.1267547607422, "learning_rate": 1.8720555287555134e-06, "loss": 25.6875, "step": 5659 }, { "epoch": 0.3759048947333466, "grad_norm": 429.9517517089844, "learning_rate": 1.872002887037702e-06, "loss": 23.7969, "step": 5660 }, { "epoch": 0.3759713090257023, "grad_norm": 222.09429931640625, "learning_rate": 1.8719502352330637e-06, "loss": 20.875, "step": 5661 }, { "epoch": 0.37603772331805807, "grad_norm": 203.5375213623047, "learning_rate": 1.871897573342208e-06, "loss": 19.8594, "step": 5662 }, { "epoch": 0.3761041376104138, "grad_norm": 375.3986511230469, "learning_rate": 1.8718449013657441e-06, "loss": 20.5, "step": 5663 }, { "epoch": 0.3761705519027695, "grad_norm": 593.90380859375, "learning_rate": 1.8717922193042806e-06, "loss": 26.2812, "step": 5664 }, { "epoch": 0.3762369661951252, "grad_norm": 200.50045776367188, "learning_rate": 1.8717395271584277e-06, "loss": 19.1562, "step": 5665 }, { "epoch": 0.3763033804874809, "grad_norm": 548.4298095703125, "learning_rate": 1.8716868249287948e-06, "loss": 23.7188, "step": 5666 }, { "epoch": 0.3763697947798366, "grad_norm": 208.67999267578125, "learning_rate": 1.8716341126159912e-06, "loss": 20.625, "step": 5667 }, { "epoch": 0.37643620907219233, "grad_norm": 201.6431427001953, "learning_rate": 1.8715813902206267e-06, "loss": 16.8438, "step": 5668 }, { "epoch": 0.37650262336454804, "grad_norm": 305.76226806640625, "learning_rate": 1.8715286577433113e-06, "loss": 15.5156, "step": 5669 }, { "epoch": 0.37656903765690375, "grad_norm": 263.42681884765625, "learning_rate": 1.871475915184655e-06, "loss": 16.4609, "step": 5670 }, { "epoch": 0.37663545194925946, "grad_norm": 235.06912231445312, "learning_rate": 1.8714231625452679e-06, "loss": 19.9062, "step": 5671 }, { "epoch": 0.37670186624161517, "grad_norm": 399.00537109375, "learning_rate": 1.8713703998257602e-06, "loss": 16.4062, "step": 5672 }, { "epoch": 0.37676828053397093, "grad_norm": 130.7521209716797, "learning_rate": 1.871317627026742e-06, "loss": 17.4688, "step": 5673 }, { "epoch": 0.37683469482632664, "grad_norm": 176.51824951171875, "learning_rate": 1.8712648441488236e-06, "loss": 16.4375, "step": 5674 }, { "epoch": 0.37690110911868235, "grad_norm": 921.605712890625, "learning_rate": 1.8712120511926165e-06, "loss": 19.25, "step": 5675 }, { "epoch": 0.37696752341103806, "grad_norm": 388.07977294921875, "learning_rate": 1.8711592481587303e-06, "loss": 17.75, "step": 5676 }, { "epoch": 0.3770339377033938, "grad_norm": 174.31422424316406, "learning_rate": 1.8711064350477766e-06, "loss": 14.4688, "step": 5677 }, { "epoch": 0.3771003519957495, "grad_norm": 125.87647247314453, "learning_rate": 1.8710536118603656e-06, "loss": 16.0156, "step": 5678 }, { "epoch": 0.3771667662881052, "grad_norm": 161.117919921875, "learning_rate": 1.8710007785971091e-06, "loss": 13.2969, "step": 5679 }, { "epoch": 0.3772331805804609, "grad_norm": 214.3628387451172, "learning_rate": 1.8709479352586177e-06, "loss": 22.1562, "step": 5680 }, { "epoch": 0.3772995948728166, "grad_norm": 565.9727783203125, "learning_rate": 1.8708950818455028e-06, "loss": 19.7969, "step": 5681 }, { "epoch": 0.3773660091651723, "grad_norm": 155.03639221191406, "learning_rate": 1.8708422183583758e-06, "loss": 17.0625, "step": 5682 }, { "epoch": 0.37743242345752803, "grad_norm": 378.55718994140625, "learning_rate": 1.8707893447978483e-06, "loss": 27.9062, "step": 5683 }, { "epoch": 0.3774988377498838, "grad_norm": 296.1816101074219, "learning_rate": 1.8707364611645312e-06, "loss": 13.9375, "step": 5684 }, { "epoch": 0.3775652520422395, "grad_norm": 273.3768005371094, "learning_rate": 1.8706835674590376e-06, "loss": 15.4375, "step": 5685 }, { "epoch": 0.3776316663345952, "grad_norm": 219.91348266601562, "learning_rate": 1.8706306636819779e-06, "loss": 18.1094, "step": 5686 }, { "epoch": 0.3776980806269509, "grad_norm": 215.60018920898438, "learning_rate": 1.870577749833965e-06, "loss": 16.4688, "step": 5687 }, { "epoch": 0.37776449491930664, "grad_norm": 246.9284210205078, "learning_rate": 1.8705248259156107e-06, "loss": 18.0938, "step": 5688 }, { "epoch": 0.37783090921166235, "grad_norm": 348.1913757324219, "learning_rate": 1.8704718919275273e-06, "loss": 15.7656, "step": 5689 }, { "epoch": 0.37789732350401806, "grad_norm": 234.87059020996094, "learning_rate": 1.870418947870327e-06, "loss": 16.5, "step": 5690 }, { "epoch": 0.37796373779637377, "grad_norm": 352.807373046875, "learning_rate": 1.8703659937446221e-06, "loss": 18.0938, "step": 5691 }, { "epoch": 0.3780301520887295, "grad_norm": 157.84735107421875, "learning_rate": 1.870313029551025e-06, "loss": 11.6406, "step": 5692 }, { "epoch": 0.3780965663810852, "grad_norm": 262.4818420410156, "learning_rate": 1.8702600552901488e-06, "loss": 21.3438, "step": 5693 }, { "epoch": 0.37816298067344095, "grad_norm": 226.92245483398438, "learning_rate": 1.8702070709626063e-06, "loss": 16.5938, "step": 5694 }, { "epoch": 0.37822939496579666, "grad_norm": 255.8184051513672, "learning_rate": 1.87015407656901e-06, "loss": 19.0781, "step": 5695 }, { "epoch": 0.37829580925815237, "grad_norm": 139.56275939941406, "learning_rate": 1.8701010721099734e-06, "loss": 18.125, "step": 5696 }, { "epoch": 0.3783622235505081, "grad_norm": 209.61647033691406, "learning_rate": 1.8700480575861091e-06, "loss": 23.9219, "step": 5697 }, { "epoch": 0.3784286378428638, "grad_norm": 756.902587890625, "learning_rate": 1.8699950329980306e-06, "loss": 19.5469, "step": 5698 }, { "epoch": 0.3784950521352195, "grad_norm": 235.37770080566406, "learning_rate": 1.8699419983463511e-06, "loss": 18.1562, "step": 5699 }, { "epoch": 0.3785614664275752, "grad_norm": 265.496826171875, "learning_rate": 1.8698889536316843e-06, "loss": 20.9062, "step": 5700 }, { "epoch": 0.3786278807199309, "grad_norm": 210.1861572265625, "learning_rate": 1.8698358988546436e-06, "loss": 18.0625, "step": 5701 }, { "epoch": 0.37869429501228663, "grad_norm": 977.2465209960938, "learning_rate": 1.869782834015843e-06, "loss": 17.2812, "step": 5702 }, { "epoch": 0.37876070930464234, "grad_norm": 239.08544921875, "learning_rate": 1.869729759115896e-06, "loss": 23.8594, "step": 5703 }, { "epoch": 0.37882712359699805, "grad_norm": 311.4081726074219, "learning_rate": 1.869676674155417e-06, "loss": 11.5938, "step": 5704 }, { "epoch": 0.3788935378893538, "grad_norm": 659.669189453125, "learning_rate": 1.8696235791350193e-06, "loss": 21.9375, "step": 5705 }, { "epoch": 0.3789599521817095, "grad_norm": 246.49920654296875, "learning_rate": 1.8695704740553175e-06, "loss": 20.5156, "step": 5706 }, { "epoch": 0.37902636647406523, "grad_norm": 227.01773071289062, "learning_rate": 1.869517358916926e-06, "loss": 19.0469, "step": 5707 }, { "epoch": 0.37909278076642094, "grad_norm": 575.3306884765625, "learning_rate": 1.869464233720459e-06, "loss": 20.4219, "step": 5708 }, { "epoch": 0.37915919505877665, "grad_norm": 1564.4959716796875, "learning_rate": 1.8694110984665312e-06, "loss": 30.9531, "step": 5709 }, { "epoch": 0.37922560935113236, "grad_norm": 342.1094665527344, "learning_rate": 1.8693579531557568e-06, "loss": 18.875, "step": 5710 }, { "epoch": 0.3792920236434881, "grad_norm": 284.8885192871094, "learning_rate": 1.8693047977887515e-06, "loss": 26.8125, "step": 5711 }, { "epoch": 0.3793584379358438, "grad_norm": 146.24940490722656, "learning_rate": 1.869251632366129e-06, "loss": 17.3984, "step": 5712 }, { "epoch": 0.3794248522281995, "grad_norm": 303.12591552734375, "learning_rate": 1.8691984568885053e-06, "loss": 22.3281, "step": 5713 }, { "epoch": 0.3794912665205552, "grad_norm": 510.6625671386719, "learning_rate": 1.8691452713564948e-06, "loss": 30.1406, "step": 5714 }, { "epoch": 0.3795576808129109, "grad_norm": 131.02696228027344, "learning_rate": 1.869092075770713e-06, "loss": 18.9375, "step": 5715 }, { "epoch": 0.3796240951052667, "grad_norm": 208.10879516601562, "learning_rate": 1.8690388701317754e-06, "loss": 20.5312, "step": 5716 }, { "epoch": 0.3796905093976224, "grad_norm": 183.66690063476562, "learning_rate": 1.8689856544402971e-06, "loss": 18.8906, "step": 5717 }, { "epoch": 0.3797569236899781, "grad_norm": 170.72476196289062, "learning_rate": 1.868932428696894e-06, "loss": 19.2344, "step": 5718 }, { "epoch": 0.3798233379823338, "grad_norm": 194.34454345703125, "learning_rate": 1.8688791929021815e-06, "loss": 14.3906, "step": 5719 }, { "epoch": 0.3798897522746895, "grad_norm": 373.7431945800781, "learning_rate": 1.8688259470567753e-06, "loss": 26.5781, "step": 5720 }, { "epoch": 0.37995616656704523, "grad_norm": 392.5791931152344, "learning_rate": 1.868772691161292e-06, "loss": 20.5938, "step": 5721 }, { "epoch": 0.38002258085940094, "grad_norm": 233.4799346923828, "learning_rate": 1.8687194252163469e-06, "loss": 22.4688, "step": 5722 }, { "epoch": 0.38008899515175665, "grad_norm": 192.37734985351562, "learning_rate": 1.8686661492225563e-06, "loss": 15.1406, "step": 5723 }, { "epoch": 0.38015540944411236, "grad_norm": 289.06793212890625, "learning_rate": 1.868612863180537e-06, "loss": 20.1016, "step": 5724 }, { "epoch": 0.38022182373646807, "grad_norm": 118.43611145019531, "learning_rate": 1.8685595670909046e-06, "loss": 15.7812, "step": 5725 }, { "epoch": 0.3802882380288238, "grad_norm": 241.46401977539062, "learning_rate": 1.8685062609542758e-06, "loss": 18.2031, "step": 5726 }, { "epoch": 0.38035465232117954, "grad_norm": 167.9512176513672, "learning_rate": 1.8684529447712677e-06, "loss": 15.7812, "step": 5727 }, { "epoch": 0.38042106661353525, "grad_norm": 443.62139892578125, "learning_rate": 1.868399618542497e-06, "loss": 29.2656, "step": 5728 }, { "epoch": 0.38048748090589096, "grad_norm": 211.35714721679688, "learning_rate": 1.8683462822685799e-06, "loss": 19.5156, "step": 5729 }, { "epoch": 0.38055389519824667, "grad_norm": 177.22703552246094, "learning_rate": 1.8682929359501337e-06, "loss": 21.4688, "step": 5730 }, { "epoch": 0.3806203094906024, "grad_norm": 155.4625244140625, "learning_rate": 1.8682395795877755e-06, "loss": 16.8438, "step": 5731 }, { "epoch": 0.3806867237829581, "grad_norm": 279.6610107421875, "learning_rate": 1.8681862131821227e-06, "loss": 21.0, "step": 5732 }, { "epoch": 0.3807531380753138, "grad_norm": 575.1123046875, "learning_rate": 1.8681328367337921e-06, "loss": 18.1719, "step": 5733 }, { "epoch": 0.3808195523676695, "grad_norm": 151.7352294921875, "learning_rate": 1.8680794502434016e-06, "loss": 19.7031, "step": 5734 }, { "epoch": 0.3808859666600252, "grad_norm": 320.58349609375, "learning_rate": 1.8680260537115686e-06, "loss": 23.875, "step": 5735 }, { "epoch": 0.38095238095238093, "grad_norm": 176.218017578125, "learning_rate": 1.8679726471389108e-06, "loss": 25.1562, "step": 5736 }, { "epoch": 0.38101879524473664, "grad_norm": 191.78321838378906, "learning_rate": 1.8679192305260458e-06, "loss": 21.375, "step": 5737 }, { "epoch": 0.3810852095370924, "grad_norm": 203.33836364746094, "learning_rate": 1.8678658038735917e-06, "loss": 17.7031, "step": 5738 }, { "epoch": 0.3811516238294481, "grad_norm": 236.57203674316406, "learning_rate": 1.8678123671821663e-06, "loss": 20.8594, "step": 5739 }, { "epoch": 0.3812180381218038, "grad_norm": 284.88299560546875, "learning_rate": 1.867758920452388e-06, "loss": 19.8438, "step": 5740 }, { "epoch": 0.38128445241415954, "grad_norm": 248.5437469482422, "learning_rate": 1.8677054636848748e-06, "loss": 20.0469, "step": 5741 }, { "epoch": 0.38135086670651525, "grad_norm": 228.06346130371094, "learning_rate": 1.8676519968802454e-06, "loss": 25.25, "step": 5742 }, { "epoch": 0.38141728099887096, "grad_norm": 119.7900161743164, "learning_rate": 1.8675985200391175e-06, "loss": 13.5938, "step": 5743 }, { "epoch": 0.38148369529122667, "grad_norm": 269.2790832519531, "learning_rate": 1.8675450331621106e-06, "loss": 18.0781, "step": 5744 }, { "epoch": 0.3815501095835824, "grad_norm": 173.42286682128906, "learning_rate": 1.8674915362498429e-06, "loss": 22.7969, "step": 5745 }, { "epoch": 0.3816165238759381, "grad_norm": 185.5259552001953, "learning_rate": 1.8674380293029332e-06, "loss": 20.9062, "step": 5746 }, { "epoch": 0.3816829381682938, "grad_norm": 202.31117248535156, "learning_rate": 1.8673845123220007e-06, "loss": 18.2969, "step": 5747 }, { "epoch": 0.3817493524606495, "grad_norm": 250.26063537597656, "learning_rate": 1.8673309853076643e-06, "loss": 18.0781, "step": 5748 }, { "epoch": 0.38181576675300527, "grad_norm": 177.88937377929688, "learning_rate": 1.8672774482605433e-06, "loss": 16.7734, "step": 5749 }, { "epoch": 0.381882181045361, "grad_norm": 300.3489990234375, "learning_rate": 1.8672239011812567e-06, "loss": 19.6094, "step": 5750 }, { "epoch": 0.3819485953377167, "grad_norm": 221.31552124023438, "learning_rate": 1.8671703440704243e-06, "loss": 16.1094, "step": 5751 }, { "epoch": 0.3820150096300724, "grad_norm": 391.375244140625, "learning_rate": 1.8671167769286653e-06, "loss": 22.5938, "step": 5752 }, { "epoch": 0.3820814239224281, "grad_norm": 95.11914825439453, "learning_rate": 1.8670631997565996e-06, "loss": 16.0469, "step": 5753 }, { "epoch": 0.3821478382147838, "grad_norm": 165.54339599609375, "learning_rate": 1.8670096125548462e-06, "loss": 14.6406, "step": 5754 }, { "epoch": 0.38221425250713953, "grad_norm": 169.48394775390625, "learning_rate": 1.866956015324026e-06, "loss": 22.125, "step": 5755 }, { "epoch": 0.38228066679949524, "grad_norm": 186.8816375732422, "learning_rate": 1.8669024080647585e-06, "loss": 22.5, "step": 5756 }, { "epoch": 0.38234708109185095, "grad_norm": 144.84722900390625, "learning_rate": 1.8668487907776637e-06, "loss": 16.5781, "step": 5757 }, { "epoch": 0.38241349538420666, "grad_norm": 243.7522735595703, "learning_rate": 1.8667951634633624e-06, "loss": 21.4062, "step": 5758 }, { "epoch": 0.38247990967656237, "grad_norm": 171.09194946289062, "learning_rate": 1.8667415261224741e-06, "loss": 16.8438, "step": 5759 }, { "epoch": 0.38254632396891813, "grad_norm": 241.3153533935547, "learning_rate": 1.8666878787556196e-06, "loss": 20.5156, "step": 5760 }, { "epoch": 0.38261273826127384, "grad_norm": 161.87001037597656, "learning_rate": 1.8666342213634195e-06, "loss": 22.75, "step": 5761 }, { "epoch": 0.38267915255362955, "grad_norm": 212.66258239746094, "learning_rate": 1.8665805539464942e-06, "loss": 24.4062, "step": 5762 }, { "epoch": 0.38274556684598526, "grad_norm": 323.786865234375, "learning_rate": 1.8665268765054653e-06, "loss": 19.0938, "step": 5763 }, { "epoch": 0.382811981138341, "grad_norm": 274.6207580566406, "learning_rate": 1.8664731890409529e-06, "loss": 25.5625, "step": 5764 }, { "epoch": 0.3828783954306967, "grad_norm": 214.65855407714844, "learning_rate": 1.8664194915535783e-06, "loss": 19.6875, "step": 5765 }, { "epoch": 0.3829448097230524, "grad_norm": 1871.8765869140625, "learning_rate": 1.8663657840439627e-06, "loss": 23.8438, "step": 5766 }, { "epoch": 0.3830112240154081, "grad_norm": 246.28366088867188, "learning_rate": 1.866312066512727e-06, "loss": 18.6875, "step": 5767 }, { "epoch": 0.3830776383077638, "grad_norm": 842.454345703125, "learning_rate": 1.8662583389604931e-06, "loss": 24.3281, "step": 5768 }, { "epoch": 0.3831440526001195, "grad_norm": 231.7724151611328, "learning_rate": 1.866204601387882e-06, "loss": 22.0625, "step": 5769 }, { "epoch": 0.3832104668924753, "grad_norm": 297.1000061035156, "learning_rate": 1.8661508537955157e-06, "loss": 16.0859, "step": 5770 }, { "epoch": 0.383276881184831, "grad_norm": 190.89016723632812, "learning_rate": 1.8660970961840159e-06, "loss": 16.3125, "step": 5771 }, { "epoch": 0.3833432954771867, "grad_norm": 116.94609069824219, "learning_rate": 1.8660433285540043e-06, "loss": 18.0781, "step": 5772 }, { "epoch": 0.3834097097695424, "grad_norm": 130.82470703125, "learning_rate": 1.8659895509061027e-06, "loss": 15.625, "step": 5773 }, { "epoch": 0.3834761240618981, "grad_norm": 216.99386596679688, "learning_rate": 1.8659357632409335e-06, "loss": 18.5469, "step": 5774 }, { "epoch": 0.38354253835425384, "grad_norm": 277.43280029296875, "learning_rate": 1.8658819655591187e-06, "loss": 16.5156, "step": 5775 }, { "epoch": 0.38360895264660955, "grad_norm": 122.0398178100586, "learning_rate": 1.8658281578612804e-06, "loss": 21.7969, "step": 5776 }, { "epoch": 0.38367536693896526, "grad_norm": 139.89923095703125, "learning_rate": 1.8657743401480415e-06, "loss": 17.3281, "step": 5777 }, { "epoch": 0.38374178123132097, "grad_norm": 186.24581909179688, "learning_rate": 1.8657205124200242e-06, "loss": 21.8438, "step": 5778 }, { "epoch": 0.3838081955236767, "grad_norm": 204.96107482910156, "learning_rate": 1.865666674677851e-06, "loss": 18.4375, "step": 5779 }, { "epoch": 0.3838746098160324, "grad_norm": 210.67710876464844, "learning_rate": 1.8656128269221452e-06, "loss": 23.8438, "step": 5780 }, { "epoch": 0.38394102410838815, "grad_norm": 116.17755889892578, "learning_rate": 1.8655589691535293e-06, "loss": 15.1406, "step": 5781 }, { "epoch": 0.38400743840074386, "grad_norm": 126.04057312011719, "learning_rate": 1.8655051013726263e-06, "loss": 17.0312, "step": 5782 }, { "epoch": 0.38407385269309957, "grad_norm": 211.6429901123047, "learning_rate": 1.8654512235800593e-06, "loss": 16.8594, "step": 5783 }, { "epoch": 0.3841402669854553, "grad_norm": 167.2516632080078, "learning_rate": 1.8653973357764517e-06, "loss": 16.6875, "step": 5784 }, { "epoch": 0.384206681277811, "grad_norm": 168.94610595703125, "learning_rate": 1.8653434379624264e-06, "loss": 18.2812, "step": 5785 }, { "epoch": 0.3842730955701667, "grad_norm": 735.50732421875, "learning_rate": 1.8652895301386075e-06, "loss": 18.1719, "step": 5786 }, { "epoch": 0.3843395098625224, "grad_norm": 123.20053100585938, "learning_rate": 1.8652356123056183e-06, "loss": 16.0781, "step": 5787 }, { "epoch": 0.3844059241548781, "grad_norm": 242.89402770996094, "learning_rate": 1.8651816844640824e-06, "loss": 15.1562, "step": 5788 }, { "epoch": 0.38447233844723383, "grad_norm": 126.65758514404297, "learning_rate": 1.8651277466146236e-06, "loss": 14.8438, "step": 5789 }, { "epoch": 0.38453875273958954, "grad_norm": 365.6190490722656, "learning_rate": 1.865073798757866e-06, "loss": 20.1406, "step": 5790 }, { "epoch": 0.38460516703194525, "grad_norm": 268.875732421875, "learning_rate": 1.8650198408944332e-06, "loss": 20.0312, "step": 5791 }, { "epoch": 0.384671581324301, "grad_norm": 273.1282653808594, "learning_rate": 1.8649658730249503e-06, "loss": 14.2188, "step": 5792 }, { "epoch": 0.3847379956166567, "grad_norm": 269.08453369140625, "learning_rate": 1.8649118951500404e-06, "loss": 15.0312, "step": 5793 }, { "epoch": 0.38480440990901243, "grad_norm": 148.70706176757812, "learning_rate": 1.8648579072703287e-06, "loss": 20.25, "step": 5794 }, { "epoch": 0.38487082420136814, "grad_norm": 504.0433044433594, "learning_rate": 1.8648039093864393e-06, "loss": 18.5781, "step": 5795 }, { "epoch": 0.38493723849372385, "grad_norm": 193.73744201660156, "learning_rate": 1.864749901498997e-06, "loss": 18.4688, "step": 5796 }, { "epoch": 0.38500365278607956, "grad_norm": 296.1859130859375, "learning_rate": 1.8646958836086265e-06, "loss": 27.0938, "step": 5797 }, { "epoch": 0.3850700670784353, "grad_norm": 160.29544067382812, "learning_rate": 1.864641855715953e-06, "loss": 21.5625, "step": 5798 }, { "epoch": 0.385136481370791, "grad_norm": 180.7581787109375, "learning_rate": 1.8645878178216006e-06, "loss": 17.8906, "step": 5799 }, { "epoch": 0.3852028956631467, "grad_norm": 283.91632080078125, "learning_rate": 1.8645337699261948e-06, "loss": 16.3281, "step": 5800 }, { "epoch": 0.3852693099555024, "grad_norm": 378.3922119140625, "learning_rate": 1.864479712030361e-06, "loss": 17.5625, "step": 5801 }, { "epoch": 0.3853357242478581, "grad_norm": 176.47801208496094, "learning_rate": 1.8644256441347248e-06, "loss": 15.0469, "step": 5802 }, { "epoch": 0.3854021385402139, "grad_norm": 193.09426879882812, "learning_rate": 1.8643715662399107e-06, "loss": 24.1094, "step": 5803 }, { "epoch": 0.3854685528325696, "grad_norm": 258.10076904296875, "learning_rate": 1.8643174783465449e-06, "loss": 20.4219, "step": 5804 }, { "epoch": 0.3855349671249253, "grad_norm": 180.0631866455078, "learning_rate": 1.864263380455253e-06, "loss": 18.6406, "step": 5805 }, { "epoch": 0.385601381417281, "grad_norm": 126.41539001464844, "learning_rate": 1.8642092725666604e-06, "loss": 13.6406, "step": 5806 }, { "epoch": 0.3856677957096367, "grad_norm": 179.7842559814453, "learning_rate": 1.8641551546813936e-06, "loss": 15.2656, "step": 5807 }, { "epoch": 0.38573421000199243, "grad_norm": 535.0476684570312, "learning_rate": 1.864101026800078e-06, "loss": 16.0469, "step": 5808 }, { "epoch": 0.38580062429434814, "grad_norm": 932.7711791992188, "learning_rate": 1.8640468889233404e-06, "loss": 17.4375, "step": 5809 }, { "epoch": 0.38586703858670385, "grad_norm": 192.09963989257812, "learning_rate": 1.8639927410518061e-06, "loss": 18.7031, "step": 5810 }, { "epoch": 0.38593345287905956, "grad_norm": 153.01837158203125, "learning_rate": 1.8639385831861022e-06, "loss": 14.8438, "step": 5811 }, { "epoch": 0.38599986717141527, "grad_norm": 274.7090759277344, "learning_rate": 1.8638844153268552e-06, "loss": 20.1719, "step": 5812 }, { "epoch": 0.386066281463771, "grad_norm": 149.22836303710938, "learning_rate": 1.863830237474691e-06, "loss": 15.0469, "step": 5813 }, { "epoch": 0.38613269575612674, "grad_norm": 260.2269287109375, "learning_rate": 1.8637760496302369e-06, "loss": 37.9375, "step": 5814 }, { "epoch": 0.38619911004848245, "grad_norm": 253.24668884277344, "learning_rate": 1.8637218517941197e-06, "loss": 23.1094, "step": 5815 }, { "epoch": 0.38626552434083816, "grad_norm": 341.1067199707031, "learning_rate": 1.863667643966966e-06, "loss": 23.875, "step": 5816 }, { "epoch": 0.38633193863319387, "grad_norm": 250.42306518554688, "learning_rate": 1.8636134261494029e-06, "loss": 19.0312, "step": 5817 }, { "epoch": 0.3863983529255496, "grad_norm": 941.2593383789062, "learning_rate": 1.8635591983420578e-06, "loss": 19.5156, "step": 5818 }, { "epoch": 0.3864647672179053, "grad_norm": 221.21780395507812, "learning_rate": 1.8635049605455579e-06, "loss": 14.75, "step": 5819 }, { "epoch": 0.386531181510261, "grad_norm": 289.9900207519531, "learning_rate": 1.8634507127605305e-06, "loss": 15.0469, "step": 5820 }, { "epoch": 0.3865975958026167, "grad_norm": 300.03680419921875, "learning_rate": 1.8633964549876034e-06, "loss": 20.75, "step": 5821 }, { "epoch": 0.3866640100949724, "grad_norm": 316.02825927734375, "learning_rate": 1.8633421872274036e-06, "loss": 16.7344, "step": 5822 }, { "epoch": 0.38673042438732813, "grad_norm": 245.9867706298828, "learning_rate": 1.8632879094805594e-06, "loss": 18.1719, "step": 5823 }, { "epoch": 0.38679683867968384, "grad_norm": 258.586181640625, "learning_rate": 1.8632336217476983e-06, "loss": 17.1562, "step": 5824 }, { "epoch": 0.3868632529720396, "grad_norm": 142.6648406982422, "learning_rate": 1.8631793240294483e-06, "loss": 15.875, "step": 5825 }, { "epoch": 0.3869296672643953, "grad_norm": 344.2962646484375, "learning_rate": 1.8631250163264381e-06, "loss": 16.5, "step": 5826 }, { "epoch": 0.386996081556751, "grad_norm": 229.6949005126953, "learning_rate": 1.8630706986392952e-06, "loss": 14.8906, "step": 5827 }, { "epoch": 0.38706249584910674, "grad_norm": 171.19053649902344, "learning_rate": 1.8630163709686481e-06, "loss": 20.9062, "step": 5828 }, { "epoch": 0.38712891014146245, "grad_norm": 322.6485595703125, "learning_rate": 1.8629620333151253e-06, "loss": 17.1094, "step": 5829 }, { "epoch": 0.38719532443381816, "grad_norm": 130.30357360839844, "learning_rate": 1.8629076856793554e-06, "loss": 18.1094, "step": 5830 }, { "epoch": 0.38726173872617387, "grad_norm": 126.57633209228516, "learning_rate": 1.8628533280619667e-06, "loss": 13.9531, "step": 5831 }, { "epoch": 0.3873281530185296, "grad_norm": 135.20054626464844, "learning_rate": 1.8627989604635883e-06, "loss": 15.2812, "step": 5832 }, { "epoch": 0.3873945673108853, "grad_norm": 344.1851806640625, "learning_rate": 1.862744582884849e-06, "loss": 25.4375, "step": 5833 }, { "epoch": 0.387460981603241, "grad_norm": 132.4739532470703, "learning_rate": 1.8626901953263782e-06, "loss": 14.3906, "step": 5834 }, { "epoch": 0.38752739589559676, "grad_norm": 256.1094665527344, "learning_rate": 1.8626357977888042e-06, "loss": 20.5781, "step": 5835 }, { "epoch": 0.38759381018795247, "grad_norm": 432.5609436035156, "learning_rate": 1.8625813902727571e-06, "loss": 18.3438, "step": 5836 }, { "epoch": 0.3876602244803082, "grad_norm": 125.223388671875, "learning_rate": 1.8625269727788658e-06, "loss": 18.3281, "step": 5837 }, { "epoch": 0.3877266387726639, "grad_norm": 162.00018310546875, "learning_rate": 1.8624725453077599e-06, "loss": 14.25, "step": 5838 }, { "epoch": 0.3877930530650196, "grad_norm": 216.1399688720703, "learning_rate": 1.8624181078600687e-06, "loss": 18.8281, "step": 5839 }, { "epoch": 0.3878594673573753, "grad_norm": 300.4720764160156, "learning_rate": 1.8623636604364223e-06, "loss": 21.9375, "step": 5840 }, { "epoch": 0.387925881649731, "grad_norm": 182.33067321777344, "learning_rate": 1.8623092030374503e-06, "loss": 20.2188, "step": 5841 }, { "epoch": 0.38799229594208673, "grad_norm": 178.1045379638672, "learning_rate": 1.8622547356637828e-06, "loss": 17.0938, "step": 5842 }, { "epoch": 0.38805871023444244, "grad_norm": 239.1427459716797, "learning_rate": 1.8622002583160494e-06, "loss": 23.2188, "step": 5843 }, { "epoch": 0.38812512452679815, "grad_norm": 253.67494201660156, "learning_rate": 1.862145770994881e-06, "loss": 16.0156, "step": 5844 }, { "epoch": 0.38819153881915386, "grad_norm": 257.6563720703125, "learning_rate": 1.8620912737009072e-06, "loss": 20.8906, "step": 5845 }, { "epoch": 0.3882579531115096, "grad_norm": 388.6095886230469, "learning_rate": 1.8620367664347584e-06, "loss": 20.9062, "step": 5846 }, { "epoch": 0.38832436740386533, "grad_norm": 317.19097900390625, "learning_rate": 1.8619822491970658e-06, "loss": 29.0625, "step": 5847 }, { "epoch": 0.38839078169622104, "grad_norm": 280.1557312011719, "learning_rate": 1.8619277219884595e-06, "loss": 21.1875, "step": 5848 }, { "epoch": 0.38845719598857675, "grad_norm": 185.92335510253906, "learning_rate": 1.8618731848095704e-06, "loss": 17.4844, "step": 5849 }, { "epoch": 0.38852361028093246, "grad_norm": 222.0376434326172, "learning_rate": 1.8618186376610292e-06, "loss": 20.0156, "step": 5850 }, { "epoch": 0.3885900245732882, "grad_norm": 138.7915496826172, "learning_rate": 1.861764080543467e-06, "loss": 17.9688, "step": 5851 }, { "epoch": 0.3886564388656439, "grad_norm": 295.87322998046875, "learning_rate": 1.8617095134575145e-06, "loss": 16.3438, "step": 5852 }, { "epoch": 0.3887228531579996, "grad_norm": 256.9582214355469, "learning_rate": 1.8616549364038034e-06, "loss": 18.2188, "step": 5853 }, { "epoch": 0.3887892674503553, "grad_norm": 269.74493408203125, "learning_rate": 1.861600349382965e-06, "loss": 20.5625, "step": 5854 }, { "epoch": 0.388855681742711, "grad_norm": 198.44288635253906, "learning_rate": 1.8615457523956307e-06, "loss": 16.2031, "step": 5855 }, { "epoch": 0.3889220960350667, "grad_norm": 169.95787048339844, "learning_rate": 1.8614911454424318e-06, "loss": 21.2031, "step": 5856 }, { "epoch": 0.3889885103274225, "grad_norm": 430.3637390136719, "learning_rate": 1.8614365285239998e-06, "loss": 26.2188, "step": 5857 }, { "epoch": 0.3890549246197782, "grad_norm": 177.77085876464844, "learning_rate": 1.8613819016409672e-06, "loss": 16.875, "step": 5858 }, { "epoch": 0.3891213389121339, "grad_norm": 490.2720031738281, "learning_rate": 1.8613272647939654e-06, "loss": 22.875, "step": 5859 }, { "epoch": 0.3891877532044896, "grad_norm": 396.934326171875, "learning_rate": 1.8612726179836263e-06, "loss": 20.6406, "step": 5860 }, { "epoch": 0.3892541674968453, "grad_norm": 300.135498046875, "learning_rate": 1.861217961210582e-06, "loss": 17.3438, "step": 5861 }, { "epoch": 0.38932058178920104, "grad_norm": 247.84742736816406, "learning_rate": 1.8611632944754653e-06, "loss": 22.7812, "step": 5862 }, { "epoch": 0.38938699608155675, "grad_norm": 142.1334686279297, "learning_rate": 1.861108617778908e-06, "loss": 13.6094, "step": 5863 }, { "epoch": 0.38945341037391246, "grad_norm": 171.36216735839844, "learning_rate": 1.861053931121543e-06, "loss": 17.125, "step": 5864 }, { "epoch": 0.38951982466626817, "grad_norm": 115.14202117919922, "learning_rate": 1.8609992345040024e-06, "loss": 16.8281, "step": 5865 }, { "epoch": 0.3895862389586239, "grad_norm": 191.4394989013672, "learning_rate": 1.860944527926919e-06, "loss": 16.0156, "step": 5866 }, { "epoch": 0.3896526532509796, "grad_norm": 249.5181427001953, "learning_rate": 1.860889811390926e-06, "loss": 21.0, "step": 5867 }, { "epoch": 0.38971906754333535, "grad_norm": 1747.583740234375, "learning_rate": 1.860835084896656e-06, "loss": 20.7812, "step": 5868 }, { "epoch": 0.38978548183569106, "grad_norm": 230.10720825195312, "learning_rate": 1.8607803484447424e-06, "loss": 17.0, "step": 5869 }, { "epoch": 0.38985189612804677, "grad_norm": 96.87943267822266, "learning_rate": 1.8607256020358178e-06, "loss": 16.1875, "step": 5870 }, { "epoch": 0.3899183104204025, "grad_norm": 295.31402587890625, "learning_rate": 1.8606708456705158e-06, "loss": 16.3906, "step": 5871 }, { "epoch": 0.3899847247127582, "grad_norm": 356.9703674316406, "learning_rate": 1.8606160793494697e-06, "loss": 17.0625, "step": 5872 }, { "epoch": 0.3900511390051139, "grad_norm": 457.8150634765625, "learning_rate": 1.8605613030733132e-06, "loss": 18.7969, "step": 5873 }, { "epoch": 0.3901175532974696, "grad_norm": 199.62994384765625, "learning_rate": 1.8605065168426798e-06, "loss": 21.7812, "step": 5874 }, { "epoch": 0.3901839675898253, "grad_norm": 284.8045654296875, "learning_rate": 1.8604517206582028e-06, "loss": 16.5, "step": 5875 }, { "epoch": 0.39025038188218103, "grad_norm": 323.3586730957031, "learning_rate": 1.8603969145205168e-06, "loss": 17.1562, "step": 5876 }, { "epoch": 0.39031679617453674, "grad_norm": 228.05960083007812, "learning_rate": 1.8603420984302553e-06, "loss": 19.5469, "step": 5877 }, { "epoch": 0.39038321046689245, "grad_norm": 255.79029846191406, "learning_rate": 1.8602872723880525e-06, "loss": 24.5625, "step": 5878 }, { "epoch": 0.3904496247592482, "grad_norm": 323.2156066894531, "learning_rate": 1.8602324363945428e-06, "loss": 21.0469, "step": 5879 }, { "epoch": 0.3905160390516039, "grad_norm": 188.73403930664062, "learning_rate": 1.8601775904503602e-06, "loss": 17.7812, "step": 5880 }, { "epoch": 0.39058245334395963, "grad_norm": 837.9345703125, "learning_rate": 1.860122734556139e-06, "loss": 15.1719, "step": 5881 }, { "epoch": 0.39064886763631534, "grad_norm": 222.60279846191406, "learning_rate": 1.8600678687125145e-06, "loss": 20.8594, "step": 5882 }, { "epoch": 0.39071528192867105, "grad_norm": 163.47906494140625, "learning_rate": 1.8600129929201206e-06, "loss": 17.9062, "step": 5883 }, { "epoch": 0.39078169622102676, "grad_norm": 298.6398010253906, "learning_rate": 1.8599581071795921e-06, "loss": 14.0156, "step": 5884 }, { "epoch": 0.3908481105133825, "grad_norm": 218.5222625732422, "learning_rate": 1.8599032114915642e-06, "loss": 20.8281, "step": 5885 }, { "epoch": 0.3909145248057382, "grad_norm": 273.95452880859375, "learning_rate": 1.859848305856672e-06, "loss": 21.8594, "step": 5886 }, { "epoch": 0.3909809390980939, "grad_norm": 245.9416961669922, "learning_rate": 1.8597933902755503e-06, "loss": 16.5781, "step": 5887 }, { "epoch": 0.3910473533904496, "grad_norm": 312.3916931152344, "learning_rate": 1.8597384647488342e-06, "loss": 18.8906, "step": 5888 }, { "epoch": 0.3911137676828053, "grad_norm": 295.1397705078125, "learning_rate": 1.8596835292771598e-06, "loss": 18.9688, "step": 5889 }, { "epoch": 0.3911801819751611, "grad_norm": 111.57034301757812, "learning_rate": 1.8596285838611616e-06, "loss": 13.1016, "step": 5890 }, { "epoch": 0.3912465962675168, "grad_norm": 182.17221069335938, "learning_rate": 1.859573628501476e-06, "loss": 13.4531, "step": 5891 }, { "epoch": 0.3913130105598725, "grad_norm": 153.98858642578125, "learning_rate": 1.8595186631987378e-06, "loss": 12.1562, "step": 5892 }, { "epoch": 0.3913794248522282, "grad_norm": 259.4347839355469, "learning_rate": 1.8594636879535836e-06, "loss": 19.8438, "step": 5893 }, { "epoch": 0.3914458391445839, "grad_norm": 462.56268310546875, "learning_rate": 1.8594087027666495e-06, "loss": 25.8125, "step": 5894 }, { "epoch": 0.39151225343693963, "grad_norm": 153.0560302734375, "learning_rate": 1.8593537076385703e-06, "loss": 18.6875, "step": 5895 }, { "epoch": 0.39157866772929534, "grad_norm": 726.4210815429688, "learning_rate": 1.8592987025699836e-06, "loss": 15.6875, "step": 5896 }, { "epoch": 0.39164508202165105, "grad_norm": 185.10169982910156, "learning_rate": 1.8592436875615247e-06, "loss": 23.8594, "step": 5897 }, { "epoch": 0.39171149631400676, "grad_norm": 184.17465209960938, "learning_rate": 1.8591886626138305e-06, "loss": 17.3906, "step": 5898 }, { "epoch": 0.39177791060636247, "grad_norm": 195.91175842285156, "learning_rate": 1.859133627727537e-06, "loss": 19.2031, "step": 5899 }, { "epoch": 0.3918443248987182, "grad_norm": 150.15077209472656, "learning_rate": 1.8590785829032812e-06, "loss": 19.9844, "step": 5900 }, { "epoch": 0.39191073919107394, "grad_norm": 643.5839233398438, "learning_rate": 1.8590235281416996e-06, "loss": 27.6875, "step": 5901 }, { "epoch": 0.39197715348342965, "grad_norm": 314.93817138671875, "learning_rate": 1.8589684634434293e-06, "loss": 16.375, "step": 5902 }, { "epoch": 0.39204356777578536, "grad_norm": 136.83084106445312, "learning_rate": 1.8589133888091072e-06, "loss": 19.7188, "step": 5903 }, { "epoch": 0.39210998206814107, "grad_norm": 154.8197479248047, "learning_rate": 1.85885830423937e-06, "loss": 21.1562, "step": 5904 }, { "epoch": 0.3921763963604968, "grad_norm": 211.14564514160156, "learning_rate": 1.8588032097348557e-06, "loss": 20.9688, "step": 5905 }, { "epoch": 0.3922428106528525, "grad_norm": 219.6154022216797, "learning_rate": 1.8587481052962009e-06, "loss": 16.125, "step": 5906 }, { "epoch": 0.3923092249452082, "grad_norm": 431.3760986328125, "learning_rate": 1.858692990924043e-06, "loss": 17.1719, "step": 5907 }, { "epoch": 0.3923756392375639, "grad_norm": 147.0741729736328, "learning_rate": 1.85863786661902e-06, "loss": 14.2969, "step": 5908 }, { "epoch": 0.3924420535299196, "grad_norm": 215.29522705078125, "learning_rate": 1.8585827323817689e-06, "loss": 17.0625, "step": 5909 }, { "epoch": 0.39250846782227533, "grad_norm": 378.1044921875, "learning_rate": 1.8585275882129282e-06, "loss": 24.5156, "step": 5910 }, { "epoch": 0.3925748821146311, "grad_norm": 123.80067443847656, "learning_rate": 1.858472434113135e-06, "loss": 13.8281, "step": 5911 }, { "epoch": 0.3926412964069868, "grad_norm": 236.31619262695312, "learning_rate": 1.8584172700830281e-06, "loss": 19.0, "step": 5912 }, { "epoch": 0.3927077106993425, "grad_norm": 862.8765258789062, "learning_rate": 1.8583620961232452e-06, "loss": 22.0938, "step": 5913 }, { "epoch": 0.3927741249916982, "grad_norm": 218.46853637695312, "learning_rate": 1.8583069122344244e-06, "loss": 21.9062, "step": 5914 }, { "epoch": 0.39284053928405394, "grad_norm": 514.7124633789062, "learning_rate": 1.858251718417204e-06, "loss": 15.5625, "step": 5915 }, { "epoch": 0.39290695357640965, "grad_norm": 342.203369140625, "learning_rate": 1.858196514672223e-06, "loss": 21.0625, "step": 5916 }, { "epoch": 0.39297336786876536, "grad_norm": 202.05763244628906, "learning_rate": 1.8581413010001193e-06, "loss": 18.625, "step": 5917 }, { "epoch": 0.39303978216112107, "grad_norm": 207.000244140625, "learning_rate": 1.858086077401532e-06, "loss": 12.9375, "step": 5918 }, { "epoch": 0.3931061964534768, "grad_norm": 197.42355346679688, "learning_rate": 1.8580308438770994e-06, "loss": 18.5469, "step": 5919 }, { "epoch": 0.3931726107458325, "grad_norm": 119.18598937988281, "learning_rate": 1.8579756004274612e-06, "loss": 15.2031, "step": 5920 }, { "epoch": 0.3932390250381882, "grad_norm": 198.35304260253906, "learning_rate": 1.8579203470532558e-06, "loss": 20.0469, "step": 5921 }, { "epoch": 0.39330543933054396, "grad_norm": 126.77937316894531, "learning_rate": 1.8578650837551226e-06, "loss": 17.4062, "step": 5922 }, { "epoch": 0.39337185362289967, "grad_norm": 417.4004821777344, "learning_rate": 1.857809810533701e-06, "loss": 29.9062, "step": 5923 }, { "epoch": 0.3934382679152554, "grad_norm": 261.4833068847656, "learning_rate": 1.85775452738963e-06, "loss": 22.9688, "step": 5924 }, { "epoch": 0.3935046822076111, "grad_norm": 152.9742889404297, "learning_rate": 1.8576992343235492e-06, "loss": 15.8281, "step": 5925 }, { "epoch": 0.3935710964999668, "grad_norm": 133.52511596679688, "learning_rate": 1.857643931336098e-06, "loss": 14.4531, "step": 5926 }, { "epoch": 0.3936375107923225, "grad_norm": 424.0746765136719, "learning_rate": 1.8575886184279166e-06, "loss": 19.8438, "step": 5927 }, { "epoch": 0.3937039250846782, "grad_norm": 210.3526153564453, "learning_rate": 1.8575332955996445e-06, "loss": 27.5469, "step": 5928 }, { "epoch": 0.39377033937703393, "grad_norm": 388.0259094238281, "learning_rate": 1.857477962851922e-06, "loss": 15.6562, "step": 5929 }, { "epoch": 0.39383675366938964, "grad_norm": 372.0380554199219, "learning_rate": 1.8574226201853885e-06, "loss": 23.6406, "step": 5930 }, { "epoch": 0.39390316796174535, "grad_norm": 203.32643127441406, "learning_rate": 1.8573672676006846e-06, "loss": 13.6562, "step": 5931 }, { "epoch": 0.39396958225410106, "grad_norm": 266.5053405761719, "learning_rate": 1.8573119050984505e-06, "loss": 17.4844, "step": 5932 }, { "epoch": 0.3940359965464568, "grad_norm": 343.0830993652344, "learning_rate": 1.8572565326793268e-06, "loss": 16.7188, "step": 5933 }, { "epoch": 0.39410241083881253, "grad_norm": 184.2765655517578, "learning_rate": 1.857201150343954e-06, "loss": 18.7656, "step": 5934 }, { "epoch": 0.39416882513116824, "grad_norm": 254.8212432861328, "learning_rate": 1.8571457580929722e-06, "loss": 20.9375, "step": 5935 }, { "epoch": 0.39423523942352395, "grad_norm": 175.54721069335938, "learning_rate": 1.8570903559270227e-06, "loss": 26.0625, "step": 5936 }, { "epoch": 0.39430165371587966, "grad_norm": 210.3818817138672, "learning_rate": 1.857034943846746e-06, "loss": 17.5625, "step": 5937 }, { "epoch": 0.3943680680082354, "grad_norm": 268.7743225097656, "learning_rate": 1.8569795218527836e-06, "loss": 17.5312, "step": 5938 }, { "epoch": 0.3944344823005911, "grad_norm": 129.49005126953125, "learning_rate": 1.8569240899457762e-06, "loss": 18.5469, "step": 5939 }, { "epoch": 0.3945008965929468, "grad_norm": 214.51046752929688, "learning_rate": 1.856868648126365e-06, "loss": 18.2188, "step": 5940 }, { "epoch": 0.3945673108853025, "grad_norm": 351.889404296875, "learning_rate": 1.8568131963951914e-06, "loss": 21.0625, "step": 5941 }, { "epoch": 0.3946337251776582, "grad_norm": 217.15126037597656, "learning_rate": 1.8567577347528966e-06, "loss": 16.8594, "step": 5942 }, { "epoch": 0.3947001394700139, "grad_norm": 213.12045288085938, "learning_rate": 1.8567022632001225e-06, "loss": 18.6094, "step": 5943 }, { "epoch": 0.3947665537623697, "grad_norm": 316.3440246582031, "learning_rate": 1.8566467817375106e-06, "loss": 18.7969, "step": 5944 }, { "epoch": 0.3948329680547254, "grad_norm": 156.13916015625, "learning_rate": 1.8565912903657028e-06, "loss": 21.5625, "step": 5945 }, { "epoch": 0.3948993823470811, "grad_norm": 152.67544555664062, "learning_rate": 1.856535789085341e-06, "loss": 17.875, "step": 5946 }, { "epoch": 0.3949657966394368, "grad_norm": 177.04812622070312, "learning_rate": 1.8564802778970668e-06, "loss": 20.875, "step": 5947 }, { "epoch": 0.3950322109317925, "grad_norm": 351.3174743652344, "learning_rate": 1.8564247568015229e-06, "loss": 21.6562, "step": 5948 }, { "epoch": 0.39509862522414824, "grad_norm": 457.2281494140625, "learning_rate": 1.8563692257993511e-06, "loss": 13.8281, "step": 5949 }, { "epoch": 0.39516503951650395, "grad_norm": 238.96771240234375, "learning_rate": 1.8563136848911939e-06, "loss": 25.2656, "step": 5950 }, { "epoch": 0.39523145380885966, "grad_norm": 216.41822814941406, "learning_rate": 1.8562581340776938e-06, "loss": 17.7031, "step": 5951 }, { "epoch": 0.39529786810121537, "grad_norm": 209.15966796875, "learning_rate": 1.8562025733594933e-06, "loss": 20.7812, "step": 5952 }, { "epoch": 0.3953642823935711, "grad_norm": 704.5134887695312, "learning_rate": 1.8561470027372352e-06, "loss": 17.6406, "step": 5953 }, { "epoch": 0.3954306966859268, "grad_norm": 152.00096130371094, "learning_rate": 1.8560914222115623e-06, "loss": 20.2188, "step": 5954 }, { "epoch": 0.39549711097828255, "grad_norm": 178.40187072753906, "learning_rate": 1.8560358317831173e-06, "loss": 15.875, "step": 5955 }, { "epoch": 0.39556352527063826, "grad_norm": 185.69021606445312, "learning_rate": 1.8559802314525435e-06, "loss": 20.1875, "step": 5956 }, { "epoch": 0.39562993956299397, "grad_norm": 313.05792236328125, "learning_rate": 1.8559246212204839e-06, "loss": 11.0391, "step": 5957 }, { "epoch": 0.3956963538553497, "grad_norm": 382.08148193359375, "learning_rate": 1.8558690010875821e-06, "loss": 21.125, "step": 5958 }, { "epoch": 0.3957627681477054, "grad_norm": 208.12852478027344, "learning_rate": 1.855813371054481e-06, "loss": 17.375, "step": 5959 }, { "epoch": 0.3958291824400611, "grad_norm": 230.30447387695312, "learning_rate": 1.8557577311218245e-06, "loss": 15.3125, "step": 5960 }, { "epoch": 0.3958955967324168, "grad_norm": 165.00047302246094, "learning_rate": 1.8557020812902556e-06, "loss": 17.4375, "step": 5961 }, { "epoch": 0.3959620110247725, "grad_norm": 182.29579162597656, "learning_rate": 1.8556464215604188e-06, "loss": 17.4844, "step": 5962 }, { "epoch": 0.39602842531712823, "grad_norm": 668.09814453125, "learning_rate": 1.8555907519329575e-06, "loss": 19.5625, "step": 5963 }, { "epoch": 0.39609483960948394, "grad_norm": 274.45367431640625, "learning_rate": 1.855535072408516e-06, "loss": 16.6094, "step": 5964 }, { "epoch": 0.39616125390183965, "grad_norm": 156.3287353515625, "learning_rate": 1.8554793829877378e-06, "loss": 19.6719, "step": 5965 }, { "epoch": 0.3962276681941954, "grad_norm": 265.92279052734375, "learning_rate": 1.8554236836712676e-06, "loss": 25.0312, "step": 5966 }, { "epoch": 0.3962940824865511, "grad_norm": 182.55714416503906, "learning_rate": 1.8553679744597493e-06, "loss": 15.375, "step": 5967 }, { "epoch": 0.39636049677890683, "grad_norm": 168.2051239013672, "learning_rate": 1.8553122553538274e-06, "loss": 15.3906, "step": 5968 }, { "epoch": 0.39642691107126254, "grad_norm": 438.48443603515625, "learning_rate": 1.855256526354147e-06, "loss": 17.4375, "step": 5969 }, { "epoch": 0.39649332536361825, "grad_norm": 473.879150390625, "learning_rate": 1.855200787461352e-06, "loss": 16.8281, "step": 5970 }, { "epoch": 0.39655973965597396, "grad_norm": 179.30703735351562, "learning_rate": 1.8551450386760874e-06, "loss": 18.3438, "step": 5971 }, { "epoch": 0.3966261539483297, "grad_norm": 190.62295532226562, "learning_rate": 1.8550892799989979e-06, "loss": 22.1562, "step": 5972 }, { "epoch": 0.3966925682406854, "grad_norm": 227.6957550048828, "learning_rate": 1.8550335114307289e-06, "loss": 16.6562, "step": 5973 }, { "epoch": 0.3967589825330411, "grad_norm": 155.27903747558594, "learning_rate": 1.8549777329719251e-06, "loss": 17.9844, "step": 5974 }, { "epoch": 0.3968253968253968, "grad_norm": 162.1292724609375, "learning_rate": 1.8549219446232322e-06, "loss": 16.3125, "step": 5975 }, { "epoch": 0.3968918111177525, "grad_norm": 242.38609313964844, "learning_rate": 1.8548661463852952e-06, "loss": 23.4375, "step": 5976 }, { "epoch": 0.3969582254101083, "grad_norm": 190.70742797851562, "learning_rate": 1.8548103382587595e-06, "loss": 13.125, "step": 5977 }, { "epoch": 0.397024639702464, "grad_norm": 246.506591796875, "learning_rate": 1.8547545202442705e-06, "loss": 18.5, "step": 5978 }, { "epoch": 0.3970910539948197, "grad_norm": 381.54345703125, "learning_rate": 1.8546986923424744e-06, "loss": 23.0625, "step": 5979 }, { "epoch": 0.3971574682871754, "grad_norm": 306.7049255371094, "learning_rate": 1.8546428545540165e-06, "loss": 17.9688, "step": 5980 }, { "epoch": 0.3972238825795311, "grad_norm": 324.3230285644531, "learning_rate": 1.8545870068795428e-06, "loss": 22.125, "step": 5981 }, { "epoch": 0.3972902968718868, "grad_norm": 161.1841583251953, "learning_rate": 1.8545311493196995e-06, "loss": 16.9219, "step": 5982 }, { "epoch": 0.39735671116424254, "grad_norm": 298.64312744140625, "learning_rate": 1.8544752818751328e-06, "loss": 17.3125, "step": 5983 }, { "epoch": 0.39742312545659825, "grad_norm": 196.34228515625, "learning_rate": 1.8544194045464886e-06, "loss": 19.4219, "step": 5984 }, { "epoch": 0.39748953974895396, "grad_norm": 1056.86328125, "learning_rate": 1.8543635173344131e-06, "loss": 15.0469, "step": 5985 }, { "epoch": 0.39755595404130967, "grad_norm": 193.8872528076172, "learning_rate": 1.8543076202395535e-06, "loss": 16.8906, "step": 5986 }, { "epoch": 0.39762236833366543, "grad_norm": 236.7702178955078, "learning_rate": 1.854251713262556e-06, "loss": 15.3281, "step": 5987 }, { "epoch": 0.39768878262602114, "grad_norm": 232.62002563476562, "learning_rate": 1.8541957964040669e-06, "loss": 16.5938, "step": 5988 }, { "epoch": 0.39775519691837685, "grad_norm": 637.5339965820312, "learning_rate": 1.8541398696647337e-06, "loss": 21.5625, "step": 5989 }, { "epoch": 0.39782161121073256, "grad_norm": 147.59251403808594, "learning_rate": 1.854083933045203e-06, "loss": 16.625, "step": 5990 }, { "epoch": 0.39788802550308827, "grad_norm": 427.85723876953125, "learning_rate": 1.8540279865461216e-06, "loss": 21.6562, "step": 5991 }, { "epoch": 0.397954439795444, "grad_norm": 158.58180236816406, "learning_rate": 1.853972030168137e-06, "loss": 15.4453, "step": 5992 }, { "epoch": 0.3980208540877997, "grad_norm": 217.97276306152344, "learning_rate": 1.8539160639118965e-06, "loss": 20.0312, "step": 5993 }, { "epoch": 0.3980872683801554, "grad_norm": 320.2038269042969, "learning_rate": 1.8538600877780475e-06, "loss": 22.7344, "step": 5994 }, { "epoch": 0.3981536826725111, "grad_norm": 387.51971435546875, "learning_rate": 1.853804101767237e-06, "loss": 23.8438, "step": 5995 }, { "epoch": 0.3982200969648668, "grad_norm": 86.14076232910156, "learning_rate": 1.8537481058801134e-06, "loss": 16.9062, "step": 5996 }, { "epoch": 0.39828651125722253, "grad_norm": 253.3944091796875, "learning_rate": 1.8536921001173239e-06, "loss": 24.875, "step": 5997 }, { "epoch": 0.3983529255495783, "grad_norm": 407.5182800292969, "learning_rate": 1.853636084479516e-06, "loss": 23.625, "step": 5998 }, { "epoch": 0.398419339841934, "grad_norm": 201.82131958007812, "learning_rate": 1.8535800589673385e-06, "loss": 20.5312, "step": 5999 }, { "epoch": 0.3984857541342897, "grad_norm": 329.0624084472656, "learning_rate": 1.8535240235814391e-06, "loss": 20.8438, "step": 6000 }, { "epoch": 0.3985521684266454, "grad_norm": 147.1533203125, "learning_rate": 1.8534679783224659e-06, "loss": 16.0938, "step": 6001 }, { "epoch": 0.39861858271900114, "grad_norm": 147.7583465576172, "learning_rate": 1.8534119231910674e-06, "loss": 15.5625, "step": 6002 }, { "epoch": 0.39868499701135685, "grad_norm": 361.4578857421875, "learning_rate": 1.8533558581878917e-06, "loss": 12.7812, "step": 6003 }, { "epoch": 0.39875141130371256, "grad_norm": 289.3795166015625, "learning_rate": 1.8532997833135875e-06, "loss": 18.4844, "step": 6004 }, { "epoch": 0.39881782559606826, "grad_norm": 266.9530334472656, "learning_rate": 1.8532436985688036e-06, "loss": 20.625, "step": 6005 }, { "epoch": 0.398884239888424, "grad_norm": 320.64642333984375, "learning_rate": 1.8531876039541885e-06, "loss": 25.0, "step": 6006 }, { "epoch": 0.3989506541807797, "grad_norm": 214.01968383789062, "learning_rate": 1.853131499470391e-06, "loss": 17.5312, "step": 6007 }, { "epoch": 0.3990170684731354, "grad_norm": 191.621337890625, "learning_rate": 1.8530753851180606e-06, "loss": 23.2344, "step": 6008 }, { "epoch": 0.39908348276549116, "grad_norm": 263.0097351074219, "learning_rate": 1.8530192608978457e-06, "loss": 17.7031, "step": 6009 }, { "epoch": 0.39914989705784687, "grad_norm": 175.42308044433594, "learning_rate": 1.8529631268103962e-06, "loss": 20.2812, "step": 6010 }, { "epoch": 0.3992163113502026, "grad_norm": 675.936279296875, "learning_rate": 1.852906982856361e-06, "loss": 25.375, "step": 6011 }, { "epoch": 0.3992827256425583, "grad_norm": 230.21311950683594, "learning_rate": 1.8528508290363893e-06, "loss": 19.3594, "step": 6012 }, { "epoch": 0.399349139934914, "grad_norm": 417.1519775390625, "learning_rate": 1.8527946653511313e-06, "loss": 17.375, "step": 6013 }, { "epoch": 0.3994155542272697, "grad_norm": 347.7225646972656, "learning_rate": 1.8527384918012365e-06, "loss": 16.0, "step": 6014 }, { "epoch": 0.3994819685196254, "grad_norm": 125.37321472167969, "learning_rate": 1.8526823083873544e-06, "loss": 11.5469, "step": 6015 }, { "epoch": 0.39954838281198113, "grad_norm": 211.92660522460938, "learning_rate": 1.852626115110135e-06, "loss": 19.375, "step": 6016 }, { "epoch": 0.39961479710433684, "grad_norm": 330.0710754394531, "learning_rate": 1.8525699119702285e-06, "loss": 18.5938, "step": 6017 }, { "epoch": 0.39968121139669255, "grad_norm": 158.46673583984375, "learning_rate": 1.8525136989682846e-06, "loss": 23.5469, "step": 6018 }, { "epoch": 0.39974762568904826, "grad_norm": 343.02862548828125, "learning_rate": 1.8524574761049541e-06, "loss": 15.2344, "step": 6019 }, { "epoch": 0.399814039981404, "grad_norm": 156.79208374023438, "learning_rate": 1.8524012433808868e-06, "loss": 11.6562, "step": 6020 }, { "epoch": 0.39988045427375973, "grad_norm": 295.6404724121094, "learning_rate": 1.8523450007967336e-06, "loss": 15.7656, "step": 6021 }, { "epoch": 0.39994686856611544, "grad_norm": 372.93804931640625, "learning_rate": 1.8522887483531449e-06, "loss": 28.5625, "step": 6022 }, { "epoch": 0.40001328285847115, "grad_norm": 203.88262939453125, "learning_rate": 1.8522324860507715e-06, "loss": 17.5312, "step": 6023 }, { "epoch": 0.40007969715082686, "grad_norm": 294.190673828125, "learning_rate": 1.852176213890264e-06, "loss": 22.8281, "step": 6024 }, { "epoch": 0.4001461114431826, "grad_norm": 207.26084899902344, "learning_rate": 1.8521199318722735e-06, "loss": 14.7969, "step": 6025 }, { "epoch": 0.4002125257355383, "grad_norm": 301.7430725097656, "learning_rate": 1.8520636399974511e-06, "loss": 29.7031, "step": 6026 }, { "epoch": 0.400278940027894, "grad_norm": 247.49276733398438, "learning_rate": 1.8520073382664477e-06, "loss": 23.1562, "step": 6027 }, { "epoch": 0.4003453543202497, "grad_norm": 179.21826171875, "learning_rate": 1.8519510266799149e-06, "loss": 20.0781, "step": 6028 }, { "epoch": 0.4004117686126054, "grad_norm": 173.5128936767578, "learning_rate": 1.8518947052385035e-06, "loss": 19.4688, "step": 6029 }, { "epoch": 0.4004781829049611, "grad_norm": 331.590087890625, "learning_rate": 1.8518383739428656e-06, "loss": 14.8438, "step": 6030 }, { "epoch": 0.4005445971973169, "grad_norm": 251.27816772460938, "learning_rate": 1.8517820327936525e-06, "loss": 20.5625, "step": 6031 }, { "epoch": 0.4006110114896726, "grad_norm": 173.97637939453125, "learning_rate": 1.8517256817915162e-06, "loss": 25.4062, "step": 6032 }, { "epoch": 0.4006774257820283, "grad_norm": 139.9208984375, "learning_rate": 1.8516693209371082e-06, "loss": 23.5, "step": 6033 }, { "epoch": 0.400743840074384, "grad_norm": 577.7830200195312, "learning_rate": 1.8516129502310807e-06, "loss": 22.0469, "step": 6034 }, { "epoch": 0.4008102543667397, "grad_norm": 234.483642578125, "learning_rate": 1.8515565696740856e-06, "loss": 14.9688, "step": 6035 }, { "epoch": 0.40087666865909544, "grad_norm": 185.23052978515625, "learning_rate": 1.8515001792667752e-06, "loss": 16.9688, "step": 6036 }, { "epoch": 0.40094308295145115, "grad_norm": 177.12289428710938, "learning_rate": 1.8514437790098017e-06, "loss": 21.1562, "step": 6037 }, { "epoch": 0.40100949724380686, "grad_norm": 203.06564331054688, "learning_rate": 1.8513873689038175e-06, "loss": 17.5, "step": 6038 }, { "epoch": 0.40107591153616257, "grad_norm": 136.95492553710938, "learning_rate": 1.851330948949475e-06, "loss": 19.5938, "step": 6039 }, { "epoch": 0.4011423258285183, "grad_norm": 205.90289306640625, "learning_rate": 1.8512745191474273e-06, "loss": 19.75, "step": 6040 }, { "epoch": 0.401208740120874, "grad_norm": 155.6261749267578, "learning_rate": 1.8512180794983268e-06, "loss": 19.5156, "step": 6041 }, { "epoch": 0.40127515441322975, "grad_norm": 140.99586486816406, "learning_rate": 1.851161630002826e-06, "loss": 19.3906, "step": 6042 }, { "epoch": 0.40134156870558546, "grad_norm": 237.23545837402344, "learning_rate": 1.8511051706615785e-06, "loss": 17.0625, "step": 6043 }, { "epoch": 0.40140798299794117, "grad_norm": 173.50100708007812, "learning_rate": 1.8510487014752375e-06, "loss": 17.6562, "step": 6044 }, { "epoch": 0.4014743972902969, "grad_norm": 1145.7296142578125, "learning_rate": 1.8509922224444554e-06, "loss": 22.9844, "step": 6045 }, { "epoch": 0.4015408115826526, "grad_norm": 438.519287109375, "learning_rate": 1.8509357335698864e-06, "loss": 27.9844, "step": 6046 }, { "epoch": 0.4016072258750083, "grad_norm": 208.2895965576172, "learning_rate": 1.8508792348521831e-06, "loss": 17.1406, "step": 6047 }, { "epoch": 0.401673640167364, "grad_norm": 134.49720764160156, "learning_rate": 1.8508227262919996e-06, "loss": 16.4219, "step": 6048 }, { "epoch": 0.4017400544597197, "grad_norm": 225.4462890625, "learning_rate": 1.8507662078899894e-06, "loss": 21.4844, "step": 6049 }, { "epoch": 0.40180646875207543, "grad_norm": 262.584716796875, "learning_rate": 1.8507096796468064e-06, "loss": 24.5469, "step": 6050 }, { "epoch": 0.40187288304443114, "grad_norm": 265.03668212890625, "learning_rate": 1.8506531415631042e-06, "loss": 22.8125, "step": 6051 }, { "epoch": 0.40193929733678685, "grad_norm": 155.85308837890625, "learning_rate": 1.8505965936395371e-06, "loss": 20.6875, "step": 6052 }, { "epoch": 0.4020057116291426, "grad_norm": 665.7776489257812, "learning_rate": 1.8505400358767591e-06, "loss": 25.5, "step": 6053 }, { "epoch": 0.4020721259214983, "grad_norm": 162.57656860351562, "learning_rate": 1.8504834682754243e-06, "loss": 19.9219, "step": 6054 }, { "epoch": 0.40213854021385403, "grad_norm": 152.62026977539062, "learning_rate": 1.8504268908361874e-06, "loss": 14.7031, "step": 6055 }, { "epoch": 0.40220495450620974, "grad_norm": 114.05113220214844, "learning_rate": 1.8503703035597025e-06, "loss": 15.6875, "step": 6056 }, { "epoch": 0.40227136879856545, "grad_norm": 150.0399932861328, "learning_rate": 1.850313706446624e-06, "loss": 21.6875, "step": 6057 }, { "epoch": 0.40233778309092116, "grad_norm": 129.14801025390625, "learning_rate": 1.8502570994976073e-06, "loss": 14.5781, "step": 6058 }, { "epoch": 0.4024041973832769, "grad_norm": 394.9715881347656, "learning_rate": 1.8502004827133065e-06, "loss": 20.1719, "step": 6059 }, { "epoch": 0.4024706116756326, "grad_norm": 248.71168518066406, "learning_rate": 1.8501438560943768e-06, "loss": 12.5625, "step": 6060 }, { "epoch": 0.4025370259679883, "grad_norm": 525.4161987304688, "learning_rate": 1.8500872196414733e-06, "loss": 22.7656, "step": 6061 }, { "epoch": 0.402603440260344, "grad_norm": 231.10055541992188, "learning_rate": 1.8500305733552508e-06, "loss": 20.75, "step": 6062 }, { "epoch": 0.40266985455269977, "grad_norm": 302.6937255859375, "learning_rate": 1.8499739172363648e-06, "loss": 15.8594, "step": 6063 }, { "epoch": 0.4027362688450555, "grad_norm": 157.37640380859375, "learning_rate": 1.8499172512854711e-06, "loss": 18.0625, "step": 6064 }, { "epoch": 0.4028026831374112, "grad_norm": 163.90414428710938, "learning_rate": 1.849860575503224e-06, "loss": 18.4688, "step": 6065 }, { "epoch": 0.4028690974297669, "grad_norm": 132.92860412597656, "learning_rate": 1.8498038898902804e-06, "loss": 15.2188, "step": 6066 }, { "epoch": 0.4029355117221226, "grad_norm": 170.1539306640625, "learning_rate": 1.8497471944472954e-06, "loss": 21.4531, "step": 6067 }, { "epoch": 0.4030019260144783, "grad_norm": 296.3495178222656, "learning_rate": 1.8496904891749245e-06, "loss": 19.3125, "step": 6068 }, { "epoch": 0.403068340306834, "grad_norm": 409.5465393066406, "learning_rate": 1.8496337740738244e-06, "loss": 16.8906, "step": 6069 }, { "epoch": 0.40313475459918974, "grad_norm": 122.25434112548828, "learning_rate": 1.8495770491446503e-06, "loss": 17.4219, "step": 6070 }, { "epoch": 0.40320116889154545, "grad_norm": 326.89080810546875, "learning_rate": 1.849520314388059e-06, "loss": 22.0312, "step": 6071 }, { "epoch": 0.40326758318390116, "grad_norm": 166.5597381591797, "learning_rate": 1.8494635698047066e-06, "loss": 18.4219, "step": 6072 }, { "epoch": 0.40333399747625687, "grad_norm": 214.82040405273438, "learning_rate": 1.8494068153952496e-06, "loss": 20.5469, "step": 6073 }, { "epoch": 0.40340041176861263, "grad_norm": 174.61700439453125, "learning_rate": 1.8493500511603442e-06, "loss": 20.0625, "step": 6074 }, { "epoch": 0.40346682606096834, "grad_norm": 433.76409912109375, "learning_rate": 1.849293277100647e-06, "loss": 23.875, "step": 6075 }, { "epoch": 0.40353324035332405, "grad_norm": 173.73004150390625, "learning_rate": 1.849236493216815e-06, "loss": 17.9219, "step": 6076 }, { "epoch": 0.40359965464567976, "grad_norm": 622.9986572265625, "learning_rate": 1.8491796995095049e-06, "loss": 16.4531, "step": 6077 }, { "epoch": 0.40366606893803547, "grad_norm": 355.6362609863281, "learning_rate": 1.849122895979374e-06, "loss": 17.0, "step": 6078 }, { "epoch": 0.4037324832303912, "grad_norm": 233.1381378173828, "learning_rate": 1.8490660826270788e-06, "loss": 21.6562, "step": 6079 }, { "epoch": 0.4037988975227469, "grad_norm": 175.23150634765625, "learning_rate": 1.8490092594532768e-06, "loss": 19.6094, "step": 6080 }, { "epoch": 0.4038653118151026, "grad_norm": 166.4402618408203, "learning_rate": 1.8489524264586252e-06, "loss": 16.2969, "step": 6081 }, { "epoch": 0.4039317261074583, "grad_norm": 293.9205627441406, "learning_rate": 1.8488955836437814e-06, "loss": 20.9844, "step": 6082 }, { "epoch": 0.403998140399814, "grad_norm": 186.15924072265625, "learning_rate": 1.8488387310094033e-06, "loss": 18.1875, "step": 6083 }, { "epoch": 0.40406455469216973, "grad_norm": 250.4637908935547, "learning_rate": 1.8487818685561481e-06, "loss": 24.4688, "step": 6084 }, { "epoch": 0.4041309689845255, "grad_norm": 157.6710968017578, "learning_rate": 1.8487249962846736e-06, "loss": 15.75, "step": 6085 }, { "epoch": 0.4041973832768812, "grad_norm": 285.4700622558594, "learning_rate": 1.8486681141956376e-06, "loss": 21.75, "step": 6086 }, { "epoch": 0.4042637975692369, "grad_norm": 169.94276428222656, "learning_rate": 1.8486112222896986e-06, "loss": 16.6562, "step": 6087 }, { "epoch": 0.4043302118615926, "grad_norm": 175.48609924316406, "learning_rate": 1.8485543205675138e-06, "loss": 17.7188, "step": 6088 }, { "epoch": 0.40439662615394834, "grad_norm": 334.3358154296875, "learning_rate": 1.8484974090297425e-06, "loss": 23.3906, "step": 6089 }, { "epoch": 0.40446304044630405, "grad_norm": 213.67210388183594, "learning_rate": 1.8484404876770423e-06, "loss": 24.8125, "step": 6090 }, { "epoch": 0.40452945473865976, "grad_norm": 139.06195068359375, "learning_rate": 1.8483835565100714e-06, "loss": 16.8438, "step": 6091 }, { "epoch": 0.40459586903101546, "grad_norm": 376.31396484375, "learning_rate": 1.8483266155294892e-06, "loss": 22.7188, "step": 6092 }, { "epoch": 0.4046622833233712, "grad_norm": 608.298583984375, "learning_rate": 1.8482696647359535e-06, "loss": 19.2344, "step": 6093 }, { "epoch": 0.4047286976157269, "grad_norm": 134.52767944335938, "learning_rate": 1.8482127041301236e-06, "loss": 15.9062, "step": 6094 }, { "epoch": 0.4047951119080826, "grad_norm": 235.28091430664062, "learning_rate": 1.8481557337126584e-06, "loss": 16.8281, "step": 6095 }, { "epoch": 0.40486152620043836, "grad_norm": 285.702880859375, "learning_rate": 1.8480987534842165e-06, "loss": 20.8281, "step": 6096 }, { "epoch": 0.40492794049279407, "grad_norm": 458.4817199707031, "learning_rate": 1.8480417634454575e-06, "loss": 29.5781, "step": 6097 }, { "epoch": 0.4049943547851498, "grad_norm": 204.02760314941406, "learning_rate": 1.8479847635970403e-06, "loss": 15.4375, "step": 6098 }, { "epoch": 0.4050607690775055, "grad_norm": 277.1824645996094, "learning_rate": 1.8479277539396243e-06, "loss": 23.875, "step": 6099 }, { "epoch": 0.4051271833698612, "grad_norm": 708.3137817382812, "learning_rate": 1.847870734473869e-06, "loss": 19.1719, "step": 6100 }, { "epoch": 0.4051935976622169, "grad_norm": 299.2938232421875, "learning_rate": 1.8478137052004338e-06, "loss": 20.6719, "step": 6101 }, { "epoch": 0.4052600119545726, "grad_norm": 659.4954223632812, "learning_rate": 1.8477566661199787e-06, "loss": 22.2656, "step": 6102 }, { "epoch": 0.40532642624692833, "grad_norm": 164.043212890625, "learning_rate": 1.8476996172331633e-06, "loss": 16.7344, "step": 6103 }, { "epoch": 0.40539284053928404, "grad_norm": 143.74452209472656, "learning_rate": 1.8476425585406477e-06, "loss": 14.6406, "step": 6104 }, { "epoch": 0.40545925483163975, "grad_norm": 401.50848388671875, "learning_rate": 1.8475854900430915e-06, "loss": 14.9531, "step": 6105 }, { "epoch": 0.40552566912399546, "grad_norm": 553.1861572265625, "learning_rate": 1.8475284117411555e-06, "loss": 15.9062, "step": 6106 }, { "epoch": 0.4055920834163512, "grad_norm": 1007.3754272460938, "learning_rate": 1.8474713236354991e-06, "loss": 17.625, "step": 6107 }, { "epoch": 0.40565849770870693, "grad_norm": 370.324951171875, "learning_rate": 1.847414225726783e-06, "loss": 16.0625, "step": 6108 }, { "epoch": 0.40572491200106264, "grad_norm": 427.4706115722656, "learning_rate": 1.8473571180156682e-06, "loss": 16.4375, "step": 6109 }, { "epoch": 0.40579132629341835, "grad_norm": 224.08078002929688, "learning_rate": 1.8473000005028147e-06, "loss": 22.0312, "step": 6110 }, { "epoch": 0.40585774058577406, "grad_norm": 298.858154296875, "learning_rate": 1.8472428731888833e-06, "loss": 19.8906, "step": 6111 }, { "epoch": 0.4059241548781298, "grad_norm": 137.90884399414062, "learning_rate": 1.847185736074535e-06, "loss": 16.3906, "step": 6112 }, { "epoch": 0.4059905691704855, "grad_norm": 129.30606079101562, "learning_rate": 1.8471285891604307e-06, "loss": 14.7969, "step": 6113 }, { "epoch": 0.4060569834628412, "grad_norm": 120.16019439697266, "learning_rate": 1.8470714324472315e-06, "loss": 16.6562, "step": 6114 }, { "epoch": 0.4061233977551969, "grad_norm": 376.554931640625, "learning_rate": 1.8470142659355981e-06, "loss": 14.3438, "step": 6115 }, { "epoch": 0.4061898120475526, "grad_norm": 686.3812866210938, "learning_rate": 1.8469570896261921e-06, "loss": 17.0781, "step": 6116 }, { "epoch": 0.4062562263399083, "grad_norm": 409.99737548828125, "learning_rate": 1.846899903519675e-06, "loss": 25.4844, "step": 6117 }, { "epoch": 0.4063226406322641, "grad_norm": 277.5284423828125, "learning_rate": 1.846842707616708e-06, "loss": 15.9219, "step": 6118 }, { "epoch": 0.4063890549246198, "grad_norm": 256.3945617675781, "learning_rate": 1.846785501917953e-06, "loss": 18.5156, "step": 6119 }, { "epoch": 0.4064554692169755, "grad_norm": 214.37428283691406, "learning_rate": 1.8467282864240716e-06, "loss": 18.6562, "step": 6120 }, { "epoch": 0.4065218835093312, "grad_norm": 264.9864501953125, "learning_rate": 1.8466710611357256e-06, "loss": 21.2812, "step": 6121 }, { "epoch": 0.4065882978016869, "grad_norm": 210.3462371826172, "learning_rate": 1.8466138260535772e-06, "loss": 17.3438, "step": 6122 }, { "epoch": 0.40665471209404264, "grad_norm": 131.36500549316406, "learning_rate": 1.846556581178288e-06, "loss": 18.2812, "step": 6123 }, { "epoch": 0.40672112638639835, "grad_norm": 448.21478271484375, "learning_rate": 1.8464993265105204e-06, "loss": 22.2812, "step": 6124 }, { "epoch": 0.40678754067875406, "grad_norm": 124.04448699951172, "learning_rate": 1.8464420620509369e-06, "loss": 12.5938, "step": 6125 }, { "epoch": 0.40685395497110977, "grad_norm": 207.91954040527344, "learning_rate": 1.8463847878001994e-06, "loss": 16.9531, "step": 6126 }, { "epoch": 0.4069203692634655, "grad_norm": 323.00537109375, "learning_rate": 1.8463275037589709e-06, "loss": 21.1562, "step": 6127 }, { "epoch": 0.4069867835558212, "grad_norm": 583.9448852539062, "learning_rate": 1.8462702099279143e-06, "loss": 27.6484, "step": 6128 }, { "epoch": 0.40705319784817695, "grad_norm": 196.35626220703125, "learning_rate": 1.846212906307691e-06, "loss": 23.0312, "step": 6129 }, { "epoch": 0.40711961214053266, "grad_norm": 157.58956909179688, "learning_rate": 1.8461555928989654e-06, "loss": 15.7031, "step": 6130 }, { "epoch": 0.40718602643288837, "grad_norm": 192.01475524902344, "learning_rate": 1.8460982697023997e-06, "loss": 18.3906, "step": 6131 }, { "epoch": 0.4072524407252441, "grad_norm": 167.4811248779297, "learning_rate": 1.846040936718657e-06, "loss": 17.875, "step": 6132 }, { "epoch": 0.4073188550175998, "grad_norm": 272.4468688964844, "learning_rate": 1.8459835939484008e-06, "loss": 17.0312, "step": 6133 }, { "epoch": 0.4073852693099555, "grad_norm": 322.24462890625, "learning_rate": 1.845926241392294e-06, "loss": 23.1719, "step": 6134 }, { "epoch": 0.4074516836023112, "grad_norm": 298.21539306640625, "learning_rate": 1.8458688790510001e-06, "loss": 22.9062, "step": 6135 }, { "epoch": 0.4075180978946669, "grad_norm": 229.5301513671875, "learning_rate": 1.8458115069251832e-06, "loss": 21.5156, "step": 6136 }, { "epoch": 0.40758451218702263, "grad_norm": 208.72608947753906, "learning_rate": 1.845754125015506e-06, "loss": 19.9531, "step": 6137 }, { "epoch": 0.40765092647937834, "grad_norm": 236.2302703857422, "learning_rate": 1.8456967333226332e-06, "loss": 27.4375, "step": 6138 }, { "epoch": 0.4077173407717341, "grad_norm": 247.2396697998047, "learning_rate": 1.845639331847228e-06, "loss": 21.4062, "step": 6139 }, { "epoch": 0.4077837550640898, "grad_norm": 212.2264862060547, "learning_rate": 1.845581920589955e-06, "loss": 19.7969, "step": 6140 }, { "epoch": 0.4078501693564455, "grad_norm": 284.948974609375, "learning_rate": 1.8455244995514775e-06, "loss": 18.0938, "step": 6141 }, { "epoch": 0.40791658364880123, "grad_norm": 471.14727783203125, "learning_rate": 1.8454670687324602e-06, "loss": 18.1406, "step": 6142 }, { "epoch": 0.40798299794115694, "grad_norm": 232.70059204101562, "learning_rate": 1.8454096281335675e-06, "loss": 15.9844, "step": 6143 }, { "epoch": 0.40804941223351265, "grad_norm": 369.0054626464844, "learning_rate": 1.8453521777554636e-06, "loss": 24.2188, "step": 6144 }, { "epoch": 0.40811582652586836, "grad_norm": 139.0950469970703, "learning_rate": 1.8452947175988135e-06, "loss": 13.1719, "step": 6145 }, { "epoch": 0.4081822408182241, "grad_norm": 108.28509521484375, "learning_rate": 1.8452372476642812e-06, "loss": 15.0938, "step": 6146 }, { "epoch": 0.4082486551105798, "grad_norm": 217.19471740722656, "learning_rate": 1.845179767952532e-06, "loss": 18.0625, "step": 6147 }, { "epoch": 0.4083150694029355, "grad_norm": 301.54669189453125, "learning_rate": 1.8451222784642304e-06, "loss": 22.8438, "step": 6148 }, { "epoch": 0.4083814836952912, "grad_norm": 327.0561828613281, "learning_rate": 1.8450647792000418e-06, "loss": 16.8125, "step": 6149 }, { "epoch": 0.40844789798764697, "grad_norm": 326.0561828613281, "learning_rate": 1.845007270160631e-06, "loss": 21.0469, "step": 6150 }, { "epoch": 0.4085143122800027, "grad_norm": 191.31951904296875, "learning_rate": 1.8449497513466631e-06, "loss": 18.9062, "step": 6151 }, { "epoch": 0.4085807265723584, "grad_norm": 243.32923889160156, "learning_rate": 1.8448922227588042e-06, "loss": 21.5938, "step": 6152 }, { "epoch": 0.4086471408647141, "grad_norm": 166.6351318359375, "learning_rate": 1.844834684397719e-06, "loss": 16.1719, "step": 6153 }, { "epoch": 0.4087135551570698, "grad_norm": 286.35382080078125, "learning_rate": 1.8447771362640733e-06, "loss": 18.9375, "step": 6154 }, { "epoch": 0.4087799694494255, "grad_norm": 410.6280517578125, "learning_rate": 1.844719578358533e-06, "loss": 18.3438, "step": 6155 }, { "epoch": 0.4088463837417812, "grad_norm": 353.8191833496094, "learning_rate": 1.8446620106817636e-06, "loss": 20.375, "step": 6156 }, { "epoch": 0.40891279803413694, "grad_norm": 281.42138671875, "learning_rate": 1.8446044332344312e-06, "loss": 19.5156, "step": 6157 }, { "epoch": 0.40897921232649265, "grad_norm": 698.8644409179688, "learning_rate": 1.8445468460172017e-06, "loss": 20.625, "step": 6158 }, { "epoch": 0.40904562661884836, "grad_norm": 230.92062377929688, "learning_rate": 1.844489249030741e-06, "loss": 12.7969, "step": 6159 }, { "epoch": 0.40911204091120407, "grad_norm": 164.3361053466797, "learning_rate": 1.8444316422757163e-06, "loss": 18.2344, "step": 6160 }, { "epoch": 0.40917845520355983, "grad_norm": 161.76214599609375, "learning_rate": 1.8443740257527927e-06, "loss": 16.8125, "step": 6161 }, { "epoch": 0.40924486949591554, "grad_norm": 215.86280822753906, "learning_rate": 1.8443163994626375e-06, "loss": 18.0, "step": 6162 }, { "epoch": 0.40931128378827125, "grad_norm": 214.86978149414062, "learning_rate": 1.844258763405917e-06, "loss": 17.875, "step": 6163 }, { "epoch": 0.40937769808062696, "grad_norm": 191.67080688476562, "learning_rate": 1.8442011175832976e-06, "loss": 22.7031, "step": 6164 }, { "epoch": 0.40944411237298267, "grad_norm": 277.1333923339844, "learning_rate": 1.844143461995447e-06, "loss": 19.0312, "step": 6165 }, { "epoch": 0.4095105266653384, "grad_norm": 237.21592712402344, "learning_rate": 1.8440857966430314e-06, "loss": 25.3281, "step": 6166 }, { "epoch": 0.4095769409576941, "grad_norm": 118.64085388183594, "learning_rate": 1.8440281215267182e-06, "loss": 14.9531, "step": 6167 }, { "epoch": 0.4096433552500498, "grad_norm": 127.93568420410156, "learning_rate": 1.843970436647174e-06, "loss": 18.1562, "step": 6168 }, { "epoch": 0.4097097695424055, "grad_norm": 134.64532470703125, "learning_rate": 1.8439127420050666e-06, "loss": 15.5156, "step": 6169 }, { "epoch": 0.4097761838347612, "grad_norm": 233.8057098388672, "learning_rate": 1.8438550376010636e-06, "loss": 18.5781, "step": 6170 }, { "epoch": 0.40984259812711693, "grad_norm": 469.5027770996094, "learning_rate": 1.8437973234358317e-06, "loss": 20.625, "step": 6171 }, { "epoch": 0.4099090124194727, "grad_norm": 199.87429809570312, "learning_rate": 1.8437395995100392e-06, "loss": 16.5781, "step": 6172 }, { "epoch": 0.4099754267118284, "grad_norm": 196.97776794433594, "learning_rate": 1.8436818658243532e-06, "loss": 18.1406, "step": 6173 }, { "epoch": 0.4100418410041841, "grad_norm": 139.81007385253906, "learning_rate": 1.843624122379442e-06, "loss": 18.6406, "step": 6174 }, { "epoch": 0.4101082552965398, "grad_norm": 164.6247100830078, "learning_rate": 1.8435663691759733e-06, "loss": 21.4688, "step": 6175 }, { "epoch": 0.41017466958889554, "grad_norm": 249.13999938964844, "learning_rate": 1.8435086062146156e-06, "loss": 20.7969, "step": 6176 }, { "epoch": 0.41024108388125125, "grad_norm": 313.7425537109375, "learning_rate": 1.8434508334960363e-06, "loss": 18.8594, "step": 6177 }, { "epoch": 0.41030749817360695, "grad_norm": 130.59552001953125, "learning_rate": 1.8433930510209047e-06, "loss": 21.0, "step": 6178 }, { "epoch": 0.41037391246596266, "grad_norm": 191.39486694335938, "learning_rate": 1.8433352587898883e-06, "loss": 21.9062, "step": 6179 }, { "epoch": 0.4104403267583184, "grad_norm": 261.8380432128906, "learning_rate": 1.8432774568036557e-06, "loss": 24.5625, "step": 6180 }, { "epoch": 0.4105067410506741, "grad_norm": 132.846435546875, "learning_rate": 1.843219645062876e-06, "loss": 19.4375, "step": 6181 }, { "epoch": 0.4105731553430298, "grad_norm": 230.90936279296875, "learning_rate": 1.8431618235682177e-06, "loss": 23.625, "step": 6182 }, { "epoch": 0.41063956963538556, "grad_norm": 246.6492919921875, "learning_rate": 1.8431039923203496e-06, "loss": 18.2188, "step": 6183 }, { "epoch": 0.41070598392774127, "grad_norm": 247.9337615966797, "learning_rate": 1.8430461513199406e-06, "loss": 16.8125, "step": 6184 }, { "epoch": 0.410772398220097, "grad_norm": 140.76858520507812, "learning_rate": 1.84298830056766e-06, "loss": 18.5156, "step": 6185 }, { "epoch": 0.4108388125124527, "grad_norm": 700.1099853515625, "learning_rate": 1.8429304400641766e-06, "loss": 32.0312, "step": 6186 }, { "epoch": 0.4109052268048084, "grad_norm": 323.78497314453125, "learning_rate": 1.8428725698101602e-06, "loss": 18.4844, "step": 6187 }, { "epoch": 0.4109716410971641, "grad_norm": 660.543212890625, "learning_rate": 1.8428146898062798e-06, "loss": 18.7188, "step": 6188 }, { "epoch": 0.4110380553895198, "grad_norm": 107.9827651977539, "learning_rate": 1.842756800053205e-06, "loss": 14.5625, "step": 6189 }, { "epoch": 0.41110446968187553, "grad_norm": 146.02023315429688, "learning_rate": 1.8426989005516056e-06, "loss": 17.1719, "step": 6190 }, { "epoch": 0.41117088397423124, "grad_norm": 302.0450134277344, "learning_rate": 1.8426409913021511e-06, "loss": 22.9375, "step": 6191 }, { "epoch": 0.41123729826658695, "grad_norm": 294.2602233886719, "learning_rate": 1.8425830723055116e-06, "loss": 16.7969, "step": 6192 }, { "epoch": 0.41130371255894266, "grad_norm": 229.47744750976562, "learning_rate": 1.842525143562357e-06, "loss": 22.0, "step": 6193 }, { "epoch": 0.4113701268512984, "grad_norm": 277.38433837890625, "learning_rate": 1.8424672050733574e-06, "loss": 21.25, "step": 6194 }, { "epoch": 0.41143654114365413, "grad_norm": 114.22864532470703, "learning_rate": 1.842409256839183e-06, "loss": 18.6406, "step": 6195 }, { "epoch": 0.41150295543600984, "grad_norm": 206.1202850341797, "learning_rate": 1.8423512988605038e-06, "loss": 19.0938, "step": 6196 }, { "epoch": 0.41156936972836555, "grad_norm": 115.57991027832031, "learning_rate": 1.842293331137991e-06, "loss": 19.4844, "step": 6197 }, { "epoch": 0.41163578402072126, "grad_norm": 132.2825927734375, "learning_rate": 1.8422353536723141e-06, "loss": 16.9219, "step": 6198 }, { "epoch": 0.411702198313077, "grad_norm": 268.8746032714844, "learning_rate": 1.8421773664641446e-06, "loss": 16.8594, "step": 6199 }, { "epoch": 0.4117686126054327, "grad_norm": 138.8679656982422, "learning_rate": 1.8421193695141528e-06, "loss": 19.2031, "step": 6200 }, { "epoch": 0.4118350268977884, "grad_norm": 181.90914916992188, "learning_rate": 1.84206136282301e-06, "loss": 19.1016, "step": 6201 }, { "epoch": 0.4119014411901441, "grad_norm": 183.71914672851562, "learning_rate": 1.8420033463913866e-06, "loss": 21.4531, "step": 6202 }, { "epoch": 0.4119678554824998, "grad_norm": 165.61920166015625, "learning_rate": 1.841945320219954e-06, "loss": 12.3438, "step": 6203 }, { "epoch": 0.4120342697748555, "grad_norm": 315.8891906738281, "learning_rate": 1.8418872843093838e-06, "loss": 22.625, "step": 6204 }, { "epoch": 0.4121006840672113, "grad_norm": 85.36499786376953, "learning_rate": 1.8418292386603467e-06, "loss": 10.9062, "step": 6205 }, { "epoch": 0.412167098359567, "grad_norm": 391.7265930175781, "learning_rate": 1.8417711832735144e-06, "loss": 17.0938, "step": 6206 }, { "epoch": 0.4122335126519227, "grad_norm": 611.603515625, "learning_rate": 1.8417131181495585e-06, "loss": 29.6562, "step": 6207 }, { "epoch": 0.4122999269442784, "grad_norm": 162.74197387695312, "learning_rate": 1.8416550432891507e-06, "loss": 20.0469, "step": 6208 }, { "epoch": 0.4123663412366341, "grad_norm": 126.23861694335938, "learning_rate": 1.8415969586929624e-06, "loss": 18.375, "step": 6209 }, { "epoch": 0.41243275552898984, "grad_norm": 188.86085510253906, "learning_rate": 1.8415388643616661e-06, "loss": 20.6719, "step": 6210 }, { "epoch": 0.41249916982134555, "grad_norm": 368.6258850097656, "learning_rate": 1.841480760295933e-06, "loss": 24.0625, "step": 6211 }, { "epoch": 0.41256558411370126, "grad_norm": 419.0150146484375, "learning_rate": 1.841422646496436e-06, "loss": 25.5312, "step": 6212 }, { "epoch": 0.41263199840605697, "grad_norm": 210.22288513183594, "learning_rate": 1.8413645229638472e-06, "loss": 15.2656, "step": 6213 }, { "epoch": 0.4126984126984127, "grad_norm": 201.3599090576172, "learning_rate": 1.8413063896988384e-06, "loss": 17.3438, "step": 6214 }, { "epoch": 0.41276482699076844, "grad_norm": 661.3803100585938, "learning_rate": 1.8412482467020827e-06, "loss": 26.1562, "step": 6215 }, { "epoch": 0.41283124128312415, "grad_norm": 225.2010498046875, "learning_rate": 1.8411900939742522e-06, "loss": 16.3594, "step": 6216 }, { "epoch": 0.41289765557547986, "grad_norm": 298.64349365234375, "learning_rate": 1.8411319315160198e-06, "loss": 27.625, "step": 6217 }, { "epoch": 0.41296406986783557, "grad_norm": 171.6995849609375, "learning_rate": 1.8410737593280582e-06, "loss": 17.6719, "step": 6218 }, { "epoch": 0.4130304841601913, "grad_norm": 632.8114013671875, "learning_rate": 1.8410155774110404e-06, "loss": 15.1094, "step": 6219 }, { "epoch": 0.413096898452547, "grad_norm": 187.6892547607422, "learning_rate": 1.8409573857656394e-06, "loss": 15.4531, "step": 6220 }, { "epoch": 0.4131633127449027, "grad_norm": 297.8266906738281, "learning_rate": 1.840899184392528e-06, "loss": 21.7188, "step": 6221 }, { "epoch": 0.4132297270372584, "grad_norm": 275.9458312988281, "learning_rate": 1.8408409732923804e-06, "loss": 18.3594, "step": 6222 }, { "epoch": 0.4132961413296141, "grad_norm": 234.16278076171875, "learning_rate": 1.8407827524658685e-06, "loss": 19.9844, "step": 6223 }, { "epoch": 0.41336255562196983, "grad_norm": 356.2079772949219, "learning_rate": 1.840724521913667e-06, "loss": 15.4844, "step": 6224 }, { "epoch": 0.41342896991432554, "grad_norm": 155.20274353027344, "learning_rate": 1.840666281636449e-06, "loss": 13.4062, "step": 6225 }, { "epoch": 0.4134953842066813, "grad_norm": 274.8351745605469, "learning_rate": 1.840608031634888e-06, "loss": 19.6875, "step": 6226 }, { "epoch": 0.413561798499037, "grad_norm": 384.93914794921875, "learning_rate": 1.8405497719096581e-06, "loss": 21.2188, "step": 6227 }, { "epoch": 0.4136282127913927, "grad_norm": 200.67941284179688, "learning_rate": 1.8404915024614334e-06, "loss": 19.3438, "step": 6228 }, { "epoch": 0.41369462708374843, "grad_norm": 464.9447021484375, "learning_rate": 1.8404332232908873e-06, "loss": 20.6094, "step": 6229 }, { "epoch": 0.41376104137610414, "grad_norm": 210.20877075195312, "learning_rate": 1.8403749343986943e-06, "loss": 17.2188, "step": 6230 }, { "epoch": 0.41382745566845985, "grad_norm": 121.4127197265625, "learning_rate": 1.8403166357855287e-06, "loss": 14.2812, "step": 6231 }, { "epoch": 0.41389386996081556, "grad_norm": 501.35980224609375, "learning_rate": 1.840258327452065e-06, "loss": 34.4688, "step": 6232 }, { "epoch": 0.4139602842531713, "grad_norm": 448.18328857421875, "learning_rate": 1.8402000093989774e-06, "loss": 23.9375, "step": 6233 }, { "epoch": 0.414026698545527, "grad_norm": 119.31159210205078, "learning_rate": 1.8401416816269404e-06, "loss": 16.2188, "step": 6234 }, { "epoch": 0.4140931128378827, "grad_norm": 252.65850830078125, "learning_rate": 1.840083344136629e-06, "loss": 12.6094, "step": 6235 }, { "epoch": 0.4141595271302384, "grad_norm": 275.1055908203125, "learning_rate": 1.8400249969287178e-06, "loss": 16.2031, "step": 6236 }, { "epoch": 0.41422594142259417, "grad_norm": 123.06853485107422, "learning_rate": 1.839966640003882e-06, "loss": 18.7344, "step": 6237 }, { "epoch": 0.4142923557149499, "grad_norm": 100.95050811767578, "learning_rate": 1.8399082733627965e-06, "loss": 14.2656, "step": 6238 }, { "epoch": 0.4143587700073056, "grad_norm": 186.239013671875, "learning_rate": 1.8398498970061363e-06, "loss": 19.2656, "step": 6239 }, { "epoch": 0.4144251842996613, "grad_norm": 358.1891174316406, "learning_rate": 1.8397915109345768e-06, "loss": 23.4688, "step": 6240 }, { "epoch": 0.414491598592017, "grad_norm": 118.29718780517578, "learning_rate": 1.8397331151487933e-06, "loss": 13.8438, "step": 6241 }, { "epoch": 0.4145580128843727, "grad_norm": 433.9702453613281, "learning_rate": 1.8396747096494614e-06, "loss": 16.2656, "step": 6242 }, { "epoch": 0.4146244271767284, "grad_norm": 185.1398162841797, "learning_rate": 1.8396162944372566e-06, "loss": 17.2812, "step": 6243 }, { "epoch": 0.41469084146908414, "grad_norm": 299.55322265625, "learning_rate": 1.8395578695128546e-06, "loss": 20.3125, "step": 6244 }, { "epoch": 0.41475725576143985, "grad_norm": 162.7418212890625, "learning_rate": 1.8394994348769313e-06, "loss": 18.0938, "step": 6245 }, { "epoch": 0.41482367005379556, "grad_norm": 375.5953674316406, "learning_rate": 1.8394409905301627e-06, "loss": 19.7188, "step": 6246 }, { "epoch": 0.41489008434615127, "grad_norm": 197.90025329589844, "learning_rate": 1.8393825364732247e-06, "loss": 15.5625, "step": 6247 }, { "epoch": 0.41495649863850703, "grad_norm": 165.78335571289062, "learning_rate": 1.8393240727067935e-06, "loss": 14.75, "step": 6248 }, { "epoch": 0.41502291293086274, "grad_norm": 125.69005584716797, "learning_rate": 1.8392655992315455e-06, "loss": 12.6875, "step": 6249 }, { "epoch": 0.41508932722321845, "grad_norm": 494.73828125, "learning_rate": 1.839207116048157e-06, "loss": 15.0469, "step": 6250 }, { "epoch": 0.41515574151557416, "grad_norm": 519.8650512695312, "learning_rate": 1.8391486231573046e-06, "loss": 31.375, "step": 6251 }, { "epoch": 0.41522215580792987, "grad_norm": 278.5600891113281, "learning_rate": 1.8390901205596646e-06, "loss": 24.125, "step": 6252 }, { "epoch": 0.4152885701002856, "grad_norm": 720.5757446289062, "learning_rate": 1.839031608255914e-06, "loss": 28.6875, "step": 6253 }, { "epoch": 0.4153549843926413, "grad_norm": 139.6885986328125, "learning_rate": 1.8389730862467296e-06, "loss": 14.8281, "step": 6254 }, { "epoch": 0.415421398684997, "grad_norm": 264.2380065917969, "learning_rate": 1.8389145545327883e-06, "loss": 19.0312, "step": 6255 }, { "epoch": 0.4154878129773527, "grad_norm": 149.8126678466797, "learning_rate": 1.8388560131147673e-06, "loss": 18.2031, "step": 6256 }, { "epoch": 0.4155542272697084, "grad_norm": 453.6262512207031, "learning_rate": 1.8387974619933436e-06, "loss": 22.6875, "step": 6257 }, { "epoch": 0.41562064156206413, "grad_norm": 131.67620849609375, "learning_rate": 1.8387389011691945e-06, "loss": 18.3125, "step": 6258 }, { "epoch": 0.4156870558544199, "grad_norm": 216.36572265625, "learning_rate": 1.8386803306429972e-06, "loss": 16.7188, "step": 6259 }, { "epoch": 0.4157534701467756, "grad_norm": 190.1410675048828, "learning_rate": 1.8386217504154296e-06, "loss": 16.8438, "step": 6260 }, { "epoch": 0.4158198844391313, "grad_norm": 179.5153045654297, "learning_rate": 1.8385631604871694e-06, "loss": 18.2031, "step": 6261 }, { "epoch": 0.415886298731487, "grad_norm": 269.2583312988281, "learning_rate": 1.8385045608588938e-06, "loss": 16.9844, "step": 6262 }, { "epoch": 0.41595271302384274, "grad_norm": 149.59815979003906, "learning_rate": 1.838445951531281e-06, "loss": 12.9219, "step": 6263 }, { "epoch": 0.41601912731619844, "grad_norm": 169.37298583984375, "learning_rate": 1.8383873325050091e-06, "loss": 21.875, "step": 6264 }, { "epoch": 0.41608554160855415, "grad_norm": 7148.4892578125, "learning_rate": 1.838328703780756e-06, "loss": 14.8906, "step": 6265 }, { "epoch": 0.41615195590090986, "grad_norm": 169.7064208984375, "learning_rate": 1.8382700653591993e-06, "loss": 18.9688, "step": 6266 }, { "epoch": 0.4162183701932656, "grad_norm": 225.77711486816406, "learning_rate": 1.8382114172410186e-06, "loss": 14.6094, "step": 6267 }, { "epoch": 0.4162847844856213, "grad_norm": 254.77308654785156, "learning_rate": 1.8381527594268911e-06, "loss": 21.5312, "step": 6268 }, { "epoch": 0.416351198777977, "grad_norm": 182.3145751953125, "learning_rate": 1.838094091917496e-06, "loss": 14.0938, "step": 6269 }, { "epoch": 0.41641761307033276, "grad_norm": 296.06292724609375, "learning_rate": 1.8380354147135117e-06, "loss": 25.2344, "step": 6270 }, { "epoch": 0.41648402736268847, "grad_norm": 488.7841491699219, "learning_rate": 1.837976727815617e-06, "loss": 19.0859, "step": 6271 }, { "epoch": 0.4165504416550442, "grad_norm": 168.8207550048828, "learning_rate": 1.8379180312244907e-06, "loss": 16.2812, "step": 6272 }, { "epoch": 0.4166168559473999, "grad_norm": 145.08895874023438, "learning_rate": 1.8378593249408114e-06, "loss": 15.4062, "step": 6273 }, { "epoch": 0.4166832702397556, "grad_norm": 280.7784729003906, "learning_rate": 1.8378006089652591e-06, "loss": 19.75, "step": 6274 }, { "epoch": 0.4167496845321113, "grad_norm": 192.7294158935547, "learning_rate": 1.8377418832985124e-06, "loss": 18.5938, "step": 6275 }, { "epoch": 0.416816098824467, "grad_norm": 156.01425170898438, "learning_rate": 1.8376831479412505e-06, "loss": 14.4062, "step": 6276 }, { "epoch": 0.41688251311682273, "grad_norm": 149.0343475341797, "learning_rate": 1.8376244028941527e-06, "loss": 15.8125, "step": 6277 }, { "epoch": 0.41694892740917844, "grad_norm": 141.12728881835938, "learning_rate": 1.8375656481578991e-06, "loss": 13.3125, "step": 6278 }, { "epoch": 0.41701534170153415, "grad_norm": 402.9443664550781, "learning_rate": 1.8375068837331691e-06, "loss": 24.875, "step": 6279 }, { "epoch": 0.41708175599388986, "grad_norm": 927.6321411132812, "learning_rate": 1.8374481096206425e-06, "loss": 19.1562, "step": 6280 }, { "epoch": 0.4171481702862456, "grad_norm": 343.51092529296875, "learning_rate": 1.8373893258209989e-06, "loss": 20.1562, "step": 6281 }, { "epoch": 0.41721458457860133, "grad_norm": 253.8020477294922, "learning_rate": 1.8373305323349185e-06, "loss": 22.0938, "step": 6282 }, { "epoch": 0.41728099887095704, "grad_norm": 284.5265197753906, "learning_rate": 1.8372717291630812e-06, "loss": 14.9062, "step": 6283 }, { "epoch": 0.41734741316331275, "grad_norm": 1127.6258544921875, "learning_rate": 1.8372129163061673e-06, "loss": 16.6719, "step": 6284 }, { "epoch": 0.41741382745566846, "grad_norm": 853.7990112304688, "learning_rate": 1.8371540937648576e-06, "loss": 16.5, "step": 6285 }, { "epoch": 0.4174802417480242, "grad_norm": 301.142822265625, "learning_rate": 1.8370952615398317e-06, "loss": 18.0156, "step": 6286 }, { "epoch": 0.4175466560403799, "grad_norm": 249.3697052001953, "learning_rate": 1.8370364196317706e-06, "loss": 23.4375, "step": 6287 }, { "epoch": 0.4176130703327356, "grad_norm": 441.5849914550781, "learning_rate": 1.8369775680413546e-06, "loss": 25.5625, "step": 6288 }, { "epoch": 0.4176794846250913, "grad_norm": 226.92893981933594, "learning_rate": 1.836918706769265e-06, "loss": 22.4062, "step": 6289 }, { "epoch": 0.417745898917447, "grad_norm": 229.474365234375, "learning_rate": 1.8368598358161823e-06, "loss": 18.4375, "step": 6290 }, { "epoch": 0.4178123132098028, "grad_norm": 284.76812744140625, "learning_rate": 1.8368009551827877e-06, "loss": 17.2969, "step": 6291 }, { "epoch": 0.4178787275021585, "grad_norm": 448.1343078613281, "learning_rate": 1.836742064869762e-06, "loss": 20.8125, "step": 6292 }, { "epoch": 0.4179451417945142, "grad_norm": 218.58340454101562, "learning_rate": 1.8366831648777866e-06, "loss": 16.1719, "step": 6293 }, { "epoch": 0.4180115560868699, "grad_norm": 179.8986053466797, "learning_rate": 1.836624255207543e-06, "loss": 14.5703, "step": 6294 }, { "epoch": 0.4180779703792256, "grad_norm": 126.57584381103516, "learning_rate": 1.8365653358597125e-06, "loss": 16.625, "step": 6295 }, { "epoch": 0.4181443846715813, "grad_norm": 184.08218383789062, "learning_rate": 1.8365064068349763e-06, "loss": 15.7344, "step": 6296 }, { "epoch": 0.41821079896393704, "grad_norm": 252.82107543945312, "learning_rate": 1.8364474681340164e-06, "loss": 23.8125, "step": 6297 }, { "epoch": 0.41827721325629275, "grad_norm": 870.0910034179688, "learning_rate": 1.8363885197575146e-06, "loss": 25.0312, "step": 6298 }, { "epoch": 0.41834362754864846, "grad_norm": 247.54470825195312, "learning_rate": 1.8363295617061528e-06, "loss": 12.3438, "step": 6299 }, { "epoch": 0.41841004184100417, "grad_norm": 176.25970458984375, "learning_rate": 1.8362705939806126e-06, "loss": 16.5781, "step": 6300 }, { "epoch": 0.4184764561333599, "grad_norm": 241.06442260742188, "learning_rate": 1.8362116165815767e-06, "loss": 22.4688, "step": 6301 }, { "epoch": 0.41854287042571564, "grad_norm": 299.8374938964844, "learning_rate": 1.8361526295097267e-06, "loss": 21.7344, "step": 6302 }, { "epoch": 0.41860928471807135, "grad_norm": 312.6624450683594, "learning_rate": 1.8360936327657454e-06, "loss": 21.5, "step": 6303 }, { "epoch": 0.41867569901042706, "grad_norm": 313.19024658203125, "learning_rate": 1.8360346263503152e-06, "loss": 29.7031, "step": 6304 }, { "epoch": 0.41874211330278277, "grad_norm": 262.6427001953125, "learning_rate": 1.8359756102641181e-06, "loss": 17.6875, "step": 6305 }, { "epoch": 0.4188085275951385, "grad_norm": 102.55439758300781, "learning_rate": 1.8359165845078376e-06, "loss": 15.1406, "step": 6306 }, { "epoch": 0.4188749418874942, "grad_norm": 177.75067138671875, "learning_rate": 1.835857549082156e-06, "loss": 26.1875, "step": 6307 }, { "epoch": 0.4189413561798499, "grad_norm": 399.26177978515625, "learning_rate": 1.8357985039877563e-06, "loss": 22.625, "step": 6308 }, { "epoch": 0.4190077704722056, "grad_norm": 197.13504028320312, "learning_rate": 1.8357394492253213e-06, "loss": 21.1562, "step": 6309 }, { "epoch": 0.4190741847645613, "grad_norm": 1189.9954833984375, "learning_rate": 1.8356803847955345e-06, "loss": 18.875, "step": 6310 }, { "epoch": 0.41914059905691703, "grad_norm": 151.71560668945312, "learning_rate": 1.8356213106990786e-06, "loss": 15.0625, "step": 6311 }, { "epoch": 0.41920701334927274, "grad_norm": 485.83404541015625, "learning_rate": 1.8355622269366372e-06, "loss": 20.0625, "step": 6312 }, { "epoch": 0.4192734276416285, "grad_norm": 209.6563262939453, "learning_rate": 1.835503133508894e-06, "loss": 23.1406, "step": 6313 }, { "epoch": 0.4193398419339842, "grad_norm": 385.5350646972656, "learning_rate": 1.8354440304165321e-06, "loss": 23.2031, "step": 6314 }, { "epoch": 0.4194062562263399, "grad_norm": 329.07794189453125, "learning_rate": 1.8353849176602356e-06, "loss": 19.1094, "step": 6315 }, { "epoch": 0.41947267051869563, "grad_norm": 160.77511596679688, "learning_rate": 1.8353257952406881e-06, "loss": 18.1094, "step": 6316 }, { "epoch": 0.41953908481105134, "grad_norm": 376.32666015625, "learning_rate": 1.8352666631585736e-06, "loss": 18.3281, "step": 6317 }, { "epoch": 0.41960549910340705, "grad_norm": 339.26885986328125, "learning_rate": 1.8352075214145757e-06, "loss": 20.7812, "step": 6318 }, { "epoch": 0.41967191339576276, "grad_norm": 129.68972778320312, "learning_rate": 1.835148370009379e-06, "loss": 15.125, "step": 6319 }, { "epoch": 0.4197383276881185, "grad_norm": 184.37571716308594, "learning_rate": 1.8350892089436674e-06, "loss": 13.625, "step": 6320 }, { "epoch": 0.4198047419804742, "grad_norm": 273.3520202636719, "learning_rate": 1.835030038218125e-06, "loss": 24.1406, "step": 6321 }, { "epoch": 0.4198711562728299, "grad_norm": 1028.045166015625, "learning_rate": 1.8349708578334373e-06, "loss": 18.5, "step": 6322 }, { "epoch": 0.4199375705651856, "grad_norm": 216.92990112304688, "learning_rate": 1.8349116677902877e-06, "loss": 13.25, "step": 6323 }, { "epoch": 0.42000398485754137, "grad_norm": 200.10260009765625, "learning_rate": 1.8348524680893617e-06, "loss": 18.5938, "step": 6324 }, { "epoch": 0.4200703991498971, "grad_norm": 280.60992431640625, "learning_rate": 1.8347932587313438e-06, "loss": 19.9531, "step": 6325 }, { "epoch": 0.4201368134422528, "grad_norm": 193.2469024658203, "learning_rate": 1.8347340397169186e-06, "loss": 16.1094, "step": 6326 }, { "epoch": 0.4202032277346085, "grad_norm": 189.75291442871094, "learning_rate": 1.8346748110467715e-06, "loss": 21.7188, "step": 6327 }, { "epoch": 0.4202696420269642, "grad_norm": 203.98098754882812, "learning_rate": 1.8346155727215873e-06, "loss": 28.8125, "step": 6328 }, { "epoch": 0.4203360563193199, "grad_norm": 368.39984130859375, "learning_rate": 1.8345563247420514e-06, "loss": 24.8906, "step": 6329 }, { "epoch": 0.4204024706116756, "grad_norm": 258.7388000488281, "learning_rate": 1.8344970671088493e-06, "loss": 17.7969, "step": 6330 }, { "epoch": 0.42046888490403134, "grad_norm": 235.96743774414062, "learning_rate": 1.8344377998226665e-06, "loss": 16.2031, "step": 6331 }, { "epoch": 0.42053529919638705, "grad_norm": 279.6509704589844, "learning_rate": 1.8343785228841884e-06, "loss": 20.4844, "step": 6332 }, { "epoch": 0.42060171348874276, "grad_norm": 323.7450866699219, "learning_rate": 1.8343192362941005e-06, "loss": 18.2344, "step": 6333 }, { "epoch": 0.42066812778109847, "grad_norm": 140.74761962890625, "learning_rate": 1.8342599400530885e-06, "loss": 13.2656, "step": 6334 }, { "epoch": 0.42073454207345423, "grad_norm": 263.4324951171875, "learning_rate": 1.8342006341618391e-06, "loss": 14.7969, "step": 6335 }, { "epoch": 0.42080095636580994, "grad_norm": 271.086669921875, "learning_rate": 1.8341413186210375e-06, "loss": 21.9688, "step": 6336 }, { "epoch": 0.42086737065816565, "grad_norm": 552.1581420898438, "learning_rate": 1.8340819934313703e-06, "loss": 19.6719, "step": 6337 }, { "epoch": 0.42093378495052136, "grad_norm": 705.7362670898438, "learning_rate": 1.8340226585935231e-06, "loss": 22.6562, "step": 6338 }, { "epoch": 0.42100019924287707, "grad_norm": 189.62551879882812, "learning_rate": 1.833963314108183e-06, "loss": 20.3906, "step": 6339 }, { "epoch": 0.4210666135352328, "grad_norm": 260.47003173828125, "learning_rate": 1.8339039599760365e-06, "loss": 19.6406, "step": 6340 }, { "epoch": 0.4211330278275885, "grad_norm": 291.3968505859375, "learning_rate": 1.8338445961977694e-06, "loss": 18.4688, "step": 6341 }, { "epoch": 0.4211994421199442, "grad_norm": 211.80908203125, "learning_rate": 1.8337852227740692e-06, "loss": 22.1875, "step": 6342 }, { "epoch": 0.4212658564122999, "grad_norm": 286.18109130859375, "learning_rate": 1.8337258397056222e-06, "loss": 18.7344, "step": 6343 }, { "epoch": 0.4213322707046556, "grad_norm": 470.92803955078125, "learning_rate": 1.8336664469931152e-06, "loss": 19.7188, "step": 6344 }, { "epoch": 0.42139868499701133, "grad_norm": 153.76177978515625, "learning_rate": 1.8336070446372356e-06, "loss": 17.7969, "step": 6345 }, { "epoch": 0.4214650992893671, "grad_norm": 2744.061767578125, "learning_rate": 1.8335476326386703e-06, "loss": 25.25, "step": 6346 }, { "epoch": 0.4215315135817228, "grad_norm": 250.82708740234375, "learning_rate": 1.8334882109981069e-06, "loss": 19.1094, "step": 6347 }, { "epoch": 0.4215979278740785, "grad_norm": 189.98727416992188, "learning_rate": 1.8334287797162322e-06, "loss": 20.8125, "step": 6348 }, { "epoch": 0.4216643421664342, "grad_norm": 129.592041015625, "learning_rate": 1.833369338793734e-06, "loss": 16.4219, "step": 6349 }, { "epoch": 0.42173075645878993, "grad_norm": 135.13267517089844, "learning_rate": 1.8333098882313e-06, "loss": 15.25, "step": 6350 }, { "epoch": 0.42179717075114564, "grad_norm": 252.63946533203125, "learning_rate": 1.8332504280296178e-06, "loss": 17.2812, "step": 6351 }, { "epoch": 0.42186358504350135, "grad_norm": 327.36187744140625, "learning_rate": 1.833190958189375e-06, "loss": 14.75, "step": 6352 }, { "epoch": 0.42192999933585706, "grad_norm": 169.29209899902344, "learning_rate": 1.8331314787112598e-06, "loss": 28.9375, "step": 6353 }, { "epoch": 0.4219964136282128, "grad_norm": 183.94558715820312, "learning_rate": 1.8330719895959597e-06, "loss": 18.1406, "step": 6354 }, { "epoch": 0.4220628279205685, "grad_norm": 273.8614501953125, "learning_rate": 1.8330124908441635e-06, "loss": 18.0156, "step": 6355 }, { "epoch": 0.4221292422129242, "grad_norm": 120.40938568115234, "learning_rate": 1.832952982456559e-06, "loss": 21.0625, "step": 6356 }, { "epoch": 0.42219565650527996, "grad_norm": 181.1326446533203, "learning_rate": 1.8328934644338347e-06, "loss": 17.9062, "step": 6357 }, { "epoch": 0.42226207079763567, "grad_norm": 292.5528259277344, "learning_rate": 1.8328339367766792e-06, "loss": 20.3438, "step": 6358 }, { "epoch": 0.4223284850899914, "grad_norm": 168.9955291748047, "learning_rate": 1.8327743994857808e-06, "loss": 23.0781, "step": 6359 }, { "epoch": 0.4223948993823471, "grad_norm": 991.3193359375, "learning_rate": 1.8327148525618286e-06, "loss": 21.5312, "step": 6360 }, { "epoch": 0.4224613136747028, "grad_norm": 158.01632690429688, "learning_rate": 1.832655296005511e-06, "loss": 15.0859, "step": 6361 }, { "epoch": 0.4225277279670585, "grad_norm": 217.99270629882812, "learning_rate": 1.832595729817517e-06, "loss": 18.4219, "step": 6362 }, { "epoch": 0.4225941422594142, "grad_norm": 162.2628936767578, "learning_rate": 1.8325361539985358e-06, "loss": 17.3594, "step": 6363 }, { "epoch": 0.42266055655176993, "grad_norm": 283.7868957519531, "learning_rate": 1.8324765685492563e-06, "loss": 33.8438, "step": 6364 }, { "epoch": 0.42272697084412564, "grad_norm": 208.0977783203125, "learning_rate": 1.832416973470368e-06, "loss": 21.4531, "step": 6365 }, { "epoch": 0.42279338513648135, "grad_norm": 283.25994873046875, "learning_rate": 1.8323573687625604e-06, "loss": 21.2812, "step": 6366 }, { "epoch": 0.4228597994288371, "grad_norm": 336.2755432128906, "learning_rate": 1.8322977544265223e-06, "loss": 18.8672, "step": 6367 }, { "epoch": 0.4229262137211928, "grad_norm": 134.66091918945312, "learning_rate": 1.832238130462944e-06, "loss": 16.0312, "step": 6368 }, { "epoch": 0.42299262801354853, "grad_norm": 4316.57177734375, "learning_rate": 1.832178496872515e-06, "loss": 17.9844, "step": 6369 }, { "epoch": 0.42305904230590424, "grad_norm": 298.7712097167969, "learning_rate": 1.8321188536559244e-06, "loss": 20.3906, "step": 6370 }, { "epoch": 0.42312545659825995, "grad_norm": 148.49075317382812, "learning_rate": 1.8320592008138634e-06, "loss": 16.3594, "step": 6371 }, { "epoch": 0.42319187089061566, "grad_norm": 224.532958984375, "learning_rate": 1.831999538347021e-06, "loss": 20.0469, "step": 6372 }, { "epoch": 0.42325828518297137, "grad_norm": 158.74911499023438, "learning_rate": 1.8319398662560879e-06, "loss": 14.8281, "step": 6373 }, { "epoch": 0.4233246994753271, "grad_norm": 178.0681915283203, "learning_rate": 1.8318801845417537e-06, "loss": 15.7812, "step": 6374 }, { "epoch": 0.4233911137676828, "grad_norm": 182.59808349609375, "learning_rate": 1.83182049320471e-06, "loss": 19.5, "step": 6375 }, { "epoch": 0.4234575280600385, "grad_norm": 335.3917541503906, "learning_rate": 1.831760792245646e-06, "loss": 27.4688, "step": 6376 }, { "epoch": 0.4235239423523942, "grad_norm": 194.4428253173828, "learning_rate": 1.8317010816652525e-06, "loss": 16.0781, "step": 6377 }, { "epoch": 0.42359035664475, "grad_norm": 324.1300048828125, "learning_rate": 1.831641361464221e-06, "loss": 18.1094, "step": 6378 }, { "epoch": 0.4236567709371057, "grad_norm": 203.0811309814453, "learning_rate": 1.8315816316432416e-06, "loss": 20.0469, "step": 6379 }, { "epoch": 0.4237231852294614, "grad_norm": 284.0128173828125, "learning_rate": 1.8315218922030052e-06, "loss": 19.4844, "step": 6380 }, { "epoch": 0.4237895995218171, "grad_norm": 100.76829528808594, "learning_rate": 1.8314621431442033e-06, "loss": 12.3438, "step": 6381 }, { "epoch": 0.4238560138141728, "grad_norm": 187.98826599121094, "learning_rate": 1.8314023844675265e-06, "loss": 17.5312, "step": 6382 }, { "epoch": 0.4239224281065285, "grad_norm": 154.30496215820312, "learning_rate": 1.8313426161736665e-06, "loss": 17.875, "step": 6383 }, { "epoch": 0.42398884239888424, "grad_norm": 283.9810485839844, "learning_rate": 1.8312828382633145e-06, "loss": 18.5625, "step": 6384 }, { "epoch": 0.42405525669123995, "grad_norm": 561.84130859375, "learning_rate": 1.831223050737162e-06, "loss": 18.5469, "step": 6385 }, { "epoch": 0.42412167098359566, "grad_norm": 223.9785919189453, "learning_rate": 1.8311632535959007e-06, "loss": 15.625, "step": 6386 }, { "epoch": 0.42418808527595137, "grad_norm": 242.6333465576172, "learning_rate": 1.831103446840222e-06, "loss": 18.1406, "step": 6387 }, { "epoch": 0.4242544995683071, "grad_norm": 177.20298767089844, "learning_rate": 1.831043630470818e-06, "loss": 15.6562, "step": 6388 }, { "epoch": 0.42432091386066284, "grad_norm": 220.43438720703125, "learning_rate": 1.8309838044883802e-06, "loss": 20.3438, "step": 6389 }, { "epoch": 0.42438732815301855, "grad_norm": 219.17779541015625, "learning_rate": 1.8309239688936012e-06, "loss": 16.875, "step": 6390 }, { "epoch": 0.42445374244537426, "grad_norm": 170.52578735351562, "learning_rate": 1.8308641236871725e-06, "loss": 18.5781, "step": 6391 }, { "epoch": 0.42452015673772997, "grad_norm": 411.1023254394531, "learning_rate": 1.8308042688697871e-06, "loss": 19.2344, "step": 6392 }, { "epoch": 0.4245865710300857, "grad_norm": 415.47906494140625, "learning_rate": 1.8307444044421368e-06, "loss": 20.2344, "step": 6393 }, { "epoch": 0.4246529853224414, "grad_norm": 152.4672088623047, "learning_rate": 1.8306845304049143e-06, "loss": 14.2656, "step": 6394 }, { "epoch": 0.4247193996147971, "grad_norm": 258.02740478515625, "learning_rate": 1.8306246467588123e-06, "loss": 17.8594, "step": 6395 }, { "epoch": 0.4247858139071528, "grad_norm": 176.91749572753906, "learning_rate": 1.830564753504523e-06, "loss": 16.625, "step": 6396 }, { "epoch": 0.4248522281995085, "grad_norm": 479.2696228027344, "learning_rate": 1.8305048506427401e-06, "loss": 19.9688, "step": 6397 }, { "epoch": 0.42491864249186423, "grad_norm": 485.7080078125, "learning_rate": 1.8304449381741555e-06, "loss": 19.6094, "step": 6398 }, { "epoch": 0.42498505678421994, "grad_norm": 184.43153381347656, "learning_rate": 1.830385016099463e-06, "loss": 14.8594, "step": 6399 }, { "epoch": 0.4250514710765757, "grad_norm": 111.84174346923828, "learning_rate": 1.8303250844193552e-06, "loss": 14.1719, "step": 6400 }, { "epoch": 0.4251178853689314, "grad_norm": 195.0274200439453, "learning_rate": 1.8302651431345257e-06, "loss": 16.875, "step": 6401 }, { "epoch": 0.4251842996612871, "grad_norm": 158.64559936523438, "learning_rate": 1.830205192245668e-06, "loss": 14.2656, "step": 6402 }, { "epoch": 0.42525071395364283, "grad_norm": 168.5391082763672, "learning_rate": 1.8301452317534753e-06, "loss": 19.75, "step": 6403 }, { "epoch": 0.42531712824599854, "grad_norm": 482.1900939941406, "learning_rate": 1.830085261658641e-06, "loss": 21.4688, "step": 6404 }, { "epoch": 0.42538354253835425, "grad_norm": 182.86961364746094, "learning_rate": 1.830025281961859e-06, "loss": 14.7812, "step": 6405 }, { "epoch": 0.42544995683070996, "grad_norm": 361.372314453125, "learning_rate": 1.8299652926638237e-06, "loss": 29.7188, "step": 6406 }, { "epoch": 0.4255163711230657, "grad_norm": 398.36663818359375, "learning_rate": 1.829905293765228e-06, "loss": 26.2969, "step": 6407 }, { "epoch": 0.4255827854154214, "grad_norm": 283.509765625, "learning_rate": 1.8298452852667668e-06, "loss": 23.0, "step": 6408 }, { "epoch": 0.4256491997077771, "grad_norm": 129.63023376464844, "learning_rate": 1.8297852671691335e-06, "loss": 12.6094, "step": 6409 }, { "epoch": 0.4257156140001328, "grad_norm": 288.0648498535156, "learning_rate": 1.829725239473023e-06, "loss": 15.9219, "step": 6410 }, { "epoch": 0.42578202829248857, "grad_norm": 227.27835083007812, "learning_rate": 1.829665202179129e-06, "loss": 15.8594, "step": 6411 }, { "epoch": 0.4258484425848443, "grad_norm": 237.5251007080078, "learning_rate": 1.8296051552881466e-06, "loss": 19.9688, "step": 6412 }, { "epoch": 0.4259148568772, "grad_norm": 676.5872802734375, "learning_rate": 1.82954509880077e-06, "loss": 19.3281, "step": 6413 }, { "epoch": 0.4259812711695557, "grad_norm": 419.6811218261719, "learning_rate": 1.8294850327176941e-06, "loss": 22.5938, "step": 6414 }, { "epoch": 0.4260476854619114, "grad_norm": 179.3325653076172, "learning_rate": 1.8294249570396136e-06, "loss": 18.4062, "step": 6415 }, { "epoch": 0.4261140997542671, "grad_norm": 193.12669372558594, "learning_rate": 1.8293648717672235e-06, "loss": 19.5, "step": 6416 }, { "epoch": 0.4261805140466228, "grad_norm": 328.66668701171875, "learning_rate": 1.8293047769012189e-06, "loss": 18.6406, "step": 6417 }, { "epoch": 0.42624692833897854, "grad_norm": 251.9324493408203, "learning_rate": 1.829244672442295e-06, "loss": 19.5938, "step": 6418 }, { "epoch": 0.42631334263133425, "grad_norm": 397.6443176269531, "learning_rate": 1.8291845583911468e-06, "loss": 17.8438, "step": 6419 }, { "epoch": 0.42637975692368996, "grad_norm": 612.4271240234375, "learning_rate": 1.8291244347484695e-06, "loss": 24.7188, "step": 6420 }, { "epoch": 0.42644617121604567, "grad_norm": 614.5619506835938, "learning_rate": 1.829064301514959e-06, "loss": 24.5938, "step": 6421 }, { "epoch": 0.42651258550840143, "grad_norm": 140.65249633789062, "learning_rate": 1.8290041586913108e-06, "loss": 14.7656, "step": 6422 }, { "epoch": 0.42657899980075714, "grad_norm": 175.75611877441406, "learning_rate": 1.8289440062782204e-06, "loss": 15.8125, "step": 6423 }, { "epoch": 0.42664541409311285, "grad_norm": 351.6955871582031, "learning_rate": 1.8288838442763837e-06, "loss": 21.5938, "step": 6424 }, { "epoch": 0.42671182838546856, "grad_norm": 103.03326416015625, "learning_rate": 1.828823672686497e-06, "loss": 14.7188, "step": 6425 }, { "epoch": 0.42677824267782427, "grad_norm": 176.4022216796875, "learning_rate": 1.8287634915092557e-06, "loss": 20.8281, "step": 6426 }, { "epoch": 0.42684465697018, "grad_norm": 159.67677307128906, "learning_rate": 1.828703300745356e-06, "loss": 18.1719, "step": 6427 }, { "epoch": 0.4269110712625357, "grad_norm": 266.94439697265625, "learning_rate": 1.8286431003954947e-06, "loss": 24.8906, "step": 6428 }, { "epoch": 0.4269774855548914, "grad_norm": 180.0707550048828, "learning_rate": 1.828582890460368e-06, "loss": 18.1406, "step": 6429 }, { "epoch": 0.4270438998472471, "grad_norm": 189.05125427246094, "learning_rate": 1.8285226709406718e-06, "loss": 17.5938, "step": 6430 }, { "epoch": 0.4271103141396028, "grad_norm": 317.5176696777344, "learning_rate": 1.8284624418371034e-06, "loss": 17.625, "step": 6431 }, { "epoch": 0.42717672843195853, "grad_norm": 216.02781677246094, "learning_rate": 1.8284022031503587e-06, "loss": 21.2188, "step": 6432 }, { "epoch": 0.4272431427243143, "grad_norm": 207.46292114257812, "learning_rate": 1.8283419548811354e-06, "loss": 18.0156, "step": 6433 }, { "epoch": 0.42730955701667, "grad_norm": 209.75894165039062, "learning_rate": 1.82828169703013e-06, "loss": 16.2031, "step": 6434 }, { "epoch": 0.4273759713090257, "grad_norm": 865.1652221679688, "learning_rate": 1.8282214295980396e-06, "loss": 32.2812, "step": 6435 }, { "epoch": 0.4274423856013814, "grad_norm": 640.2540893554688, "learning_rate": 1.8281611525855611e-06, "loss": 21.4219, "step": 6436 }, { "epoch": 0.42750879989373713, "grad_norm": 324.8398742675781, "learning_rate": 1.8281008659933922e-06, "loss": 19.0, "step": 6437 }, { "epoch": 0.42757521418609284, "grad_norm": 208.68417358398438, "learning_rate": 1.82804056982223e-06, "loss": 22.0156, "step": 6438 }, { "epoch": 0.42764162847844855, "grad_norm": 575.6212158203125, "learning_rate": 1.8279802640727715e-06, "loss": 16.8125, "step": 6439 }, { "epoch": 0.42770804277080426, "grad_norm": 490.95526123046875, "learning_rate": 1.8279199487457152e-06, "loss": 18.6562, "step": 6440 }, { "epoch": 0.42777445706316, "grad_norm": 196.26283264160156, "learning_rate": 1.827859623841758e-06, "loss": 17.7812, "step": 6441 }, { "epoch": 0.4278408713555157, "grad_norm": 369.627685546875, "learning_rate": 1.8277992893615983e-06, "loss": 25.0312, "step": 6442 }, { "epoch": 0.42790728564787145, "grad_norm": 432.2129211425781, "learning_rate": 1.8277389453059335e-06, "loss": 18.2969, "step": 6443 }, { "epoch": 0.42797369994022716, "grad_norm": 323.5808410644531, "learning_rate": 1.827678591675462e-06, "loss": 19.0781, "step": 6444 }, { "epoch": 0.42804011423258287, "grad_norm": 112.63737487792969, "learning_rate": 1.8276182284708818e-06, "loss": 13.5156, "step": 6445 }, { "epoch": 0.4281065285249386, "grad_norm": 170.685546875, "learning_rate": 1.8275578556928914e-06, "loss": 21.7656, "step": 6446 }, { "epoch": 0.4281729428172943, "grad_norm": 220.3030242919922, "learning_rate": 1.827497473342189e-06, "loss": 19.3125, "step": 6447 }, { "epoch": 0.42823935710965, "grad_norm": 1034.1239013671875, "learning_rate": 1.8274370814194725e-06, "loss": 25.625, "step": 6448 }, { "epoch": 0.4283057714020057, "grad_norm": 279.5697021484375, "learning_rate": 1.8273766799254414e-06, "loss": 18.7969, "step": 6449 }, { "epoch": 0.4283721856943614, "grad_norm": 175.59054565429688, "learning_rate": 1.8273162688607936e-06, "loss": 14.8281, "step": 6450 }, { "epoch": 0.42843859998671713, "grad_norm": 266.25238037109375, "learning_rate": 1.8272558482262283e-06, "loss": 17.125, "step": 6451 }, { "epoch": 0.42850501427907284, "grad_norm": 122.90467834472656, "learning_rate": 1.8271954180224446e-06, "loss": 14.5781, "step": 6452 }, { "epoch": 0.42857142857142855, "grad_norm": 288.1730041503906, "learning_rate": 1.8271349782501412e-06, "loss": 16.0781, "step": 6453 }, { "epoch": 0.4286378428637843, "grad_norm": 296.65625, "learning_rate": 1.8270745289100174e-06, "loss": 23.0, "step": 6454 }, { "epoch": 0.42870425715614, "grad_norm": 130.05368041992188, "learning_rate": 1.8270140700027722e-06, "loss": 15.875, "step": 6455 }, { "epoch": 0.42877067144849573, "grad_norm": 967.14501953125, "learning_rate": 1.8269536015291052e-06, "loss": 23.9375, "step": 6456 }, { "epoch": 0.42883708574085144, "grad_norm": 176.78289794921875, "learning_rate": 1.8268931234897159e-06, "loss": 21.0, "step": 6457 }, { "epoch": 0.42890350003320715, "grad_norm": 191.88113403320312, "learning_rate": 1.8268326358853037e-06, "loss": 16.0469, "step": 6458 }, { "epoch": 0.42896991432556286, "grad_norm": 213.34043884277344, "learning_rate": 1.8267721387165685e-06, "loss": 20.875, "step": 6459 }, { "epoch": 0.42903632861791857, "grad_norm": 140.32891845703125, "learning_rate": 1.8267116319842095e-06, "loss": 18.6562, "step": 6460 }, { "epoch": 0.4291027429102743, "grad_norm": 178.78720092773438, "learning_rate": 1.8266511156889273e-06, "loss": 22.3438, "step": 6461 }, { "epoch": 0.42916915720263, "grad_norm": 184.37501525878906, "learning_rate": 1.8265905898314218e-06, "loss": 18.0, "step": 6462 }, { "epoch": 0.4292355714949857, "grad_norm": 137.91958618164062, "learning_rate": 1.8265300544123928e-06, "loss": 12.6406, "step": 6463 }, { "epoch": 0.4293019857873414, "grad_norm": 390.27838134765625, "learning_rate": 1.8264695094325408e-06, "loss": 20.3125, "step": 6464 }, { "epoch": 0.4293684000796972, "grad_norm": 245.3484649658203, "learning_rate": 1.8264089548925661e-06, "loss": 28.125, "step": 6465 }, { "epoch": 0.4294348143720529, "grad_norm": 444.60736083984375, "learning_rate": 1.8263483907931693e-06, "loss": 32.75, "step": 6466 }, { "epoch": 0.4295012286644086, "grad_norm": 393.14373779296875, "learning_rate": 1.8262878171350505e-06, "loss": 21.125, "step": 6467 }, { "epoch": 0.4295676429567643, "grad_norm": 321.3061828613281, "learning_rate": 1.8262272339189107e-06, "loss": 23.8906, "step": 6468 }, { "epoch": 0.42963405724912, "grad_norm": 159.51426696777344, "learning_rate": 1.8261666411454508e-06, "loss": 16.1484, "step": 6469 }, { "epoch": 0.4297004715414757, "grad_norm": 391.3782958984375, "learning_rate": 1.8261060388153716e-06, "loss": 13.6562, "step": 6470 }, { "epoch": 0.42976688583383144, "grad_norm": 199.8199920654297, "learning_rate": 1.826045426929374e-06, "loss": 14.75, "step": 6471 }, { "epoch": 0.42983330012618715, "grad_norm": 419.6096496582031, "learning_rate": 1.8259848054881597e-06, "loss": 19.25, "step": 6472 }, { "epoch": 0.42989971441854286, "grad_norm": 714.424560546875, "learning_rate": 1.825924174492429e-06, "loss": 18.0, "step": 6473 }, { "epoch": 0.42996612871089857, "grad_norm": 446.3755187988281, "learning_rate": 1.8258635339428835e-06, "loss": 20.9375, "step": 6474 }, { "epoch": 0.4300325430032543, "grad_norm": 255.95423889160156, "learning_rate": 1.8258028838402255e-06, "loss": 21.4844, "step": 6475 }, { "epoch": 0.43009895729561004, "grad_norm": 735.0001831054688, "learning_rate": 1.8257422241851557e-06, "loss": 22.3281, "step": 6476 }, { "epoch": 0.43016537158796575, "grad_norm": 276.0615539550781, "learning_rate": 1.8256815549783757e-06, "loss": 17.1562, "step": 6477 }, { "epoch": 0.43023178588032146, "grad_norm": 161.7783660888672, "learning_rate": 1.8256208762205878e-06, "loss": 18.9062, "step": 6478 }, { "epoch": 0.43029820017267717, "grad_norm": 404.9249267578125, "learning_rate": 1.8255601879124938e-06, "loss": 18.2969, "step": 6479 }, { "epoch": 0.4303646144650329, "grad_norm": 159.9363250732422, "learning_rate": 1.8254994900547954e-06, "loss": 18.5312, "step": 6480 }, { "epoch": 0.4304310287573886, "grad_norm": 174.7811279296875, "learning_rate": 1.825438782648195e-06, "loss": 15.8906, "step": 6481 }, { "epoch": 0.4304974430497443, "grad_norm": 654.6982421875, "learning_rate": 1.8253780656933946e-06, "loss": 27.75, "step": 6482 }, { "epoch": 0.4305638573421, "grad_norm": 513.0305786132812, "learning_rate": 1.825317339191097e-06, "loss": 20.25, "step": 6483 }, { "epoch": 0.4306302716344557, "grad_norm": 113.16588592529297, "learning_rate": 1.825256603142004e-06, "loss": 12.3438, "step": 6484 }, { "epoch": 0.43069668592681143, "grad_norm": 245.92263793945312, "learning_rate": 1.8251958575468187e-06, "loss": 25.8906, "step": 6485 }, { "epoch": 0.43076310021916714, "grad_norm": 258.2556457519531, "learning_rate": 1.8251351024062433e-06, "loss": 19.3125, "step": 6486 }, { "epoch": 0.4308295145115229, "grad_norm": 210.9698028564453, "learning_rate": 1.825074337720981e-06, "loss": 17.4219, "step": 6487 }, { "epoch": 0.4308959288038786, "grad_norm": 258.2554931640625, "learning_rate": 1.8250135634917345e-06, "loss": 24.6562, "step": 6488 }, { "epoch": 0.4309623430962343, "grad_norm": 175.44082641601562, "learning_rate": 1.8249527797192068e-06, "loss": 21.2188, "step": 6489 }, { "epoch": 0.43102875738859003, "grad_norm": 227.7216339111328, "learning_rate": 1.824891986404101e-06, "loss": 28.8125, "step": 6490 }, { "epoch": 0.43109517168094574, "grad_norm": 151.0636444091797, "learning_rate": 1.8248311835471206e-06, "loss": 14.7656, "step": 6491 }, { "epoch": 0.43116158597330145, "grad_norm": 157.51654052734375, "learning_rate": 1.8247703711489684e-06, "loss": 19.0469, "step": 6492 }, { "epoch": 0.43122800026565716, "grad_norm": 118.00993347167969, "learning_rate": 1.8247095492103484e-06, "loss": 18.125, "step": 6493 }, { "epoch": 0.4312944145580129, "grad_norm": 133.39944458007812, "learning_rate": 1.8246487177319638e-06, "loss": 16.4219, "step": 6494 }, { "epoch": 0.4313608288503686, "grad_norm": 229.41622924804688, "learning_rate": 1.8245878767145182e-06, "loss": 23.2656, "step": 6495 }, { "epoch": 0.4314272431427243, "grad_norm": 275.8479309082031, "learning_rate": 1.8245270261587158e-06, "loss": 20.9062, "step": 6496 }, { "epoch": 0.43149365743508, "grad_norm": 174.1787109375, "learning_rate": 1.8244661660652598e-06, "loss": 18.7812, "step": 6497 }, { "epoch": 0.43156007172743577, "grad_norm": 265.4161682128906, "learning_rate": 1.8244052964348549e-06, "loss": 21.9688, "step": 6498 }, { "epoch": 0.4316264860197915, "grad_norm": 135.1295166015625, "learning_rate": 1.824344417268205e-06, "loss": 14.7031, "step": 6499 }, { "epoch": 0.4316929003121472, "grad_norm": 259.66455078125, "learning_rate": 1.8242835285660141e-06, "loss": 16.9375, "step": 6500 }, { "epoch": 0.4317593146045029, "grad_norm": 458.517333984375, "learning_rate": 1.8242226303289865e-06, "loss": 21.125, "step": 6501 }, { "epoch": 0.4318257288968586, "grad_norm": 202.40513610839844, "learning_rate": 1.824161722557827e-06, "loss": 18.5156, "step": 6502 }, { "epoch": 0.4318921431892143, "grad_norm": 167.74830627441406, "learning_rate": 1.8241008052532399e-06, "loss": 16.5938, "step": 6503 }, { "epoch": 0.43195855748157, "grad_norm": 615.1797485351562, "learning_rate": 1.8240398784159299e-06, "loss": 29.0625, "step": 6504 }, { "epoch": 0.43202497177392574, "grad_norm": 201.71827697753906, "learning_rate": 1.8239789420466017e-06, "loss": 12.8047, "step": 6505 }, { "epoch": 0.43209138606628145, "grad_norm": 126.43038177490234, "learning_rate": 1.8239179961459602e-06, "loss": 15.7188, "step": 6506 }, { "epoch": 0.43215780035863716, "grad_norm": 978.780029296875, "learning_rate": 1.8238570407147106e-06, "loss": 28.0312, "step": 6507 }, { "epoch": 0.43222421465099287, "grad_norm": 133.00013732910156, "learning_rate": 1.8237960757535572e-06, "loss": 17.0156, "step": 6508 }, { "epoch": 0.43229062894334863, "grad_norm": 149.49131774902344, "learning_rate": 1.8237351012632068e-06, "loss": 16.9219, "step": 6509 }, { "epoch": 0.43235704323570434, "grad_norm": 502.1435852050781, "learning_rate": 1.823674117244363e-06, "loss": 18.3125, "step": 6510 }, { "epoch": 0.43242345752806005, "grad_norm": 423.9896545410156, "learning_rate": 1.8236131236977324e-06, "loss": 16.3906, "step": 6511 }, { "epoch": 0.43248987182041576, "grad_norm": 105.99372863769531, "learning_rate": 1.82355212062402e-06, "loss": 15.4219, "step": 6512 }, { "epoch": 0.43255628611277147, "grad_norm": 251.2140350341797, "learning_rate": 1.8234911080239311e-06, "loss": 21.6562, "step": 6513 }, { "epoch": 0.4326227004051272, "grad_norm": 195.26950073242188, "learning_rate": 1.8234300858981723e-06, "loss": 14.4688, "step": 6514 }, { "epoch": 0.4326891146974829, "grad_norm": 216.07156372070312, "learning_rate": 1.8233690542474492e-06, "loss": 19.7188, "step": 6515 }, { "epoch": 0.4327555289898386, "grad_norm": 228.5420684814453, "learning_rate": 1.8233080130724674e-06, "loss": 20.1406, "step": 6516 }, { "epoch": 0.4328219432821943, "grad_norm": 173.27291870117188, "learning_rate": 1.8232469623739336e-06, "loss": 23.25, "step": 6517 }, { "epoch": 0.43288835757455, "grad_norm": 223.95030212402344, "learning_rate": 1.8231859021525535e-06, "loss": 15.8438, "step": 6518 }, { "epoch": 0.4329547718669058, "grad_norm": 240.1011962890625, "learning_rate": 1.8231248324090333e-06, "loss": 20.875, "step": 6519 }, { "epoch": 0.4330211861592615, "grad_norm": 204.4309539794922, "learning_rate": 1.8230637531440798e-06, "loss": 15.0312, "step": 6520 }, { "epoch": 0.4330876004516172, "grad_norm": 226.8258056640625, "learning_rate": 1.8230026643583996e-06, "loss": 22.7812, "step": 6521 }, { "epoch": 0.4331540147439729, "grad_norm": 299.9203186035156, "learning_rate": 1.822941566052699e-06, "loss": 21.1719, "step": 6522 }, { "epoch": 0.4332204290363286, "grad_norm": 230.98785400390625, "learning_rate": 1.8228804582276852e-06, "loss": 19.8906, "step": 6523 }, { "epoch": 0.43328684332868433, "grad_norm": 312.29449462890625, "learning_rate": 1.8228193408840642e-06, "loss": 17.3125, "step": 6524 }, { "epoch": 0.43335325762104004, "grad_norm": 197.38943481445312, "learning_rate": 1.822758214022544e-06, "loss": 15.375, "step": 6525 }, { "epoch": 0.43341967191339575, "grad_norm": 258.0301513671875, "learning_rate": 1.8226970776438308e-06, "loss": 21.6875, "step": 6526 }, { "epoch": 0.43348608620575146, "grad_norm": 127.87212371826172, "learning_rate": 1.8226359317486324e-06, "loss": 13.375, "step": 6527 }, { "epoch": 0.4335525004981072, "grad_norm": 200.6487274169922, "learning_rate": 1.822574776337656e-06, "loss": 20.25, "step": 6528 }, { "epoch": 0.4336189147904629, "grad_norm": 225.400634765625, "learning_rate": 1.822513611411609e-06, "loss": 20.7656, "step": 6529 }, { "epoch": 0.43368532908281865, "grad_norm": 202.6874237060547, "learning_rate": 1.8224524369711984e-06, "loss": 16.0781, "step": 6530 }, { "epoch": 0.43375174337517436, "grad_norm": 205.69102478027344, "learning_rate": 1.8223912530171326e-06, "loss": 22.1094, "step": 6531 }, { "epoch": 0.43381815766753007, "grad_norm": 162.1970672607422, "learning_rate": 1.8223300595501189e-06, "loss": 24.0312, "step": 6532 }, { "epoch": 0.4338845719598858, "grad_norm": 226.7290802001953, "learning_rate": 1.8222688565708655e-06, "loss": 25.5312, "step": 6533 }, { "epoch": 0.4339509862522415, "grad_norm": 288.9637756347656, "learning_rate": 1.82220764408008e-06, "loss": 18.2344, "step": 6534 }, { "epoch": 0.4340174005445972, "grad_norm": 93.62423706054688, "learning_rate": 1.8221464220784703e-06, "loss": 11.3594, "step": 6535 }, { "epoch": 0.4340838148369529, "grad_norm": 137.91639709472656, "learning_rate": 1.8220851905667452e-06, "loss": 16.4531, "step": 6536 }, { "epoch": 0.4341502291293086, "grad_norm": 491.58563232421875, "learning_rate": 1.8220239495456125e-06, "loss": 21.6094, "step": 6537 }, { "epoch": 0.43421664342166433, "grad_norm": 531.5831298828125, "learning_rate": 1.8219626990157808e-06, "loss": 20.25, "step": 6538 }, { "epoch": 0.43428305771402004, "grad_norm": 142.8192596435547, "learning_rate": 1.8219014389779584e-06, "loss": 17.0, "step": 6539 }, { "epoch": 0.43434947200637575, "grad_norm": 335.04931640625, "learning_rate": 1.8218401694328542e-06, "loss": 15.5469, "step": 6540 }, { "epoch": 0.4344158862987315, "grad_norm": 212.66021728515625, "learning_rate": 1.8217788903811768e-06, "loss": 17.6406, "step": 6541 }, { "epoch": 0.4344823005910872, "grad_norm": 172.78126525878906, "learning_rate": 1.8217176018236354e-06, "loss": 23.0, "step": 6542 }, { "epoch": 0.43454871488344293, "grad_norm": 196.53875732421875, "learning_rate": 1.821656303760938e-06, "loss": 15.9531, "step": 6543 }, { "epoch": 0.43461512917579864, "grad_norm": 226.28933715820312, "learning_rate": 1.8215949961937947e-06, "loss": 18.8125, "step": 6544 }, { "epoch": 0.43468154346815435, "grad_norm": 336.6059875488281, "learning_rate": 1.8215336791229142e-06, "loss": 23.5, "step": 6545 }, { "epoch": 0.43474795776051006, "grad_norm": 366.559326171875, "learning_rate": 1.8214723525490056e-06, "loss": 19.4219, "step": 6546 }, { "epoch": 0.43481437205286577, "grad_norm": 227.08041381835938, "learning_rate": 1.821411016472779e-06, "loss": 22.3594, "step": 6547 }, { "epoch": 0.4348807863452215, "grad_norm": 223.02981567382812, "learning_rate": 1.821349670894943e-06, "loss": 16.2969, "step": 6548 }, { "epoch": 0.4349472006375772, "grad_norm": 235.35391235351562, "learning_rate": 1.8212883158162076e-06, "loss": 21.7188, "step": 6549 }, { "epoch": 0.4350136149299329, "grad_norm": 281.41156005859375, "learning_rate": 1.8212269512372826e-06, "loss": 22.4062, "step": 6550 }, { "epoch": 0.4350800292222886, "grad_norm": 323.4064636230469, "learning_rate": 1.821165577158878e-06, "loss": 23.5312, "step": 6551 }, { "epoch": 0.4351464435146444, "grad_norm": 286.9398498535156, "learning_rate": 1.8211041935817034e-06, "loss": 18.0781, "step": 6552 }, { "epoch": 0.4352128578070001, "grad_norm": 156.46041870117188, "learning_rate": 1.8210428005064688e-06, "loss": 18.2812, "step": 6553 }, { "epoch": 0.4352792720993558, "grad_norm": 225.47019958496094, "learning_rate": 1.8209813979338845e-06, "loss": 21.2969, "step": 6554 }, { "epoch": 0.4353456863917115, "grad_norm": 159.44749450683594, "learning_rate": 1.820919985864661e-06, "loss": 15.5469, "step": 6555 }, { "epoch": 0.4354121006840672, "grad_norm": 940.0283203125, "learning_rate": 1.8208585642995084e-06, "loss": 21.5938, "step": 6556 }, { "epoch": 0.4354785149764229, "grad_norm": 452.4514465332031, "learning_rate": 1.8207971332391372e-06, "loss": 24.25, "step": 6557 }, { "epoch": 0.43554492926877864, "grad_norm": 183.99732971191406, "learning_rate": 1.8207356926842583e-06, "loss": 21.5469, "step": 6558 }, { "epoch": 0.43561134356113435, "grad_norm": 381.2472839355469, "learning_rate": 1.8206742426355821e-06, "loss": 18.8594, "step": 6559 }, { "epoch": 0.43567775785349006, "grad_norm": 113.81608581542969, "learning_rate": 1.8206127830938191e-06, "loss": 15.8438, "step": 6560 }, { "epoch": 0.43574417214584577, "grad_norm": 362.4425964355469, "learning_rate": 1.8205513140596809e-06, "loss": 23.6094, "step": 6561 }, { "epoch": 0.4358105864382015, "grad_norm": 158.44207763671875, "learning_rate": 1.8204898355338781e-06, "loss": 19.8281, "step": 6562 }, { "epoch": 0.43587700073055724, "grad_norm": 159.41273498535156, "learning_rate": 1.8204283475171225e-06, "loss": 19.1719, "step": 6563 }, { "epoch": 0.43594341502291295, "grad_norm": 365.1080627441406, "learning_rate": 1.8203668500101244e-06, "loss": 16.6562, "step": 6564 }, { "epoch": 0.43600982931526866, "grad_norm": 365.0271911621094, "learning_rate": 1.820305343013596e-06, "loss": 16.9375, "step": 6565 }, { "epoch": 0.43607624360762437, "grad_norm": 329.29852294921875, "learning_rate": 1.820243826528248e-06, "loss": 15.2344, "step": 6566 }, { "epoch": 0.4361426578999801, "grad_norm": 587.6507568359375, "learning_rate": 1.8201823005547925e-06, "loss": 18.1094, "step": 6567 }, { "epoch": 0.4362090721923358, "grad_norm": 471.42108154296875, "learning_rate": 1.8201207650939412e-06, "loss": 20.75, "step": 6568 }, { "epoch": 0.4362754864846915, "grad_norm": 283.51788330078125, "learning_rate": 1.8200592201464059e-06, "loss": 16.0469, "step": 6569 }, { "epoch": 0.4363419007770472, "grad_norm": 239.3380584716797, "learning_rate": 1.8199976657128981e-06, "loss": 21.7344, "step": 6570 }, { "epoch": 0.4364083150694029, "grad_norm": 139.05410766601562, "learning_rate": 1.8199361017941301e-06, "loss": 19.7812, "step": 6571 }, { "epoch": 0.43647472936175863, "grad_norm": 169.27352905273438, "learning_rate": 1.8198745283908145e-06, "loss": 18.3438, "step": 6572 }, { "epoch": 0.43654114365411434, "grad_norm": 172.9163360595703, "learning_rate": 1.8198129455036631e-06, "loss": 23.8438, "step": 6573 }, { "epoch": 0.4366075579464701, "grad_norm": 333.92803955078125, "learning_rate": 1.819751353133388e-06, "loss": 23.9844, "step": 6574 }, { "epoch": 0.4366739722388258, "grad_norm": 211.20510864257812, "learning_rate": 1.819689751280702e-06, "loss": 19.4375, "step": 6575 }, { "epoch": 0.4367403865311815, "grad_norm": 213.06178283691406, "learning_rate": 1.8196281399463175e-06, "loss": 21.125, "step": 6576 }, { "epoch": 0.43680680082353723, "grad_norm": 351.6597595214844, "learning_rate": 1.8195665191309476e-06, "loss": 17.375, "step": 6577 }, { "epoch": 0.43687321511589294, "grad_norm": 222.13885498046875, "learning_rate": 1.8195048888353046e-06, "loss": 13.8906, "step": 6578 }, { "epoch": 0.43693962940824865, "grad_norm": 173.9904022216797, "learning_rate": 1.8194432490601014e-06, "loss": 19.2031, "step": 6579 }, { "epoch": 0.43700604370060436, "grad_norm": 423.4021301269531, "learning_rate": 1.8193815998060518e-06, "loss": 20.7812, "step": 6580 }, { "epoch": 0.4370724579929601, "grad_norm": 287.9979248046875, "learning_rate": 1.8193199410738677e-06, "loss": 20.9219, "step": 6581 }, { "epoch": 0.4371388722853158, "grad_norm": 285.20037841796875, "learning_rate": 1.8192582728642634e-06, "loss": 16.5, "step": 6582 }, { "epoch": 0.4372052865776715, "grad_norm": 360.26605224609375, "learning_rate": 1.8191965951779516e-06, "loss": 17.2656, "step": 6583 }, { "epoch": 0.4372717008700272, "grad_norm": 170.8833770751953, "learning_rate": 1.8191349080156463e-06, "loss": 14.2188, "step": 6584 }, { "epoch": 0.43733811516238297, "grad_norm": 258.3677673339844, "learning_rate": 1.8190732113780602e-06, "loss": 17.9844, "step": 6585 }, { "epoch": 0.4374045294547387, "grad_norm": 173.36256408691406, "learning_rate": 1.8190115052659078e-06, "loss": 20.0781, "step": 6586 }, { "epoch": 0.4374709437470944, "grad_norm": 183.1927032470703, "learning_rate": 1.8189497896799025e-06, "loss": 18.1406, "step": 6587 }, { "epoch": 0.4375373580394501, "grad_norm": 119.0404052734375, "learning_rate": 1.8188880646207582e-06, "loss": 16.6562, "step": 6588 }, { "epoch": 0.4376037723318058, "grad_norm": 284.116943359375, "learning_rate": 1.8188263300891893e-06, "loss": 24.4062, "step": 6589 }, { "epoch": 0.4376701866241615, "grad_norm": 262.6750183105469, "learning_rate": 1.8187645860859094e-06, "loss": 23.7188, "step": 6590 }, { "epoch": 0.4377366009165172, "grad_norm": 118.34024810791016, "learning_rate": 1.8187028326116325e-06, "loss": 17.7812, "step": 6591 }, { "epoch": 0.43780301520887294, "grad_norm": 235.03787231445312, "learning_rate": 1.8186410696670739e-06, "loss": 18.0469, "step": 6592 }, { "epoch": 0.43786942950122865, "grad_norm": 196.61593627929688, "learning_rate": 1.8185792972529471e-06, "loss": 19.4531, "step": 6593 }, { "epoch": 0.43793584379358436, "grad_norm": 194.02392578125, "learning_rate": 1.818517515369967e-06, "loss": 16.2188, "step": 6594 }, { "epoch": 0.4380022580859401, "grad_norm": 179.2421112060547, "learning_rate": 1.8184557240188485e-06, "loss": 15.2969, "step": 6595 }, { "epoch": 0.43806867237829583, "grad_norm": 130.60411071777344, "learning_rate": 1.818393923200306e-06, "loss": 18.2344, "step": 6596 }, { "epoch": 0.43813508667065154, "grad_norm": 1123.9215087890625, "learning_rate": 1.8183321129150546e-06, "loss": 19.1094, "step": 6597 }, { "epoch": 0.43820150096300725, "grad_norm": 637.11083984375, "learning_rate": 1.8182702931638091e-06, "loss": 20.5469, "step": 6598 }, { "epoch": 0.43826791525536296, "grad_norm": 176.37669372558594, "learning_rate": 1.8182084639472847e-06, "loss": 15.9375, "step": 6599 }, { "epoch": 0.43833432954771867, "grad_norm": 136.93408203125, "learning_rate": 1.8181466252661965e-06, "loss": 20.0312, "step": 6600 }, { "epoch": 0.4384007438400744, "grad_norm": 336.3482971191406, "learning_rate": 1.81808477712126e-06, "loss": 17.6406, "step": 6601 }, { "epoch": 0.4384671581324301, "grad_norm": 184.84051513671875, "learning_rate": 1.8180229195131906e-06, "loss": 24.625, "step": 6602 }, { "epoch": 0.4385335724247858, "grad_norm": 246.51318359375, "learning_rate": 1.8179610524427038e-06, "loss": 16.2031, "step": 6603 }, { "epoch": 0.4385999867171415, "grad_norm": 227.83009338378906, "learning_rate": 1.8178991759105152e-06, "loss": 16.9531, "step": 6604 }, { "epoch": 0.4386664010094972, "grad_norm": 188.28176879882812, "learning_rate": 1.8178372899173403e-06, "loss": 25.4062, "step": 6605 }, { "epoch": 0.438732815301853, "grad_norm": 237.0931854248047, "learning_rate": 1.8177753944638955e-06, "loss": 16.1562, "step": 6606 }, { "epoch": 0.4387992295942087, "grad_norm": 267.33099365234375, "learning_rate": 1.8177134895508966e-06, "loss": 18.3281, "step": 6607 }, { "epoch": 0.4388656438865644, "grad_norm": 247.5387420654297, "learning_rate": 1.8176515751790591e-06, "loss": 16.6875, "step": 6608 }, { "epoch": 0.4389320581789201, "grad_norm": 263.6996765136719, "learning_rate": 1.8175896513491002e-06, "loss": 26.0312, "step": 6609 }, { "epoch": 0.4389984724712758, "grad_norm": 219.91847229003906, "learning_rate": 1.8175277180617355e-06, "loss": 16.4062, "step": 6610 }, { "epoch": 0.43906488676363153, "grad_norm": 337.56884765625, "learning_rate": 1.8174657753176814e-06, "loss": 24.5938, "step": 6611 }, { "epoch": 0.43913130105598724, "grad_norm": 214.6964874267578, "learning_rate": 1.8174038231176545e-06, "loss": 17.3438, "step": 6612 }, { "epoch": 0.43919771534834295, "grad_norm": 168.6497802734375, "learning_rate": 1.8173418614623719e-06, "loss": 19.25, "step": 6613 }, { "epoch": 0.43926412964069866, "grad_norm": 189.35830688476562, "learning_rate": 1.8172798903525496e-06, "loss": 14.5312, "step": 6614 }, { "epoch": 0.4393305439330544, "grad_norm": 352.7201843261719, "learning_rate": 1.8172179097889049e-06, "loss": 23.1875, "step": 6615 }, { "epoch": 0.4393969582254101, "grad_norm": 209.11793518066406, "learning_rate": 1.8171559197721546e-06, "loss": 19.8438, "step": 6616 }, { "epoch": 0.43946337251776585, "grad_norm": 500.90606689453125, "learning_rate": 1.8170939203030158e-06, "loss": 19.3125, "step": 6617 }, { "epoch": 0.43952978681012156, "grad_norm": 204.31166076660156, "learning_rate": 1.817031911382206e-06, "loss": 19.75, "step": 6618 }, { "epoch": 0.43959620110247727, "grad_norm": 187.04541015625, "learning_rate": 1.8169698930104419e-06, "loss": 21.2812, "step": 6619 }, { "epoch": 0.439662615394833, "grad_norm": 302.6784973144531, "learning_rate": 1.8169078651884413e-06, "loss": 21.3438, "step": 6620 }, { "epoch": 0.4397290296871887, "grad_norm": 136.63352966308594, "learning_rate": 1.8168458279169214e-06, "loss": 15.2031, "step": 6621 }, { "epoch": 0.4397954439795444, "grad_norm": 515.5523681640625, "learning_rate": 1.8167837811966002e-06, "loss": 20.9219, "step": 6622 }, { "epoch": 0.4398618582719001, "grad_norm": 172.2833251953125, "learning_rate": 1.816721725028195e-06, "loss": 18.5938, "step": 6623 }, { "epoch": 0.4399282725642558, "grad_norm": 1463.114501953125, "learning_rate": 1.8166596594124242e-06, "loss": 18.0938, "step": 6624 }, { "epoch": 0.4399946868566115, "grad_norm": 822.935302734375, "learning_rate": 1.816597584350005e-06, "loss": 23.9062, "step": 6625 }, { "epoch": 0.44006110114896724, "grad_norm": 267.2371520996094, "learning_rate": 1.816535499841656e-06, "loss": 23.2656, "step": 6626 }, { "epoch": 0.44012751544132295, "grad_norm": 271.5439758300781, "learning_rate": 1.816473405888095e-06, "loss": 23.1562, "step": 6627 }, { "epoch": 0.4401939297336787, "grad_norm": 420.51171875, "learning_rate": 1.8164113024900405e-06, "loss": 30.625, "step": 6628 }, { "epoch": 0.4402603440260344, "grad_norm": 900.928466796875, "learning_rate": 1.8163491896482111e-06, "loss": 16.5781, "step": 6629 }, { "epoch": 0.44032675831839013, "grad_norm": 168.55120849609375, "learning_rate": 1.8162870673633247e-06, "loss": 22.8125, "step": 6630 }, { "epoch": 0.44039317261074584, "grad_norm": 132.10720825195312, "learning_rate": 1.8162249356361001e-06, "loss": 15.75, "step": 6631 }, { "epoch": 0.44045958690310155, "grad_norm": 258.8515319824219, "learning_rate": 1.8161627944672566e-06, "loss": 21.3281, "step": 6632 }, { "epoch": 0.44052600119545726, "grad_norm": 233.0418701171875, "learning_rate": 1.8161006438575122e-06, "loss": 29.8438, "step": 6633 }, { "epoch": 0.44059241548781297, "grad_norm": 179.3273162841797, "learning_rate": 1.816038483807586e-06, "loss": 16.2188, "step": 6634 }, { "epoch": 0.4406588297801687, "grad_norm": 121.1488037109375, "learning_rate": 1.8159763143181971e-06, "loss": 16.5781, "step": 6635 }, { "epoch": 0.4407252440725244, "grad_norm": 169.77960205078125, "learning_rate": 1.815914135390065e-06, "loss": 17.5938, "step": 6636 }, { "epoch": 0.4407916583648801, "grad_norm": 301.26495361328125, "learning_rate": 1.815851947023909e-06, "loss": 19.375, "step": 6637 }, { "epoch": 0.4408580726572358, "grad_norm": 196.3968963623047, "learning_rate": 1.8157897492204475e-06, "loss": 19.4375, "step": 6638 }, { "epoch": 0.4409244869495916, "grad_norm": 458.1899108886719, "learning_rate": 1.8157275419804007e-06, "loss": 23.3438, "step": 6639 }, { "epoch": 0.4409909012419473, "grad_norm": 357.9945373535156, "learning_rate": 1.8156653253044882e-06, "loss": 21.9844, "step": 6640 }, { "epoch": 0.441057315534303, "grad_norm": 222.75491333007812, "learning_rate": 1.8156030991934297e-06, "loss": 18.6094, "step": 6641 }, { "epoch": 0.4411237298266587, "grad_norm": 264.0449523925781, "learning_rate": 1.8155408636479448e-06, "loss": 22.9531, "step": 6642 }, { "epoch": 0.4411901441190144, "grad_norm": 221.27825927734375, "learning_rate": 1.8154786186687532e-06, "loss": 19.0469, "step": 6643 }, { "epoch": 0.4412565584113701, "grad_norm": 363.2460021972656, "learning_rate": 1.8154163642565755e-06, "loss": 17.7812, "step": 6644 }, { "epoch": 0.44132297270372584, "grad_norm": 178.4960479736328, "learning_rate": 1.8153541004121312e-06, "loss": 15.3438, "step": 6645 }, { "epoch": 0.44138938699608155, "grad_norm": 266.1373596191406, "learning_rate": 1.815291827136141e-06, "loss": 20.25, "step": 6646 }, { "epoch": 0.44145580128843726, "grad_norm": 223.0067138671875, "learning_rate": 1.815229544429325e-06, "loss": 20.2656, "step": 6647 }, { "epoch": 0.44152221558079296, "grad_norm": 281.22198486328125, "learning_rate": 1.815167252292404e-06, "loss": 23.2188, "step": 6648 }, { "epoch": 0.4415886298731487, "grad_norm": 138.2582550048828, "learning_rate": 1.8151049507260978e-06, "loss": 17.3438, "step": 6649 }, { "epoch": 0.44165504416550444, "grad_norm": 266.6038818359375, "learning_rate": 1.8150426397311279e-06, "loss": 17.1094, "step": 6650 }, { "epoch": 0.44172145845786015, "grad_norm": 328.989990234375, "learning_rate": 1.8149803193082144e-06, "loss": 18.375, "step": 6651 }, { "epoch": 0.44178787275021586, "grad_norm": 575.184814453125, "learning_rate": 1.814917989458079e-06, "loss": 17.9375, "step": 6652 }, { "epoch": 0.44185428704257157, "grad_norm": 186.73800659179688, "learning_rate": 1.8148556501814417e-06, "loss": 16.6406, "step": 6653 }, { "epoch": 0.4419207013349273, "grad_norm": 502.4089050292969, "learning_rate": 1.8147933014790242e-06, "loss": 27.1562, "step": 6654 }, { "epoch": 0.441987115627283, "grad_norm": 236.11915588378906, "learning_rate": 1.8147309433515478e-06, "loss": 20.4062, "step": 6655 }, { "epoch": 0.4420535299196387, "grad_norm": 299.22113037109375, "learning_rate": 1.8146685757997338e-06, "loss": 17.7188, "step": 6656 }, { "epoch": 0.4421199442119944, "grad_norm": 205.2014617919922, "learning_rate": 1.814606198824303e-06, "loss": 19.8125, "step": 6657 }, { "epoch": 0.4421863585043501, "grad_norm": 146.34457397460938, "learning_rate": 1.814543812425978e-06, "loss": 15.2031, "step": 6658 }, { "epoch": 0.44225277279670583, "grad_norm": 2140.353759765625, "learning_rate": 1.8144814166054796e-06, "loss": 19.5938, "step": 6659 }, { "epoch": 0.44231918708906154, "grad_norm": 171.34091186523438, "learning_rate": 1.8144190113635296e-06, "loss": 21.1719, "step": 6660 }, { "epoch": 0.4423856013814173, "grad_norm": 186.0833740234375, "learning_rate": 1.8143565967008503e-06, "loss": 20.7031, "step": 6661 }, { "epoch": 0.442452015673773, "grad_norm": 579.1019897460938, "learning_rate": 1.8142941726181636e-06, "loss": 15.0469, "step": 6662 }, { "epoch": 0.4425184299661287, "grad_norm": 308.77044677734375, "learning_rate": 1.8142317391161913e-06, "loss": 26.4375, "step": 6663 }, { "epoch": 0.44258484425848443, "grad_norm": 287.6676940917969, "learning_rate": 1.8141692961956556e-06, "loss": 19.9688, "step": 6664 }, { "epoch": 0.44265125855084014, "grad_norm": 323.3377990722656, "learning_rate": 1.8141068438572794e-06, "loss": 27.3125, "step": 6665 }, { "epoch": 0.44271767284319585, "grad_norm": 301.1867980957031, "learning_rate": 1.8140443821017843e-06, "loss": 23.9688, "step": 6666 }, { "epoch": 0.44278408713555156, "grad_norm": 291.6349792480469, "learning_rate": 1.8139819109298932e-06, "loss": 27.5938, "step": 6667 }, { "epoch": 0.4428505014279073, "grad_norm": 187.2499237060547, "learning_rate": 1.813919430342329e-06, "loss": 16.6406, "step": 6668 }, { "epoch": 0.442916915720263, "grad_norm": 277.364013671875, "learning_rate": 1.813856940339814e-06, "loss": 19.1562, "step": 6669 }, { "epoch": 0.4429833300126187, "grad_norm": 277.89434814453125, "learning_rate": 1.813794440923071e-06, "loss": 14.75, "step": 6670 }, { "epoch": 0.44304974430497446, "grad_norm": 185.41134643554688, "learning_rate": 1.8137319320928235e-06, "loss": 16.2812, "step": 6671 }, { "epoch": 0.44311615859733017, "grad_norm": 363.78302001953125, "learning_rate": 1.8136694138497939e-06, "loss": 19.25, "step": 6672 }, { "epoch": 0.4431825728896859, "grad_norm": 152.5916748046875, "learning_rate": 1.813606886194706e-06, "loss": 15.4844, "step": 6673 }, { "epoch": 0.4432489871820416, "grad_norm": 278.6358642578125, "learning_rate": 1.8135443491282828e-06, "loss": 17.0, "step": 6674 }, { "epoch": 0.4433154014743973, "grad_norm": 151.828369140625, "learning_rate": 1.8134818026512476e-06, "loss": 11.1328, "step": 6675 }, { "epoch": 0.443381815766753, "grad_norm": 366.9125061035156, "learning_rate": 1.8134192467643238e-06, "loss": 16.9219, "step": 6676 }, { "epoch": 0.4434482300591087, "grad_norm": 260.8114318847656, "learning_rate": 1.8133566814682355e-06, "loss": 17.2656, "step": 6677 }, { "epoch": 0.4435146443514644, "grad_norm": 407.1119079589844, "learning_rate": 1.813294106763706e-06, "loss": 18.7969, "step": 6678 }, { "epoch": 0.44358105864382014, "grad_norm": 202.4217529296875, "learning_rate": 1.8132315226514595e-06, "loss": 15.1719, "step": 6679 }, { "epoch": 0.44364747293617585, "grad_norm": 288.1859436035156, "learning_rate": 1.8131689291322192e-06, "loss": 20.5, "step": 6680 }, { "epoch": 0.44371388722853156, "grad_norm": 140.55271911621094, "learning_rate": 1.8131063262067098e-06, "loss": 18.3906, "step": 6681 }, { "epoch": 0.4437803015208873, "grad_norm": 226.57437133789062, "learning_rate": 1.8130437138756554e-06, "loss": 19.7188, "step": 6682 }, { "epoch": 0.44384671581324303, "grad_norm": 269.6678161621094, "learning_rate": 1.8129810921397802e-06, "loss": 17.3594, "step": 6683 }, { "epoch": 0.44391313010559874, "grad_norm": 370.5259704589844, "learning_rate": 1.8129184609998083e-06, "loss": 16.5625, "step": 6684 }, { "epoch": 0.44397954439795445, "grad_norm": 172.87611389160156, "learning_rate": 1.8128558204564644e-06, "loss": 17.7812, "step": 6685 }, { "epoch": 0.44404595869031016, "grad_norm": 131.45150756835938, "learning_rate": 1.8127931705104733e-06, "loss": 12.3594, "step": 6686 }, { "epoch": 0.44411237298266587, "grad_norm": 140.16842651367188, "learning_rate": 1.8127305111625594e-06, "loss": 15.7344, "step": 6687 }, { "epoch": 0.4441787872750216, "grad_norm": 258.4992980957031, "learning_rate": 1.8126678424134476e-06, "loss": 22.0625, "step": 6688 }, { "epoch": 0.4442452015673773, "grad_norm": 179.13070678710938, "learning_rate": 1.8126051642638627e-06, "loss": 17.2656, "step": 6689 }, { "epoch": 0.444311615859733, "grad_norm": 679.0914306640625, "learning_rate": 1.8125424767145298e-06, "loss": 18.0781, "step": 6690 }, { "epoch": 0.4443780301520887, "grad_norm": 320.52130126953125, "learning_rate": 1.8124797797661744e-06, "loss": 22.125, "step": 6691 }, { "epoch": 0.4444444444444444, "grad_norm": 288.5365905761719, "learning_rate": 1.812417073419521e-06, "loss": 20.5781, "step": 6692 }, { "epoch": 0.4445108587368002, "grad_norm": 184.49485778808594, "learning_rate": 1.8123543576752955e-06, "loss": 16.4531, "step": 6693 }, { "epoch": 0.4445772730291559, "grad_norm": 278.43621826171875, "learning_rate": 1.8122916325342232e-06, "loss": 25.6406, "step": 6694 }, { "epoch": 0.4446436873215116, "grad_norm": 229.77882385253906, "learning_rate": 1.8122288979970299e-06, "loss": 20.1094, "step": 6695 }, { "epoch": 0.4447101016138673, "grad_norm": 176.9989013671875, "learning_rate": 1.812166154064441e-06, "loss": 20.7812, "step": 6696 }, { "epoch": 0.444776515906223, "grad_norm": 288.9454345703125, "learning_rate": 1.8121034007371823e-06, "loss": 18.4688, "step": 6697 }, { "epoch": 0.44484293019857873, "grad_norm": 424.45379638671875, "learning_rate": 1.8120406380159797e-06, "loss": 25.625, "step": 6698 }, { "epoch": 0.44490934449093444, "grad_norm": 179.7084503173828, "learning_rate": 1.8119778659015593e-06, "loss": 17.875, "step": 6699 }, { "epoch": 0.44497575878329015, "grad_norm": 221.64968872070312, "learning_rate": 1.811915084394647e-06, "loss": 15.7969, "step": 6700 }, { "epoch": 0.44504217307564586, "grad_norm": 131.62709045410156, "learning_rate": 1.8118522934959692e-06, "loss": 18.375, "step": 6701 }, { "epoch": 0.4451085873680016, "grad_norm": 155.18478393554688, "learning_rate": 1.8117894932062526e-06, "loss": 14.7812, "step": 6702 }, { "epoch": 0.4451750016603573, "grad_norm": 119.55007934570312, "learning_rate": 1.8117266835262229e-06, "loss": 13.7969, "step": 6703 }, { "epoch": 0.44524141595271305, "grad_norm": 323.054931640625, "learning_rate": 1.811663864456607e-06, "loss": 16.0156, "step": 6704 }, { "epoch": 0.44530783024506876, "grad_norm": 279.2728271484375, "learning_rate": 1.8116010359981318e-06, "loss": 19.4531, "step": 6705 }, { "epoch": 0.44537424453742447, "grad_norm": 287.15252685546875, "learning_rate": 1.8115381981515236e-06, "loss": 15.625, "step": 6706 }, { "epoch": 0.4454406588297802, "grad_norm": 144.09902954101562, "learning_rate": 1.8114753509175094e-06, "loss": 17.0938, "step": 6707 }, { "epoch": 0.4455070731221359, "grad_norm": 134.3903350830078, "learning_rate": 1.8114124942968164e-06, "loss": 13.9375, "step": 6708 }, { "epoch": 0.4455734874144916, "grad_norm": 834.318115234375, "learning_rate": 1.8113496282901714e-06, "loss": 14.4766, "step": 6709 }, { "epoch": 0.4456399017068473, "grad_norm": 205.90850830078125, "learning_rate": 1.8112867528983019e-06, "loss": 21.2812, "step": 6710 }, { "epoch": 0.445706315999203, "grad_norm": 194.43438720703125, "learning_rate": 1.8112238681219351e-06, "loss": 17.7188, "step": 6711 }, { "epoch": 0.4457727302915587, "grad_norm": 275.9717712402344, "learning_rate": 1.8111609739617984e-06, "loss": 20.4844, "step": 6712 }, { "epoch": 0.44583914458391444, "grad_norm": 271.69842529296875, "learning_rate": 1.8110980704186192e-06, "loss": 20.75, "step": 6713 }, { "epoch": 0.44590555887627015, "grad_norm": 233.1396026611328, "learning_rate": 1.811035157493125e-06, "loss": 18.0312, "step": 6714 }, { "epoch": 0.4459719731686259, "grad_norm": 1982.6123046875, "learning_rate": 1.810972235186044e-06, "loss": 20.3438, "step": 6715 }, { "epoch": 0.4460383874609816, "grad_norm": 289.5739440917969, "learning_rate": 1.810909303498104e-06, "loss": 13.875, "step": 6716 }, { "epoch": 0.44610480175333733, "grad_norm": 194.6631317138672, "learning_rate": 1.8108463624300326e-06, "loss": 19.5, "step": 6717 }, { "epoch": 0.44617121604569304, "grad_norm": 189.8536376953125, "learning_rate": 1.810783411982558e-06, "loss": 15.1562, "step": 6718 }, { "epoch": 0.44623763033804875, "grad_norm": 163.17054748535156, "learning_rate": 1.8107204521564086e-06, "loss": 14.4688, "step": 6719 }, { "epoch": 0.44630404463040446, "grad_norm": 323.83514404296875, "learning_rate": 1.8106574829523122e-06, "loss": 24.4688, "step": 6720 }, { "epoch": 0.44637045892276017, "grad_norm": 1064.3018798828125, "learning_rate": 1.8105945043709975e-06, "loss": 16.1562, "step": 6721 }, { "epoch": 0.4464368732151159, "grad_norm": 139.25750732421875, "learning_rate": 1.810531516413193e-06, "loss": 15.0625, "step": 6722 }, { "epoch": 0.4465032875074716, "grad_norm": 160.5968475341797, "learning_rate": 1.8104685190796278e-06, "loss": 12.8906, "step": 6723 }, { "epoch": 0.4465697017998273, "grad_norm": 288.829345703125, "learning_rate": 1.8104055123710296e-06, "loss": 18.0938, "step": 6724 }, { "epoch": 0.446636116092183, "grad_norm": 170.68341064453125, "learning_rate": 1.8103424962881278e-06, "loss": 17.5156, "step": 6725 }, { "epoch": 0.4467025303845388, "grad_norm": 278.61114501953125, "learning_rate": 1.8102794708316514e-06, "loss": 26.5625, "step": 6726 }, { "epoch": 0.4467689446768945, "grad_norm": 316.949951171875, "learning_rate": 1.8102164360023293e-06, "loss": 24.8906, "step": 6727 }, { "epoch": 0.4468353589692502, "grad_norm": 96.91716766357422, "learning_rate": 1.8101533918008905e-06, "loss": 16.6875, "step": 6728 }, { "epoch": 0.4469017732616059, "grad_norm": 585.2666015625, "learning_rate": 1.8100903382280646e-06, "loss": 21.1719, "step": 6729 }, { "epoch": 0.4469681875539616, "grad_norm": 221.62506103515625, "learning_rate": 1.8100272752845806e-06, "loss": 20.6094, "step": 6730 }, { "epoch": 0.4470346018463173, "grad_norm": 164.97938537597656, "learning_rate": 1.8099642029711686e-06, "loss": 12.7188, "step": 6731 }, { "epoch": 0.44710101613867304, "grad_norm": 731.605224609375, "learning_rate": 1.8099011212885574e-06, "loss": 23.4688, "step": 6732 }, { "epoch": 0.44716743043102875, "grad_norm": 283.2335205078125, "learning_rate": 1.809838030237477e-06, "loss": 28.5781, "step": 6733 }, { "epoch": 0.44723384472338445, "grad_norm": 170.35848999023438, "learning_rate": 1.8097749298186576e-06, "loss": 16.5, "step": 6734 }, { "epoch": 0.44730025901574016, "grad_norm": 435.2010803222656, "learning_rate": 1.8097118200328284e-06, "loss": 11.6406, "step": 6735 }, { "epoch": 0.4473666733080959, "grad_norm": 265.7499694824219, "learning_rate": 1.8096487008807197e-06, "loss": 18.7656, "step": 6736 }, { "epoch": 0.44743308760045164, "grad_norm": 242.96104431152344, "learning_rate": 1.809585572363062e-06, "loss": 16.5469, "step": 6737 }, { "epoch": 0.44749950189280735, "grad_norm": 293.9724426269531, "learning_rate": 1.809522434480585e-06, "loss": 21.125, "step": 6738 }, { "epoch": 0.44756591618516306, "grad_norm": 280.99237060546875, "learning_rate": 1.8094592872340195e-06, "loss": 16.8906, "step": 6739 }, { "epoch": 0.44763233047751877, "grad_norm": 342.77813720703125, "learning_rate": 1.8093961306240955e-06, "loss": 19.6562, "step": 6740 }, { "epoch": 0.4476987447698745, "grad_norm": 309.92181396484375, "learning_rate": 1.8093329646515438e-06, "loss": 18.0, "step": 6741 }, { "epoch": 0.4477651590622302, "grad_norm": 386.1025695800781, "learning_rate": 1.8092697893170952e-06, "loss": 26.4375, "step": 6742 }, { "epoch": 0.4478315733545859, "grad_norm": 296.0861511230469, "learning_rate": 1.8092066046214804e-06, "loss": 18.4844, "step": 6743 }, { "epoch": 0.4478979876469416, "grad_norm": 243.2169647216797, "learning_rate": 1.8091434105654302e-06, "loss": 16.9375, "step": 6744 }, { "epoch": 0.4479644019392973, "grad_norm": 223.68511962890625, "learning_rate": 1.8090802071496754e-06, "loss": 14.6406, "step": 6745 }, { "epoch": 0.44803081623165303, "grad_norm": 354.8623046875, "learning_rate": 1.8090169943749474e-06, "loss": 17.7188, "step": 6746 }, { "epoch": 0.4480972305240088, "grad_norm": 232.5537109375, "learning_rate": 1.8089537722419772e-06, "loss": 16.0469, "step": 6747 }, { "epoch": 0.4481636448163645, "grad_norm": 335.9494323730469, "learning_rate": 1.8088905407514963e-06, "loss": 18.125, "step": 6748 }, { "epoch": 0.4482300591087202, "grad_norm": 180.4803924560547, "learning_rate": 1.8088272999042362e-06, "loss": 18.25, "step": 6749 }, { "epoch": 0.4482964734010759, "grad_norm": 296.5313720703125, "learning_rate": 1.8087640497009283e-06, "loss": 22.7969, "step": 6750 }, { "epoch": 0.44836288769343163, "grad_norm": 373.0848693847656, "learning_rate": 1.808700790142304e-06, "loss": 16.6719, "step": 6751 }, { "epoch": 0.44842930198578734, "grad_norm": 154.8417205810547, "learning_rate": 1.8086375212290955e-06, "loss": 21.2812, "step": 6752 }, { "epoch": 0.44849571627814305, "grad_norm": 271.6968994140625, "learning_rate": 1.8085742429620345e-06, "loss": 21.4688, "step": 6753 }, { "epoch": 0.44856213057049876, "grad_norm": 288.6866149902344, "learning_rate": 1.8085109553418527e-06, "loss": 19.2969, "step": 6754 }, { "epoch": 0.4486285448628545, "grad_norm": 279.9149475097656, "learning_rate": 1.8084476583692822e-06, "loss": 20.75, "step": 6755 }, { "epoch": 0.4486949591552102, "grad_norm": 131.7559814453125, "learning_rate": 1.808384352045056e-06, "loss": 13.5938, "step": 6756 }, { "epoch": 0.4487613734475659, "grad_norm": 175.45753479003906, "learning_rate": 1.808321036369905e-06, "loss": 22.9219, "step": 6757 }, { "epoch": 0.44882778773992166, "grad_norm": 139.03175354003906, "learning_rate": 1.808257711344563e-06, "loss": 11.4297, "step": 6758 }, { "epoch": 0.44889420203227737, "grad_norm": 196.33908081054688, "learning_rate": 1.8081943769697615e-06, "loss": 17.8906, "step": 6759 }, { "epoch": 0.4489606163246331, "grad_norm": 148.21046447753906, "learning_rate": 1.8081310332462337e-06, "loss": 15.7344, "step": 6760 }, { "epoch": 0.4490270306169888, "grad_norm": 335.5892333984375, "learning_rate": 1.808067680174712e-06, "loss": 18.8438, "step": 6761 }, { "epoch": 0.4490934449093445, "grad_norm": 253.92242431640625, "learning_rate": 1.808004317755929e-06, "loss": 13.6094, "step": 6762 }, { "epoch": 0.4491598592017002, "grad_norm": 511.302490234375, "learning_rate": 1.8079409459906182e-06, "loss": 26.0781, "step": 6763 }, { "epoch": 0.4492262734940559, "grad_norm": 532.2079467773438, "learning_rate": 1.8078775648795129e-06, "loss": 16.4688, "step": 6764 }, { "epoch": 0.4492926877864116, "grad_norm": 260.9821472167969, "learning_rate": 1.807814174423345e-06, "loss": 28.7188, "step": 6765 }, { "epoch": 0.44935910207876734, "grad_norm": 195.8964080810547, "learning_rate": 1.8077507746228492e-06, "loss": 20.8906, "step": 6766 }, { "epoch": 0.44942551637112305, "grad_norm": 153.2120819091797, "learning_rate": 1.807687365478758e-06, "loss": 19.25, "step": 6767 }, { "epoch": 0.44949193066347876, "grad_norm": 329.80792236328125, "learning_rate": 1.807623946991805e-06, "loss": 22.1719, "step": 6768 }, { "epoch": 0.4495583449558345, "grad_norm": 226.3430938720703, "learning_rate": 1.8075605191627238e-06, "loss": 22.9531, "step": 6769 }, { "epoch": 0.44962475924819023, "grad_norm": 182.78338623046875, "learning_rate": 1.8074970819922482e-06, "loss": 17.2812, "step": 6770 }, { "epoch": 0.44969117354054594, "grad_norm": 200.5898895263672, "learning_rate": 1.8074336354811122e-06, "loss": 23.0156, "step": 6771 }, { "epoch": 0.44975758783290165, "grad_norm": 162.74716186523438, "learning_rate": 1.8073701796300493e-06, "loss": 17.9844, "step": 6772 }, { "epoch": 0.44982400212525736, "grad_norm": 353.49639892578125, "learning_rate": 1.807306714439794e-06, "loss": 23.875, "step": 6773 }, { "epoch": 0.44989041641761307, "grad_norm": 306.34613037109375, "learning_rate": 1.8072432399110797e-06, "loss": 28.4219, "step": 6774 }, { "epoch": 0.4499568307099688, "grad_norm": 132.59521484375, "learning_rate": 1.8071797560446415e-06, "loss": 16.3281, "step": 6775 }, { "epoch": 0.4500232450023245, "grad_norm": 163.82704162597656, "learning_rate": 1.8071162628412131e-06, "loss": 17.3906, "step": 6776 }, { "epoch": 0.4500896592946802, "grad_norm": 228.83340454101562, "learning_rate": 1.8070527603015293e-06, "loss": 21.8125, "step": 6777 }, { "epoch": 0.4501560735870359, "grad_norm": 239.5203857421875, "learning_rate": 1.8069892484263246e-06, "loss": 16.4219, "step": 6778 }, { "epoch": 0.4502224878793916, "grad_norm": 166.79833984375, "learning_rate": 1.8069257272163333e-06, "loss": 15.9531, "step": 6779 }, { "epoch": 0.4502889021717474, "grad_norm": 297.55224609375, "learning_rate": 1.8068621966722906e-06, "loss": 15.6875, "step": 6780 }, { "epoch": 0.4503553164641031, "grad_norm": 544.8529052734375, "learning_rate": 1.8067986567949313e-06, "loss": 24.3125, "step": 6781 }, { "epoch": 0.4504217307564588, "grad_norm": 272.75482177734375, "learning_rate": 1.8067351075849904e-06, "loss": 19.0, "step": 6782 }, { "epoch": 0.4504881450488145, "grad_norm": 305.3340148925781, "learning_rate": 1.806671549043203e-06, "loss": 17.0312, "step": 6783 }, { "epoch": 0.4505545593411702, "grad_norm": 236.48599243164062, "learning_rate": 1.8066079811703043e-06, "loss": 23.0, "step": 6784 }, { "epoch": 0.45062097363352593, "grad_norm": 195.82638549804688, "learning_rate": 1.8065444039670296e-06, "loss": 14.9531, "step": 6785 }, { "epoch": 0.45068738792588164, "grad_norm": 170.8313751220703, "learning_rate": 1.806480817434114e-06, "loss": 13.0781, "step": 6786 }, { "epoch": 0.45075380221823735, "grad_norm": 254.4202117919922, "learning_rate": 1.8064172215722939e-06, "loss": 22.1719, "step": 6787 }, { "epoch": 0.45082021651059306, "grad_norm": 154.8213348388672, "learning_rate": 1.8063536163823042e-06, "loss": 14.9844, "step": 6788 }, { "epoch": 0.4508866308029488, "grad_norm": 216.14736938476562, "learning_rate": 1.8062900018648808e-06, "loss": 17.2812, "step": 6789 }, { "epoch": 0.4509530450953045, "grad_norm": 559.5089721679688, "learning_rate": 1.8062263780207594e-06, "loss": 22.125, "step": 6790 }, { "epoch": 0.45101945938766025, "grad_norm": 379.2005920410156, "learning_rate": 1.8061627448506762e-06, "loss": 24.7188, "step": 6791 }, { "epoch": 0.45108587368001596, "grad_norm": 538.3079833984375, "learning_rate": 1.8060991023553673e-06, "loss": 26.4062, "step": 6792 }, { "epoch": 0.45115228797237167, "grad_norm": 350.587158203125, "learning_rate": 1.8060354505355688e-06, "loss": 24.0156, "step": 6793 }, { "epoch": 0.4512187022647274, "grad_norm": 184.468017578125, "learning_rate": 1.805971789392017e-06, "loss": 18.0312, "step": 6794 }, { "epoch": 0.4512851165570831, "grad_norm": 143.07818603515625, "learning_rate": 1.8059081189254485e-06, "loss": 15.3281, "step": 6795 }, { "epoch": 0.4513515308494388, "grad_norm": 186.81173706054688, "learning_rate": 1.8058444391365994e-06, "loss": 15.5156, "step": 6796 }, { "epoch": 0.4514179451417945, "grad_norm": 214.9300994873047, "learning_rate": 1.8057807500262063e-06, "loss": 17.2656, "step": 6797 }, { "epoch": 0.4514843594341502, "grad_norm": 189.66476440429688, "learning_rate": 1.8057170515950064e-06, "loss": 15.625, "step": 6798 }, { "epoch": 0.4515507737265059, "grad_norm": 322.2180480957031, "learning_rate": 1.805653343843736e-06, "loss": 27.7812, "step": 6799 }, { "epoch": 0.45161718801886164, "grad_norm": 511.0039978027344, "learning_rate": 1.8055896267731327e-06, "loss": 21.125, "step": 6800 }, { "epoch": 0.45168360231121735, "grad_norm": 223.86709594726562, "learning_rate": 1.805525900383933e-06, "loss": 19.5, "step": 6801 }, { "epoch": 0.4517500166035731, "grad_norm": 377.56414794921875, "learning_rate": 1.8054621646768736e-06, "loss": 20.5156, "step": 6802 }, { "epoch": 0.4518164308959288, "grad_norm": 428.6448059082031, "learning_rate": 1.8053984196526933e-06, "loss": 16.2812, "step": 6803 }, { "epoch": 0.45188284518828453, "grad_norm": 217.27040100097656, "learning_rate": 1.8053346653121278e-06, "loss": 15.125, "step": 6804 }, { "epoch": 0.45194925948064024, "grad_norm": 216.51905822753906, "learning_rate": 1.8052709016559153e-06, "loss": 18.9062, "step": 6805 }, { "epoch": 0.45201567377299595, "grad_norm": 233.83477783203125, "learning_rate": 1.8052071286847936e-06, "loss": 20.0938, "step": 6806 }, { "epoch": 0.45208208806535166, "grad_norm": 189.1671905517578, "learning_rate": 1.8051433463995003e-06, "loss": 17.4844, "step": 6807 }, { "epoch": 0.45214850235770737, "grad_norm": 354.2171936035156, "learning_rate": 1.8050795548007727e-06, "loss": 29.6562, "step": 6808 }, { "epoch": 0.4522149166500631, "grad_norm": 362.8148498535156, "learning_rate": 1.8050157538893492e-06, "loss": 19.7344, "step": 6809 }, { "epoch": 0.4522813309424188, "grad_norm": 219.9755096435547, "learning_rate": 1.8049519436659675e-06, "loss": 17.1562, "step": 6810 }, { "epoch": 0.4523477452347745, "grad_norm": 346.99334716796875, "learning_rate": 1.8048881241313661e-06, "loss": 23.125, "step": 6811 }, { "epoch": 0.4524141595271302, "grad_norm": 107.2380142211914, "learning_rate": 1.804824295286283e-06, "loss": 15.1719, "step": 6812 }, { "epoch": 0.452480573819486, "grad_norm": 551.7142944335938, "learning_rate": 1.8047604571314562e-06, "loss": 15.0156, "step": 6813 }, { "epoch": 0.4525469881118417, "grad_norm": 563.1181640625, "learning_rate": 1.8046966096676247e-06, "loss": 21.5781, "step": 6814 }, { "epoch": 0.4526134024041974, "grad_norm": 287.8948974609375, "learning_rate": 1.804632752895527e-06, "loss": 26.7812, "step": 6815 }, { "epoch": 0.4526798166965531, "grad_norm": 414.58465576171875, "learning_rate": 1.8045688868159016e-06, "loss": 21.4531, "step": 6816 }, { "epoch": 0.4527462309889088, "grad_norm": 124.37887573242188, "learning_rate": 1.804505011429487e-06, "loss": 18.8281, "step": 6817 }, { "epoch": 0.4528126452812645, "grad_norm": 239.8953857421875, "learning_rate": 1.8044411267370224e-06, "loss": 16.1875, "step": 6818 }, { "epoch": 0.45287905957362024, "grad_norm": 546.60205078125, "learning_rate": 1.8043772327392466e-06, "loss": 26.0312, "step": 6819 }, { "epoch": 0.45294547386597594, "grad_norm": 1341.4447021484375, "learning_rate": 1.8043133294368992e-06, "loss": 23.3125, "step": 6820 }, { "epoch": 0.45301188815833165, "grad_norm": 166.7917938232422, "learning_rate": 1.8042494168307188e-06, "loss": 29.9062, "step": 6821 }, { "epoch": 0.45307830245068736, "grad_norm": 183.30484008789062, "learning_rate": 1.8041854949214445e-06, "loss": 19.4219, "step": 6822 }, { "epoch": 0.45314471674304313, "grad_norm": 203.69729614257812, "learning_rate": 1.8041215637098166e-06, "loss": 14.0469, "step": 6823 }, { "epoch": 0.45321113103539884, "grad_norm": 194.75308227539062, "learning_rate": 1.804057623196574e-06, "loss": 18.1094, "step": 6824 }, { "epoch": 0.45327754532775455, "grad_norm": 198.7252655029297, "learning_rate": 1.8039936733824566e-06, "loss": 18.6719, "step": 6825 }, { "epoch": 0.45334395962011026, "grad_norm": 158.34410095214844, "learning_rate": 1.8039297142682037e-06, "loss": 13.1719, "step": 6826 }, { "epoch": 0.45341037391246597, "grad_norm": 149.85983276367188, "learning_rate": 1.8038657458545556e-06, "loss": 15.5938, "step": 6827 }, { "epoch": 0.4534767882048217, "grad_norm": 267.39312744140625, "learning_rate": 1.8038017681422522e-06, "loss": 21.8281, "step": 6828 }, { "epoch": 0.4535432024971774, "grad_norm": 195.76612854003906, "learning_rate": 1.8037377811320333e-06, "loss": 25.9062, "step": 6829 }, { "epoch": 0.4536096167895331, "grad_norm": 115.72747039794922, "learning_rate": 1.8036737848246393e-06, "loss": 16.5, "step": 6830 }, { "epoch": 0.4536760310818888, "grad_norm": 710.3801879882812, "learning_rate": 1.8036097792208102e-06, "loss": 26.75, "step": 6831 }, { "epoch": 0.4537424453742445, "grad_norm": 203.04965209960938, "learning_rate": 1.8035457643212868e-06, "loss": 20.0938, "step": 6832 }, { "epoch": 0.45380885966660023, "grad_norm": 149.36846923828125, "learning_rate": 1.803481740126809e-06, "loss": 14.6406, "step": 6833 }, { "epoch": 0.453875273958956, "grad_norm": 175.96238708496094, "learning_rate": 1.8034177066381182e-06, "loss": 16.6406, "step": 6834 }, { "epoch": 0.4539416882513117, "grad_norm": 101.63972473144531, "learning_rate": 1.8033536638559547e-06, "loss": 16.7344, "step": 6835 }, { "epoch": 0.4540081025436674, "grad_norm": 300.7713928222656, "learning_rate": 1.8032896117810587e-06, "loss": 20.5312, "step": 6836 }, { "epoch": 0.4540745168360231, "grad_norm": 118.22566223144531, "learning_rate": 1.8032255504141719e-06, "loss": 11.7969, "step": 6837 }, { "epoch": 0.45414093112837883, "grad_norm": 172.9648895263672, "learning_rate": 1.803161479756035e-06, "loss": 17.625, "step": 6838 }, { "epoch": 0.45420734542073454, "grad_norm": 162.4067840576172, "learning_rate": 1.8030973998073894e-06, "loss": 18.6094, "step": 6839 }, { "epoch": 0.45427375971309025, "grad_norm": 166.90267944335938, "learning_rate": 1.8030333105689764e-06, "loss": 16.3281, "step": 6840 }, { "epoch": 0.45434017400544596, "grad_norm": 166.9287872314453, "learning_rate": 1.8029692120415366e-06, "loss": 23.6562, "step": 6841 }, { "epoch": 0.4544065882978017, "grad_norm": 231.54208374023438, "learning_rate": 1.8029051042258122e-06, "loss": 21.375, "step": 6842 }, { "epoch": 0.4544730025901574, "grad_norm": 191.4614715576172, "learning_rate": 1.8028409871225448e-06, "loss": 23.5312, "step": 6843 }, { "epoch": 0.4545394168825131, "grad_norm": 220.46539306640625, "learning_rate": 1.8027768607324756e-06, "loss": 19.5781, "step": 6844 }, { "epoch": 0.45460583117486886, "grad_norm": 230.112060546875, "learning_rate": 1.8027127250563465e-06, "loss": 14.3438, "step": 6845 }, { "epoch": 0.45467224546722457, "grad_norm": 211.13194274902344, "learning_rate": 1.8026485800948996e-06, "loss": 18.4531, "step": 6846 }, { "epoch": 0.4547386597595803, "grad_norm": 296.58843994140625, "learning_rate": 1.802584425848877e-06, "loss": 21.1406, "step": 6847 }, { "epoch": 0.454805074051936, "grad_norm": 191.13473510742188, "learning_rate": 1.80252026231902e-06, "loss": 26.5312, "step": 6848 }, { "epoch": 0.4548714883442917, "grad_norm": 487.8001708984375, "learning_rate": 1.8024560895060716e-06, "loss": 17.6875, "step": 6849 }, { "epoch": 0.4549379026366474, "grad_norm": 207.95443725585938, "learning_rate": 1.802391907410774e-06, "loss": 21.7188, "step": 6850 }, { "epoch": 0.4550043169290031, "grad_norm": 332.7835693359375, "learning_rate": 1.8023277160338695e-06, "loss": 22.3125, "step": 6851 }, { "epoch": 0.4550707312213588, "grad_norm": 195.22996520996094, "learning_rate": 1.8022635153761004e-06, "loss": 13.5625, "step": 6852 }, { "epoch": 0.45513714551371454, "grad_norm": 185.95574951171875, "learning_rate": 1.80219930543821e-06, "loss": 18.125, "step": 6853 }, { "epoch": 0.45520355980607025, "grad_norm": 309.48101806640625, "learning_rate": 1.8021350862209402e-06, "loss": 16.0938, "step": 6854 }, { "epoch": 0.45526997409842596, "grad_norm": 304.1016845703125, "learning_rate": 1.802070857725034e-06, "loss": 21.8438, "step": 6855 }, { "epoch": 0.4553363883907817, "grad_norm": 227.54318237304688, "learning_rate": 1.8020066199512352e-06, "loss": 17.1562, "step": 6856 }, { "epoch": 0.45540280268313743, "grad_norm": 194.98374938964844, "learning_rate": 1.8019423729002861e-06, "loss": 20.7031, "step": 6857 }, { "epoch": 0.45546921697549314, "grad_norm": 335.3562316894531, "learning_rate": 1.8018781165729302e-06, "loss": 23.6875, "step": 6858 }, { "epoch": 0.45553563126784885, "grad_norm": 146.6259002685547, "learning_rate": 1.8018138509699104e-06, "loss": 14.2031, "step": 6859 }, { "epoch": 0.45560204556020456, "grad_norm": 1053.35888671875, "learning_rate": 1.8017495760919703e-06, "loss": 12.9922, "step": 6860 }, { "epoch": 0.45566845985256027, "grad_norm": 214.73861694335938, "learning_rate": 1.8016852919398535e-06, "loss": 16.1562, "step": 6861 }, { "epoch": 0.455734874144916, "grad_norm": 212.78921508789062, "learning_rate": 1.8016209985143036e-06, "loss": 18.0781, "step": 6862 }, { "epoch": 0.4558012884372717, "grad_norm": 207.11163330078125, "learning_rate": 1.801556695816064e-06, "loss": 21.4688, "step": 6863 }, { "epoch": 0.4558677027296274, "grad_norm": 220.7411651611328, "learning_rate": 1.8014923838458791e-06, "loss": 18.25, "step": 6864 }, { "epoch": 0.4559341170219831, "grad_norm": 258.5095520019531, "learning_rate": 1.8014280626044922e-06, "loss": 15.7188, "step": 6865 }, { "epoch": 0.4560005313143388, "grad_norm": 253.34371948242188, "learning_rate": 1.8013637320926476e-06, "loss": 19.6094, "step": 6866 }, { "epoch": 0.4560669456066946, "grad_norm": 157.9611358642578, "learning_rate": 1.8012993923110895e-06, "loss": 24.4062, "step": 6867 }, { "epoch": 0.4561333598990503, "grad_norm": 280.0532531738281, "learning_rate": 1.8012350432605623e-06, "loss": 18.9531, "step": 6868 }, { "epoch": 0.456199774191406, "grad_norm": 183.6565704345703, "learning_rate": 1.8011706849418098e-06, "loss": 14.7344, "step": 6869 }, { "epoch": 0.4562661884837617, "grad_norm": 664.3778686523438, "learning_rate": 1.8011063173555772e-06, "loss": 21.7031, "step": 6870 }, { "epoch": 0.4563326027761174, "grad_norm": 231.53680419921875, "learning_rate": 1.8010419405026083e-06, "loss": 16.4375, "step": 6871 }, { "epoch": 0.45639901706847313, "grad_norm": 169.1484375, "learning_rate": 1.8009775543836482e-06, "loss": 14.2188, "step": 6872 }, { "epoch": 0.45646543136082884, "grad_norm": 384.4726867675781, "learning_rate": 1.8009131589994416e-06, "loss": 18.8906, "step": 6873 }, { "epoch": 0.45653184565318455, "grad_norm": 280.0833435058594, "learning_rate": 1.8008487543507335e-06, "loss": 23.3125, "step": 6874 }, { "epoch": 0.45659825994554026, "grad_norm": 158.0209197998047, "learning_rate": 1.8007843404382687e-06, "loss": 21.3281, "step": 6875 }, { "epoch": 0.456664674237896, "grad_norm": 340.8450927734375, "learning_rate": 1.8007199172627927e-06, "loss": 13.7344, "step": 6876 }, { "epoch": 0.4567310885302517, "grad_norm": 400.74688720703125, "learning_rate": 1.8006554848250502e-06, "loss": 30.3438, "step": 6877 }, { "epoch": 0.45679750282260745, "grad_norm": 130.77630615234375, "learning_rate": 1.8005910431257867e-06, "loss": 13.1875, "step": 6878 }, { "epoch": 0.45686391711496316, "grad_norm": 258.5776062011719, "learning_rate": 1.8005265921657475e-06, "loss": 20.8594, "step": 6879 }, { "epoch": 0.45693033140731887, "grad_norm": 235.19497680664062, "learning_rate": 1.8004621319456787e-06, "loss": 17.7031, "step": 6880 }, { "epoch": 0.4569967456996746, "grad_norm": 230.73623657226562, "learning_rate": 1.8003976624663252e-06, "loss": 21.5938, "step": 6881 }, { "epoch": 0.4570631599920303, "grad_norm": 267.76263427734375, "learning_rate": 1.8003331837284334e-06, "loss": 26.4062, "step": 6882 }, { "epoch": 0.457129574284386, "grad_norm": 293.5484619140625, "learning_rate": 1.8002686957327487e-06, "loss": 16.75, "step": 6883 }, { "epoch": 0.4571959885767417, "grad_norm": 227.00196838378906, "learning_rate": 1.800204198480017e-06, "loss": 15.5781, "step": 6884 }, { "epoch": 0.4572624028690974, "grad_norm": 169.89219665527344, "learning_rate": 1.8001396919709846e-06, "loss": 16.875, "step": 6885 }, { "epoch": 0.4573288171614531, "grad_norm": 270.4099426269531, "learning_rate": 1.8000751762063979e-06, "loss": 17.7656, "step": 6886 }, { "epoch": 0.45739523145380884, "grad_norm": 345.45123291015625, "learning_rate": 1.8000106511870028e-06, "loss": 27.4062, "step": 6887 }, { "epoch": 0.45746164574616455, "grad_norm": 178.48318481445312, "learning_rate": 1.7999461169135457e-06, "loss": 17.8906, "step": 6888 }, { "epoch": 0.4575280600385203, "grad_norm": 500.09674072265625, "learning_rate": 1.7998815733867731e-06, "loss": 23.7188, "step": 6889 }, { "epoch": 0.457594474330876, "grad_norm": 281.4011535644531, "learning_rate": 1.7998170206074317e-06, "loss": 21.5312, "step": 6890 }, { "epoch": 0.45766088862323173, "grad_norm": 186.99478149414062, "learning_rate": 1.7997524585762685e-06, "loss": 22.6406, "step": 6891 }, { "epoch": 0.45772730291558744, "grad_norm": 101.10619354248047, "learning_rate": 1.7996878872940298e-06, "loss": 17.3906, "step": 6892 }, { "epoch": 0.45779371720794315, "grad_norm": 183.02198791503906, "learning_rate": 1.7996233067614629e-06, "loss": 17.2969, "step": 6893 }, { "epoch": 0.45786013150029886, "grad_norm": 373.31298828125, "learning_rate": 1.7995587169793144e-06, "loss": 20.5625, "step": 6894 }, { "epoch": 0.45792654579265457, "grad_norm": 243.68746948242188, "learning_rate": 1.7994941179483318e-06, "loss": 22.5469, "step": 6895 }, { "epoch": 0.4579929600850103, "grad_norm": 152.33131408691406, "learning_rate": 1.7994295096692624e-06, "loss": 18.2656, "step": 6896 }, { "epoch": 0.458059374377366, "grad_norm": 225.62049865722656, "learning_rate": 1.7993648921428534e-06, "loss": 14.6797, "step": 6897 }, { "epoch": 0.4581257886697217, "grad_norm": 304.729736328125, "learning_rate": 1.7993002653698522e-06, "loss": 24.3594, "step": 6898 }, { "epoch": 0.45819220296207747, "grad_norm": 606.4058837890625, "learning_rate": 1.7992356293510063e-06, "loss": 14.0312, "step": 6899 }, { "epoch": 0.4582586172544332, "grad_norm": 391.39990234375, "learning_rate": 1.7991709840870638e-06, "loss": 12.0078, "step": 6900 }, { "epoch": 0.4583250315467889, "grad_norm": 208.99429321289062, "learning_rate": 1.7991063295787722e-06, "loss": 16.0938, "step": 6901 }, { "epoch": 0.4583914458391446, "grad_norm": 224.69056701660156, "learning_rate": 1.799041665826879e-06, "loss": 29.3125, "step": 6902 }, { "epoch": 0.4584578601315003, "grad_norm": 190.83717346191406, "learning_rate": 1.798976992832133e-06, "loss": 17.25, "step": 6903 }, { "epoch": 0.458524274423856, "grad_norm": 339.9564514160156, "learning_rate": 1.7989123105952814e-06, "loss": 16.1406, "step": 6904 }, { "epoch": 0.4585906887162117, "grad_norm": 355.3117370605469, "learning_rate": 1.7988476191170732e-06, "loss": 19.7188, "step": 6905 }, { "epoch": 0.45865710300856743, "grad_norm": 182.5959930419922, "learning_rate": 1.7987829183982562e-06, "loss": 17.8438, "step": 6906 }, { "epoch": 0.45872351730092314, "grad_norm": 293.005615234375, "learning_rate": 1.7987182084395794e-06, "loss": 20.6719, "step": 6907 }, { "epoch": 0.45878993159327885, "grad_norm": 244.22250366210938, "learning_rate": 1.7986534892417904e-06, "loss": 16.0938, "step": 6908 }, { "epoch": 0.45885634588563456, "grad_norm": 179.6851043701172, "learning_rate": 1.7985887608056386e-06, "loss": 19.9375, "step": 6909 }, { "epoch": 0.45892276017799033, "grad_norm": 251.932861328125, "learning_rate": 1.7985240231318723e-06, "loss": 19.5312, "step": 6910 }, { "epoch": 0.45898917447034604, "grad_norm": 269.8127746582031, "learning_rate": 1.7984592762212409e-06, "loss": 18.7188, "step": 6911 }, { "epoch": 0.45905558876270175, "grad_norm": 334.3197021484375, "learning_rate": 1.7983945200744928e-06, "loss": 20.8125, "step": 6912 }, { "epoch": 0.45912200305505746, "grad_norm": 258.40240478515625, "learning_rate": 1.7983297546923774e-06, "loss": 24.0938, "step": 6913 }, { "epoch": 0.45918841734741317, "grad_norm": 257.31805419921875, "learning_rate": 1.7982649800756437e-06, "loss": 21.4375, "step": 6914 }, { "epoch": 0.4592548316397689, "grad_norm": 99.6321029663086, "learning_rate": 1.798200196225041e-06, "loss": 11.4219, "step": 6915 }, { "epoch": 0.4593212459321246, "grad_norm": 345.25860595703125, "learning_rate": 1.7981354031413187e-06, "loss": 20.3281, "step": 6916 }, { "epoch": 0.4593876602244803, "grad_norm": 654.1442260742188, "learning_rate": 1.7980706008252264e-06, "loss": 20.7812, "step": 6917 }, { "epoch": 0.459454074516836, "grad_norm": 350.8928527832031, "learning_rate": 1.7980057892775133e-06, "loss": 21.1719, "step": 6918 }, { "epoch": 0.4595204888091917, "grad_norm": 151.76377868652344, "learning_rate": 1.7979409684989297e-06, "loss": 17.625, "step": 6919 }, { "epoch": 0.45958690310154743, "grad_norm": 307.5041198730469, "learning_rate": 1.797876138490225e-06, "loss": 17.5, "step": 6920 }, { "epoch": 0.4596533173939032, "grad_norm": 409.86663818359375, "learning_rate": 1.797811299252149e-06, "loss": 20.4375, "step": 6921 }, { "epoch": 0.4597197316862589, "grad_norm": 213.3348388671875, "learning_rate": 1.797746450785452e-06, "loss": 17.0938, "step": 6922 }, { "epoch": 0.4597861459786146, "grad_norm": 345.900146484375, "learning_rate": 1.7976815930908843e-06, "loss": 21.25, "step": 6923 }, { "epoch": 0.4598525602709703, "grad_norm": 214.59124755859375, "learning_rate": 1.797616726169196e-06, "loss": 19.9375, "step": 6924 }, { "epoch": 0.45991897456332603, "grad_norm": 232.977783203125, "learning_rate": 1.7975518500211368e-06, "loss": 20.6875, "step": 6925 }, { "epoch": 0.45998538885568174, "grad_norm": 168.48880004882812, "learning_rate": 1.797486964647458e-06, "loss": 23.25, "step": 6926 }, { "epoch": 0.46005180314803745, "grad_norm": 159.53973388671875, "learning_rate": 1.79742207004891e-06, "loss": 14.2656, "step": 6927 }, { "epoch": 0.46011821744039316, "grad_norm": 2181.28369140625, "learning_rate": 1.7973571662262431e-06, "loss": 20.25, "step": 6928 }, { "epoch": 0.46018463173274887, "grad_norm": 294.2008361816406, "learning_rate": 1.7972922531802085e-06, "loss": 19.4531, "step": 6929 }, { "epoch": 0.4602510460251046, "grad_norm": 488.614501953125, "learning_rate": 1.7972273309115567e-06, "loss": 15.7031, "step": 6930 }, { "epoch": 0.4603174603174603, "grad_norm": 127.59310913085938, "learning_rate": 1.7971623994210385e-06, "loss": 15.4062, "step": 6931 }, { "epoch": 0.46038387460981606, "grad_norm": 156.39443969726562, "learning_rate": 1.797097458709406e-06, "loss": 16.25, "step": 6932 }, { "epoch": 0.46045028890217177, "grad_norm": 282.95220947265625, "learning_rate": 1.7970325087774095e-06, "loss": 21.9062, "step": 6933 }, { "epoch": 0.4605167031945275, "grad_norm": 252.17080688476562, "learning_rate": 1.7969675496258003e-06, "loss": 16.0312, "step": 6934 }, { "epoch": 0.4605831174868832, "grad_norm": 386.52899169921875, "learning_rate": 1.7969025812553303e-06, "loss": 24.375, "step": 6935 }, { "epoch": 0.4606495317792389, "grad_norm": 124.99748992919922, "learning_rate": 1.7968376036667505e-06, "loss": 18.4688, "step": 6936 }, { "epoch": 0.4607159460715946, "grad_norm": 140.95831298828125, "learning_rate": 1.796772616860813e-06, "loss": 15.9219, "step": 6937 }, { "epoch": 0.4607823603639503, "grad_norm": 114.94310760498047, "learning_rate": 1.7967076208382691e-06, "loss": 17.7344, "step": 6938 }, { "epoch": 0.460848774656306, "grad_norm": 136.3773956298828, "learning_rate": 1.7966426155998713e-06, "loss": 17.3906, "step": 6939 }, { "epoch": 0.46091518894866174, "grad_norm": 130.7021484375, "learning_rate": 1.7965776011463705e-06, "loss": 17.1094, "step": 6940 }, { "epoch": 0.46098160324101745, "grad_norm": 222.3597412109375, "learning_rate": 1.79651257747852e-06, "loss": 20.9062, "step": 6941 }, { "epoch": 0.46104801753337316, "grad_norm": 148.206298828125, "learning_rate": 1.7964475445970705e-06, "loss": 17.2812, "step": 6942 }, { "epoch": 0.4611144318257289, "grad_norm": 193.0983428955078, "learning_rate": 1.7963825025027756e-06, "loss": 17.5781, "step": 6943 }, { "epoch": 0.46118084611808463, "grad_norm": 165.7583465576172, "learning_rate": 1.7963174511963873e-06, "loss": 14.7188, "step": 6944 }, { "epoch": 0.46124726041044034, "grad_norm": 153.27755737304688, "learning_rate": 1.7962523906786576e-06, "loss": 20.2812, "step": 6945 }, { "epoch": 0.46131367470279605, "grad_norm": 250.0909423828125, "learning_rate": 1.7961873209503392e-06, "loss": 16.3594, "step": 6946 }, { "epoch": 0.46138008899515176, "grad_norm": 242.7252960205078, "learning_rate": 1.7961222420121853e-06, "loss": 16.5, "step": 6947 }, { "epoch": 0.46144650328750747, "grad_norm": 206.05177307128906, "learning_rate": 1.7960571538649483e-06, "loss": 27.0938, "step": 6948 }, { "epoch": 0.4615129175798632, "grad_norm": 361.52520751953125, "learning_rate": 1.795992056509381e-06, "loss": 21.8438, "step": 6949 }, { "epoch": 0.4615793318722189, "grad_norm": 407.48486328125, "learning_rate": 1.7959269499462369e-06, "loss": 20.7344, "step": 6950 }, { "epoch": 0.4616457461645746, "grad_norm": 185.070068359375, "learning_rate": 1.7958618341762687e-06, "loss": 21.0625, "step": 6951 }, { "epoch": 0.4617121604569303, "grad_norm": 173.2974395751953, "learning_rate": 1.7957967092002297e-06, "loss": 18.3594, "step": 6952 }, { "epoch": 0.461778574749286, "grad_norm": 196.29656982421875, "learning_rate": 1.7957315750188733e-06, "loss": 20.5625, "step": 6953 }, { "epoch": 0.4618449890416418, "grad_norm": 215.30706787109375, "learning_rate": 1.7956664316329527e-06, "loss": 18.5781, "step": 6954 }, { "epoch": 0.4619114033339975, "grad_norm": 174.94493103027344, "learning_rate": 1.7956012790432219e-06, "loss": 17.2969, "step": 6955 }, { "epoch": 0.4619778176263532, "grad_norm": 121.94561767578125, "learning_rate": 1.795536117250434e-06, "loss": 20.0469, "step": 6956 }, { "epoch": 0.4620442319187089, "grad_norm": 318.57757568359375, "learning_rate": 1.795470946255343e-06, "loss": 23.0625, "step": 6957 }, { "epoch": 0.4621106462110646, "grad_norm": 147.0266571044922, "learning_rate": 1.795405766058703e-06, "loss": 21.0312, "step": 6958 }, { "epoch": 0.46217706050342033, "grad_norm": 179.46456909179688, "learning_rate": 1.7953405766612674e-06, "loss": 21.6875, "step": 6959 }, { "epoch": 0.46224347479577604, "grad_norm": 195.01052856445312, "learning_rate": 1.795275378063791e-06, "loss": 16.0469, "step": 6960 }, { "epoch": 0.46230988908813175, "grad_norm": 227.17813110351562, "learning_rate": 1.7952101702670274e-06, "loss": 27.9375, "step": 6961 }, { "epoch": 0.46237630338048746, "grad_norm": 163.92587280273438, "learning_rate": 1.7951449532717312e-06, "loss": 16.8594, "step": 6962 }, { "epoch": 0.4624427176728432, "grad_norm": 410.1266784667969, "learning_rate": 1.795079727078657e-06, "loss": 25.875, "step": 6963 }, { "epoch": 0.46250913196519894, "grad_norm": 339.8448181152344, "learning_rate": 1.7950144916885587e-06, "loss": 31.1875, "step": 6964 }, { "epoch": 0.46257554625755465, "grad_norm": 231.6926727294922, "learning_rate": 1.794949247102191e-06, "loss": 19.2812, "step": 6965 }, { "epoch": 0.46264196054991036, "grad_norm": 200.52745056152344, "learning_rate": 1.7948839933203091e-06, "loss": 18.3281, "step": 6966 }, { "epoch": 0.46270837484226607, "grad_norm": 252.28759765625, "learning_rate": 1.7948187303436674e-06, "loss": 22.8281, "step": 6967 }, { "epoch": 0.4627747891346218, "grad_norm": 236.8069610595703, "learning_rate": 1.7947534581730208e-06, "loss": 18.5469, "step": 6968 }, { "epoch": 0.4628412034269775, "grad_norm": 236.8507843017578, "learning_rate": 1.7946881768091249e-06, "loss": 27.0312, "step": 6969 }, { "epoch": 0.4629076177193332, "grad_norm": 450.8732604980469, "learning_rate": 1.794622886252734e-06, "loss": 24.4844, "step": 6970 }, { "epoch": 0.4629740320116889, "grad_norm": 231.56878662109375, "learning_rate": 1.7945575865046038e-06, "loss": 14.9688, "step": 6971 }, { "epoch": 0.4630404463040446, "grad_norm": 184.3968048095703, "learning_rate": 1.7944922775654895e-06, "loss": 18.8125, "step": 6972 }, { "epoch": 0.4631068605964003, "grad_norm": 732.1090087890625, "learning_rate": 1.7944269594361472e-06, "loss": 24.625, "step": 6973 }, { "epoch": 0.46317327488875604, "grad_norm": 248.74827575683594, "learning_rate": 1.7943616321173315e-06, "loss": 17.625, "step": 6974 }, { "epoch": 0.4632396891811118, "grad_norm": 196.37445068359375, "learning_rate": 1.7942962956097986e-06, "loss": 19.4531, "step": 6975 }, { "epoch": 0.4633061034734675, "grad_norm": 154.39593505859375, "learning_rate": 1.7942309499143041e-06, "loss": 22.4531, "step": 6976 }, { "epoch": 0.4633725177658232, "grad_norm": 236.12908935546875, "learning_rate": 1.7941655950316042e-06, "loss": 21.9531, "step": 6977 }, { "epoch": 0.46343893205817893, "grad_norm": 274.88006591796875, "learning_rate": 1.7941002309624542e-06, "loss": 22.375, "step": 6978 }, { "epoch": 0.46350534635053464, "grad_norm": 569.2470703125, "learning_rate": 1.794034857707611e-06, "loss": 26.6875, "step": 6979 }, { "epoch": 0.46357176064289035, "grad_norm": 261.3047180175781, "learning_rate": 1.7939694752678306e-06, "loss": 18.0938, "step": 6980 }, { "epoch": 0.46363817493524606, "grad_norm": 249.54251098632812, "learning_rate": 1.7939040836438687e-06, "loss": 16.375, "step": 6981 }, { "epoch": 0.46370458922760177, "grad_norm": 261.0899963378906, "learning_rate": 1.793838682836482e-06, "loss": 17.9688, "step": 6982 }, { "epoch": 0.4637710035199575, "grad_norm": 437.3614807128906, "learning_rate": 1.7937732728464276e-06, "loss": 30.5938, "step": 6983 }, { "epoch": 0.4638374178123132, "grad_norm": 246.13916015625, "learning_rate": 1.7937078536744617e-06, "loss": 16.4688, "step": 6984 }, { "epoch": 0.4639038321046689, "grad_norm": 198.3740997314453, "learning_rate": 1.793642425321341e-06, "loss": 16.1719, "step": 6985 }, { "epoch": 0.46397024639702467, "grad_norm": 298.5306091308594, "learning_rate": 1.7935769877878219e-06, "loss": 19.1719, "step": 6986 }, { "epoch": 0.4640366606893804, "grad_norm": 150.43441772460938, "learning_rate": 1.7935115410746625e-06, "loss": 13.8125, "step": 6987 }, { "epoch": 0.4641030749817361, "grad_norm": 227.23570251464844, "learning_rate": 1.7934460851826184e-06, "loss": 19.3594, "step": 6988 }, { "epoch": 0.4641694892740918, "grad_norm": 173.30575561523438, "learning_rate": 1.7933806201124481e-06, "loss": 17.6875, "step": 6989 }, { "epoch": 0.4642359035664475, "grad_norm": 174.37413024902344, "learning_rate": 1.7933151458649083e-06, "loss": 16.7812, "step": 6990 }, { "epoch": 0.4643023178588032, "grad_norm": 151.3824005126953, "learning_rate": 1.7932496624407563e-06, "loss": 18.0, "step": 6991 }, { "epoch": 0.4643687321511589, "grad_norm": 432.5563049316406, "learning_rate": 1.7931841698407493e-06, "loss": 20.1562, "step": 6992 }, { "epoch": 0.46443514644351463, "grad_norm": 256.8312072753906, "learning_rate": 1.7931186680656455e-06, "loss": 17.5156, "step": 6993 }, { "epoch": 0.46450156073587034, "grad_norm": 271.7926940917969, "learning_rate": 1.7930531571162022e-06, "loss": 17.6719, "step": 6994 }, { "epoch": 0.46456797502822605, "grad_norm": 813.00537109375, "learning_rate": 1.7929876369931771e-06, "loss": 16.5469, "step": 6995 }, { "epoch": 0.46463438932058176, "grad_norm": 295.29351806640625, "learning_rate": 1.7929221076973284e-06, "loss": 23.3125, "step": 6996 }, { "epoch": 0.46470080361293753, "grad_norm": 231.30044555664062, "learning_rate": 1.792856569229414e-06, "loss": 19.4219, "step": 6997 }, { "epoch": 0.46476721790529324, "grad_norm": 274.6061096191406, "learning_rate": 1.792791021590192e-06, "loss": 23.5781, "step": 6998 }, { "epoch": 0.46483363219764895, "grad_norm": 164.96466064453125, "learning_rate": 1.7927254647804206e-06, "loss": 18.25, "step": 6999 }, { "epoch": 0.46490004649000466, "grad_norm": 277.8180847167969, "learning_rate": 1.792659898800858e-06, "loss": 21.7031, "step": 7000 }, { "epoch": 0.46496646078236037, "grad_norm": 176.1807861328125, "learning_rate": 1.792594323652263e-06, "loss": 14.1094, "step": 7001 }, { "epoch": 0.4650328750747161, "grad_norm": 254.2904510498047, "learning_rate": 1.7925287393353939e-06, "loss": 15.6719, "step": 7002 }, { "epoch": 0.4650992893670718, "grad_norm": 158.375, "learning_rate": 1.7924631458510092e-06, "loss": 13.0469, "step": 7003 }, { "epoch": 0.4651657036594275, "grad_norm": 165.87057495117188, "learning_rate": 1.7923975431998679e-06, "loss": 19.3125, "step": 7004 }, { "epoch": 0.4652321179517832, "grad_norm": 235.27220153808594, "learning_rate": 1.7923319313827285e-06, "loss": 18.2969, "step": 7005 }, { "epoch": 0.4652985322441389, "grad_norm": 319.0000305175781, "learning_rate": 1.7922663104003505e-06, "loss": 18.5781, "step": 7006 }, { "epoch": 0.46536494653649463, "grad_norm": 144.5729217529297, "learning_rate": 1.7922006802534925e-06, "loss": 17.9688, "step": 7007 }, { "epoch": 0.4654313608288504, "grad_norm": 185.52093505859375, "learning_rate": 1.792135040942914e-06, "loss": 20.2188, "step": 7008 }, { "epoch": 0.4654977751212061, "grad_norm": 125.49823760986328, "learning_rate": 1.7920693924693739e-06, "loss": 14.875, "step": 7009 }, { "epoch": 0.4655641894135618, "grad_norm": 210.1021728515625, "learning_rate": 1.7920037348336322e-06, "loss": 16.7031, "step": 7010 }, { "epoch": 0.4656306037059175, "grad_norm": 171.83815002441406, "learning_rate": 1.7919380680364476e-06, "loss": 16.8125, "step": 7011 }, { "epoch": 0.46569701799827323, "grad_norm": 142.34764099121094, "learning_rate": 1.7918723920785803e-06, "loss": 17.1562, "step": 7012 }, { "epoch": 0.46576343229062894, "grad_norm": 132.2679901123047, "learning_rate": 1.7918067069607896e-06, "loss": 16.2969, "step": 7013 }, { "epoch": 0.46582984658298465, "grad_norm": 198.93724060058594, "learning_rate": 1.7917410126838356e-06, "loss": 24.625, "step": 7014 }, { "epoch": 0.46589626087534036, "grad_norm": 409.84429931640625, "learning_rate": 1.791675309248478e-06, "loss": 16.6875, "step": 7015 }, { "epoch": 0.46596267516769607, "grad_norm": 143.19459533691406, "learning_rate": 1.791609596655477e-06, "loss": 14.6562, "step": 7016 }, { "epoch": 0.4660290894600518, "grad_norm": 176.6438751220703, "learning_rate": 1.7915438749055926e-06, "loss": 18.1406, "step": 7017 }, { "epoch": 0.4660955037524075, "grad_norm": 326.96014404296875, "learning_rate": 1.7914781439995852e-06, "loss": 18.6875, "step": 7018 }, { "epoch": 0.46616191804476326, "grad_norm": 190.39990234375, "learning_rate": 1.791412403938215e-06, "loss": 11.875, "step": 7019 }, { "epoch": 0.46622833233711897, "grad_norm": 440.1648864746094, "learning_rate": 1.7913466547222425e-06, "loss": 18.8281, "step": 7020 }, { "epoch": 0.4662947466294747, "grad_norm": 209.42967224121094, "learning_rate": 1.7912808963524282e-06, "loss": 12.9531, "step": 7021 }, { "epoch": 0.4663611609218304, "grad_norm": 186.92947387695312, "learning_rate": 1.7912151288295325e-06, "loss": 26.1562, "step": 7022 }, { "epoch": 0.4664275752141861, "grad_norm": 223.51742553710938, "learning_rate": 1.7911493521543167e-06, "loss": 17.7969, "step": 7023 }, { "epoch": 0.4664939895065418, "grad_norm": 859.8792724609375, "learning_rate": 1.7910835663275415e-06, "loss": 9.6641, "step": 7024 }, { "epoch": 0.4665604037988975, "grad_norm": 206.94456481933594, "learning_rate": 1.7910177713499674e-06, "loss": 17.7031, "step": 7025 }, { "epoch": 0.4666268180912532, "grad_norm": 214.71136474609375, "learning_rate": 1.7909519672223563e-06, "loss": 15.3594, "step": 7026 }, { "epoch": 0.46669323238360894, "grad_norm": 196.7074432373047, "learning_rate": 1.7908861539454687e-06, "loss": 18.5781, "step": 7027 }, { "epoch": 0.46675964667596465, "grad_norm": 137.28448486328125, "learning_rate": 1.790820331520066e-06, "loss": 19.9062, "step": 7028 }, { "epoch": 0.46682606096832036, "grad_norm": 261.8912048339844, "learning_rate": 1.7907544999469097e-06, "loss": 16.4062, "step": 7029 }, { "epoch": 0.4668924752606761, "grad_norm": 231.9635009765625, "learning_rate": 1.7906886592267618e-06, "loss": 15.1406, "step": 7030 }, { "epoch": 0.46695888955303183, "grad_norm": 140.6375732421875, "learning_rate": 1.790622809360383e-06, "loss": 15.2188, "step": 7031 }, { "epoch": 0.46702530384538754, "grad_norm": 376.3713684082031, "learning_rate": 1.7905569503485354e-06, "loss": 20.8906, "step": 7032 }, { "epoch": 0.46709171813774325, "grad_norm": 285.9868469238281, "learning_rate": 1.7904910821919812e-06, "loss": 20.3594, "step": 7033 }, { "epoch": 0.46715813243009896, "grad_norm": 273.0746765136719, "learning_rate": 1.7904252048914818e-06, "loss": 21.6562, "step": 7034 }, { "epoch": 0.46722454672245467, "grad_norm": 584.6735229492188, "learning_rate": 1.7903593184477993e-06, "loss": 19.3281, "step": 7035 }, { "epoch": 0.4672909610148104, "grad_norm": 219.93106079101562, "learning_rate": 1.790293422861696e-06, "loss": 15.1094, "step": 7036 }, { "epoch": 0.4673573753071661, "grad_norm": 460.4759826660156, "learning_rate": 1.7902275181339342e-06, "loss": 22.6953, "step": 7037 }, { "epoch": 0.4674237895995218, "grad_norm": 246.1913299560547, "learning_rate": 1.790161604265276e-06, "loss": 17.8906, "step": 7038 }, { "epoch": 0.4674902038918775, "grad_norm": 211.64122009277344, "learning_rate": 1.790095681256484e-06, "loss": 17.5469, "step": 7039 }, { "epoch": 0.4675566181842333, "grad_norm": 276.2713317871094, "learning_rate": 1.790029749108321e-06, "loss": 15.375, "step": 7040 }, { "epoch": 0.467623032476589, "grad_norm": 274.5052185058594, "learning_rate": 1.7899638078215492e-06, "loss": 22.625, "step": 7041 }, { "epoch": 0.4676894467689447, "grad_norm": 405.6425476074219, "learning_rate": 1.7898978573969317e-06, "loss": 22.1875, "step": 7042 }, { "epoch": 0.4677558610613004, "grad_norm": 209.9380645751953, "learning_rate": 1.7898318978352312e-06, "loss": 28.125, "step": 7043 }, { "epoch": 0.4678222753536561, "grad_norm": 243.84890747070312, "learning_rate": 1.7897659291372106e-06, "loss": 23.2188, "step": 7044 }, { "epoch": 0.4678886896460118, "grad_norm": 208.7187957763672, "learning_rate": 1.7896999513036333e-06, "loss": 18.1875, "step": 7045 }, { "epoch": 0.46795510393836753, "grad_norm": 377.5207214355469, "learning_rate": 1.7896339643352626e-06, "loss": 21.5938, "step": 7046 }, { "epoch": 0.46802151823072324, "grad_norm": 296.8357849121094, "learning_rate": 1.789567968232861e-06, "loss": 16.7344, "step": 7047 }, { "epoch": 0.46808793252307895, "grad_norm": 174.7033233642578, "learning_rate": 1.7895019629971927e-06, "loss": 18.0938, "step": 7048 }, { "epoch": 0.46815434681543466, "grad_norm": 195.7488555908203, "learning_rate": 1.7894359486290212e-06, "loss": 15.3281, "step": 7049 }, { "epoch": 0.4682207611077904, "grad_norm": 158.238525390625, "learning_rate": 1.7893699251291095e-06, "loss": 16.9688, "step": 7050 }, { "epoch": 0.46828717540014614, "grad_norm": 725.3101196289062, "learning_rate": 1.7893038924982217e-06, "loss": 22.0, "step": 7051 }, { "epoch": 0.46835358969250185, "grad_norm": 191.6986846923828, "learning_rate": 1.7892378507371216e-06, "loss": 16.0391, "step": 7052 }, { "epoch": 0.46842000398485756, "grad_norm": 359.83099365234375, "learning_rate": 1.7891717998465735e-06, "loss": 21.1406, "step": 7053 }, { "epoch": 0.46848641827721327, "grad_norm": 135.66488647460938, "learning_rate": 1.7891057398273406e-06, "loss": 21.3125, "step": 7054 }, { "epoch": 0.468552832569569, "grad_norm": 179.08447265625, "learning_rate": 1.7890396706801878e-06, "loss": 12.7031, "step": 7055 }, { "epoch": 0.4686192468619247, "grad_norm": 183.2117919921875, "learning_rate": 1.7889735924058788e-06, "loss": 20.75, "step": 7056 }, { "epoch": 0.4686856611542804, "grad_norm": 250.39312744140625, "learning_rate": 1.7889075050051785e-06, "loss": 23.8438, "step": 7057 }, { "epoch": 0.4687520754466361, "grad_norm": 135.29750061035156, "learning_rate": 1.7888414084788512e-06, "loss": 12.125, "step": 7058 }, { "epoch": 0.4688184897389918, "grad_norm": 265.2024841308594, "learning_rate": 1.788775302827661e-06, "loss": 14.0469, "step": 7059 }, { "epoch": 0.4688849040313475, "grad_norm": 279.9967346191406, "learning_rate": 1.7887091880523733e-06, "loss": 16.0312, "step": 7060 }, { "epoch": 0.46895131832370324, "grad_norm": 280.02740478515625, "learning_rate": 1.7886430641537522e-06, "loss": 15.5312, "step": 7061 }, { "epoch": 0.469017732616059, "grad_norm": 246.5723419189453, "learning_rate": 1.7885769311325632e-06, "loss": 22.75, "step": 7062 }, { "epoch": 0.4690841469084147, "grad_norm": 164.69439697265625, "learning_rate": 1.7885107889895705e-06, "loss": 22.9375, "step": 7063 }, { "epoch": 0.4691505612007704, "grad_norm": 198.3480224609375, "learning_rate": 1.7884446377255402e-06, "loss": 20.3906, "step": 7064 }, { "epoch": 0.46921697549312613, "grad_norm": 150.99114990234375, "learning_rate": 1.788378477341237e-06, "loss": 18.7188, "step": 7065 }, { "epoch": 0.46928338978548184, "grad_norm": 307.5867004394531, "learning_rate": 1.7883123078374256e-06, "loss": 20.9688, "step": 7066 }, { "epoch": 0.46934980407783755, "grad_norm": 249.6598358154297, "learning_rate": 1.7882461292148726e-06, "loss": 21.7188, "step": 7067 }, { "epoch": 0.46941621837019326, "grad_norm": 607.955810546875, "learning_rate": 1.7881799414743423e-06, "loss": 21.2188, "step": 7068 }, { "epoch": 0.46948263266254897, "grad_norm": 157.5939483642578, "learning_rate": 1.7881137446166012e-06, "loss": 16.4688, "step": 7069 }, { "epoch": 0.4695490469549047, "grad_norm": 276.6815490722656, "learning_rate": 1.788047538642415e-06, "loss": 18.0469, "step": 7070 }, { "epoch": 0.4696154612472604, "grad_norm": 242.8278350830078, "learning_rate": 1.7879813235525492e-06, "loss": 15.4062, "step": 7071 }, { "epoch": 0.4696818755396161, "grad_norm": 214.60914611816406, "learning_rate": 1.7879150993477696e-06, "loss": 22.875, "step": 7072 }, { "epoch": 0.46974828983197187, "grad_norm": 318.97613525390625, "learning_rate": 1.7878488660288426e-06, "loss": 26.375, "step": 7073 }, { "epoch": 0.4698147041243276, "grad_norm": 165.88021850585938, "learning_rate": 1.7877826235965344e-06, "loss": 18.5, "step": 7074 }, { "epoch": 0.4698811184166833, "grad_norm": 256.95269775390625, "learning_rate": 1.787716372051611e-06, "loss": 19.4844, "step": 7075 }, { "epoch": 0.469947532709039, "grad_norm": 246.42274475097656, "learning_rate": 1.7876501113948383e-06, "loss": 17.0156, "step": 7076 }, { "epoch": 0.4700139470013947, "grad_norm": 160.7545623779297, "learning_rate": 1.7875838416269837e-06, "loss": 20.9062, "step": 7077 }, { "epoch": 0.4700803612937504, "grad_norm": 269.02899169921875, "learning_rate": 1.7875175627488136e-06, "loss": 19.3906, "step": 7078 }, { "epoch": 0.4701467755861061, "grad_norm": 285.3047790527344, "learning_rate": 1.7874512747610941e-06, "loss": 16.0156, "step": 7079 }, { "epoch": 0.47021318987846183, "grad_norm": 115.50745391845703, "learning_rate": 1.7873849776645927e-06, "loss": 15.2031, "step": 7080 }, { "epoch": 0.47027960417081754, "grad_norm": 273.0755310058594, "learning_rate": 1.7873186714600754e-06, "loss": 19.7188, "step": 7081 }, { "epoch": 0.47034601846317325, "grad_norm": 149.01649475097656, "learning_rate": 1.78725235614831e-06, "loss": 18.7812, "step": 7082 }, { "epoch": 0.47041243275552896, "grad_norm": 188.91183471679688, "learning_rate": 1.787186031730063e-06, "loss": 20.4844, "step": 7083 }, { "epoch": 0.47047884704788473, "grad_norm": 247.97042846679688, "learning_rate": 1.7871196982061023e-06, "loss": 17.75, "step": 7084 }, { "epoch": 0.47054526134024044, "grad_norm": 201.9667205810547, "learning_rate": 1.787053355577195e-06, "loss": 18.2344, "step": 7085 }, { "epoch": 0.47061167563259615, "grad_norm": 181.83763122558594, "learning_rate": 1.7869870038441077e-06, "loss": 17.2656, "step": 7086 }, { "epoch": 0.47067808992495186, "grad_norm": 907.183349609375, "learning_rate": 1.786920643007609e-06, "loss": 17.0, "step": 7087 }, { "epoch": 0.47074450421730757, "grad_norm": 167.51075744628906, "learning_rate": 1.786854273068466e-06, "loss": 16.6875, "step": 7088 }, { "epoch": 0.4708109185096633, "grad_norm": 553.701171875, "learning_rate": 1.786787894027446e-06, "loss": 18.0312, "step": 7089 }, { "epoch": 0.470877332802019, "grad_norm": 751.292236328125, "learning_rate": 1.786721505885318e-06, "loss": 19.0312, "step": 7090 }, { "epoch": 0.4709437470943747, "grad_norm": 151.39366149902344, "learning_rate": 1.786655108642849e-06, "loss": 14.1719, "step": 7091 }, { "epoch": 0.4710101613867304, "grad_norm": 508.0611267089844, "learning_rate": 1.7865887023008073e-06, "loss": 21.9375, "step": 7092 }, { "epoch": 0.4710765756790861, "grad_norm": 260.64178466796875, "learning_rate": 1.786522286859961e-06, "loss": 26.625, "step": 7093 }, { "epoch": 0.47114298997144183, "grad_norm": 264.6623229980469, "learning_rate": 1.7864558623210786e-06, "loss": 26.4688, "step": 7094 }, { "epoch": 0.4712094042637976, "grad_norm": 432.4461364746094, "learning_rate": 1.7863894286849281e-06, "loss": 22.4062, "step": 7095 }, { "epoch": 0.4712758185561533, "grad_norm": 571.7866821289062, "learning_rate": 1.786322985952278e-06, "loss": 15.3438, "step": 7096 }, { "epoch": 0.471342232848509, "grad_norm": 245.63949584960938, "learning_rate": 1.7862565341238972e-06, "loss": 19.9688, "step": 7097 }, { "epoch": 0.4714086471408647, "grad_norm": 279.0260314941406, "learning_rate": 1.7861900732005544e-06, "loss": 17.1719, "step": 7098 }, { "epoch": 0.47147506143322043, "grad_norm": 422.8459777832031, "learning_rate": 1.786123603183018e-06, "loss": 20.4844, "step": 7099 }, { "epoch": 0.47154147572557614, "grad_norm": 213.62615966796875, "learning_rate": 1.786057124072057e-06, "loss": 19.4688, "step": 7100 }, { "epoch": 0.47160789001793185, "grad_norm": 365.40423583984375, "learning_rate": 1.7859906358684406e-06, "loss": 24.5625, "step": 7101 }, { "epoch": 0.47167430431028756, "grad_norm": 129.36900329589844, "learning_rate": 1.7859241385729378e-06, "loss": 19.0625, "step": 7102 }, { "epoch": 0.47174071860264327, "grad_norm": 217.748291015625, "learning_rate": 1.7858576321863178e-06, "loss": 17.4375, "step": 7103 }, { "epoch": 0.471807132894999, "grad_norm": 195.74075317382812, "learning_rate": 1.7857911167093495e-06, "loss": 17.9062, "step": 7104 }, { "epoch": 0.4718735471873547, "grad_norm": 109.23750305175781, "learning_rate": 1.7857245921428027e-06, "loss": 14.9375, "step": 7105 }, { "epoch": 0.47193996147971046, "grad_norm": 211.80453491210938, "learning_rate": 1.7856580584874474e-06, "loss": 14.5625, "step": 7106 }, { "epoch": 0.47200637577206617, "grad_norm": 181.29000854492188, "learning_rate": 1.7855915157440525e-06, "loss": 22.1719, "step": 7107 }, { "epoch": 0.4720727900644219, "grad_norm": 275.2141418457031, "learning_rate": 1.7855249639133877e-06, "loss": 26.1875, "step": 7108 }, { "epoch": 0.4721392043567776, "grad_norm": 217.91737365722656, "learning_rate": 1.7854584029962232e-06, "loss": 16.5469, "step": 7109 }, { "epoch": 0.4722056186491333, "grad_norm": 173.76612854003906, "learning_rate": 1.7853918329933286e-06, "loss": 15.8594, "step": 7110 }, { "epoch": 0.472272032941489, "grad_norm": 324.6644592285156, "learning_rate": 1.7853252539054745e-06, "loss": 14.5156, "step": 7111 }, { "epoch": 0.4723384472338447, "grad_norm": 221.83921813964844, "learning_rate": 1.7852586657334308e-06, "loss": 16.3438, "step": 7112 }, { "epoch": 0.4724048615262004, "grad_norm": 182.80010986328125, "learning_rate": 1.785192068477967e-06, "loss": 19.625, "step": 7113 }, { "epoch": 0.47247127581855614, "grad_norm": 499.93341064453125, "learning_rate": 1.7851254621398548e-06, "loss": 23.375, "step": 7114 }, { "epoch": 0.47253769011091185, "grad_norm": 161.0838165283203, "learning_rate": 1.7850588467198637e-06, "loss": 14.9531, "step": 7115 }, { "epoch": 0.4726041044032676, "grad_norm": 111.73112487792969, "learning_rate": 1.7849922222187645e-06, "loss": 15.375, "step": 7116 }, { "epoch": 0.4726705186956233, "grad_norm": 156.9224395751953, "learning_rate": 1.7849255886373283e-06, "loss": 19.3906, "step": 7117 }, { "epoch": 0.47273693298797903, "grad_norm": 197.64366149902344, "learning_rate": 1.784858945976325e-06, "loss": 19.2969, "step": 7118 }, { "epoch": 0.47280334728033474, "grad_norm": 133.61241149902344, "learning_rate": 1.7847922942365264e-06, "loss": 14.7969, "step": 7119 }, { "epoch": 0.47286976157269045, "grad_norm": 385.4378967285156, "learning_rate": 1.784725633418703e-06, "loss": 24.4062, "step": 7120 }, { "epoch": 0.47293617586504616, "grad_norm": 333.7594299316406, "learning_rate": 1.7846589635236258e-06, "loss": 16.125, "step": 7121 }, { "epoch": 0.47300259015740187, "grad_norm": 211.38916015625, "learning_rate": 1.7845922845520662e-06, "loss": 18.875, "step": 7122 }, { "epoch": 0.4730690044497576, "grad_norm": 163.7265167236328, "learning_rate": 1.7845255965047955e-06, "loss": 21.5938, "step": 7123 }, { "epoch": 0.4731354187421133, "grad_norm": 294.2230224609375, "learning_rate": 1.784458899382585e-06, "loss": 22.75, "step": 7124 }, { "epoch": 0.473201833034469, "grad_norm": 469.1732482910156, "learning_rate": 1.7843921931862064e-06, "loss": 21.7656, "step": 7125 }, { "epoch": 0.4732682473268247, "grad_norm": 307.99176025390625, "learning_rate": 1.7843254779164314e-06, "loss": 18.1875, "step": 7126 }, { "epoch": 0.4733346616191805, "grad_norm": 194.46849060058594, "learning_rate": 1.7842587535740313e-06, "loss": 17.6875, "step": 7127 }, { "epoch": 0.4734010759115362, "grad_norm": 153.6965789794922, "learning_rate": 1.7841920201597782e-06, "loss": 15.375, "step": 7128 }, { "epoch": 0.4734674902038919, "grad_norm": 146.2307891845703, "learning_rate": 1.784125277674444e-06, "loss": 19.3906, "step": 7129 }, { "epoch": 0.4735339044962476, "grad_norm": 190.61326599121094, "learning_rate": 1.7840585261188007e-06, "loss": 15.9375, "step": 7130 }, { "epoch": 0.4736003187886033, "grad_norm": 210.0861358642578, "learning_rate": 1.7839917654936208e-06, "loss": 17.9688, "step": 7131 }, { "epoch": 0.473666733080959, "grad_norm": 153.25381469726562, "learning_rate": 1.7839249957996759e-06, "loss": 14.4531, "step": 7132 }, { "epoch": 0.47373314737331473, "grad_norm": 142.81207275390625, "learning_rate": 1.7838582170377388e-06, "loss": 14.9219, "step": 7133 }, { "epoch": 0.47379956166567044, "grad_norm": 225.51092529296875, "learning_rate": 1.7837914292085816e-06, "loss": 19.3438, "step": 7134 }, { "epoch": 0.47386597595802615, "grad_norm": 218.4439239501953, "learning_rate": 1.7837246323129774e-06, "loss": 17.25, "step": 7135 }, { "epoch": 0.47393239025038186, "grad_norm": 286.2750244140625, "learning_rate": 1.7836578263516983e-06, "loss": 19.0938, "step": 7136 }, { "epoch": 0.4739988045427376, "grad_norm": 155.85142517089844, "learning_rate": 1.7835910113255178e-06, "loss": 15.7812, "step": 7137 }, { "epoch": 0.47406521883509334, "grad_norm": 223.3866424560547, "learning_rate": 1.783524187235208e-06, "loss": 18.2656, "step": 7138 }, { "epoch": 0.47413163312744905, "grad_norm": 347.537841796875, "learning_rate": 1.783457354081542e-06, "loss": 18.1562, "step": 7139 }, { "epoch": 0.47419804741980476, "grad_norm": 430.5411071777344, "learning_rate": 1.7833905118652936e-06, "loss": 19.6875, "step": 7140 }, { "epoch": 0.47426446171216047, "grad_norm": 232.62493896484375, "learning_rate": 1.7833236605872352e-06, "loss": 22.3438, "step": 7141 }, { "epoch": 0.4743308760045162, "grad_norm": 192.92388916015625, "learning_rate": 1.7832568002481406e-06, "loss": 20.5, "step": 7142 }, { "epoch": 0.4743972902968719, "grad_norm": 172.69595336914062, "learning_rate": 1.7831899308487827e-06, "loss": 15.125, "step": 7143 }, { "epoch": 0.4744637045892276, "grad_norm": 2660.00927734375, "learning_rate": 1.7831230523899355e-06, "loss": 16.8438, "step": 7144 }, { "epoch": 0.4745301188815833, "grad_norm": 100.2077407836914, "learning_rate": 1.7830561648723723e-06, "loss": 17.4531, "step": 7145 }, { "epoch": 0.474596533173939, "grad_norm": 275.6755676269531, "learning_rate": 1.782989268296867e-06, "loss": 19.9062, "step": 7146 }, { "epoch": 0.4746629474662947, "grad_norm": 133.74954223632812, "learning_rate": 1.7829223626641932e-06, "loss": 20.7812, "step": 7147 }, { "epoch": 0.47472936175865044, "grad_norm": 219.98623657226562, "learning_rate": 1.782855447975125e-06, "loss": 21.7969, "step": 7148 }, { "epoch": 0.4747957760510062, "grad_norm": 294.0550537109375, "learning_rate": 1.7827885242304364e-06, "loss": 18.8281, "step": 7149 }, { "epoch": 0.4748621903433619, "grad_norm": 725.6036987304688, "learning_rate": 1.7827215914309015e-06, "loss": 22.3906, "step": 7150 }, { "epoch": 0.4749286046357176, "grad_norm": 190.97842407226562, "learning_rate": 1.782654649577295e-06, "loss": 20.9375, "step": 7151 }, { "epoch": 0.47499501892807333, "grad_norm": 256.0082092285156, "learning_rate": 1.7825876986703903e-06, "loss": 18.3906, "step": 7152 }, { "epoch": 0.47506143322042904, "grad_norm": 168.6579132080078, "learning_rate": 1.7825207387109623e-06, "loss": 16.5938, "step": 7153 }, { "epoch": 0.47512784751278475, "grad_norm": 116.9045639038086, "learning_rate": 1.7824537696997862e-06, "loss": 18.8125, "step": 7154 }, { "epoch": 0.47519426180514046, "grad_norm": 197.40447998046875, "learning_rate": 1.7823867916376355e-06, "loss": 23.4688, "step": 7155 }, { "epoch": 0.47526067609749617, "grad_norm": 180.67247009277344, "learning_rate": 1.7823198045252861e-06, "loss": 21.3281, "step": 7156 }, { "epoch": 0.4753270903898519, "grad_norm": 266.0850830078125, "learning_rate": 1.7822528083635118e-06, "loss": 20.3281, "step": 7157 }, { "epoch": 0.4753935046822076, "grad_norm": 792.6607055664062, "learning_rate": 1.7821858031530886e-06, "loss": 18.6875, "step": 7158 }, { "epoch": 0.4754599189745633, "grad_norm": 132.39068603515625, "learning_rate": 1.7821187888947904e-06, "loss": 18.9062, "step": 7159 }, { "epoch": 0.47552633326691907, "grad_norm": 294.007080078125, "learning_rate": 1.7820517655893936e-06, "loss": 19.4219, "step": 7160 }, { "epoch": 0.4755927475592748, "grad_norm": 154.79115295410156, "learning_rate": 1.7819847332376728e-06, "loss": 19.1094, "step": 7161 }, { "epoch": 0.4756591618516305, "grad_norm": 127.86709594726562, "learning_rate": 1.7819176918404034e-06, "loss": 13.7812, "step": 7162 }, { "epoch": 0.4757255761439862, "grad_norm": 339.23199462890625, "learning_rate": 1.781850641398361e-06, "loss": 17.9531, "step": 7163 }, { "epoch": 0.4757919904363419, "grad_norm": 316.71673583984375, "learning_rate": 1.7817835819123213e-06, "loss": 19.4688, "step": 7164 }, { "epoch": 0.4758584047286976, "grad_norm": 159.83950805664062, "learning_rate": 1.7817165133830597e-06, "loss": 15.7812, "step": 7165 }, { "epoch": 0.4759248190210533, "grad_norm": 171.79698181152344, "learning_rate": 1.7816494358113528e-06, "loss": 19.6562, "step": 7166 }, { "epoch": 0.47599123331340903, "grad_norm": 527.1869506835938, "learning_rate": 1.7815823491979756e-06, "loss": 26.6562, "step": 7167 }, { "epoch": 0.47605764760576474, "grad_norm": 186.78176879882812, "learning_rate": 1.7815152535437043e-06, "loss": 12.5938, "step": 7168 }, { "epoch": 0.47612406189812045, "grad_norm": 389.2489013671875, "learning_rate": 1.7814481488493154e-06, "loss": 21.5469, "step": 7169 }, { "epoch": 0.47619047619047616, "grad_norm": 330.6747741699219, "learning_rate": 1.781381035115585e-06, "loss": 16.1406, "step": 7170 }, { "epoch": 0.47625689048283193, "grad_norm": 143.32852172851562, "learning_rate": 1.7813139123432892e-06, "loss": 21.2344, "step": 7171 }, { "epoch": 0.47632330477518764, "grad_norm": 305.4170837402344, "learning_rate": 1.7812467805332046e-06, "loss": 22.7656, "step": 7172 }, { "epoch": 0.47638971906754335, "grad_norm": 624.7872314453125, "learning_rate": 1.7811796396861078e-06, "loss": 24.4531, "step": 7173 }, { "epoch": 0.47645613335989906, "grad_norm": 373.3346252441406, "learning_rate": 1.7811124898027753e-06, "loss": 21.4219, "step": 7174 }, { "epoch": 0.47652254765225477, "grad_norm": 270.7396240234375, "learning_rate": 1.7810453308839842e-06, "loss": 18.0469, "step": 7175 }, { "epoch": 0.4765889619446105, "grad_norm": 325.5307922363281, "learning_rate": 1.780978162930511e-06, "loss": 25.25, "step": 7176 }, { "epoch": 0.4766553762369662, "grad_norm": 283.9944763183594, "learning_rate": 1.7809109859431323e-06, "loss": 19.25, "step": 7177 }, { "epoch": 0.4767217905293219, "grad_norm": 257.65325927734375, "learning_rate": 1.7808437999226263e-06, "loss": 18.7344, "step": 7178 }, { "epoch": 0.4767882048216776, "grad_norm": 199.8212890625, "learning_rate": 1.780776604869769e-06, "loss": 19.8438, "step": 7179 }, { "epoch": 0.4768546191140333, "grad_norm": 179.6190185546875, "learning_rate": 1.7807094007853382e-06, "loss": 23.1562, "step": 7180 }, { "epoch": 0.476921033406389, "grad_norm": 566.3076171875, "learning_rate": 1.7806421876701115e-06, "loss": 17.9531, "step": 7181 }, { "epoch": 0.4769874476987448, "grad_norm": 284.81292724609375, "learning_rate": 1.780574965524866e-06, "loss": 13.6875, "step": 7182 }, { "epoch": 0.4770538619911005, "grad_norm": 335.7895812988281, "learning_rate": 1.7805077343503792e-06, "loss": 19.4062, "step": 7183 }, { "epoch": 0.4771202762834562, "grad_norm": 149.4384307861328, "learning_rate": 1.7804404941474291e-06, "loss": 18.0469, "step": 7184 }, { "epoch": 0.4771866905758119, "grad_norm": 162.23086547851562, "learning_rate": 1.7803732449167934e-06, "loss": 21.7969, "step": 7185 }, { "epoch": 0.47725310486816763, "grad_norm": 415.53497314453125, "learning_rate": 1.78030598665925e-06, "loss": 17.6875, "step": 7186 }, { "epoch": 0.47731951916052334, "grad_norm": 351.5398254394531, "learning_rate": 1.780238719375577e-06, "loss": 18.5625, "step": 7187 }, { "epoch": 0.47738593345287905, "grad_norm": 156.41595458984375, "learning_rate": 1.7801714430665524e-06, "loss": 22.5938, "step": 7188 }, { "epoch": 0.47745234774523476, "grad_norm": 248.4420623779297, "learning_rate": 1.7801041577329544e-06, "loss": 29.2344, "step": 7189 }, { "epoch": 0.47751876203759047, "grad_norm": 166.4094696044922, "learning_rate": 1.7800368633755612e-06, "loss": 19.4688, "step": 7190 }, { "epoch": 0.4775851763299462, "grad_norm": 284.2726745605469, "learning_rate": 1.7799695599951514e-06, "loss": 18.6719, "step": 7191 }, { "epoch": 0.47765159062230195, "grad_norm": 140.13717651367188, "learning_rate": 1.7799022475925037e-06, "loss": 17.6562, "step": 7192 }, { "epoch": 0.47771800491465766, "grad_norm": 239.516357421875, "learning_rate": 1.7798349261683962e-06, "loss": 15.3125, "step": 7193 }, { "epoch": 0.47778441920701337, "grad_norm": 2652.407470703125, "learning_rate": 1.779767595723608e-06, "loss": 17.2422, "step": 7194 }, { "epoch": 0.4778508334993691, "grad_norm": 203.6732940673828, "learning_rate": 1.779700256258918e-06, "loss": 13.7969, "step": 7195 }, { "epoch": 0.4779172477917248, "grad_norm": 168.61114501953125, "learning_rate": 1.7796329077751052e-06, "loss": 15.8125, "step": 7196 }, { "epoch": 0.4779836620840805, "grad_norm": 180.40260314941406, "learning_rate": 1.7795655502729484e-06, "loss": 17.2969, "step": 7197 }, { "epoch": 0.4780500763764362, "grad_norm": 105.36137390136719, "learning_rate": 1.7794981837532265e-06, "loss": 17.7812, "step": 7198 }, { "epoch": 0.4781164906687919, "grad_norm": 537.8704833984375, "learning_rate": 1.7794308082167193e-06, "loss": 22.0938, "step": 7199 }, { "epoch": 0.4781829049611476, "grad_norm": 254.81431579589844, "learning_rate": 1.779363423664206e-06, "loss": 25.1562, "step": 7200 }, { "epoch": 0.47824931925350334, "grad_norm": 181.06895446777344, "learning_rate": 1.779296030096466e-06, "loss": 20.8281, "step": 7201 }, { "epoch": 0.47831573354585905, "grad_norm": 415.96466064453125, "learning_rate": 1.7792286275142788e-06, "loss": 22.5312, "step": 7202 }, { "epoch": 0.4783821478382148, "grad_norm": 271.318115234375, "learning_rate": 1.7791612159184246e-06, "loss": 12.7969, "step": 7203 }, { "epoch": 0.4784485621305705, "grad_norm": 255.12539672851562, "learning_rate": 1.7790937953096823e-06, "loss": 24.5625, "step": 7204 }, { "epoch": 0.47851497642292623, "grad_norm": 121.89238739013672, "learning_rate": 1.7790263656888325e-06, "loss": 14.8438, "step": 7205 }, { "epoch": 0.47858139071528194, "grad_norm": 456.8875427246094, "learning_rate": 1.7789589270566547e-06, "loss": 22.4062, "step": 7206 }, { "epoch": 0.47864780500763765, "grad_norm": 249.86500549316406, "learning_rate": 1.778891479413929e-06, "loss": 21.2656, "step": 7207 }, { "epoch": 0.47871421929999336, "grad_norm": 255.76058959960938, "learning_rate": 1.7788240227614364e-06, "loss": 12.25, "step": 7208 }, { "epoch": 0.47878063359234907, "grad_norm": 196.15640258789062, "learning_rate": 1.7787565570999561e-06, "loss": 20.1562, "step": 7209 }, { "epoch": 0.4788470478847048, "grad_norm": 216.4442138671875, "learning_rate": 1.7786890824302692e-06, "loss": 17.9688, "step": 7210 }, { "epoch": 0.4789134621770605, "grad_norm": 577.4068603515625, "learning_rate": 1.778621598753156e-06, "loss": 17.4844, "step": 7211 }, { "epoch": 0.4789798764694162, "grad_norm": 230.4617919921875, "learning_rate": 1.7785541060693972e-06, "loss": 19.4219, "step": 7212 }, { "epoch": 0.4790462907617719, "grad_norm": 123.05430603027344, "learning_rate": 1.7784866043797733e-06, "loss": 15.9219, "step": 7213 }, { "epoch": 0.4791127050541277, "grad_norm": 413.5591735839844, "learning_rate": 1.7784190936850653e-06, "loss": 23.4688, "step": 7214 }, { "epoch": 0.4791791193464834, "grad_norm": 473.8348388671875, "learning_rate": 1.7783515739860541e-06, "loss": 22.7969, "step": 7215 }, { "epoch": 0.4792455336388391, "grad_norm": 240.60885620117188, "learning_rate": 1.7782840452835206e-06, "loss": 28.4688, "step": 7216 }, { "epoch": 0.4793119479311948, "grad_norm": 381.4591369628906, "learning_rate": 1.7782165075782466e-06, "loss": 25.0312, "step": 7217 }, { "epoch": 0.4793783622235505, "grad_norm": 179.8711395263672, "learning_rate": 1.7781489608710121e-06, "loss": 14.9219, "step": 7218 }, { "epoch": 0.4794447765159062, "grad_norm": 133.49974060058594, "learning_rate": 1.7780814051625994e-06, "loss": 13.8594, "step": 7219 }, { "epoch": 0.47951119080826193, "grad_norm": 210.6067352294922, "learning_rate": 1.7780138404537896e-06, "loss": 17.0, "step": 7220 }, { "epoch": 0.47957760510061764, "grad_norm": 276.8589782714844, "learning_rate": 1.7779462667453645e-06, "loss": 15.3594, "step": 7221 }, { "epoch": 0.47964401939297335, "grad_norm": 693.9723510742188, "learning_rate": 1.7778786840381054e-06, "loss": 22.6094, "step": 7222 }, { "epoch": 0.47971043368532906, "grad_norm": 160.47549438476562, "learning_rate": 1.7778110923327941e-06, "loss": 17.9062, "step": 7223 }, { "epoch": 0.4797768479776848, "grad_norm": 238.54013061523438, "learning_rate": 1.7777434916302129e-06, "loss": 21.6875, "step": 7224 }, { "epoch": 0.47984326227004054, "grad_norm": 273.75555419921875, "learning_rate": 1.7776758819311433e-06, "loss": 18.0, "step": 7225 }, { "epoch": 0.47990967656239625, "grad_norm": 168.98651123046875, "learning_rate": 1.7776082632363677e-06, "loss": 18.75, "step": 7226 }, { "epoch": 0.47997609085475196, "grad_norm": 170.1465301513672, "learning_rate": 1.7775406355466677e-06, "loss": 21.0156, "step": 7227 }, { "epoch": 0.48004250514710767, "grad_norm": 562.8358154296875, "learning_rate": 1.7774729988628265e-06, "loss": 23.5, "step": 7228 }, { "epoch": 0.4801089194394634, "grad_norm": 99.58377075195312, "learning_rate": 1.7774053531856257e-06, "loss": 16.2031, "step": 7229 }, { "epoch": 0.4801753337318191, "grad_norm": 189.40997314453125, "learning_rate": 1.777337698515848e-06, "loss": 17.9688, "step": 7230 }, { "epoch": 0.4802417480241748, "grad_norm": 276.605712890625, "learning_rate": 1.7772700348542762e-06, "loss": 19.5781, "step": 7231 }, { "epoch": 0.4803081623165305, "grad_norm": 290.41644287109375, "learning_rate": 1.7772023622016927e-06, "loss": 21.5625, "step": 7232 }, { "epoch": 0.4803745766088862, "grad_norm": 178.82936096191406, "learning_rate": 1.7771346805588805e-06, "loss": 17.7812, "step": 7233 }, { "epoch": 0.4804409909012419, "grad_norm": 184.81251525878906, "learning_rate": 1.7770669899266224e-06, "loss": 23.3125, "step": 7234 }, { "epoch": 0.48050740519359764, "grad_norm": 261.7384033203125, "learning_rate": 1.7769992903057017e-06, "loss": 24.625, "step": 7235 }, { "epoch": 0.4805738194859534, "grad_norm": 144.69189453125, "learning_rate": 1.7769315816969008e-06, "loss": 18.4219, "step": 7236 }, { "epoch": 0.4806402337783091, "grad_norm": 269.8171081542969, "learning_rate": 1.7768638641010037e-06, "loss": 19.2188, "step": 7237 }, { "epoch": 0.4807066480706648, "grad_norm": 233.21238708496094, "learning_rate": 1.7767961375187932e-06, "loss": 21.5, "step": 7238 }, { "epoch": 0.48077306236302053, "grad_norm": 341.3163757324219, "learning_rate": 1.7767284019510531e-06, "loss": 22.0625, "step": 7239 }, { "epoch": 0.48083947665537624, "grad_norm": 123.52738952636719, "learning_rate": 1.7766606573985667e-06, "loss": 17.1719, "step": 7240 }, { "epoch": 0.48090589094773195, "grad_norm": 181.21339416503906, "learning_rate": 1.7765929038621174e-06, "loss": 14.625, "step": 7241 }, { "epoch": 0.48097230524008766, "grad_norm": 161.66932678222656, "learning_rate": 1.7765251413424896e-06, "loss": 21.0312, "step": 7242 }, { "epoch": 0.48103871953244337, "grad_norm": 172.94203186035156, "learning_rate": 1.7764573698404664e-06, "loss": 13.7031, "step": 7243 }, { "epoch": 0.4811051338247991, "grad_norm": 94.0063247680664, "learning_rate": 1.7763895893568322e-06, "loss": 14.8594, "step": 7244 }, { "epoch": 0.4811715481171548, "grad_norm": 274.9347229003906, "learning_rate": 1.7763217998923708e-06, "loss": 19.0156, "step": 7245 }, { "epoch": 0.4812379624095105, "grad_norm": 408.43194580078125, "learning_rate": 1.7762540014478667e-06, "loss": 30.7188, "step": 7246 }, { "epoch": 0.48130437670186627, "grad_norm": 299.3272705078125, "learning_rate": 1.776186194024104e-06, "loss": 25.0, "step": 7247 }, { "epoch": 0.481370790994222, "grad_norm": 161.6673126220703, "learning_rate": 1.7761183776218665e-06, "loss": 18.625, "step": 7248 }, { "epoch": 0.4814372052865777, "grad_norm": 339.652099609375, "learning_rate": 1.7760505522419395e-06, "loss": 29.9531, "step": 7249 }, { "epoch": 0.4815036195789334, "grad_norm": 238.41305541992188, "learning_rate": 1.7759827178851072e-06, "loss": 18.4375, "step": 7250 }, { "epoch": 0.4815700338712891, "grad_norm": 194.17767333984375, "learning_rate": 1.775914874552154e-06, "loss": 20.6406, "step": 7251 }, { "epoch": 0.4816364481636448, "grad_norm": 322.8678894042969, "learning_rate": 1.7758470222438653e-06, "loss": 17.1719, "step": 7252 }, { "epoch": 0.4817028624560005, "grad_norm": 579.3181762695312, "learning_rate": 1.7757791609610256e-06, "loss": 16.2656, "step": 7253 }, { "epoch": 0.48176927674835623, "grad_norm": 163.1080322265625, "learning_rate": 1.7757112907044198e-06, "loss": 15.0781, "step": 7254 }, { "epoch": 0.48183569104071194, "grad_norm": 218.81504821777344, "learning_rate": 1.7756434114748331e-06, "loss": 27.9375, "step": 7255 }, { "epoch": 0.48190210533306765, "grad_norm": 405.20928955078125, "learning_rate": 1.7755755232730504e-06, "loss": 23.0156, "step": 7256 }, { "epoch": 0.48196851962542336, "grad_norm": 203.6715087890625, "learning_rate": 1.7755076260998579e-06, "loss": 19.9688, "step": 7257 }, { "epoch": 0.48203493391777913, "grad_norm": 196.7897186279297, "learning_rate": 1.7754397199560401e-06, "loss": 15.1875, "step": 7258 }, { "epoch": 0.48210134821013484, "grad_norm": 134.3408203125, "learning_rate": 1.7753718048423825e-06, "loss": 11.0469, "step": 7259 }, { "epoch": 0.48216776250249055, "grad_norm": 377.0549011230469, "learning_rate": 1.7753038807596709e-06, "loss": 23.5312, "step": 7260 }, { "epoch": 0.48223417679484626, "grad_norm": 291.9601745605469, "learning_rate": 1.7752359477086913e-06, "loss": 16.9531, "step": 7261 }, { "epoch": 0.48230059108720197, "grad_norm": 107.15953826904297, "learning_rate": 1.7751680056902294e-06, "loss": 14.3281, "step": 7262 }, { "epoch": 0.4823670053795577, "grad_norm": 140.33189392089844, "learning_rate": 1.775100054705071e-06, "loss": 21.0156, "step": 7263 }, { "epoch": 0.4824334196719134, "grad_norm": 333.6523742675781, "learning_rate": 1.7750320947540017e-06, "loss": 28.875, "step": 7264 }, { "epoch": 0.4824998339642691, "grad_norm": 359.9781799316406, "learning_rate": 1.7749641258378087e-06, "loss": 17.375, "step": 7265 }, { "epoch": 0.4825662482566248, "grad_norm": 160.24009704589844, "learning_rate": 1.774896147957277e-06, "loss": 21.6719, "step": 7266 }, { "epoch": 0.4826326625489805, "grad_norm": 231.6424102783203, "learning_rate": 1.7748281611131936e-06, "loss": 19.0938, "step": 7267 }, { "epoch": 0.4826990768413363, "grad_norm": 147.7456817626953, "learning_rate": 1.774760165306345e-06, "loss": 17.875, "step": 7268 }, { "epoch": 0.482765491133692, "grad_norm": 200.8044891357422, "learning_rate": 1.7746921605375177e-06, "loss": 19.7188, "step": 7269 }, { "epoch": 0.4828319054260477, "grad_norm": 171.34788513183594, "learning_rate": 1.7746241468074977e-06, "loss": 15.0938, "step": 7270 }, { "epoch": 0.4828983197184034, "grad_norm": 219.88812255859375, "learning_rate": 1.7745561241170727e-06, "loss": 18.6719, "step": 7271 }, { "epoch": 0.4829647340107591, "grad_norm": 134.0154266357422, "learning_rate": 1.7744880924670289e-06, "loss": 16.4844, "step": 7272 }, { "epoch": 0.48303114830311483, "grad_norm": 225.49786376953125, "learning_rate": 1.7744200518581533e-06, "loss": 20.4219, "step": 7273 }, { "epoch": 0.48309756259547054, "grad_norm": 114.56298065185547, "learning_rate": 1.7743520022912335e-06, "loss": 14.9219, "step": 7274 }, { "epoch": 0.48316397688782625, "grad_norm": 320.8777160644531, "learning_rate": 1.774283943767056e-06, "loss": 20.9844, "step": 7275 }, { "epoch": 0.48323039118018196, "grad_norm": 273.1084899902344, "learning_rate": 1.7742158762864082e-06, "loss": 19.0156, "step": 7276 }, { "epoch": 0.48329680547253767, "grad_norm": 153.22647094726562, "learning_rate": 1.7741477998500778e-06, "loss": 14.2969, "step": 7277 }, { "epoch": 0.4833632197648934, "grad_norm": 137.221923828125, "learning_rate": 1.774079714458852e-06, "loss": 16.9688, "step": 7278 }, { "epoch": 0.48342963405724915, "grad_norm": 159.8457489013672, "learning_rate": 1.7740116201135182e-06, "loss": 14.3281, "step": 7279 }, { "epoch": 0.48349604834960486, "grad_norm": 197.47061157226562, "learning_rate": 1.7739435168148647e-06, "loss": 20.6094, "step": 7280 }, { "epoch": 0.48356246264196057, "grad_norm": 209.3958740234375, "learning_rate": 1.7738754045636786e-06, "loss": 18.7188, "step": 7281 }, { "epoch": 0.4836288769343163, "grad_norm": 154.52125549316406, "learning_rate": 1.773807283360748e-06, "loss": 19.6562, "step": 7282 }, { "epoch": 0.483695291226672, "grad_norm": 444.6119384765625, "learning_rate": 1.773739153206861e-06, "loss": 23.9688, "step": 7283 }, { "epoch": 0.4837617055190277, "grad_norm": 293.5155029296875, "learning_rate": 1.7736710141028058e-06, "loss": 21.25, "step": 7284 }, { "epoch": 0.4838281198113834, "grad_norm": 158.01132202148438, "learning_rate": 1.7736028660493702e-06, "loss": 14.7344, "step": 7285 }, { "epoch": 0.4838945341037391, "grad_norm": 272.146240234375, "learning_rate": 1.773534709047343e-06, "loss": 16.2812, "step": 7286 }, { "epoch": 0.4839609483960948, "grad_norm": 180.0962371826172, "learning_rate": 1.7734665430975123e-06, "loss": 16.8438, "step": 7287 }, { "epoch": 0.48402736268845054, "grad_norm": 236.59571838378906, "learning_rate": 1.7733983682006666e-06, "loss": 18.1875, "step": 7288 }, { "epoch": 0.48409377698080625, "grad_norm": 275.4654235839844, "learning_rate": 1.7733301843575943e-06, "loss": 16.9844, "step": 7289 }, { "epoch": 0.484160191273162, "grad_norm": 254.9404754638672, "learning_rate": 1.7732619915690847e-06, "loss": 24.75, "step": 7290 }, { "epoch": 0.4842266055655177, "grad_norm": 249.98114013671875, "learning_rate": 1.7731937898359258e-06, "loss": 17.1719, "step": 7291 }, { "epoch": 0.48429301985787343, "grad_norm": 162.64346313476562, "learning_rate": 1.7731255791589076e-06, "loss": 15.9375, "step": 7292 }, { "epoch": 0.48435943415022914, "grad_norm": 145.73072814941406, "learning_rate": 1.773057359538818e-06, "loss": 12.6406, "step": 7293 }, { "epoch": 0.48442584844258485, "grad_norm": 448.6411437988281, "learning_rate": 1.7729891309764468e-06, "loss": 12.75, "step": 7294 }, { "epoch": 0.48449226273494056, "grad_norm": 192.3960418701172, "learning_rate": 1.7729208934725835e-06, "loss": 18.9375, "step": 7295 }, { "epoch": 0.48455867702729627, "grad_norm": 369.63690185546875, "learning_rate": 1.7728526470280163e-06, "loss": 17.0781, "step": 7296 }, { "epoch": 0.484625091319652, "grad_norm": 203.0376739501953, "learning_rate": 1.7727843916435356e-06, "loss": 28.2188, "step": 7297 }, { "epoch": 0.4846915056120077, "grad_norm": 299.7751770019531, "learning_rate": 1.7727161273199306e-06, "loss": 24.0938, "step": 7298 }, { "epoch": 0.4847579199043634, "grad_norm": 206.81752014160156, "learning_rate": 1.7726478540579912e-06, "loss": 22.5625, "step": 7299 }, { "epoch": 0.4848243341967191, "grad_norm": 309.1612243652344, "learning_rate": 1.772579571858507e-06, "loss": 19.8438, "step": 7300 }, { "epoch": 0.4848907484890749, "grad_norm": 174.66329956054688, "learning_rate": 1.7725112807222678e-06, "loss": 16.8125, "step": 7301 }, { "epoch": 0.4849571627814306, "grad_norm": 218.92636108398438, "learning_rate": 1.772442980650063e-06, "loss": 16.375, "step": 7302 }, { "epoch": 0.4850235770737863, "grad_norm": 332.86932373046875, "learning_rate": 1.772374671642684e-06, "loss": 20.9844, "step": 7303 }, { "epoch": 0.485089991366142, "grad_norm": 215.83538818359375, "learning_rate": 1.7723063537009198e-06, "loss": 21.2812, "step": 7304 }, { "epoch": 0.4851564056584977, "grad_norm": 102.1091079711914, "learning_rate": 1.772238026825561e-06, "loss": 17.75, "step": 7305 }, { "epoch": 0.4852228199508534, "grad_norm": 282.40069580078125, "learning_rate": 1.7721696910173983e-06, "loss": 25.7188, "step": 7306 }, { "epoch": 0.48528923424320913, "grad_norm": 161.5570526123047, "learning_rate": 1.7721013462772213e-06, "loss": 15.625, "step": 7307 }, { "epoch": 0.48535564853556484, "grad_norm": 171.45126342773438, "learning_rate": 1.7720329926058216e-06, "loss": 18.3281, "step": 7308 }, { "epoch": 0.48542206282792055, "grad_norm": 255.64788818359375, "learning_rate": 1.7719646300039895e-06, "loss": 16.125, "step": 7309 }, { "epoch": 0.48548847712027626, "grad_norm": 426.8081970214844, "learning_rate": 1.7718962584725153e-06, "loss": 18.8125, "step": 7310 }, { "epoch": 0.485554891412632, "grad_norm": 141.08395385742188, "learning_rate": 1.7718278780121905e-06, "loss": 17.9219, "step": 7311 }, { "epoch": 0.48562130570498774, "grad_norm": 82.3072280883789, "learning_rate": 1.771759488623806e-06, "loss": 14.9062, "step": 7312 }, { "epoch": 0.48568771999734345, "grad_norm": 397.3146667480469, "learning_rate": 1.7716910903081525e-06, "loss": 19.7656, "step": 7313 }, { "epoch": 0.48575413428969916, "grad_norm": 260.4026184082031, "learning_rate": 1.7716226830660217e-06, "loss": 14.3906, "step": 7314 }, { "epoch": 0.48582054858205487, "grad_norm": 139.05511474609375, "learning_rate": 1.7715542668982043e-06, "loss": 17.5312, "step": 7315 }, { "epoch": 0.4858869628744106, "grad_norm": 174.5233612060547, "learning_rate": 1.7714858418054922e-06, "loss": 18.75, "step": 7316 }, { "epoch": 0.4859533771667663, "grad_norm": 139.14776611328125, "learning_rate": 1.771417407788677e-06, "loss": 17.1875, "step": 7317 }, { "epoch": 0.486019791459122, "grad_norm": 361.6206970214844, "learning_rate": 1.77134896484855e-06, "loss": 27.2812, "step": 7318 }, { "epoch": 0.4860862057514777, "grad_norm": 126.42032623291016, "learning_rate": 1.7712805129859024e-06, "loss": 17.5469, "step": 7319 }, { "epoch": 0.4861526200438334, "grad_norm": 384.365478515625, "learning_rate": 1.7712120522015273e-06, "loss": 13.0, "step": 7320 }, { "epoch": 0.4862190343361891, "grad_norm": 312.4836730957031, "learning_rate": 1.7711435824962156e-06, "loss": 21.7344, "step": 7321 }, { "epoch": 0.48628544862854484, "grad_norm": 205.53848266601562, "learning_rate": 1.7710751038707597e-06, "loss": 20.0, "step": 7322 }, { "epoch": 0.4863518629209006, "grad_norm": 260.7569885253906, "learning_rate": 1.7710066163259513e-06, "loss": 15.0156, "step": 7323 }, { "epoch": 0.4864182772132563, "grad_norm": 315.5211486816406, "learning_rate": 1.7709381198625832e-06, "loss": 21.8125, "step": 7324 }, { "epoch": 0.486484691505612, "grad_norm": 744.0975341796875, "learning_rate": 1.7708696144814475e-06, "loss": 19.8438, "step": 7325 }, { "epoch": 0.48655110579796773, "grad_norm": 207.59637451171875, "learning_rate": 1.7708011001833365e-06, "loss": 20.0469, "step": 7326 }, { "epoch": 0.48661752009032344, "grad_norm": 283.1785888671875, "learning_rate": 1.7707325769690428e-06, "loss": 19.0312, "step": 7327 }, { "epoch": 0.48668393438267915, "grad_norm": 163.47140502929688, "learning_rate": 1.7706640448393593e-06, "loss": 16.9375, "step": 7328 }, { "epoch": 0.48675034867503486, "grad_norm": 162.03668212890625, "learning_rate": 1.7705955037950784e-06, "loss": 19.3281, "step": 7329 }, { "epoch": 0.48681676296739057, "grad_norm": 350.2209777832031, "learning_rate": 1.7705269538369931e-06, "loss": 24.0156, "step": 7330 }, { "epoch": 0.4868831772597463, "grad_norm": 829.9994506835938, "learning_rate": 1.7704583949658963e-06, "loss": 15.875, "step": 7331 }, { "epoch": 0.486949591552102, "grad_norm": 303.99761962890625, "learning_rate": 1.7703898271825808e-06, "loss": 19.5625, "step": 7332 }, { "epoch": 0.4870160058444577, "grad_norm": 174.5199432373047, "learning_rate": 1.7703212504878402e-06, "loss": 16.3594, "step": 7333 }, { "epoch": 0.48708242013681347, "grad_norm": 801.0604248046875, "learning_rate": 1.7702526648824678e-06, "loss": 27.2969, "step": 7334 }, { "epoch": 0.4871488344291692, "grad_norm": 197.13121032714844, "learning_rate": 1.7701840703672564e-06, "loss": 21.4375, "step": 7335 }, { "epoch": 0.4872152487215249, "grad_norm": 167.87847900390625, "learning_rate": 1.7701154669429999e-06, "loss": 20.2188, "step": 7336 }, { "epoch": 0.4872816630138806, "grad_norm": 184.6754150390625, "learning_rate": 1.770046854610492e-06, "loss": 22.8594, "step": 7337 }, { "epoch": 0.4873480773062363, "grad_norm": 214.3009033203125, "learning_rate": 1.7699782333705256e-06, "loss": 21.4219, "step": 7338 }, { "epoch": 0.487414491598592, "grad_norm": 219.3026123046875, "learning_rate": 1.7699096032238954e-06, "loss": 24.625, "step": 7339 }, { "epoch": 0.4874809058909477, "grad_norm": 229.99842834472656, "learning_rate": 1.7698409641713947e-06, "loss": 24.5625, "step": 7340 }, { "epoch": 0.48754732018330343, "grad_norm": 132.02615356445312, "learning_rate": 1.7697723162138175e-06, "loss": 15.9531, "step": 7341 }, { "epoch": 0.48761373447565914, "grad_norm": 137.37075805664062, "learning_rate": 1.7697036593519585e-06, "loss": 19.6562, "step": 7342 }, { "epoch": 0.48768014876801485, "grad_norm": 315.3000793457031, "learning_rate": 1.7696349935866111e-06, "loss": 13.7656, "step": 7343 }, { "epoch": 0.4877465630603706, "grad_norm": 231.03390502929688, "learning_rate": 1.76956631891857e-06, "loss": 24.875, "step": 7344 }, { "epoch": 0.48781297735272633, "grad_norm": 227.87974548339844, "learning_rate": 1.769497635348629e-06, "loss": 29.0781, "step": 7345 }, { "epoch": 0.48787939164508204, "grad_norm": 205.7100067138672, "learning_rate": 1.7694289428775834e-06, "loss": 16.0469, "step": 7346 }, { "epoch": 0.48794580593743775, "grad_norm": 448.70086669921875, "learning_rate": 1.7693602415062276e-06, "loss": 22.1406, "step": 7347 }, { "epoch": 0.48801222022979346, "grad_norm": 162.24539184570312, "learning_rate": 1.7692915312353562e-06, "loss": 20.7656, "step": 7348 }, { "epoch": 0.48807863452214917, "grad_norm": 202.3382568359375, "learning_rate": 1.769222812065764e-06, "loss": 18.4375, "step": 7349 }, { "epoch": 0.4881450488145049, "grad_norm": 235.14682006835938, "learning_rate": 1.7691540839982456e-06, "loss": 19.125, "step": 7350 }, { "epoch": 0.4882114631068606, "grad_norm": 429.3651428222656, "learning_rate": 1.7690853470335965e-06, "loss": 17.7812, "step": 7351 }, { "epoch": 0.4882778773992163, "grad_norm": 232.1879425048828, "learning_rate": 1.7690166011726112e-06, "loss": 14.0625, "step": 7352 }, { "epoch": 0.488344291691572, "grad_norm": 364.9378662109375, "learning_rate": 1.768947846416086e-06, "loss": 17.5938, "step": 7353 }, { "epoch": 0.4884107059839277, "grad_norm": 223.02377319335938, "learning_rate": 1.7688790827648147e-06, "loss": 17.5781, "step": 7354 }, { "epoch": 0.4884771202762835, "grad_norm": 193.72103881835938, "learning_rate": 1.7688103102195942e-06, "loss": 14.7812, "step": 7355 }, { "epoch": 0.4885435345686392, "grad_norm": 152.9416961669922, "learning_rate": 1.768741528781219e-06, "loss": 15.9844, "step": 7356 }, { "epoch": 0.4886099488609949, "grad_norm": 264.60955810546875, "learning_rate": 1.768672738450485e-06, "loss": 23.3125, "step": 7357 }, { "epoch": 0.4886763631533506, "grad_norm": 317.11700439453125, "learning_rate": 1.7686039392281882e-06, "loss": 14.4531, "step": 7358 }, { "epoch": 0.4887427774457063, "grad_norm": 108.93936920166016, "learning_rate": 1.7685351311151244e-06, "loss": 12.1406, "step": 7359 }, { "epoch": 0.48880919173806203, "grad_norm": 352.7522888183594, "learning_rate": 1.768466314112089e-06, "loss": 16.4219, "step": 7360 }, { "epoch": 0.48887560603041774, "grad_norm": 140.6700439453125, "learning_rate": 1.7683974882198784e-06, "loss": 18.8438, "step": 7361 }, { "epoch": 0.48894202032277345, "grad_norm": 195.72239685058594, "learning_rate": 1.7683286534392892e-06, "loss": 19.5938, "step": 7362 }, { "epoch": 0.48900843461512916, "grad_norm": 272.4718017578125, "learning_rate": 1.7682598097711169e-06, "loss": 25.8438, "step": 7363 }, { "epoch": 0.48907484890748487, "grad_norm": 196.12257385253906, "learning_rate": 1.768190957216158e-06, "loss": 18.7188, "step": 7364 }, { "epoch": 0.4891412631998406, "grad_norm": 430.3874816894531, "learning_rate": 1.768122095775209e-06, "loss": 26.0469, "step": 7365 }, { "epoch": 0.48920767749219635, "grad_norm": 260.7723693847656, "learning_rate": 1.7680532254490667e-06, "loss": 28.2188, "step": 7366 }, { "epoch": 0.48927409178455206, "grad_norm": 472.107666015625, "learning_rate": 1.7679843462385277e-06, "loss": 22.5156, "step": 7367 }, { "epoch": 0.48934050607690777, "grad_norm": 577.1629638671875, "learning_rate": 1.7679154581443884e-06, "loss": 20.4844, "step": 7368 }, { "epoch": 0.4894069203692635, "grad_norm": 895.4645385742188, "learning_rate": 1.767846561167446e-06, "loss": 17.9219, "step": 7369 }, { "epoch": 0.4894733346616192, "grad_norm": 148.63890075683594, "learning_rate": 1.7677776553084972e-06, "loss": 18.1719, "step": 7370 }, { "epoch": 0.4895397489539749, "grad_norm": 263.5883483886719, "learning_rate": 1.7677087405683394e-06, "loss": 19.3281, "step": 7371 }, { "epoch": 0.4896061632463306, "grad_norm": 481.11334228515625, "learning_rate": 1.7676398169477698e-06, "loss": 26.5312, "step": 7372 }, { "epoch": 0.4896725775386863, "grad_norm": 197.13211059570312, "learning_rate": 1.767570884447585e-06, "loss": 21.0156, "step": 7373 }, { "epoch": 0.489738991831042, "grad_norm": 183.1324462890625, "learning_rate": 1.767501943068583e-06, "loss": 22.6562, "step": 7374 }, { "epoch": 0.48980540612339774, "grad_norm": 558.6284790039062, "learning_rate": 1.767432992811561e-06, "loss": 17.5, "step": 7375 }, { "epoch": 0.48987182041575345, "grad_norm": 479.0135498046875, "learning_rate": 1.7673640336773166e-06, "loss": 18.5156, "step": 7376 }, { "epoch": 0.4899382347081092, "grad_norm": 259.3951110839844, "learning_rate": 1.767295065666648e-06, "loss": 22.5625, "step": 7377 }, { "epoch": 0.4900046490004649, "grad_norm": 304.9668273925781, "learning_rate": 1.767226088780352e-06, "loss": 18.0, "step": 7378 }, { "epoch": 0.49007106329282063, "grad_norm": 202.28204345703125, "learning_rate": 1.7671571030192274e-06, "loss": 21.4219, "step": 7379 }, { "epoch": 0.49013747758517634, "grad_norm": 318.867431640625, "learning_rate": 1.7670881083840715e-06, "loss": 20.3438, "step": 7380 }, { "epoch": 0.49020389187753205, "grad_norm": 154.73043823242188, "learning_rate": 1.7670191048756826e-06, "loss": 19.5469, "step": 7381 }, { "epoch": 0.49027030616988776, "grad_norm": 385.28350830078125, "learning_rate": 1.7669500924948593e-06, "loss": 22.375, "step": 7382 }, { "epoch": 0.49033672046224347, "grad_norm": 287.66766357421875, "learning_rate": 1.7668810712423997e-06, "loss": 17.7656, "step": 7383 }, { "epoch": 0.4904031347545992, "grad_norm": 697.9901123046875, "learning_rate": 1.7668120411191016e-06, "loss": 26.2969, "step": 7384 }, { "epoch": 0.4904695490469549, "grad_norm": 407.1893615722656, "learning_rate": 1.7667430021257644e-06, "loss": 18.3438, "step": 7385 }, { "epoch": 0.4905359633393106, "grad_norm": 127.94656372070312, "learning_rate": 1.7666739542631862e-06, "loss": 14.1875, "step": 7386 }, { "epoch": 0.4906023776316663, "grad_norm": 115.99149322509766, "learning_rate": 1.7666048975321654e-06, "loss": 14.9219, "step": 7387 }, { "epoch": 0.4906687919240221, "grad_norm": 436.1665954589844, "learning_rate": 1.7665358319335015e-06, "loss": 17.6719, "step": 7388 }, { "epoch": 0.4907352062163778, "grad_norm": 477.9533386230469, "learning_rate": 1.766466757467993e-06, "loss": 21.0938, "step": 7389 }, { "epoch": 0.4908016205087335, "grad_norm": 321.58575439453125, "learning_rate": 1.7663976741364392e-06, "loss": 13.6562, "step": 7390 }, { "epoch": 0.4908680348010892, "grad_norm": 226.4971160888672, "learning_rate": 1.766328581939639e-06, "loss": 21.8594, "step": 7391 }, { "epoch": 0.4909344490934449, "grad_norm": 174.902587890625, "learning_rate": 1.7662594808783915e-06, "loss": 20.7188, "step": 7392 }, { "epoch": 0.4910008633858006, "grad_norm": 154.78773498535156, "learning_rate": 1.7661903709534962e-06, "loss": 19.3281, "step": 7393 }, { "epoch": 0.49106727767815633, "grad_norm": 321.65740966796875, "learning_rate": 1.7661212521657526e-06, "loss": 15.7188, "step": 7394 }, { "epoch": 0.49113369197051204, "grad_norm": 184.32635498046875, "learning_rate": 1.7660521245159603e-06, "loss": 17.625, "step": 7395 }, { "epoch": 0.49120010626286775, "grad_norm": 192.76150512695312, "learning_rate": 1.7659829880049183e-06, "loss": 14.2031, "step": 7396 }, { "epoch": 0.49126652055522346, "grad_norm": 143.31565856933594, "learning_rate": 1.7659138426334273e-06, "loss": 17.7344, "step": 7397 }, { "epoch": 0.4913329348475792, "grad_norm": 242.13861083984375, "learning_rate": 1.7658446884022862e-06, "loss": 18.4219, "step": 7398 }, { "epoch": 0.49139934913993494, "grad_norm": 432.8749694824219, "learning_rate": 1.7657755253122957e-06, "loss": 22.7969, "step": 7399 }, { "epoch": 0.49146576343229065, "grad_norm": 325.42572021484375, "learning_rate": 1.7657063533642552e-06, "loss": 16.4375, "step": 7400 }, { "epoch": 0.49153217772464636, "grad_norm": 196.71261596679688, "learning_rate": 1.7656371725589651e-06, "loss": 15.75, "step": 7401 }, { "epoch": 0.49159859201700207, "grad_norm": 199.83541870117188, "learning_rate": 1.765567982897226e-06, "loss": 21.5, "step": 7402 }, { "epoch": 0.4916650063093578, "grad_norm": 356.3998107910156, "learning_rate": 1.7654987843798379e-06, "loss": 19.9375, "step": 7403 }, { "epoch": 0.4917314206017135, "grad_norm": 112.98497009277344, "learning_rate": 1.765429577007601e-06, "loss": 16.4531, "step": 7404 }, { "epoch": 0.4917978348940692, "grad_norm": 507.1144104003906, "learning_rate": 1.7653603607813162e-06, "loss": 23.5, "step": 7405 }, { "epoch": 0.4918642491864249, "grad_norm": 124.16802215576172, "learning_rate": 1.7652911357017844e-06, "loss": 16.5156, "step": 7406 }, { "epoch": 0.4919306634787806, "grad_norm": 310.6821594238281, "learning_rate": 1.7652219017698056e-06, "loss": 24.9531, "step": 7407 }, { "epoch": 0.4919970777711363, "grad_norm": 173.21890258789062, "learning_rate": 1.7651526589861813e-06, "loss": 19.3125, "step": 7408 }, { "epoch": 0.49206349206349204, "grad_norm": 267.3455810546875, "learning_rate": 1.7650834073517121e-06, "loss": 20.125, "step": 7409 }, { "epoch": 0.4921299063558478, "grad_norm": 281.8037109375, "learning_rate": 1.7650141468671992e-06, "loss": 14.5, "step": 7410 }, { "epoch": 0.4921963206482035, "grad_norm": 124.28541564941406, "learning_rate": 1.7649448775334439e-06, "loss": 18.7031, "step": 7411 }, { "epoch": 0.4922627349405592, "grad_norm": 185.96383666992188, "learning_rate": 1.7648755993512473e-06, "loss": 20.9688, "step": 7412 }, { "epoch": 0.49232914923291493, "grad_norm": 861.1162109375, "learning_rate": 1.764806312321411e-06, "loss": 18.8281, "step": 7413 }, { "epoch": 0.49239556352527064, "grad_norm": 269.106689453125, "learning_rate": 1.7647370164447362e-06, "loss": 16.5938, "step": 7414 }, { "epoch": 0.49246197781762635, "grad_norm": 147.35748291015625, "learning_rate": 1.7646677117220245e-06, "loss": 16.6875, "step": 7415 }, { "epoch": 0.49252839210998206, "grad_norm": 259.894287109375, "learning_rate": 1.7645983981540778e-06, "loss": 22.2344, "step": 7416 }, { "epoch": 0.49259480640233777, "grad_norm": 143.7158966064453, "learning_rate": 1.7645290757416975e-06, "loss": 15.2812, "step": 7417 }, { "epoch": 0.4926612206946935, "grad_norm": 224.68545532226562, "learning_rate": 1.7644597444856858e-06, "loss": 26.0938, "step": 7418 }, { "epoch": 0.4927276349870492, "grad_norm": 420.8787536621094, "learning_rate": 1.7643904043868445e-06, "loss": 18.625, "step": 7419 }, { "epoch": 0.49279404927940496, "grad_norm": 95.89578247070312, "learning_rate": 1.764321055445976e-06, "loss": 15.1875, "step": 7420 }, { "epoch": 0.49286046357176067, "grad_norm": 242.64398193359375, "learning_rate": 1.7642516976638822e-06, "loss": 19.4219, "step": 7421 }, { "epoch": 0.4929268778641164, "grad_norm": 128.2557373046875, "learning_rate": 1.7641823310413652e-06, "loss": 11.625, "step": 7422 }, { "epoch": 0.4929932921564721, "grad_norm": 153.45034790039062, "learning_rate": 1.764112955579228e-06, "loss": 15.4688, "step": 7423 }, { "epoch": 0.4930597064488278, "grad_norm": 147.87026977539062, "learning_rate": 1.7640435712782727e-06, "loss": 14.5781, "step": 7424 }, { "epoch": 0.4931261207411835, "grad_norm": 175.43743896484375, "learning_rate": 1.7639741781393017e-06, "loss": 16.7812, "step": 7425 }, { "epoch": 0.4931925350335392, "grad_norm": 217.56883239746094, "learning_rate": 1.763904776163118e-06, "loss": 18.6875, "step": 7426 }, { "epoch": 0.4932589493258949, "grad_norm": 269.2115173339844, "learning_rate": 1.7638353653505245e-06, "loss": 22.4062, "step": 7427 }, { "epoch": 0.49332536361825063, "grad_norm": 236.1919708251953, "learning_rate": 1.7637659457023237e-06, "loss": 20.2812, "step": 7428 }, { "epoch": 0.49339177791060634, "grad_norm": 208.3337860107422, "learning_rate": 1.7636965172193192e-06, "loss": 16.625, "step": 7429 }, { "epoch": 0.49345819220296205, "grad_norm": 266.4848327636719, "learning_rate": 1.7636270799023135e-06, "loss": 18.875, "step": 7430 }, { "epoch": 0.4935246064953178, "grad_norm": 231.44374084472656, "learning_rate": 1.76355763375211e-06, "loss": 13.4062, "step": 7431 }, { "epoch": 0.49359102078767353, "grad_norm": 196.86746215820312, "learning_rate": 1.763488178769512e-06, "loss": 28.4062, "step": 7432 }, { "epoch": 0.49365743508002924, "grad_norm": 220.2508544921875, "learning_rate": 1.7634187149553234e-06, "loss": 23.75, "step": 7433 }, { "epoch": 0.49372384937238495, "grad_norm": 309.38201904296875, "learning_rate": 1.7633492423103474e-06, "loss": 16.3906, "step": 7434 }, { "epoch": 0.49379026366474066, "grad_norm": 186.28372192382812, "learning_rate": 1.763279760835387e-06, "loss": 14.5156, "step": 7435 }, { "epoch": 0.49385667795709637, "grad_norm": 518.0523681640625, "learning_rate": 1.763210270531247e-06, "loss": 25.4062, "step": 7436 }, { "epoch": 0.4939230922494521, "grad_norm": 270.34417724609375, "learning_rate": 1.7631407713987303e-06, "loss": 22.6875, "step": 7437 }, { "epoch": 0.4939895065418078, "grad_norm": 186.35145568847656, "learning_rate": 1.7630712634386413e-06, "loss": 20.75, "step": 7438 }, { "epoch": 0.4940559208341635, "grad_norm": 136.9712677001953, "learning_rate": 1.7630017466517839e-06, "loss": 20.4219, "step": 7439 }, { "epoch": 0.4941223351265192, "grad_norm": 247.0944061279297, "learning_rate": 1.7629322210389622e-06, "loss": 18.3438, "step": 7440 }, { "epoch": 0.4941887494188749, "grad_norm": 302.65283203125, "learning_rate": 1.7628626866009807e-06, "loss": 17.9375, "step": 7441 }, { "epoch": 0.4942551637112307, "grad_norm": 209.0771942138672, "learning_rate": 1.7627931433386437e-06, "loss": 17.7188, "step": 7442 }, { "epoch": 0.4943215780035864, "grad_norm": 436.1822509765625, "learning_rate": 1.762723591252755e-06, "loss": 18.6562, "step": 7443 }, { "epoch": 0.4943879922959421, "grad_norm": 211.69378662109375, "learning_rate": 1.76265403034412e-06, "loss": 17.6719, "step": 7444 }, { "epoch": 0.4944544065882978, "grad_norm": 153.37037658691406, "learning_rate": 1.7625844606135426e-06, "loss": 20.2969, "step": 7445 }, { "epoch": 0.4945208208806535, "grad_norm": 199.3938446044922, "learning_rate": 1.7625148820618284e-06, "loss": 19.5, "step": 7446 }, { "epoch": 0.49458723517300923, "grad_norm": 207.59129333496094, "learning_rate": 1.7624452946897811e-06, "loss": 18.0156, "step": 7447 }, { "epoch": 0.49465364946536494, "grad_norm": 887.2671508789062, "learning_rate": 1.7623756984982068e-06, "loss": 19.3125, "step": 7448 }, { "epoch": 0.49472006375772065, "grad_norm": 179.93496704101562, "learning_rate": 1.7623060934879096e-06, "loss": 22.6562, "step": 7449 }, { "epoch": 0.49478647805007636, "grad_norm": 255.5361328125, "learning_rate": 1.7622364796596956e-06, "loss": 17.5, "step": 7450 }, { "epoch": 0.49485289234243207, "grad_norm": 231.36306762695312, "learning_rate": 1.762166857014369e-06, "loss": 16.0469, "step": 7451 }, { "epoch": 0.4949193066347878, "grad_norm": 231.51087951660156, "learning_rate": 1.762097225552736e-06, "loss": 17.1406, "step": 7452 }, { "epoch": 0.49498572092714355, "grad_norm": 241.8477783203125, "learning_rate": 1.7620275852756017e-06, "loss": 18.7344, "step": 7453 }, { "epoch": 0.49505213521949926, "grad_norm": 215.60171508789062, "learning_rate": 1.7619579361837715e-06, "loss": 16.3594, "step": 7454 }, { "epoch": 0.49511854951185497, "grad_norm": 311.144775390625, "learning_rate": 1.7618882782780515e-06, "loss": 16.9531, "step": 7455 }, { "epoch": 0.4951849638042107, "grad_norm": 351.9779357910156, "learning_rate": 1.7618186115592472e-06, "loss": 20.7812, "step": 7456 }, { "epoch": 0.4952513780965664, "grad_norm": 189.96238708496094, "learning_rate": 1.7617489360281645e-06, "loss": 18.0938, "step": 7457 }, { "epoch": 0.4953177923889221, "grad_norm": 147.5640869140625, "learning_rate": 1.7616792516856095e-06, "loss": 17.7812, "step": 7458 }, { "epoch": 0.4953842066812778, "grad_norm": 201.78480529785156, "learning_rate": 1.7616095585323879e-06, "loss": 19.4219, "step": 7459 }, { "epoch": 0.4954506209736335, "grad_norm": 505.6708679199219, "learning_rate": 1.761539856569306e-06, "loss": 20.8438, "step": 7460 }, { "epoch": 0.4955170352659892, "grad_norm": 222.99896240234375, "learning_rate": 1.7614701457971705e-06, "loss": 17.1719, "step": 7461 }, { "epoch": 0.49558344955834494, "grad_norm": 468.0816650390625, "learning_rate": 1.7614004262167871e-06, "loss": 16.1875, "step": 7462 }, { "epoch": 0.49564986385070064, "grad_norm": 213.8810272216797, "learning_rate": 1.7613306978289626e-06, "loss": 20.8125, "step": 7463 }, { "epoch": 0.4957162781430564, "grad_norm": 208.89915466308594, "learning_rate": 1.761260960634504e-06, "loss": 19.7031, "step": 7464 }, { "epoch": 0.4957826924354121, "grad_norm": 288.36175537109375, "learning_rate": 1.7611912146342173e-06, "loss": 20.75, "step": 7465 }, { "epoch": 0.49584910672776783, "grad_norm": 219.55377197265625, "learning_rate": 1.7611214598289096e-06, "loss": 15.6875, "step": 7466 }, { "epoch": 0.49591552102012354, "grad_norm": 155.3925018310547, "learning_rate": 1.7610516962193875e-06, "loss": 16.7969, "step": 7467 }, { "epoch": 0.49598193531247925, "grad_norm": 179.1393585205078, "learning_rate": 1.7609819238064587e-06, "loss": 16.4844, "step": 7468 }, { "epoch": 0.49604834960483496, "grad_norm": 172.3376007080078, "learning_rate": 1.7609121425909295e-06, "loss": 13.7969, "step": 7469 }, { "epoch": 0.49611476389719067, "grad_norm": 131.71095275878906, "learning_rate": 1.7608423525736076e-06, "loss": 17.0312, "step": 7470 }, { "epoch": 0.4961811781895464, "grad_norm": 196.3153839111328, "learning_rate": 1.7607725537553e-06, "loss": 19.9062, "step": 7471 }, { "epoch": 0.4962475924819021, "grad_norm": 211.3580780029297, "learning_rate": 1.7607027461368142e-06, "loss": 17.1875, "step": 7472 }, { "epoch": 0.4963140067742578, "grad_norm": 222.70437622070312, "learning_rate": 1.7606329297189576e-06, "loss": 20.2344, "step": 7473 }, { "epoch": 0.4963804210666135, "grad_norm": 546.1806030273438, "learning_rate": 1.760563104502538e-06, "loss": 19.3438, "step": 7474 }, { "epoch": 0.4964468353589693, "grad_norm": 124.43323516845703, "learning_rate": 1.7604932704883626e-06, "loss": 19.25, "step": 7475 }, { "epoch": 0.496513249651325, "grad_norm": 331.1407165527344, "learning_rate": 1.76042342767724e-06, "loss": 18.2344, "step": 7476 }, { "epoch": 0.4965796639436807, "grad_norm": 283.392578125, "learning_rate": 1.7603535760699775e-06, "loss": 17.4062, "step": 7477 }, { "epoch": 0.4966460782360364, "grad_norm": 128.10528564453125, "learning_rate": 1.7602837156673834e-06, "loss": 16.0156, "step": 7478 }, { "epoch": 0.4967124925283921, "grad_norm": 232.8906707763672, "learning_rate": 1.7602138464702656e-06, "loss": 20.8594, "step": 7479 }, { "epoch": 0.4967789068207478, "grad_norm": 323.0563659667969, "learning_rate": 1.7601439684794324e-06, "loss": 24.4062, "step": 7480 }, { "epoch": 0.49684532111310353, "grad_norm": 181.28602600097656, "learning_rate": 1.7600740816956919e-06, "loss": 22.125, "step": 7481 }, { "epoch": 0.49691173540545924, "grad_norm": 190.91073608398438, "learning_rate": 1.7600041861198529e-06, "loss": 17.8594, "step": 7482 }, { "epoch": 0.49697814969781495, "grad_norm": 247.67967224121094, "learning_rate": 1.7599342817527237e-06, "loss": 21.1406, "step": 7483 }, { "epoch": 0.49704456399017066, "grad_norm": 130.06056213378906, "learning_rate": 1.7598643685951129e-06, "loss": 18.5938, "step": 7484 }, { "epoch": 0.4971109782825264, "grad_norm": 144.05946350097656, "learning_rate": 1.7597944466478293e-06, "loss": 15.2969, "step": 7485 }, { "epoch": 0.49717739257488214, "grad_norm": 273.7989807128906, "learning_rate": 1.7597245159116816e-06, "loss": 20.9688, "step": 7486 }, { "epoch": 0.49724380686723785, "grad_norm": 147.87290954589844, "learning_rate": 1.7596545763874789e-06, "loss": 15.9688, "step": 7487 }, { "epoch": 0.49731022115959356, "grad_norm": 165.5003204345703, "learning_rate": 1.75958462807603e-06, "loss": 20.3438, "step": 7488 }, { "epoch": 0.49737663545194927, "grad_norm": 343.60791015625, "learning_rate": 1.759514670978144e-06, "loss": 24.6719, "step": 7489 }, { "epoch": 0.497443049744305, "grad_norm": 188.20831298828125, "learning_rate": 1.7594447050946306e-06, "loss": 23.9375, "step": 7490 }, { "epoch": 0.4975094640366607, "grad_norm": 126.77854919433594, "learning_rate": 1.7593747304262986e-06, "loss": 13.6562, "step": 7491 }, { "epoch": 0.4975758783290164, "grad_norm": 258.0827941894531, "learning_rate": 1.7593047469739575e-06, "loss": 20.9062, "step": 7492 }, { "epoch": 0.4976422926213721, "grad_norm": 151.9171905517578, "learning_rate": 1.759234754738417e-06, "loss": 17.5938, "step": 7493 }, { "epoch": 0.4977087069137278, "grad_norm": 275.5208740234375, "learning_rate": 1.7591647537204866e-06, "loss": 16.7969, "step": 7494 }, { "epoch": 0.4977751212060835, "grad_norm": 199.4359588623047, "learning_rate": 1.7590947439209763e-06, "loss": 22.1719, "step": 7495 }, { "epoch": 0.4978415354984393, "grad_norm": 201.2508087158203, "learning_rate": 1.7590247253406955e-06, "loss": 20.0078, "step": 7496 }, { "epoch": 0.497907949790795, "grad_norm": 226.8178253173828, "learning_rate": 1.7589546979804543e-06, "loss": 22.375, "step": 7497 }, { "epoch": 0.4979743640831507, "grad_norm": 180.35023498535156, "learning_rate": 1.7588846618410628e-06, "loss": 17.6875, "step": 7498 }, { "epoch": 0.4980407783755064, "grad_norm": 268.0811462402344, "learning_rate": 1.7588146169233316e-06, "loss": 15.4844, "step": 7499 }, { "epoch": 0.49810719266786213, "grad_norm": 178.85702514648438, "learning_rate": 1.75874456322807e-06, "loss": 18.7188, "step": 7500 }, { "epoch": 0.49817360696021784, "grad_norm": 445.9586486816406, "learning_rate": 1.7586745007560887e-06, "loss": 17.5, "step": 7501 }, { "epoch": 0.49824002125257355, "grad_norm": 432.7598876953125, "learning_rate": 1.7586044295081987e-06, "loss": 24.7969, "step": 7502 }, { "epoch": 0.49830643554492926, "grad_norm": 313.70458984375, "learning_rate": 1.7585343494852097e-06, "loss": 22.6406, "step": 7503 }, { "epoch": 0.49837284983728497, "grad_norm": 196.6586151123047, "learning_rate": 1.758464260687933e-06, "loss": 16.0781, "step": 7504 }, { "epoch": 0.4984392641296407, "grad_norm": 158.3070068359375, "learning_rate": 1.7583941631171788e-06, "loss": 16.4688, "step": 7505 }, { "epoch": 0.4985056784219964, "grad_norm": 416.68212890625, "learning_rate": 1.7583240567737584e-06, "loss": 23.6562, "step": 7506 }, { "epoch": 0.49857209271435216, "grad_norm": 139.27169799804688, "learning_rate": 1.758253941658483e-06, "loss": 17.5, "step": 7507 }, { "epoch": 0.49863850700670787, "grad_norm": 211.875244140625, "learning_rate": 1.7581838177721627e-06, "loss": 16.75, "step": 7508 }, { "epoch": 0.4987049212990636, "grad_norm": 319.9992980957031, "learning_rate": 1.7581136851156093e-06, "loss": 20.9531, "step": 7509 }, { "epoch": 0.4987713355914193, "grad_norm": 350.1322326660156, "learning_rate": 1.758043543689634e-06, "loss": 18.7344, "step": 7510 }, { "epoch": 0.498837749883775, "grad_norm": 132.42115783691406, "learning_rate": 1.7579733934950483e-06, "loss": 18.6562, "step": 7511 }, { "epoch": 0.4989041641761307, "grad_norm": 233.04730224609375, "learning_rate": 1.7579032345326632e-06, "loss": 16.9062, "step": 7512 }, { "epoch": 0.4989705784684864, "grad_norm": 499.5998229980469, "learning_rate": 1.7578330668032905e-06, "loss": 26.4219, "step": 7513 }, { "epoch": 0.4990369927608421, "grad_norm": 478.2796936035156, "learning_rate": 1.7577628903077417e-06, "loss": 29.375, "step": 7514 }, { "epoch": 0.49910340705319783, "grad_norm": 147.48419189453125, "learning_rate": 1.7576927050468293e-06, "loss": 14.8594, "step": 7515 }, { "epoch": 0.49916982134555354, "grad_norm": 239.08261108398438, "learning_rate": 1.7576225110213642e-06, "loss": 25.1875, "step": 7516 }, { "epoch": 0.49923623563790925, "grad_norm": 94.8294677734375, "learning_rate": 1.7575523082321587e-06, "loss": 17.6406, "step": 7517 }, { "epoch": 0.499302649930265, "grad_norm": 282.6513977050781, "learning_rate": 1.7574820966800253e-06, "loss": 21.7188, "step": 7518 }, { "epoch": 0.49936906422262073, "grad_norm": 229.43475341796875, "learning_rate": 1.7574118763657753e-06, "loss": 15.5469, "step": 7519 }, { "epoch": 0.49943547851497644, "grad_norm": 127.0654296875, "learning_rate": 1.7573416472902217e-06, "loss": 16.875, "step": 7520 }, { "epoch": 0.49950189280733215, "grad_norm": 213.954833984375, "learning_rate": 1.7572714094541768e-06, "loss": 13.9062, "step": 7521 }, { "epoch": 0.49956830709968786, "grad_norm": 122.54290771484375, "learning_rate": 1.7572011628584527e-06, "loss": 14.0, "step": 7522 }, { "epoch": 0.49963472139204357, "grad_norm": 179.87179565429688, "learning_rate": 1.7571309075038623e-06, "loss": 12.8438, "step": 7523 }, { "epoch": 0.4997011356843993, "grad_norm": 189.70802307128906, "learning_rate": 1.757060643391218e-06, "loss": 15.7188, "step": 7524 }, { "epoch": 0.499767549976755, "grad_norm": 171.351806640625, "learning_rate": 1.7569903705213328e-06, "loss": 16.2812, "step": 7525 }, { "epoch": 0.4998339642691107, "grad_norm": 511.3426513671875, "learning_rate": 1.7569200888950196e-06, "loss": 21.0156, "step": 7526 }, { "epoch": 0.4999003785614664, "grad_norm": 248.07174682617188, "learning_rate": 1.756849798513091e-06, "loss": 21.2344, "step": 7527 }, { "epoch": 0.4999667928538221, "grad_norm": 1859.369873046875, "learning_rate": 1.7567794993763606e-06, "loss": 19.6406, "step": 7528 }, { "epoch": 0.5000332071461778, "grad_norm": 314.3440856933594, "learning_rate": 1.7567091914856413e-06, "loss": 20.125, "step": 7529 }, { "epoch": 0.5000996214385336, "grad_norm": 148.12718200683594, "learning_rate": 1.7566388748417462e-06, "loss": 13.8125, "step": 7530 }, { "epoch": 0.5001660357308892, "grad_norm": 121.7836685180664, "learning_rate": 1.7565685494454891e-06, "loss": 15.4219, "step": 7531 }, { "epoch": 0.500232450023245, "grad_norm": 299.54638671875, "learning_rate": 1.7564982152976834e-06, "loss": 22.6875, "step": 7532 }, { "epoch": 0.5002988643156007, "grad_norm": 253.1870574951172, "learning_rate": 1.7564278723991426e-06, "loss": 16.2031, "step": 7533 }, { "epoch": 0.5003652786079564, "grad_norm": 139.757080078125, "learning_rate": 1.7563575207506803e-06, "loss": 15.3125, "step": 7534 }, { "epoch": 0.5004316929003122, "grad_norm": 205.12025451660156, "learning_rate": 1.75628716035311e-06, "loss": 26.875, "step": 7535 }, { "epoch": 0.5004981071926679, "grad_norm": 608.4498291015625, "learning_rate": 1.7562167912072461e-06, "loss": 29.1875, "step": 7536 }, { "epoch": 0.5005645214850236, "grad_norm": 289.2459411621094, "learning_rate": 1.7561464133139023e-06, "loss": 18.5312, "step": 7537 }, { "epoch": 0.5006309357773793, "grad_norm": 339.0848388671875, "learning_rate": 1.7560760266738931e-06, "loss": 19.2344, "step": 7538 }, { "epoch": 0.500697350069735, "grad_norm": 209.91543579101562, "learning_rate": 1.7560056312880325e-06, "loss": 17.6094, "step": 7539 }, { "epoch": 0.5007637643620907, "grad_norm": 159.8761749267578, "learning_rate": 1.7559352271571343e-06, "loss": 16.8125, "step": 7540 }, { "epoch": 0.5008301786544465, "grad_norm": 200.69717407226562, "learning_rate": 1.7558648142820135e-06, "loss": 17.4844, "step": 7541 }, { "epoch": 0.5008965929468021, "grad_norm": 177.8839874267578, "learning_rate": 1.7557943926634845e-06, "loss": 18.375, "step": 7542 }, { "epoch": 0.5009630072391579, "grad_norm": 174.8694305419922, "learning_rate": 1.7557239623023614e-06, "loss": 12.3281, "step": 7543 }, { "epoch": 0.5010294215315135, "grad_norm": 284.7313232421875, "learning_rate": 1.7556535231994598e-06, "loss": 13.7188, "step": 7544 }, { "epoch": 0.5010958358238693, "grad_norm": 290.62646484375, "learning_rate": 1.7555830753555936e-06, "loss": 16.3906, "step": 7545 }, { "epoch": 0.5011622501162251, "grad_norm": 167.72938537597656, "learning_rate": 1.755512618771578e-06, "loss": 15.0469, "step": 7546 }, { "epoch": 0.5012286644085807, "grad_norm": 779.8138427734375, "learning_rate": 1.7554421534482285e-06, "loss": 30.3906, "step": 7547 }, { "epoch": 0.5012950787009365, "grad_norm": 275.3206787109375, "learning_rate": 1.7553716793863593e-06, "loss": 22.2812, "step": 7548 }, { "epoch": 0.5013614929932921, "grad_norm": 163.89390563964844, "learning_rate": 1.7553011965867866e-06, "loss": 16.7344, "step": 7549 }, { "epoch": 0.5014279072856479, "grad_norm": 249.677978515625, "learning_rate": 1.7552307050503248e-06, "loss": 20.5938, "step": 7550 }, { "epoch": 0.5014943215780036, "grad_norm": 197.42848205566406, "learning_rate": 1.75516020477779e-06, "loss": 13.6406, "step": 7551 }, { "epoch": 0.5015607358703593, "grad_norm": 114.68836212158203, "learning_rate": 1.755089695769997e-06, "loss": 17.0312, "step": 7552 }, { "epoch": 0.501627150162715, "grad_norm": 371.4895935058594, "learning_rate": 1.7550191780277622e-06, "loss": 12.4375, "step": 7553 }, { "epoch": 0.5016935644550707, "grad_norm": 336.871337890625, "learning_rate": 1.7549486515519005e-06, "loss": 21.5938, "step": 7554 }, { "epoch": 0.5017599787474264, "grad_norm": 372.38421630859375, "learning_rate": 1.7548781163432285e-06, "loss": 16.8125, "step": 7555 }, { "epoch": 0.5018263930397822, "grad_norm": 720.6771850585938, "learning_rate": 1.7548075724025616e-06, "loss": 29.6562, "step": 7556 }, { "epoch": 0.5018928073321379, "grad_norm": 174.95623779296875, "learning_rate": 1.7547370197307159e-06, "loss": 20.4688, "step": 7557 }, { "epoch": 0.5019592216244936, "grad_norm": 251.77830505371094, "learning_rate": 1.7546664583285075e-06, "loss": 19.1875, "step": 7558 }, { "epoch": 0.5020256359168493, "grad_norm": 298.6749267578125, "learning_rate": 1.7545958881967529e-06, "loss": 14.5312, "step": 7559 }, { "epoch": 0.502092050209205, "grad_norm": 226.83871459960938, "learning_rate": 1.754525309336268e-06, "loss": 18.8281, "step": 7560 }, { "epoch": 0.5021584645015608, "grad_norm": 559.9622192382812, "learning_rate": 1.7544547217478696e-06, "loss": 15.3203, "step": 7561 }, { "epoch": 0.5022248787939164, "grad_norm": 242.30711364746094, "learning_rate": 1.7543841254323737e-06, "loss": 17.3594, "step": 7562 }, { "epoch": 0.5022912930862722, "grad_norm": 354.0476379394531, "learning_rate": 1.7543135203905974e-06, "loss": 21.2812, "step": 7563 }, { "epoch": 0.5023577073786278, "grad_norm": 244.66510009765625, "learning_rate": 1.7542429066233575e-06, "loss": 15.0625, "step": 7564 }, { "epoch": 0.5024241216709836, "grad_norm": 149.69923400878906, "learning_rate": 1.7541722841314704e-06, "loss": 19.4531, "step": 7565 }, { "epoch": 0.5024905359633393, "grad_norm": 727.2445678710938, "learning_rate": 1.7541016529157533e-06, "loss": 19.3906, "step": 7566 }, { "epoch": 0.502556950255695, "grad_norm": 126.01481628417969, "learning_rate": 1.7540310129770228e-06, "loss": 12.4375, "step": 7567 }, { "epoch": 0.5026233645480508, "grad_norm": 125.96976470947266, "learning_rate": 1.7539603643160965e-06, "loss": 13.2969, "step": 7568 }, { "epoch": 0.5026897788404064, "grad_norm": 283.161865234375, "learning_rate": 1.7538897069337916e-06, "loss": 24.8438, "step": 7569 }, { "epoch": 0.5027561931327622, "grad_norm": 539.021240234375, "learning_rate": 1.7538190408309252e-06, "loss": 21.4531, "step": 7570 }, { "epoch": 0.5028226074251179, "grad_norm": 233.4184112548828, "learning_rate": 1.7537483660083148e-06, "loss": 16.0781, "step": 7571 }, { "epoch": 0.5028890217174736, "grad_norm": 218.6002655029297, "learning_rate": 1.7536776824667778e-06, "loss": 16.4219, "step": 7572 }, { "epoch": 0.5029554360098293, "grad_norm": 198.32606506347656, "learning_rate": 1.7536069902071321e-06, "loss": 18.7656, "step": 7573 }, { "epoch": 0.503021850302185, "grad_norm": 235.70274353027344, "learning_rate": 1.7535362892301952e-06, "loss": 19.3438, "step": 7574 }, { "epoch": 0.5030882645945407, "grad_norm": 557.4194946289062, "learning_rate": 1.7534655795367855e-06, "loss": 20.5312, "step": 7575 }, { "epoch": 0.5031546788868965, "grad_norm": 139.91162109375, "learning_rate": 1.7533948611277198e-06, "loss": 13.125, "step": 7576 }, { "epoch": 0.5032210931792521, "grad_norm": 206.11695861816406, "learning_rate": 1.7533241340038173e-06, "loss": 23.2969, "step": 7577 }, { "epoch": 0.5032875074716079, "grad_norm": 282.3487854003906, "learning_rate": 1.7532533981658952e-06, "loss": 18.375, "step": 7578 }, { "epoch": 0.5033539217639637, "grad_norm": 226.1313934326172, "learning_rate": 1.7531826536147721e-06, "loss": 15.8438, "step": 7579 }, { "epoch": 0.5034203360563193, "grad_norm": 718.2666625976562, "learning_rate": 1.7531119003512669e-06, "loss": 16.9062, "step": 7580 }, { "epoch": 0.5034867503486751, "grad_norm": 691.3695678710938, "learning_rate": 1.7530411383761971e-06, "loss": 25.8438, "step": 7581 }, { "epoch": 0.5035531646410307, "grad_norm": 207.76461791992188, "learning_rate": 1.7529703676903818e-06, "loss": 16.8125, "step": 7582 }, { "epoch": 0.5036195789333865, "grad_norm": 172.18060302734375, "learning_rate": 1.7528995882946393e-06, "loss": 15.7656, "step": 7583 }, { "epoch": 0.5036859932257421, "grad_norm": 264.2359313964844, "learning_rate": 1.7528288001897886e-06, "loss": 15.0938, "step": 7584 }, { "epoch": 0.5037524075180979, "grad_norm": 254.3079071044922, "learning_rate": 1.7527580033766482e-06, "loss": 21.0781, "step": 7585 }, { "epoch": 0.5038188218104536, "grad_norm": 166.1864471435547, "learning_rate": 1.7526871978560378e-06, "loss": 18.0469, "step": 7586 }, { "epoch": 0.5038852361028093, "grad_norm": 93.44808959960938, "learning_rate": 1.7526163836287754e-06, "loss": 14.75, "step": 7587 }, { "epoch": 0.503951650395165, "grad_norm": 181.68931579589844, "learning_rate": 1.7525455606956811e-06, "loss": 13.9688, "step": 7588 }, { "epoch": 0.5040180646875208, "grad_norm": 280.55950927734375, "learning_rate": 1.7524747290575733e-06, "loss": 24.0625, "step": 7589 }, { "epoch": 0.5040844789798765, "grad_norm": 189.24655151367188, "learning_rate": 1.7524038887152722e-06, "loss": 17.5938, "step": 7590 }, { "epoch": 0.5041508932722322, "grad_norm": 273.3303527832031, "learning_rate": 1.7523330396695965e-06, "loss": 15.8906, "step": 7591 }, { "epoch": 0.5042173075645879, "grad_norm": 246.5099639892578, "learning_rate": 1.7522621819213659e-06, "loss": 23.875, "step": 7592 }, { "epoch": 0.5042837218569436, "grad_norm": 183.9687957763672, "learning_rate": 1.7521913154714004e-06, "loss": 16.0781, "step": 7593 }, { "epoch": 0.5043501361492994, "grad_norm": 270.6231994628906, "learning_rate": 1.7521204403205191e-06, "loss": 22.6562, "step": 7594 }, { "epoch": 0.504416550441655, "grad_norm": 221.80235290527344, "learning_rate": 1.7520495564695429e-06, "loss": 15.3594, "step": 7595 }, { "epoch": 0.5044829647340108, "grad_norm": 505.4842529296875, "learning_rate": 1.7519786639192907e-06, "loss": 13.8594, "step": 7596 }, { "epoch": 0.5045493790263664, "grad_norm": 253.40968322753906, "learning_rate": 1.7519077626705828e-06, "loss": 24.7812, "step": 7597 }, { "epoch": 0.5046157933187222, "grad_norm": 276.1862487792969, "learning_rate": 1.7518368527242398e-06, "loss": 28.9688, "step": 7598 }, { "epoch": 0.5046822076110778, "grad_norm": 3414.7861328125, "learning_rate": 1.7517659340810814e-06, "loss": 13.4062, "step": 7599 }, { "epoch": 0.5047486219034336, "grad_norm": 252.20172119140625, "learning_rate": 1.751695006741928e-06, "loss": 16.7969, "step": 7600 }, { "epoch": 0.5048150361957894, "grad_norm": 159.73080444335938, "learning_rate": 1.7516240707076007e-06, "loss": 15.25, "step": 7601 }, { "epoch": 0.504881450488145, "grad_norm": 117.5727310180664, "learning_rate": 1.7515531259789191e-06, "loss": 16.625, "step": 7602 }, { "epoch": 0.5049478647805008, "grad_norm": 205.70584106445312, "learning_rate": 1.7514821725567046e-06, "loss": 13.5781, "step": 7603 }, { "epoch": 0.5050142790728565, "grad_norm": 229.58255004882812, "learning_rate": 1.7514112104417775e-06, "loss": 16.5, "step": 7604 }, { "epoch": 0.5050806933652122, "grad_norm": 198.89642333984375, "learning_rate": 1.7513402396349588e-06, "loss": 18.75, "step": 7605 }, { "epoch": 0.5051471076575679, "grad_norm": 175.8175811767578, "learning_rate": 1.7512692601370698e-06, "loss": 17.3438, "step": 7606 }, { "epoch": 0.5052135219499236, "grad_norm": 415.07916259765625, "learning_rate": 1.7511982719489308e-06, "loss": 20.0156, "step": 7607 }, { "epoch": 0.5052799362422793, "grad_norm": 392.6674499511719, "learning_rate": 1.7511272750713636e-06, "loss": 21.1719, "step": 7608 }, { "epoch": 0.5053463505346351, "grad_norm": 171.32203674316406, "learning_rate": 1.751056269505189e-06, "loss": 15.2656, "step": 7609 }, { "epoch": 0.5054127648269908, "grad_norm": 207.77113342285156, "learning_rate": 1.7509852552512285e-06, "loss": 19.0234, "step": 7610 }, { "epoch": 0.5054791791193465, "grad_norm": 664.9947509765625, "learning_rate": 1.7509142323103037e-06, "loss": 25.0312, "step": 7611 }, { "epoch": 0.5055455934117022, "grad_norm": 330.3480529785156, "learning_rate": 1.7508432006832363e-06, "loss": 24.25, "step": 7612 }, { "epoch": 0.5056120077040579, "grad_norm": 193.2767333984375, "learning_rate": 1.7507721603708476e-06, "loss": 12.5, "step": 7613 }, { "epoch": 0.5056784219964137, "grad_norm": 150.9563446044922, "learning_rate": 1.7507011113739595e-06, "loss": 19.6406, "step": 7614 }, { "epoch": 0.5057448362887693, "grad_norm": 181.6916046142578, "learning_rate": 1.7506300536933935e-06, "loss": 17.9844, "step": 7615 }, { "epoch": 0.5058112505811251, "grad_norm": 563.266357421875, "learning_rate": 1.7505589873299723e-06, "loss": 29.875, "step": 7616 }, { "epoch": 0.5058776648734807, "grad_norm": 321.5381774902344, "learning_rate": 1.7504879122845173e-06, "loss": 26.8438, "step": 7617 }, { "epoch": 0.5059440791658365, "grad_norm": 258.756591796875, "learning_rate": 1.7504168285578507e-06, "loss": 23.5625, "step": 7618 }, { "epoch": 0.5060104934581922, "grad_norm": 245.91941833496094, "learning_rate": 1.7503457361507953e-06, "loss": 22.1406, "step": 7619 }, { "epoch": 0.5060769077505479, "grad_norm": 126.24150848388672, "learning_rate": 1.750274635064173e-06, "loss": 19.9375, "step": 7620 }, { "epoch": 0.5061433220429037, "grad_norm": 384.6807556152344, "learning_rate": 1.750203525298806e-06, "loss": 20.1562, "step": 7621 }, { "epoch": 0.5062097363352593, "grad_norm": 183.32351684570312, "learning_rate": 1.750132406855518e-06, "loss": 27.6406, "step": 7622 }, { "epoch": 0.5062761506276151, "grad_norm": 586.662353515625, "learning_rate": 1.7500612797351302e-06, "loss": 19.9531, "step": 7623 }, { "epoch": 0.5063425649199708, "grad_norm": 251.41064453125, "learning_rate": 1.7499901439384662e-06, "loss": 25.9219, "step": 7624 }, { "epoch": 0.5064089792123265, "grad_norm": 120.35786437988281, "learning_rate": 1.7499189994663488e-06, "loss": 17.1875, "step": 7625 }, { "epoch": 0.5064753935046822, "grad_norm": 154.29464721679688, "learning_rate": 1.7498478463196007e-06, "loss": 14.4609, "step": 7626 }, { "epoch": 0.506541807797038, "grad_norm": 493.939208984375, "learning_rate": 1.7497766844990452e-06, "loss": 19.1406, "step": 7627 }, { "epoch": 0.5066082220893936, "grad_norm": 173.18890380859375, "learning_rate": 1.7497055140055053e-06, "loss": 17.2031, "step": 7628 }, { "epoch": 0.5066746363817494, "grad_norm": 426.6758117675781, "learning_rate": 1.7496343348398041e-06, "loss": 20.625, "step": 7629 }, { "epoch": 0.506741050674105, "grad_norm": 124.33842468261719, "learning_rate": 1.7495631470027655e-06, "loss": 19.2031, "step": 7630 }, { "epoch": 0.5068074649664608, "grad_norm": 155.6115264892578, "learning_rate": 1.7494919504952127e-06, "loss": 19.0, "step": 7631 }, { "epoch": 0.5068738792588166, "grad_norm": 223.11557006835938, "learning_rate": 1.749420745317969e-06, "loss": 14.7344, "step": 7632 }, { "epoch": 0.5069402935511722, "grad_norm": 382.96875, "learning_rate": 1.7493495314718583e-06, "loss": 22.0938, "step": 7633 }, { "epoch": 0.507006707843528, "grad_norm": 137.7279815673828, "learning_rate": 1.7492783089577044e-06, "loss": 15.0312, "step": 7634 }, { "epoch": 0.5070731221358836, "grad_norm": 329.07666015625, "learning_rate": 1.7492070777763308e-06, "loss": 19.2188, "step": 7635 }, { "epoch": 0.5071395364282394, "grad_norm": 189.33316040039062, "learning_rate": 1.7491358379285622e-06, "loss": 19.375, "step": 7636 }, { "epoch": 0.507205950720595, "grad_norm": 274.463623046875, "learning_rate": 1.7490645894152216e-06, "loss": 17.3906, "step": 7637 }, { "epoch": 0.5072723650129508, "grad_norm": 149.12440490722656, "learning_rate": 1.7489933322371341e-06, "loss": 20.875, "step": 7638 }, { "epoch": 0.5073387793053065, "grad_norm": 119.15518951416016, "learning_rate": 1.7489220663951238e-06, "loss": 18.9531, "step": 7639 }, { "epoch": 0.5074051935976622, "grad_norm": 388.3784484863281, "learning_rate": 1.7488507918900148e-06, "loss": 15.6719, "step": 7640 }, { "epoch": 0.5074716078900179, "grad_norm": 149.8217010498047, "learning_rate": 1.7487795087226316e-06, "loss": 17.7188, "step": 7641 }, { "epoch": 0.5075380221823736, "grad_norm": 193.21124267578125, "learning_rate": 1.7487082168937987e-06, "loss": 21.5312, "step": 7642 }, { "epoch": 0.5076044364747294, "grad_norm": 116.7923812866211, "learning_rate": 1.748636916404341e-06, "loss": 13.9844, "step": 7643 }, { "epoch": 0.5076708507670851, "grad_norm": 305.6228332519531, "learning_rate": 1.7485656072550833e-06, "loss": 22.8125, "step": 7644 }, { "epoch": 0.5077372650594408, "grad_norm": 166.24615478515625, "learning_rate": 1.74849428944685e-06, "loss": 16.9219, "step": 7645 }, { "epoch": 0.5078036793517965, "grad_norm": 1344.5672607421875, "learning_rate": 1.7484229629804666e-06, "loss": 12.5, "step": 7646 }, { "epoch": 0.5078700936441523, "grad_norm": 674.2281494140625, "learning_rate": 1.7483516278567576e-06, "loss": 25.8594, "step": 7647 }, { "epoch": 0.5079365079365079, "grad_norm": 212.53201293945312, "learning_rate": 1.7482802840765488e-06, "loss": 23.4375, "step": 7648 }, { "epoch": 0.5080029222288637, "grad_norm": 369.2741394042969, "learning_rate": 1.748208931640665e-06, "loss": 17.6094, "step": 7649 }, { "epoch": 0.5080693365212193, "grad_norm": 209.12188720703125, "learning_rate": 1.7481375705499319e-06, "loss": 13.0, "step": 7650 }, { "epoch": 0.5081357508135751, "grad_norm": 178.20167541503906, "learning_rate": 1.7480662008051745e-06, "loss": 19.5781, "step": 7651 }, { "epoch": 0.5082021651059307, "grad_norm": 198.2127227783203, "learning_rate": 1.747994822407219e-06, "loss": 14.2344, "step": 7652 }, { "epoch": 0.5082685793982865, "grad_norm": 178.24905395507812, "learning_rate": 1.7479234353568906e-06, "loss": 19.5156, "step": 7653 }, { "epoch": 0.5083349936906423, "grad_norm": 263.5736999511719, "learning_rate": 1.747852039655015e-06, "loss": 16.6406, "step": 7654 }, { "epoch": 0.5084014079829979, "grad_norm": 257.0820007324219, "learning_rate": 1.7477806353024183e-06, "loss": 19.3125, "step": 7655 }, { "epoch": 0.5084678222753537, "grad_norm": 338.9314270019531, "learning_rate": 1.7477092222999263e-06, "loss": 20.375, "step": 7656 }, { "epoch": 0.5085342365677094, "grad_norm": 721.9393310546875, "learning_rate": 1.7476378006483655e-06, "loss": 22.8281, "step": 7657 }, { "epoch": 0.5086006508600651, "grad_norm": 447.4128112792969, "learning_rate": 1.7475663703485615e-06, "loss": 33.25, "step": 7658 }, { "epoch": 0.5086670651524208, "grad_norm": 127.50416564941406, "learning_rate": 1.7474949314013409e-06, "loss": 15.6406, "step": 7659 }, { "epoch": 0.5087334794447765, "grad_norm": 375.874267578125, "learning_rate": 1.7474234838075298e-06, "loss": 20.2812, "step": 7660 }, { "epoch": 0.5087998937371322, "grad_norm": 252.10165405273438, "learning_rate": 1.7473520275679552e-06, "loss": 18.2344, "step": 7661 }, { "epoch": 0.508866308029488, "grad_norm": 450.3668212890625, "learning_rate": 1.747280562683443e-06, "loss": 22.9531, "step": 7662 }, { "epoch": 0.5089327223218436, "grad_norm": 427.44647216796875, "learning_rate": 1.74720908915482e-06, "loss": 17.0859, "step": 7663 }, { "epoch": 0.5089991366141994, "grad_norm": 369.457763671875, "learning_rate": 1.7471376069829136e-06, "loss": 16.4688, "step": 7664 }, { "epoch": 0.5090655509065551, "grad_norm": 259.416015625, "learning_rate": 1.7470661161685497e-06, "loss": 14.375, "step": 7665 }, { "epoch": 0.5091319651989108, "grad_norm": 160.43479919433594, "learning_rate": 1.746994616712556e-06, "loss": 17.0938, "step": 7666 }, { "epoch": 0.5091983794912666, "grad_norm": 226.9033966064453, "learning_rate": 1.7469231086157592e-06, "loss": 17.9375, "step": 7667 }, { "epoch": 0.5092647937836222, "grad_norm": 127.6436767578125, "learning_rate": 1.7468515918789868e-06, "loss": 17.2656, "step": 7668 }, { "epoch": 0.509331208075978, "grad_norm": 310.3808288574219, "learning_rate": 1.7467800665030656e-06, "loss": 18.3125, "step": 7669 }, { "epoch": 0.5093976223683336, "grad_norm": 199.2528839111328, "learning_rate": 1.7467085324888235e-06, "loss": 18.2656, "step": 7670 }, { "epoch": 0.5094640366606894, "grad_norm": 214.48867797851562, "learning_rate": 1.7466369898370875e-06, "loss": 17.2188, "step": 7671 }, { "epoch": 0.509530450953045, "grad_norm": 169.1205596923828, "learning_rate": 1.7465654385486852e-06, "loss": 22.625, "step": 7672 }, { "epoch": 0.5095968652454008, "grad_norm": 1316.8094482421875, "learning_rate": 1.7464938786244445e-06, "loss": 14.1562, "step": 7673 }, { "epoch": 0.5096632795377565, "grad_norm": 339.2361755371094, "learning_rate": 1.7464223100651931e-06, "loss": 18.1719, "step": 7674 }, { "epoch": 0.5097296938301122, "grad_norm": 285.55010986328125, "learning_rate": 1.7463507328717588e-06, "loss": 25.5469, "step": 7675 }, { "epoch": 0.509796108122468, "grad_norm": 248.6013946533203, "learning_rate": 1.7462791470449696e-06, "loss": 18.2969, "step": 7676 }, { "epoch": 0.5098625224148237, "grad_norm": 147.95167541503906, "learning_rate": 1.7462075525856536e-06, "loss": 17.1875, "step": 7677 }, { "epoch": 0.5099289367071794, "grad_norm": 190.1712646484375, "learning_rate": 1.746135949494639e-06, "loss": 19.0625, "step": 7678 }, { "epoch": 0.5099953509995351, "grad_norm": 542.4373779296875, "learning_rate": 1.7460643377727537e-06, "loss": 15.1875, "step": 7679 }, { "epoch": 0.5100617652918908, "grad_norm": 299.32171630859375, "learning_rate": 1.7459927174208266e-06, "loss": 20.4688, "step": 7680 }, { "epoch": 0.5101281795842465, "grad_norm": 164.548828125, "learning_rate": 1.7459210884396858e-06, "loss": 18.125, "step": 7681 }, { "epoch": 0.5101945938766023, "grad_norm": 299.35546875, "learning_rate": 1.74584945083016e-06, "loss": 25.0625, "step": 7682 }, { "epoch": 0.5102610081689579, "grad_norm": 312.645751953125, "learning_rate": 1.7457778045930777e-06, "loss": 24.8125, "step": 7683 }, { "epoch": 0.5103274224613137, "grad_norm": 126.93800354003906, "learning_rate": 1.745706149729268e-06, "loss": 17.2969, "step": 7684 }, { "epoch": 0.5103938367536693, "grad_norm": 261.7878112792969, "learning_rate": 1.7456344862395596e-06, "loss": 17.5312, "step": 7685 }, { "epoch": 0.5104602510460251, "grad_norm": 330.9433288574219, "learning_rate": 1.7455628141247814e-06, "loss": 17.4219, "step": 7686 }, { "epoch": 0.5105266653383809, "grad_norm": 227.4175567626953, "learning_rate": 1.7454911333857623e-06, "loss": 20.1406, "step": 7687 }, { "epoch": 0.5105930796307365, "grad_norm": 266.91119384765625, "learning_rate": 1.7454194440233316e-06, "loss": 21.3281, "step": 7688 }, { "epoch": 0.5106594939230923, "grad_norm": 187.2108612060547, "learning_rate": 1.7453477460383187e-06, "loss": 18.9844, "step": 7689 }, { "epoch": 0.5107259082154479, "grad_norm": 370.0240478515625, "learning_rate": 1.7452760394315532e-06, "loss": 24.625, "step": 7690 }, { "epoch": 0.5107923225078037, "grad_norm": 222.59463500976562, "learning_rate": 1.745204324203864e-06, "loss": 22.3438, "step": 7691 }, { "epoch": 0.5108587368001594, "grad_norm": 279.2948913574219, "learning_rate": 1.7451326003560807e-06, "loss": 23.0156, "step": 7692 }, { "epoch": 0.5109251510925151, "grad_norm": 129.31735229492188, "learning_rate": 1.7450608678890331e-06, "loss": 18.0, "step": 7693 }, { "epoch": 0.5109915653848708, "grad_norm": 1153.4510498046875, "learning_rate": 1.7449891268035511e-06, "loss": 16.875, "step": 7694 }, { "epoch": 0.5110579796772265, "grad_norm": 339.0588684082031, "learning_rate": 1.7449173771004645e-06, "loss": 25.9688, "step": 7695 }, { "epoch": 0.5111243939695822, "grad_norm": 196.112060546875, "learning_rate": 1.7448456187806032e-06, "loss": 16.3594, "step": 7696 }, { "epoch": 0.511190808261938, "grad_norm": 164.57864379882812, "learning_rate": 1.7447738518447975e-06, "loss": 21.5938, "step": 7697 }, { "epoch": 0.5112572225542937, "grad_norm": 195.2382354736328, "learning_rate": 1.7447020762938774e-06, "loss": 19.0781, "step": 7698 }, { "epoch": 0.5113236368466494, "grad_norm": 180.79881286621094, "learning_rate": 1.7446302921286725e-06, "loss": 22.6094, "step": 7699 }, { "epoch": 0.5113900511390052, "grad_norm": 209.78250122070312, "learning_rate": 1.7445584993500145e-06, "loss": 11.6406, "step": 7700 }, { "epoch": 0.5114564654313608, "grad_norm": 486.1246337890625, "learning_rate": 1.7444866979587328e-06, "loss": 23.5312, "step": 7701 }, { "epoch": 0.5115228797237166, "grad_norm": 324.0416564941406, "learning_rate": 1.7444148879556582e-06, "loss": 15.4688, "step": 7702 }, { "epoch": 0.5115892940160722, "grad_norm": 234.31991577148438, "learning_rate": 1.7443430693416215e-06, "loss": 21.8438, "step": 7703 }, { "epoch": 0.511655708308428, "grad_norm": 97.78985595703125, "learning_rate": 1.744271242117453e-06, "loss": 19.9688, "step": 7704 }, { "epoch": 0.5117221226007836, "grad_norm": 518.9608764648438, "learning_rate": 1.7441994062839847e-06, "loss": 18.7812, "step": 7705 }, { "epoch": 0.5117885368931394, "grad_norm": 215.30979919433594, "learning_rate": 1.7441275618420463e-06, "loss": 24.7656, "step": 7706 }, { "epoch": 0.5118549511854951, "grad_norm": 142.43287658691406, "learning_rate": 1.7440557087924694e-06, "loss": 18.5312, "step": 7707 }, { "epoch": 0.5119213654778508, "grad_norm": 157.51170349121094, "learning_rate": 1.743983847136085e-06, "loss": 19.4375, "step": 7708 }, { "epoch": 0.5119877797702066, "grad_norm": 171.50579833984375, "learning_rate": 1.7439119768737246e-06, "loss": 16.8906, "step": 7709 }, { "epoch": 0.5120541940625623, "grad_norm": 196.71347045898438, "learning_rate": 1.7438400980062195e-06, "loss": 15.8438, "step": 7710 }, { "epoch": 0.512120608354918, "grad_norm": 130.81851196289062, "learning_rate": 1.7437682105344012e-06, "loss": 14.0312, "step": 7711 }, { "epoch": 0.5121870226472737, "grad_norm": 586.7755737304688, "learning_rate": 1.7436963144591009e-06, "loss": 13.0156, "step": 7712 }, { "epoch": 0.5122534369396294, "grad_norm": 387.5320739746094, "learning_rate": 1.7436244097811504e-06, "loss": 15.8281, "step": 7713 }, { "epoch": 0.5123198512319851, "grad_norm": 321.349853515625, "learning_rate": 1.7435524965013816e-06, "loss": 14.8125, "step": 7714 }, { "epoch": 0.5123862655243409, "grad_norm": 243.8701629638672, "learning_rate": 1.7434805746206265e-06, "loss": 17.9688, "step": 7715 }, { "epoch": 0.5124526798166965, "grad_norm": 374.921875, "learning_rate": 1.7434086441397167e-06, "loss": 20.7812, "step": 7716 }, { "epoch": 0.5125190941090523, "grad_norm": 221.15188598632812, "learning_rate": 1.7433367050594843e-06, "loss": 20.875, "step": 7717 }, { "epoch": 0.5125855084014079, "grad_norm": 316.1587219238281, "learning_rate": 1.7432647573807615e-06, "loss": 26.7812, "step": 7718 }, { "epoch": 0.5126519226937637, "grad_norm": 234.9651336669922, "learning_rate": 1.7431928011043806e-06, "loss": 20.9062, "step": 7719 }, { "epoch": 0.5127183369861195, "grad_norm": 364.6719055175781, "learning_rate": 1.7431208362311738e-06, "loss": 23.6562, "step": 7720 }, { "epoch": 0.5127847512784751, "grad_norm": 298.7925720214844, "learning_rate": 1.7430488627619738e-06, "loss": 21.6719, "step": 7721 }, { "epoch": 0.5128511655708309, "grad_norm": 500.3346252441406, "learning_rate": 1.742976880697613e-06, "loss": 22.9219, "step": 7722 }, { "epoch": 0.5129175798631865, "grad_norm": 237.67813110351562, "learning_rate": 1.742904890038924e-06, "loss": 20.2188, "step": 7723 }, { "epoch": 0.5129839941555423, "grad_norm": 259.5222473144531, "learning_rate": 1.7428328907867398e-06, "loss": 24.1094, "step": 7724 }, { "epoch": 0.513050408447898, "grad_norm": 188.69261169433594, "learning_rate": 1.7427608829418928e-06, "loss": 24.9062, "step": 7725 }, { "epoch": 0.5131168227402537, "grad_norm": 215.06883239746094, "learning_rate": 1.7426888665052167e-06, "loss": 17.8125, "step": 7726 }, { "epoch": 0.5131832370326094, "grad_norm": 351.60125732421875, "learning_rate": 1.7426168414775437e-06, "loss": 32.125, "step": 7727 }, { "epoch": 0.5132496513249651, "grad_norm": 210.12884521484375, "learning_rate": 1.7425448078597074e-06, "loss": 20.7031, "step": 7728 }, { "epoch": 0.5133160656173208, "grad_norm": 323.58447265625, "learning_rate": 1.7424727656525409e-06, "loss": 19.2031, "step": 7729 }, { "epoch": 0.5133824799096766, "grad_norm": 249.5282745361328, "learning_rate": 1.7424007148568775e-06, "loss": 20.2031, "step": 7730 }, { "epoch": 0.5134488942020323, "grad_norm": 193.5209197998047, "learning_rate": 1.7423286554735508e-06, "loss": 13.875, "step": 7731 }, { "epoch": 0.513515308494388, "grad_norm": 184.51698303222656, "learning_rate": 1.7422565875033944e-06, "loss": 17.6562, "step": 7732 }, { "epoch": 0.5135817227867437, "grad_norm": 543.8467407226562, "learning_rate": 1.7421845109472416e-06, "loss": 32.5625, "step": 7733 }, { "epoch": 0.5136481370790994, "grad_norm": 305.3021545410156, "learning_rate": 1.7421124258059263e-06, "loss": 21.9062, "step": 7734 }, { "epoch": 0.5137145513714552, "grad_norm": 237.06185913085938, "learning_rate": 1.7420403320802826e-06, "loss": 14.4062, "step": 7735 }, { "epoch": 0.5137809656638108, "grad_norm": 373.1573181152344, "learning_rate": 1.741968229771144e-06, "loss": 15.625, "step": 7736 }, { "epoch": 0.5138473799561666, "grad_norm": 134.32652282714844, "learning_rate": 1.7418961188793448e-06, "loss": 17.4219, "step": 7737 }, { "epoch": 0.5139137942485222, "grad_norm": 360.3860168457031, "learning_rate": 1.7418239994057192e-06, "loss": 10.2891, "step": 7738 }, { "epoch": 0.513980208540878, "grad_norm": 142.57513427734375, "learning_rate": 1.7417518713511013e-06, "loss": 15.2812, "step": 7739 }, { "epoch": 0.5140466228332337, "grad_norm": 451.5603942871094, "learning_rate": 1.7416797347163255e-06, "loss": 20.0625, "step": 7740 }, { "epoch": 0.5141130371255894, "grad_norm": 333.39373779296875, "learning_rate": 1.7416075895022262e-06, "loss": 26.9844, "step": 7741 }, { "epoch": 0.5141794514179452, "grad_norm": 238.7808837890625, "learning_rate": 1.741535435709638e-06, "loss": 23.1562, "step": 7742 }, { "epoch": 0.5142458657103008, "grad_norm": 205.85997009277344, "learning_rate": 1.7414632733393956e-06, "loss": 16.0312, "step": 7743 }, { "epoch": 0.5143122800026566, "grad_norm": 182.3170928955078, "learning_rate": 1.741391102392333e-06, "loss": 14.7656, "step": 7744 }, { "epoch": 0.5143786942950123, "grad_norm": 216.2703399658203, "learning_rate": 1.7413189228692862e-06, "loss": 25.9375, "step": 7745 }, { "epoch": 0.514445108587368, "grad_norm": 500.66912841796875, "learning_rate": 1.7412467347710895e-06, "loss": 22.6562, "step": 7746 }, { "epoch": 0.5145115228797237, "grad_norm": 171.15872192382812, "learning_rate": 1.7411745380985776e-06, "loss": 22.0, "step": 7747 }, { "epoch": 0.5145779371720794, "grad_norm": 166.77963256835938, "learning_rate": 1.7411023328525864e-06, "loss": 15.0156, "step": 7748 }, { "epoch": 0.5146443514644351, "grad_norm": 287.9735412597656, "learning_rate": 1.741030119033951e-06, "loss": 22.4062, "step": 7749 }, { "epoch": 0.5147107657567909, "grad_norm": 193.7784423828125, "learning_rate": 1.7409578966435058e-06, "loss": 17.6875, "step": 7750 }, { "epoch": 0.5147771800491465, "grad_norm": 333.8194274902344, "learning_rate": 1.7408856656820876e-06, "loss": 17.5, "step": 7751 }, { "epoch": 0.5148435943415023, "grad_norm": 131.9455108642578, "learning_rate": 1.740813426150531e-06, "loss": 15.6094, "step": 7752 }, { "epoch": 0.514910008633858, "grad_norm": 2245.03271484375, "learning_rate": 1.7407411780496717e-06, "loss": 21.6875, "step": 7753 }, { "epoch": 0.5149764229262137, "grad_norm": 190.39837646484375, "learning_rate": 1.7406689213803457e-06, "loss": 20.5156, "step": 7754 }, { "epoch": 0.5150428372185695, "grad_norm": 138.62489318847656, "learning_rate": 1.7405966561433885e-06, "loss": 16.1875, "step": 7755 }, { "epoch": 0.5151092515109251, "grad_norm": 151.31076049804688, "learning_rate": 1.7405243823396365e-06, "loss": 14.2344, "step": 7756 }, { "epoch": 0.5151756658032809, "grad_norm": 232.6850128173828, "learning_rate": 1.7404520999699255e-06, "loss": 25.2344, "step": 7757 }, { "epoch": 0.5152420800956365, "grad_norm": 354.2771301269531, "learning_rate": 1.7403798090350911e-06, "loss": 16.0781, "step": 7758 }, { "epoch": 0.5153084943879923, "grad_norm": 269.7509460449219, "learning_rate": 1.7403075095359708e-06, "loss": 15.6094, "step": 7759 }, { "epoch": 0.515374908680348, "grad_norm": 329.6848449707031, "learning_rate": 1.7402352014733995e-06, "loss": 22.5938, "step": 7760 }, { "epoch": 0.5154413229727037, "grad_norm": 210.2277069091797, "learning_rate": 1.7401628848482145e-06, "loss": 14.2188, "step": 7761 }, { "epoch": 0.5155077372650595, "grad_norm": 236.17782592773438, "learning_rate": 1.7400905596612518e-06, "loss": 20.9062, "step": 7762 }, { "epoch": 0.5155741515574152, "grad_norm": 458.79217529296875, "learning_rate": 1.7400182259133484e-06, "loss": 19.0, "step": 7763 }, { "epoch": 0.5156405658497709, "grad_norm": 284.416748046875, "learning_rate": 1.739945883605341e-06, "loss": 18.5781, "step": 7764 }, { "epoch": 0.5157069801421266, "grad_norm": 164.6840057373047, "learning_rate": 1.7398735327380662e-06, "loss": 15.0, "step": 7765 }, { "epoch": 0.5157733944344823, "grad_norm": 235.70530700683594, "learning_rate": 1.739801173312361e-06, "loss": 14.375, "step": 7766 }, { "epoch": 0.515839808726838, "grad_norm": 418.1978759765625, "learning_rate": 1.7397288053290624e-06, "loss": 24.4688, "step": 7767 }, { "epoch": 0.5159062230191938, "grad_norm": 146.5555419921875, "learning_rate": 1.7396564287890073e-06, "loss": 19.7656, "step": 7768 }, { "epoch": 0.5159726373115494, "grad_norm": 218.36260986328125, "learning_rate": 1.7395840436930337e-06, "loss": 21.0312, "step": 7769 }, { "epoch": 0.5160390516039052, "grad_norm": 180.2408905029297, "learning_rate": 1.7395116500419783e-06, "loss": 16.8438, "step": 7770 }, { "epoch": 0.5161054658962608, "grad_norm": 215.14308166503906, "learning_rate": 1.7394392478366782e-06, "loss": 15.3438, "step": 7771 }, { "epoch": 0.5161718801886166, "grad_norm": 130.86766052246094, "learning_rate": 1.7393668370779712e-06, "loss": 18.0156, "step": 7772 }, { "epoch": 0.5162382944809724, "grad_norm": 241.25758361816406, "learning_rate": 1.7392944177666952e-06, "loss": 20.4375, "step": 7773 }, { "epoch": 0.516304708773328, "grad_norm": 449.4220886230469, "learning_rate": 1.7392219899036874e-06, "loss": 18.7969, "step": 7774 }, { "epoch": 0.5163711230656838, "grad_norm": 235.41961669921875, "learning_rate": 1.7391495534897863e-06, "loss": 13.5938, "step": 7775 }, { "epoch": 0.5164375373580394, "grad_norm": 174.37533569335938, "learning_rate": 1.7390771085258291e-06, "loss": 13.9062, "step": 7776 }, { "epoch": 0.5165039516503952, "grad_norm": 317.59686279296875, "learning_rate": 1.7390046550126543e-06, "loss": 23.5, "step": 7777 }, { "epoch": 0.5165703659427509, "grad_norm": 343.2755432128906, "learning_rate": 1.7389321929510997e-06, "loss": 18.7969, "step": 7778 }, { "epoch": 0.5166367802351066, "grad_norm": 350.6610107421875, "learning_rate": 1.7388597223420035e-06, "loss": 17.0156, "step": 7779 }, { "epoch": 0.5167031945274623, "grad_norm": 145.8090057373047, "learning_rate": 1.7387872431862043e-06, "loss": 17.4219, "step": 7780 }, { "epoch": 0.516769608819818, "grad_norm": 172.5646209716797, "learning_rate": 1.7387147554845401e-06, "loss": 18.2812, "step": 7781 }, { "epoch": 0.5168360231121737, "grad_norm": 461.36785888671875, "learning_rate": 1.7386422592378494e-06, "loss": 21.3438, "step": 7782 }, { "epoch": 0.5169024374045295, "grad_norm": 204.81944274902344, "learning_rate": 1.738569754446971e-06, "loss": 21.3438, "step": 7783 }, { "epoch": 0.5169688516968852, "grad_norm": 274.3617248535156, "learning_rate": 1.738497241112744e-06, "loss": 19.5781, "step": 7784 }, { "epoch": 0.5170352659892409, "grad_norm": 137.36900329589844, "learning_rate": 1.7384247192360064e-06, "loss": 15.7344, "step": 7785 }, { "epoch": 0.5171016802815966, "grad_norm": 555.0038452148438, "learning_rate": 1.7383521888175974e-06, "loss": 18.6094, "step": 7786 }, { "epoch": 0.5171680945739523, "grad_norm": 188.8982696533203, "learning_rate": 1.7382796498583562e-06, "loss": 21.3438, "step": 7787 }, { "epoch": 0.5172345088663081, "grad_norm": 213.12889099121094, "learning_rate": 1.738207102359122e-06, "loss": 17.3125, "step": 7788 }, { "epoch": 0.5173009231586637, "grad_norm": 173.5155487060547, "learning_rate": 1.7381345463207331e-06, "loss": 16.0781, "step": 7789 }, { "epoch": 0.5173673374510195, "grad_norm": 279.9559631347656, "learning_rate": 1.73806198174403e-06, "loss": 20.8438, "step": 7790 }, { "epoch": 0.5174337517433751, "grad_norm": 445.1811828613281, "learning_rate": 1.737989408629851e-06, "loss": 22.6406, "step": 7791 }, { "epoch": 0.5175001660357309, "grad_norm": 326.7896423339844, "learning_rate": 1.7379168269790362e-06, "loss": 16.2656, "step": 7792 }, { "epoch": 0.5175665803280866, "grad_norm": 429.4970703125, "learning_rate": 1.737844236792425e-06, "loss": 24.3281, "step": 7793 }, { "epoch": 0.5176329946204423, "grad_norm": 266.13409423828125, "learning_rate": 1.7377716380708574e-06, "loss": 21.2812, "step": 7794 }, { "epoch": 0.5176994089127981, "grad_norm": 209.95831298828125, "learning_rate": 1.7376990308151726e-06, "loss": 17.0156, "step": 7795 }, { "epoch": 0.5177658232051537, "grad_norm": 137.45199584960938, "learning_rate": 1.7376264150262111e-06, "loss": 16.3125, "step": 7796 }, { "epoch": 0.5178322374975095, "grad_norm": 173.4893035888672, "learning_rate": 1.7375537907048123e-06, "loss": 16.375, "step": 7797 }, { "epoch": 0.5178986517898652, "grad_norm": 181.23008728027344, "learning_rate": 1.7374811578518166e-06, "loss": 15.4375, "step": 7798 }, { "epoch": 0.5179650660822209, "grad_norm": 283.92974853515625, "learning_rate": 1.7374085164680645e-06, "loss": 12.2969, "step": 7799 }, { "epoch": 0.5180314803745766, "grad_norm": 268.1419372558594, "learning_rate": 1.7373358665543955e-06, "loss": 22.2188, "step": 7800 }, { "epoch": 0.5180978946669323, "grad_norm": 321.9707336425781, "learning_rate": 1.7372632081116504e-06, "loss": 18.0156, "step": 7801 }, { "epoch": 0.518164308959288, "grad_norm": 259.7934265136719, "learning_rate": 1.7371905411406697e-06, "loss": 15.2656, "step": 7802 }, { "epoch": 0.5182307232516438, "grad_norm": 223.1533203125, "learning_rate": 1.737117865642294e-06, "loss": 14.7656, "step": 7803 }, { "epoch": 0.5182971375439994, "grad_norm": 272.5929870605469, "learning_rate": 1.737045181617364e-06, "loss": 18.4219, "step": 7804 }, { "epoch": 0.5183635518363552, "grad_norm": 151.6232147216797, "learning_rate": 1.73697248906672e-06, "loss": 13.4062, "step": 7805 }, { "epoch": 0.518429966128711, "grad_norm": 680.6880493164062, "learning_rate": 1.7368997879912033e-06, "loss": 21.25, "step": 7806 }, { "epoch": 0.5184963804210666, "grad_norm": 185.3098602294922, "learning_rate": 1.7368270783916547e-06, "loss": 16.1406, "step": 7807 }, { "epoch": 0.5185627947134224, "grad_norm": 602.2832641601562, "learning_rate": 1.7367543602689155e-06, "loss": 18.5312, "step": 7808 }, { "epoch": 0.518629209005778, "grad_norm": 147.7498016357422, "learning_rate": 1.7366816336238266e-06, "loss": 19.0, "step": 7809 }, { "epoch": 0.5186956232981338, "grad_norm": 147.49874877929688, "learning_rate": 1.7366088984572293e-06, "loss": 15.9844, "step": 7810 }, { "epoch": 0.5187620375904894, "grad_norm": 306.8368225097656, "learning_rate": 1.7365361547699653e-06, "loss": 17.0156, "step": 7811 }, { "epoch": 0.5188284518828452, "grad_norm": 772.310546875, "learning_rate": 1.7364634025628754e-06, "loss": 23.0312, "step": 7812 }, { "epoch": 0.5188948661752009, "grad_norm": 246.3915557861328, "learning_rate": 1.736390641836802e-06, "loss": 21.7812, "step": 7813 }, { "epoch": 0.5189612804675566, "grad_norm": 302.05889892578125, "learning_rate": 1.7363178725925858e-06, "loss": 19.0625, "step": 7814 }, { "epoch": 0.5190276947599123, "grad_norm": 171.74948120117188, "learning_rate": 1.7362450948310694e-06, "loss": 15.8438, "step": 7815 }, { "epoch": 0.519094109052268, "grad_norm": 139.16192626953125, "learning_rate": 1.736172308553094e-06, "loss": 13.7969, "step": 7816 }, { "epoch": 0.5191605233446238, "grad_norm": 465.2672424316406, "learning_rate": 1.736099513759502e-06, "loss": 20.875, "step": 7817 }, { "epoch": 0.5192269376369795, "grad_norm": 190.8260498046875, "learning_rate": 1.7360267104511354e-06, "loss": 19.125, "step": 7818 }, { "epoch": 0.5192933519293352, "grad_norm": 259.1199035644531, "learning_rate": 1.735953898628836e-06, "loss": 17.3906, "step": 7819 }, { "epoch": 0.5193597662216909, "grad_norm": 153.56532287597656, "learning_rate": 1.7358810782934462e-06, "loss": 17.3438, "step": 7820 }, { "epoch": 0.5194261805140467, "grad_norm": 343.3268127441406, "learning_rate": 1.7358082494458088e-06, "loss": 24.0938, "step": 7821 }, { "epoch": 0.5194925948064023, "grad_norm": 2197.35595703125, "learning_rate": 1.7357354120867655e-06, "loss": 22.1875, "step": 7822 }, { "epoch": 0.5195590090987581, "grad_norm": 755.0144653320312, "learning_rate": 1.7356625662171592e-06, "loss": 20.2031, "step": 7823 }, { "epoch": 0.5196254233911137, "grad_norm": 245.3135528564453, "learning_rate": 1.7355897118378327e-06, "loss": 17.6094, "step": 7824 }, { "epoch": 0.5196918376834695, "grad_norm": 347.2318115234375, "learning_rate": 1.7355168489496288e-06, "loss": 19.3672, "step": 7825 }, { "epoch": 0.5197582519758251, "grad_norm": 276.68524169921875, "learning_rate": 1.7354439775533898e-06, "loss": 21.875, "step": 7826 }, { "epoch": 0.5198246662681809, "grad_norm": 356.20440673828125, "learning_rate": 1.7353710976499589e-06, "loss": 16.1562, "step": 7827 }, { "epoch": 0.5198910805605367, "grad_norm": 224.22457885742188, "learning_rate": 1.7352982092401795e-06, "loss": 22.5312, "step": 7828 }, { "epoch": 0.5199574948528923, "grad_norm": 992.8853759765625, "learning_rate": 1.7352253123248941e-06, "loss": 19.3125, "step": 7829 }, { "epoch": 0.5200239091452481, "grad_norm": 251.62911987304688, "learning_rate": 1.7351524069049464e-06, "loss": 18.8438, "step": 7830 }, { "epoch": 0.5200903234376038, "grad_norm": 390.91326904296875, "learning_rate": 1.7350794929811793e-06, "loss": 19.0938, "step": 7831 }, { "epoch": 0.5201567377299595, "grad_norm": 224.90191650390625, "learning_rate": 1.7350065705544369e-06, "loss": 14.6719, "step": 7832 }, { "epoch": 0.5202231520223152, "grad_norm": 157.86660766601562, "learning_rate": 1.734933639625562e-06, "loss": 15.0625, "step": 7833 }, { "epoch": 0.5202895663146709, "grad_norm": 332.0311279296875, "learning_rate": 1.7348607001953987e-06, "loss": 16.5156, "step": 7834 }, { "epoch": 0.5203559806070266, "grad_norm": 340.7710266113281, "learning_rate": 1.7347877522647907e-06, "loss": 12.5938, "step": 7835 }, { "epoch": 0.5204223948993824, "grad_norm": 210.8294219970703, "learning_rate": 1.7347147958345816e-06, "loss": 14.9688, "step": 7836 }, { "epoch": 0.520488809191738, "grad_norm": 409.0704650878906, "learning_rate": 1.7346418309056152e-06, "loss": 21.6562, "step": 7837 }, { "epoch": 0.5205552234840938, "grad_norm": 157.2402801513672, "learning_rate": 1.734568857478736e-06, "loss": 16.3594, "step": 7838 }, { "epoch": 0.5206216377764495, "grad_norm": 235.98741149902344, "learning_rate": 1.7344958755547876e-06, "loss": 16.375, "step": 7839 }, { "epoch": 0.5206880520688052, "grad_norm": 515.298095703125, "learning_rate": 1.7344228851346143e-06, "loss": 22.2656, "step": 7840 }, { "epoch": 0.520754466361161, "grad_norm": 483.7336120605469, "learning_rate": 1.7343498862190611e-06, "loss": 28.5, "step": 7841 }, { "epoch": 0.5208208806535166, "grad_norm": 239.06787109375, "learning_rate": 1.7342768788089714e-06, "loss": 16.0156, "step": 7842 }, { "epoch": 0.5208872949458724, "grad_norm": 270.8443603515625, "learning_rate": 1.7342038629051903e-06, "loss": 19.75, "step": 7843 }, { "epoch": 0.520953709238228, "grad_norm": 396.9515075683594, "learning_rate": 1.7341308385085625e-06, "loss": 22.2188, "step": 7844 }, { "epoch": 0.5210201235305838, "grad_norm": 281.8521728515625, "learning_rate": 1.7340578056199322e-06, "loss": 13.8125, "step": 7845 }, { "epoch": 0.5210865378229395, "grad_norm": 206.41990661621094, "learning_rate": 1.7339847642401447e-06, "loss": 19.5938, "step": 7846 }, { "epoch": 0.5211529521152952, "grad_norm": 99.96150970458984, "learning_rate": 1.7339117143700443e-06, "loss": 13.0312, "step": 7847 }, { "epoch": 0.5212193664076509, "grad_norm": 314.8128967285156, "learning_rate": 1.7338386560104768e-06, "loss": 17.4219, "step": 7848 }, { "epoch": 0.5212857807000066, "grad_norm": 172.28289794921875, "learning_rate": 1.7337655891622869e-06, "loss": 16.125, "step": 7849 }, { "epoch": 0.5213521949923624, "grad_norm": 215.36544799804688, "learning_rate": 1.7336925138263195e-06, "loss": 19.9688, "step": 7850 }, { "epoch": 0.5214186092847181, "grad_norm": 161.59747314453125, "learning_rate": 1.7336194300034203e-06, "loss": 17.125, "step": 7851 }, { "epoch": 0.5214850235770738, "grad_norm": 537.1979370117188, "learning_rate": 1.7335463376944345e-06, "loss": 21.1562, "step": 7852 }, { "epoch": 0.5215514378694295, "grad_norm": 131.5631103515625, "learning_rate": 1.7334732369002079e-06, "loss": 13.5625, "step": 7853 }, { "epoch": 0.5216178521617852, "grad_norm": 202.82308959960938, "learning_rate": 1.7334001276215853e-06, "loss": 17.3594, "step": 7854 }, { "epoch": 0.5216842664541409, "grad_norm": 172.71092224121094, "learning_rate": 1.7333270098594132e-06, "loss": 16.25, "step": 7855 }, { "epoch": 0.5217506807464967, "grad_norm": 578.1304321289062, "learning_rate": 1.7332538836145373e-06, "loss": 45.2812, "step": 7856 }, { "epoch": 0.5218170950388523, "grad_norm": 281.0404357910156, "learning_rate": 1.7331807488878032e-06, "loss": 27.125, "step": 7857 }, { "epoch": 0.5218835093312081, "grad_norm": 212.6780548095703, "learning_rate": 1.733107605680057e-06, "loss": 19.4531, "step": 7858 }, { "epoch": 0.5219499236235637, "grad_norm": 169.40538024902344, "learning_rate": 1.7330344539921446e-06, "loss": 13.0469, "step": 7859 }, { "epoch": 0.5220163379159195, "grad_norm": 145.2909698486328, "learning_rate": 1.7329612938249127e-06, "loss": 19.4375, "step": 7860 }, { "epoch": 0.5220827522082753, "grad_norm": 202.9923858642578, "learning_rate": 1.732888125179207e-06, "loss": 15.6406, "step": 7861 }, { "epoch": 0.5221491665006309, "grad_norm": 497.7169494628906, "learning_rate": 1.7328149480558739e-06, "loss": 21.6562, "step": 7862 }, { "epoch": 0.5222155807929867, "grad_norm": 79.85368347167969, "learning_rate": 1.73274176245576e-06, "loss": 11.2812, "step": 7863 }, { "epoch": 0.5222819950853423, "grad_norm": 163.08912658691406, "learning_rate": 1.7326685683797122e-06, "loss": 12.8281, "step": 7864 }, { "epoch": 0.5223484093776981, "grad_norm": 187.06100463867188, "learning_rate": 1.7325953658285765e-06, "loss": 18.9219, "step": 7865 }, { "epoch": 0.5224148236700538, "grad_norm": 214.83607482910156, "learning_rate": 1.7325221548032005e-06, "loss": 18.0, "step": 7866 }, { "epoch": 0.5224812379624095, "grad_norm": 223.4736785888672, "learning_rate": 1.7324489353044305e-06, "loss": 18.4844, "step": 7867 }, { "epoch": 0.5225476522547652, "grad_norm": 273.4626770019531, "learning_rate": 1.7323757073331133e-06, "loss": 20.6875, "step": 7868 }, { "epoch": 0.522614066547121, "grad_norm": 277.95855712890625, "learning_rate": 1.7323024708900965e-06, "loss": 14.9219, "step": 7869 }, { "epoch": 0.5226804808394766, "grad_norm": 130.25576782226562, "learning_rate": 1.7322292259762268e-06, "loss": 17.7031, "step": 7870 }, { "epoch": 0.5227468951318324, "grad_norm": 113.46644592285156, "learning_rate": 1.7321559725923518e-06, "loss": 15.9531, "step": 7871 }, { "epoch": 0.5228133094241881, "grad_norm": 368.2248229980469, "learning_rate": 1.7320827107393186e-06, "loss": 12.9219, "step": 7872 }, { "epoch": 0.5228797237165438, "grad_norm": 503.525390625, "learning_rate": 1.7320094404179748e-06, "loss": 17.9062, "step": 7873 }, { "epoch": 0.5229461380088996, "grad_norm": 131.30039978027344, "learning_rate": 1.731936161629168e-06, "loss": 16.2969, "step": 7874 }, { "epoch": 0.5230125523012552, "grad_norm": 372.6728515625, "learning_rate": 1.7318628743737453e-06, "loss": 18.4844, "step": 7875 }, { "epoch": 0.523078966593611, "grad_norm": 267.1163330078125, "learning_rate": 1.7317895786525554e-06, "loss": 26.8125, "step": 7876 }, { "epoch": 0.5231453808859666, "grad_norm": 218.4456024169922, "learning_rate": 1.7317162744664456e-06, "loss": 12.625, "step": 7877 }, { "epoch": 0.5232117951783224, "grad_norm": 167.84683227539062, "learning_rate": 1.7316429618162636e-06, "loss": 13.9062, "step": 7878 }, { "epoch": 0.523278209470678, "grad_norm": 222.96923828125, "learning_rate": 1.7315696407028576e-06, "loss": 18.5781, "step": 7879 }, { "epoch": 0.5233446237630338, "grad_norm": 139.95318603515625, "learning_rate": 1.7314963111270762e-06, "loss": 21.7188, "step": 7880 }, { "epoch": 0.5234110380553895, "grad_norm": 127.98883056640625, "learning_rate": 1.7314229730897671e-06, "loss": 15.8125, "step": 7881 }, { "epoch": 0.5234774523477452, "grad_norm": 559.03173828125, "learning_rate": 1.7313496265917788e-06, "loss": 21.3438, "step": 7882 }, { "epoch": 0.523543866640101, "grad_norm": 212.93930053710938, "learning_rate": 1.7312762716339597e-06, "loss": 24.3125, "step": 7883 }, { "epoch": 0.5236102809324567, "grad_norm": 170.8173828125, "learning_rate": 1.7312029082171585e-06, "loss": 16.3594, "step": 7884 }, { "epoch": 0.5236766952248124, "grad_norm": 248.48471069335938, "learning_rate": 1.7311295363422236e-06, "loss": 21.8125, "step": 7885 }, { "epoch": 0.5237431095171681, "grad_norm": 164.86746215820312, "learning_rate": 1.731056156010004e-06, "loss": 19.4531, "step": 7886 }, { "epoch": 0.5238095238095238, "grad_norm": 330.5518493652344, "learning_rate": 1.7309827672213482e-06, "loss": 22.1875, "step": 7887 }, { "epoch": 0.5238759381018795, "grad_norm": 131.97996520996094, "learning_rate": 1.730909369977105e-06, "loss": 17.7344, "step": 7888 }, { "epoch": 0.5239423523942353, "grad_norm": 287.8839416503906, "learning_rate": 1.7308359642781241e-06, "loss": 14.875, "step": 7889 }, { "epoch": 0.5240087666865909, "grad_norm": 323.3790588378906, "learning_rate": 1.7307625501252538e-06, "loss": 18.3438, "step": 7890 }, { "epoch": 0.5240751809789467, "grad_norm": 192.11972045898438, "learning_rate": 1.7306891275193441e-06, "loss": 17.0625, "step": 7891 }, { "epoch": 0.5241415952713023, "grad_norm": 689.0618896484375, "learning_rate": 1.7306156964612437e-06, "loss": 20.875, "step": 7892 }, { "epoch": 0.5242080095636581, "grad_norm": 398.4509582519531, "learning_rate": 1.7305422569518022e-06, "loss": 15.0469, "step": 7893 }, { "epoch": 0.5242744238560139, "grad_norm": 214.08311462402344, "learning_rate": 1.7304688089918691e-06, "loss": 19.7812, "step": 7894 }, { "epoch": 0.5243408381483695, "grad_norm": 143.9698028564453, "learning_rate": 1.7303953525822941e-06, "loss": 17.7344, "step": 7895 }, { "epoch": 0.5244072524407253, "grad_norm": 173.88662719726562, "learning_rate": 1.730321887723927e-06, "loss": 18.6719, "step": 7896 }, { "epoch": 0.5244736667330809, "grad_norm": 165.236572265625, "learning_rate": 1.7302484144176171e-06, "loss": 14.5312, "step": 7897 }, { "epoch": 0.5245400810254367, "grad_norm": 517.4205932617188, "learning_rate": 1.730174932664215e-06, "loss": 19.125, "step": 7898 }, { "epoch": 0.5246064953177924, "grad_norm": 143.88441467285156, "learning_rate": 1.7301014424645702e-06, "loss": 20.4062, "step": 7899 }, { "epoch": 0.5246729096101481, "grad_norm": 238.91627502441406, "learning_rate": 1.7300279438195327e-06, "loss": 22.7344, "step": 7900 }, { "epoch": 0.5247393239025038, "grad_norm": 243.3868865966797, "learning_rate": 1.729954436729953e-06, "loss": 18.5938, "step": 7901 }, { "epoch": 0.5248057381948595, "grad_norm": 234.25634765625, "learning_rate": 1.7298809211966811e-06, "loss": 19.3906, "step": 7902 }, { "epoch": 0.5248721524872152, "grad_norm": 163.0276336669922, "learning_rate": 1.729807397220568e-06, "loss": 14.8906, "step": 7903 }, { "epoch": 0.524938566779571, "grad_norm": 152.26271057128906, "learning_rate": 1.7297338648024635e-06, "loss": 14.9688, "step": 7904 }, { "epoch": 0.5250049810719267, "grad_norm": 204.22705078125, "learning_rate": 1.7296603239432183e-06, "loss": 18.0625, "step": 7905 }, { "epoch": 0.5250713953642824, "grad_norm": 197.186279296875, "learning_rate": 1.7295867746436836e-06, "loss": 16.2656, "step": 7906 }, { "epoch": 0.5251378096566381, "grad_norm": 201.3148193359375, "learning_rate": 1.7295132169047095e-06, "loss": 15.1875, "step": 7907 }, { "epoch": 0.5252042239489938, "grad_norm": 150.28573608398438, "learning_rate": 1.7294396507271475e-06, "loss": 21.2812, "step": 7908 }, { "epoch": 0.5252706382413496, "grad_norm": 422.5943908691406, "learning_rate": 1.729366076111848e-06, "loss": 17.9062, "step": 7909 }, { "epoch": 0.5253370525337052, "grad_norm": 441.2827453613281, "learning_rate": 1.7292924930596623e-06, "loss": 18.7188, "step": 7910 }, { "epoch": 0.525403466826061, "grad_norm": 212.40248107910156, "learning_rate": 1.7292189015714416e-06, "loss": 18.4062, "step": 7911 }, { "epoch": 0.5254698811184166, "grad_norm": 154.73873901367188, "learning_rate": 1.7291453016480372e-06, "loss": 19.9062, "step": 7912 }, { "epoch": 0.5255362954107724, "grad_norm": 252.27340698242188, "learning_rate": 1.7290716932903002e-06, "loss": 17.6562, "step": 7913 }, { "epoch": 0.5256027097031282, "grad_norm": 236.26536560058594, "learning_rate": 1.7289980764990823e-06, "loss": 14.9375, "step": 7914 }, { "epoch": 0.5256691239954838, "grad_norm": 190.16238403320312, "learning_rate": 1.7289244512752355e-06, "loss": 15.3125, "step": 7915 }, { "epoch": 0.5257355382878396, "grad_norm": 195.63873291015625, "learning_rate": 1.7288508176196104e-06, "loss": 19.7188, "step": 7916 }, { "epoch": 0.5258019525801952, "grad_norm": 235.3174591064453, "learning_rate": 1.7287771755330596e-06, "loss": 25.5625, "step": 7917 }, { "epoch": 0.525868366872551, "grad_norm": 123.5604476928711, "learning_rate": 1.7287035250164346e-06, "loss": 15.5938, "step": 7918 }, { "epoch": 0.5259347811649067, "grad_norm": 244.83111572265625, "learning_rate": 1.7286298660705873e-06, "loss": 15.7188, "step": 7919 }, { "epoch": 0.5260011954572624, "grad_norm": 293.4317321777344, "learning_rate": 1.72855619869637e-06, "loss": 26.8594, "step": 7920 }, { "epoch": 0.5260676097496181, "grad_norm": 447.82269287109375, "learning_rate": 1.7284825228946347e-06, "loss": 21.9375, "step": 7921 }, { "epoch": 0.5261340240419738, "grad_norm": 168.2023162841797, "learning_rate": 1.7284088386662335e-06, "loss": 12.9688, "step": 7922 }, { "epoch": 0.5262004383343295, "grad_norm": 151.307373046875, "learning_rate": 1.7283351460120188e-06, "loss": 18.3906, "step": 7923 }, { "epoch": 0.5262668526266853, "grad_norm": 355.0651550292969, "learning_rate": 1.7282614449328437e-06, "loss": 30.2969, "step": 7924 }, { "epoch": 0.526333266919041, "grad_norm": 226.1155548095703, "learning_rate": 1.7281877354295595e-06, "loss": 17.1719, "step": 7925 }, { "epoch": 0.5263996812113967, "grad_norm": 157.22206115722656, "learning_rate": 1.7281140175030198e-06, "loss": 13.9219, "step": 7926 }, { "epoch": 0.5264660955037525, "grad_norm": 406.24078369140625, "learning_rate": 1.7280402911540767e-06, "loss": 20.0469, "step": 7927 }, { "epoch": 0.5265325097961081, "grad_norm": 174.7232666015625, "learning_rate": 1.7279665563835838e-06, "loss": 16.6094, "step": 7928 }, { "epoch": 0.5265989240884639, "grad_norm": 92.9596176147461, "learning_rate": 1.727892813192393e-06, "loss": 15.7656, "step": 7929 }, { "epoch": 0.5266653383808195, "grad_norm": 202.9163360595703, "learning_rate": 1.7278190615813583e-06, "loss": 21.2969, "step": 7930 }, { "epoch": 0.5267317526731753, "grad_norm": 440.8778991699219, "learning_rate": 1.7277453015513324e-06, "loss": 24.6875, "step": 7931 }, { "epoch": 0.526798166965531, "grad_norm": 280.90875244140625, "learning_rate": 1.7276715331031682e-06, "loss": 13.375, "step": 7932 }, { "epoch": 0.5268645812578867, "grad_norm": 481.1612243652344, "learning_rate": 1.7275977562377193e-06, "loss": 28.7188, "step": 7933 }, { "epoch": 0.5269309955502424, "grad_norm": 337.931640625, "learning_rate": 1.7275239709558393e-06, "loss": 25.4688, "step": 7934 }, { "epoch": 0.5269974098425981, "grad_norm": 303.70654296875, "learning_rate": 1.7274501772583815e-06, "loss": 20.75, "step": 7935 }, { "epoch": 0.5270638241349539, "grad_norm": 271.4400329589844, "learning_rate": 1.7273763751461993e-06, "loss": 21.1562, "step": 7936 }, { "epoch": 0.5271302384273095, "grad_norm": 169.30917358398438, "learning_rate": 1.7273025646201469e-06, "loss": 16.4688, "step": 7937 }, { "epoch": 0.5271966527196653, "grad_norm": 149.00338745117188, "learning_rate": 1.727228745681078e-06, "loss": 12.7812, "step": 7938 }, { "epoch": 0.527263067012021, "grad_norm": 335.2011413574219, "learning_rate": 1.727154918329846e-06, "loss": 24.0312, "step": 7939 }, { "epoch": 0.5273294813043767, "grad_norm": 198.38604736328125, "learning_rate": 1.727081082567305e-06, "loss": 18.0625, "step": 7940 }, { "epoch": 0.5273958955967324, "grad_norm": 409.2187194824219, "learning_rate": 1.7270072383943094e-06, "loss": 22.9062, "step": 7941 }, { "epoch": 0.5274623098890882, "grad_norm": 238.0561981201172, "learning_rate": 1.7269333858117134e-06, "loss": 22.3125, "step": 7942 }, { "epoch": 0.5275287241814438, "grad_norm": 230.5055694580078, "learning_rate": 1.7268595248203712e-06, "loss": 22.3906, "step": 7943 }, { "epoch": 0.5275951384737996, "grad_norm": 285.7333068847656, "learning_rate": 1.7267856554211372e-06, "loss": 23.6875, "step": 7944 }, { "epoch": 0.5276615527661552, "grad_norm": 401.64691162109375, "learning_rate": 1.7267117776148658e-06, "loss": 20.6875, "step": 7945 }, { "epoch": 0.527727967058511, "grad_norm": 1407.14697265625, "learning_rate": 1.7266378914024116e-06, "loss": 18.4531, "step": 7946 }, { "epoch": 0.5277943813508668, "grad_norm": 369.8612060546875, "learning_rate": 1.726563996784629e-06, "loss": 22.625, "step": 7947 }, { "epoch": 0.5278607956432224, "grad_norm": 441.4884948730469, "learning_rate": 1.7264900937623733e-06, "loss": 23.7344, "step": 7948 }, { "epoch": 0.5279272099355782, "grad_norm": 150.6550750732422, "learning_rate": 1.7264161823364992e-06, "loss": 17.4219, "step": 7949 }, { "epoch": 0.5279936242279338, "grad_norm": 198.33193969726562, "learning_rate": 1.7263422625078616e-06, "loss": 17.8906, "step": 7950 }, { "epoch": 0.5280600385202896, "grad_norm": 169.13999938964844, "learning_rate": 1.7262683342773154e-06, "loss": 16.1094, "step": 7951 }, { "epoch": 0.5281264528126453, "grad_norm": 187.02685546875, "learning_rate": 1.7261943976457159e-06, "loss": 23.4844, "step": 7952 }, { "epoch": 0.528192867105001, "grad_norm": 132.81956481933594, "learning_rate": 1.7261204526139186e-06, "loss": 14.0312, "step": 7953 }, { "epoch": 0.5282592813973567, "grad_norm": 479.0052185058594, "learning_rate": 1.7260464991827781e-06, "loss": 25.25, "step": 7954 }, { "epoch": 0.5283256956897124, "grad_norm": 116.065185546875, "learning_rate": 1.7259725373531508e-06, "loss": 13.8281, "step": 7955 }, { "epoch": 0.5283921099820681, "grad_norm": 114.00503540039062, "learning_rate": 1.7258985671258914e-06, "loss": 16.0, "step": 7956 }, { "epoch": 0.5284585242744239, "grad_norm": 432.1898498535156, "learning_rate": 1.7258245885018563e-06, "loss": 22.0938, "step": 7957 }, { "epoch": 0.5285249385667796, "grad_norm": 361.7710266113281, "learning_rate": 1.7257506014819007e-06, "loss": 33.75, "step": 7958 }, { "epoch": 0.5285913528591353, "grad_norm": 209.3590087890625, "learning_rate": 1.7256766060668808e-06, "loss": 17.9688, "step": 7959 }, { "epoch": 0.528657767151491, "grad_norm": 290.7583923339844, "learning_rate": 1.7256026022576523e-06, "loss": 18.2812, "step": 7960 }, { "epoch": 0.5287241814438467, "grad_norm": 234.58193969726562, "learning_rate": 1.7255285900550713e-06, "loss": 17.0156, "step": 7961 }, { "epoch": 0.5287905957362025, "grad_norm": 252.34422302246094, "learning_rate": 1.725454569459994e-06, "loss": 16.9531, "step": 7962 }, { "epoch": 0.5288570100285581, "grad_norm": 644.27392578125, "learning_rate": 1.7253805404732764e-06, "loss": 18.7031, "step": 7963 }, { "epoch": 0.5289234243209139, "grad_norm": 268.384521484375, "learning_rate": 1.7253065030957753e-06, "loss": 16.6562, "step": 7964 }, { "epoch": 0.5289898386132695, "grad_norm": 536.6509399414062, "learning_rate": 1.7252324573283465e-06, "loss": 15.4688, "step": 7965 }, { "epoch": 0.5290562529056253, "grad_norm": 130.08154296875, "learning_rate": 1.7251584031718469e-06, "loss": 15.9375, "step": 7966 }, { "epoch": 0.529122667197981, "grad_norm": 174.03558349609375, "learning_rate": 1.725084340627133e-06, "loss": 14.9844, "step": 7967 }, { "epoch": 0.5291890814903367, "grad_norm": 198.5491943359375, "learning_rate": 1.7250102696950618e-06, "loss": 15.0469, "step": 7968 }, { "epoch": 0.5292554957826925, "grad_norm": 252.1041717529297, "learning_rate": 1.7249361903764896e-06, "loss": 28.8438, "step": 7969 }, { "epoch": 0.5293219100750481, "grad_norm": 172.66827392578125, "learning_rate": 1.7248621026722737e-06, "loss": 18.2031, "step": 7970 }, { "epoch": 0.5293883243674039, "grad_norm": 146.482666015625, "learning_rate": 1.724788006583271e-06, "loss": 17.6094, "step": 7971 }, { "epoch": 0.5294547386597596, "grad_norm": 211.12010192871094, "learning_rate": 1.7247139021103383e-06, "loss": 20.8125, "step": 7972 }, { "epoch": 0.5295211529521153, "grad_norm": 166.63705444335938, "learning_rate": 1.7246397892543333e-06, "loss": 17.5, "step": 7973 }, { "epoch": 0.529587567244471, "grad_norm": 204.39871215820312, "learning_rate": 1.724565668016113e-06, "loss": 17.9688, "step": 7974 }, { "epoch": 0.5296539815368267, "grad_norm": 160.6220703125, "learning_rate": 1.724491538396535e-06, "loss": 19.7344, "step": 7975 }, { "epoch": 0.5297203958291824, "grad_norm": 1004.0631713867188, "learning_rate": 1.7244174003964567e-06, "loss": 22.4219, "step": 7976 }, { "epoch": 0.5297868101215382, "grad_norm": 458.39251708984375, "learning_rate": 1.7243432540167354e-06, "loss": 16.4688, "step": 7977 }, { "epoch": 0.5298532244138938, "grad_norm": 230.7624053955078, "learning_rate": 1.7242690992582292e-06, "loss": 14.1406, "step": 7978 }, { "epoch": 0.5299196387062496, "grad_norm": 237.75579833984375, "learning_rate": 1.7241949361217953e-06, "loss": 20.4062, "step": 7979 }, { "epoch": 0.5299860529986054, "grad_norm": 122.970947265625, "learning_rate": 1.7241207646082923e-06, "loss": 19.2969, "step": 7980 }, { "epoch": 0.530052467290961, "grad_norm": 186.9766387939453, "learning_rate": 1.7240465847185778e-06, "loss": 16.9375, "step": 7981 }, { "epoch": 0.5301188815833168, "grad_norm": 131.5819091796875, "learning_rate": 1.7239723964535103e-06, "loss": 18.0312, "step": 7982 }, { "epoch": 0.5301852958756724, "grad_norm": 125.07111358642578, "learning_rate": 1.723898199813947e-06, "loss": 15.2812, "step": 7983 }, { "epoch": 0.5302517101680282, "grad_norm": 141.67507934570312, "learning_rate": 1.723823994800747e-06, "loss": 13.9219, "step": 7984 }, { "epoch": 0.5303181244603838, "grad_norm": 249.4420928955078, "learning_rate": 1.7237497814147685e-06, "loss": 28.6875, "step": 7985 }, { "epoch": 0.5303845387527396, "grad_norm": 368.8595886230469, "learning_rate": 1.7236755596568697e-06, "loss": 17.6562, "step": 7986 }, { "epoch": 0.5304509530450953, "grad_norm": 246.2570343017578, "learning_rate": 1.7236013295279092e-06, "loss": 17.3906, "step": 7987 }, { "epoch": 0.530517367337451, "grad_norm": 134.9993133544922, "learning_rate": 1.723527091028746e-06, "loss": 14.7969, "step": 7988 }, { "epoch": 0.5305837816298067, "grad_norm": 662.771484375, "learning_rate": 1.7234528441602384e-06, "loss": 22.1094, "step": 7989 }, { "epoch": 0.5306501959221624, "grad_norm": 366.8173522949219, "learning_rate": 1.723378588923246e-06, "loss": 12.7969, "step": 7990 }, { "epoch": 0.5307166102145182, "grad_norm": 835.8413696289062, "learning_rate": 1.723304325318627e-06, "loss": 14.0, "step": 7991 }, { "epoch": 0.5307830245068739, "grad_norm": 400.6131286621094, "learning_rate": 1.7232300533472405e-06, "loss": 17.3281, "step": 7992 }, { "epoch": 0.5308494387992296, "grad_norm": 130.29774475097656, "learning_rate": 1.7231557730099455e-06, "loss": 14.9688, "step": 7993 }, { "epoch": 0.5309158530915853, "grad_norm": 168.7952880859375, "learning_rate": 1.7230814843076017e-06, "loss": 17.2969, "step": 7994 }, { "epoch": 0.530982267383941, "grad_norm": 324.7363586425781, "learning_rate": 1.7230071872410683e-06, "loss": 21.6562, "step": 7995 }, { "epoch": 0.5310486816762967, "grad_norm": 125.50772857666016, "learning_rate": 1.7229328818112047e-06, "loss": 17.4375, "step": 7996 }, { "epoch": 0.5311150959686525, "grad_norm": 183.89991760253906, "learning_rate": 1.7228585680188705e-06, "loss": 18.2188, "step": 7997 }, { "epoch": 0.5311815102610081, "grad_norm": 138.8056182861328, "learning_rate": 1.722784245864925e-06, "loss": 16.8438, "step": 7998 }, { "epoch": 0.5312479245533639, "grad_norm": 139.0156707763672, "learning_rate": 1.7227099153502283e-06, "loss": 16.4844, "step": 7999 }, { "epoch": 0.5313143388457195, "grad_norm": 227.4409637451172, "learning_rate": 1.72263557647564e-06, "loss": 18.0312, "step": 8000 }, { "epoch": 0.5313807531380753, "grad_norm": 314.462646484375, "learning_rate": 1.7225612292420199e-06, "loss": 22.125, "step": 8001 }, { "epoch": 0.5314471674304311, "grad_norm": 607.5193481445312, "learning_rate": 1.7224868736502282e-06, "loss": 19.1406, "step": 8002 }, { "epoch": 0.5315135817227867, "grad_norm": 250.34217834472656, "learning_rate": 1.7224125097011251e-06, "loss": 18.0469, "step": 8003 }, { "epoch": 0.5315799960151425, "grad_norm": 261.42205810546875, "learning_rate": 1.7223381373955705e-06, "loss": 20.0625, "step": 8004 }, { "epoch": 0.5316464103074982, "grad_norm": 171.03512573242188, "learning_rate": 1.7222637567344248e-06, "loss": 13.5312, "step": 8005 }, { "epoch": 0.5317128245998539, "grad_norm": 593.649658203125, "learning_rate": 1.7221893677185487e-06, "loss": 15.7969, "step": 8006 }, { "epoch": 0.5317792388922096, "grad_norm": 293.8585510253906, "learning_rate": 1.7221149703488025e-06, "loss": 26.2188, "step": 8007 }, { "epoch": 0.5318456531845653, "grad_norm": 314.6246643066406, "learning_rate": 1.7220405646260465e-06, "loss": 18.7344, "step": 8008 }, { "epoch": 0.531912067476921, "grad_norm": 497.7841796875, "learning_rate": 1.7219661505511413e-06, "loss": 19.0469, "step": 8009 }, { "epoch": 0.5319784817692768, "grad_norm": 305.6834716796875, "learning_rate": 1.7218917281249486e-06, "loss": 22.1875, "step": 8010 }, { "epoch": 0.5320448960616324, "grad_norm": 194.71780395507812, "learning_rate": 1.7218172973483283e-06, "loss": 23.875, "step": 8011 }, { "epoch": 0.5321113103539882, "grad_norm": 315.7125244140625, "learning_rate": 1.7217428582221416e-06, "loss": 22.9688, "step": 8012 }, { "epoch": 0.5321777246463439, "grad_norm": 200.28384399414062, "learning_rate": 1.7216684107472499e-06, "loss": 15.9062, "step": 8013 }, { "epoch": 0.5322441389386996, "grad_norm": 195.3638153076172, "learning_rate": 1.721593954924514e-06, "loss": 20.8125, "step": 8014 }, { "epoch": 0.5323105532310554, "grad_norm": 123.79594421386719, "learning_rate": 1.7215194907547956e-06, "loss": 17.1875, "step": 8015 }, { "epoch": 0.532376967523411, "grad_norm": 672.1458740234375, "learning_rate": 1.7214450182389558e-06, "loss": 25.8594, "step": 8016 }, { "epoch": 0.5324433818157668, "grad_norm": 140.60113525390625, "learning_rate": 1.7213705373778558e-06, "loss": 15.6406, "step": 8017 }, { "epoch": 0.5325097961081224, "grad_norm": 156.1002655029297, "learning_rate": 1.7212960481723572e-06, "loss": 12.7656, "step": 8018 }, { "epoch": 0.5325762104004782, "grad_norm": 383.5840759277344, "learning_rate": 1.7212215506233224e-06, "loss": 26.3438, "step": 8019 }, { "epoch": 0.5326426246928339, "grad_norm": 130.88523864746094, "learning_rate": 1.7211470447316121e-06, "loss": 19.9688, "step": 8020 }, { "epoch": 0.5327090389851896, "grad_norm": 540.0320434570312, "learning_rate": 1.7210725304980888e-06, "loss": 30.9688, "step": 8021 }, { "epoch": 0.5327754532775453, "grad_norm": 355.8465881347656, "learning_rate": 1.7209980079236145e-06, "loss": 24.0938, "step": 8022 }, { "epoch": 0.532841867569901, "grad_norm": 276.9205627441406, "learning_rate": 1.7209234770090506e-06, "loss": 24.6875, "step": 8023 }, { "epoch": 0.5329082818622568, "grad_norm": 250.1746063232422, "learning_rate": 1.7208489377552597e-06, "loss": 20.4844, "step": 8024 }, { "epoch": 0.5329746961546125, "grad_norm": 126.29374694824219, "learning_rate": 1.7207743901631042e-06, "loss": 17.2656, "step": 8025 }, { "epoch": 0.5330411104469682, "grad_norm": 145.1389617919922, "learning_rate": 1.720699834233446e-06, "loss": 15.0312, "step": 8026 }, { "epoch": 0.5331075247393239, "grad_norm": 216.36199951171875, "learning_rate": 1.7206252699671477e-06, "loss": 19.375, "step": 8027 }, { "epoch": 0.5331739390316796, "grad_norm": 242.7393035888672, "learning_rate": 1.720550697365072e-06, "loss": 19.9688, "step": 8028 }, { "epoch": 0.5332403533240353, "grad_norm": 361.1454772949219, "learning_rate": 1.720476116428081e-06, "loss": 20.5469, "step": 8029 }, { "epoch": 0.5333067676163911, "grad_norm": 800.0941162109375, "learning_rate": 1.720401527157038e-06, "loss": 21.4062, "step": 8030 }, { "epoch": 0.5333731819087467, "grad_norm": 251.55531311035156, "learning_rate": 1.7203269295528053e-06, "loss": 22.8438, "step": 8031 }, { "epoch": 0.5334395962011025, "grad_norm": 331.9626770019531, "learning_rate": 1.7202523236162461e-06, "loss": 18.8281, "step": 8032 }, { "epoch": 0.5335060104934581, "grad_norm": 176.98153686523438, "learning_rate": 1.7201777093482237e-06, "loss": 23.4688, "step": 8033 }, { "epoch": 0.5335724247858139, "grad_norm": 1405.355712890625, "learning_rate": 1.7201030867496003e-06, "loss": 13.9062, "step": 8034 }, { "epoch": 0.5336388390781697, "grad_norm": 105.1067123413086, "learning_rate": 1.72002845582124e-06, "loss": 15.5312, "step": 8035 }, { "epoch": 0.5337052533705253, "grad_norm": 179.6195831298828, "learning_rate": 1.7199538165640056e-06, "loss": 12.5547, "step": 8036 }, { "epoch": 0.5337716676628811, "grad_norm": 77.2093734741211, "learning_rate": 1.7198791689787606e-06, "loss": 15.2812, "step": 8037 }, { "epoch": 0.5338380819552367, "grad_norm": 161.8237762451172, "learning_rate": 1.7198045130663684e-06, "loss": 21.2969, "step": 8038 }, { "epoch": 0.5339044962475925, "grad_norm": 195.18882751464844, "learning_rate": 1.719729848827693e-06, "loss": 22.5625, "step": 8039 }, { "epoch": 0.5339709105399482, "grad_norm": 462.0542297363281, "learning_rate": 1.7196551762635973e-06, "loss": 20.7344, "step": 8040 }, { "epoch": 0.5340373248323039, "grad_norm": 162.2556610107422, "learning_rate": 1.7195804953749456e-06, "loss": 13.5156, "step": 8041 }, { "epoch": 0.5341037391246596, "grad_norm": 539.27783203125, "learning_rate": 1.7195058061626018e-06, "loss": 15.6406, "step": 8042 }, { "epoch": 0.5341701534170153, "grad_norm": 284.04296875, "learning_rate": 1.71943110862743e-06, "loss": 15.0078, "step": 8043 }, { "epoch": 0.534236567709371, "grad_norm": 245.14288330078125, "learning_rate": 1.7193564027702933e-06, "loss": 15.9688, "step": 8044 }, { "epoch": 0.5343029820017268, "grad_norm": 184.50733947753906, "learning_rate": 1.7192816885920568e-06, "loss": 19.7656, "step": 8045 }, { "epoch": 0.5343693962940825, "grad_norm": 247.76394653320312, "learning_rate": 1.7192069660935847e-06, "loss": 16.3594, "step": 8046 }, { "epoch": 0.5344358105864382, "grad_norm": 178.81097412109375, "learning_rate": 1.719132235275741e-06, "loss": 15.3438, "step": 8047 }, { "epoch": 0.534502224878794, "grad_norm": 299.2554016113281, "learning_rate": 1.7190574961393902e-06, "loss": 25.0625, "step": 8048 }, { "epoch": 0.5345686391711496, "grad_norm": 244.72071838378906, "learning_rate": 1.7189827486853971e-06, "loss": 15.1875, "step": 8049 }, { "epoch": 0.5346350534635054, "grad_norm": 464.34375, "learning_rate": 1.718907992914626e-06, "loss": 26.9062, "step": 8050 }, { "epoch": 0.534701467755861, "grad_norm": 235.8640899658203, "learning_rate": 1.7188332288279419e-06, "loss": 13.0312, "step": 8051 }, { "epoch": 0.5347678820482168, "grad_norm": 265.02252197265625, "learning_rate": 1.7187584564262092e-06, "loss": 16.7031, "step": 8052 }, { "epoch": 0.5348342963405724, "grad_norm": 214.66299438476562, "learning_rate": 1.7186836757102934e-06, "loss": 25.375, "step": 8053 }, { "epoch": 0.5349007106329282, "grad_norm": 203.260009765625, "learning_rate": 1.7186088866810594e-06, "loss": 17.625, "step": 8054 }, { "epoch": 0.5349671249252839, "grad_norm": 319.41552734375, "learning_rate": 1.7185340893393718e-06, "loss": 23.8125, "step": 8055 }, { "epoch": 0.5350335392176396, "grad_norm": 199.08567810058594, "learning_rate": 1.7184592836860964e-06, "loss": 18.7656, "step": 8056 }, { "epoch": 0.5350999535099954, "grad_norm": 362.93194580078125, "learning_rate": 1.7183844697220984e-06, "loss": 17.9219, "step": 8057 }, { "epoch": 0.535166367802351, "grad_norm": 263.0087890625, "learning_rate": 1.718309647448243e-06, "loss": 17.8281, "step": 8058 }, { "epoch": 0.5352327820947068, "grad_norm": 416.1296691894531, "learning_rate": 1.7182348168653958e-06, "loss": 18.4688, "step": 8059 }, { "epoch": 0.5352991963870625, "grad_norm": 385.8599853515625, "learning_rate": 1.7181599779744221e-06, "loss": 21.5312, "step": 8060 }, { "epoch": 0.5353656106794182, "grad_norm": 208.42311096191406, "learning_rate": 1.7180851307761883e-06, "loss": 15.75, "step": 8061 }, { "epoch": 0.5354320249717739, "grad_norm": 153.39881896972656, "learning_rate": 1.7180102752715598e-06, "loss": 17.2031, "step": 8062 }, { "epoch": 0.5354984392641297, "grad_norm": 492.4439697265625, "learning_rate": 1.7179354114614023e-06, "loss": 13.2031, "step": 8063 }, { "epoch": 0.5355648535564853, "grad_norm": 436.1521911621094, "learning_rate": 1.717860539346582e-06, "loss": 27.5312, "step": 8064 }, { "epoch": 0.5356312678488411, "grad_norm": 182.67657470703125, "learning_rate": 1.717785658927965e-06, "loss": 14.9922, "step": 8065 }, { "epoch": 0.5356976821411968, "grad_norm": 207.040283203125, "learning_rate": 1.717710770206417e-06, "loss": 16.8281, "step": 8066 }, { "epoch": 0.5357640964335525, "grad_norm": 238.8594512939453, "learning_rate": 1.717635873182805e-06, "loss": 22.6406, "step": 8067 }, { "epoch": 0.5358305107259083, "grad_norm": 219.26150512695312, "learning_rate": 1.717560967857995e-06, "loss": 18.3906, "step": 8068 }, { "epoch": 0.5358969250182639, "grad_norm": 175.47714233398438, "learning_rate": 1.7174860542328533e-06, "loss": 14.8281, "step": 8069 }, { "epoch": 0.5359633393106197, "grad_norm": 218.7083740234375, "learning_rate": 1.717411132308247e-06, "loss": 14.4062, "step": 8070 }, { "epoch": 0.5360297536029753, "grad_norm": 331.4581298828125, "learning_rate": 1.7173362020850422e-06, "loss": 18.375, "step": 8071 }, { "epoch": 0.5360961678953311, "grad_norm": 349.4594421386719, "learning_rate": 1.7172612635641058e-06, "loss": 15.7812, "step": 8072 }, { "epoch": 0.5361625821876868, "grad_norm": 144.058349609375, "learning_rate": 1.7171863167463048e-06, "loss": 14.7188, "step": 8073 }, { "epoch": 0.5362289964800425, "grad_norm": 230.40708923339844, "learning_rate": 1.717111361632506e-06, "loss": 13.3984, "step": 8074 }, { "epoch": 0.5362954107723982, "grad_norm": 247.26380920410156, "learning_rate": 1.7170363982235764e-06, "loss": 16.7656, "step": 8075 }, { "epoch": 0.5363618250647539, "grad_norm": 160.9830322265625, "learning_rate": 1.7169614265203832e-06, "loss": 16.0625, "step": 8076 }, { "epoch": 0.5364282393571097, "grad_norm": 157.33790588378906, "learning_rate": 1.716886446523794e-06, "loss": 19.375, "step": 8077 }, { "epoch": 0.5364946536494654, "grad_norm": 184.97933959960938, "learning_rate": 1.7168114582346756e-06, "loss": 17.1094, "step": 8078 }, { "epoch": 0.5365610679418211, "grad_norm": 250.75265502929688, "learning_rate": 1.7167364616538955e-06, "loss": 22.5938, "step": 8079 }, { "epoch": 0.5366274822341768, "grad_norm": 217.6050262451172, "learning_rate": 1.7166614567823213e-06, "loss": 14.2344, "step": 8080 }, { "epoch": 0.5366938965265325, "grad_norm": 183.0878448486328, "learning_rate": 1.7165864436208206e-06, "loss": 31.5625, "step": 8081 }, { "epoch": 0.5367603108188882, "grad_norm": 239.0550537109375, "learning_rate": 1.716511422170261e-06, "loss": 14.7031, "step": 8082 }, { "epoch": 0.536826725111244, "grad_norm": 186.13087463378906, "learning_rate": 1.7164363924315106e-06, "loss": 17.1562, "step": 8083 }, { "epoch": 0.5368931394035996, "grad_norm": 207.34263610839844, "learning_rate": 1.7163613544054374e-06, "loss": 18.4375, "step": 8084 }, { "epoch": 0.5369595536959554, "grad_norm": 198.29833984375, "learning_rate": 1.7162863080929086e-06, "loss": 15.1406, "step": 8085 }, { "epoch": 0.537025967988311, "grad_norm": 718.9483032226562, "learning_rate": 1.716211253494793e-06, "loss": 15.9531, "step": 8086 }, { "epoch": 0.5370923822806668, "grad_norm": 160.84669494628906, "learning_rate": 1.7161361906119586e-06, "loss": 17.6875, "step": 8087 }, { "epoch": 0.5371587965730226, "grad_norm": 163.3136749267578, "learning_rate": 1.7160611194452738e-06, "loss": 16.6562, "step": 8088 }, { "epoch": 0.5372252108653782, "grad_norm": 206.4572296142578, "learning_rate": 1.715986039995607e-06, "loss": 18.0469, "step": 8089 }, { "epoch": 0.537291625157734, "grad_norm": 201.31239318847656, "learning_rate": 1.7159109522638265e-06, "loss": 20.1875, "step": 8090 }, { "epoch": 0.5373580394500896, "grad_norm": 270.8914794921875, "learning_rate": 1.7158358562508007e-06, "loss": 19.4219, "step": 8091 }, { "epoch": 0.5374244537424454, "grad_norm": 318.73992919921875, "learning_rate": 1.7157607519573986e-06, "loss": 18.875, "step": 8092 }, { "epoch": 0.5374908680348011, "grad_norm": 596.2969970703125, "learning_rate": 1.715685639384489e-06, "loss": 18.3594, "step": 8093 }, { "epoch": 0.5375572823271568, "grad_norm": 371.83172607421875, "learning_rate": 1.7156105185329404e-06, "loss": 17.4219, "step": 8094 }, { "epoch": 0.5376236966195125, "grad_norm": 307.8975830078125, "learning_rate": 1.7155353894036223e-06, "loss": 25.5312, "step": 8095 }, { "epoch": 0.5376901109118682, "grad_norm": 307.2715759277344, "learning_rate": 1.7154602519974032e-06, "loss": 20.7969, "step": 8096 }, { "epoch": 0.5377565252042239, "grad_norm": 159.4077911376953, "learning_rate": 1.715385106315152e-06, "loss": 15.7188, "step": 8097 }, { "epoch": 0.5378229394965797, "grad_norm": 402.3904113769531, "learning_rate": 1.715309952357739e-06, "loss": 19.9688, "step": 8098 }, { "epoch": 0.5378893537889354, "grad_norm": 151.1819305419922, "learning_rate": 1.7152347901260329e-06, "loss": 13.7656, "step": 8099 }, { "epoch": 0.5379557680812911, "grad_norm": 291.4684753417969, "learning_rate": 1.7151596196209027e-06, "loss": 15.6094, "step": 8100 }, { "epoch": 0.5380221823736469, "grad_norm": 182.16433715820312, "learning_rate": 1.715084440843219e-06, "loss": 16.6875, "step": 8101 }, { "epoch": 0.5380885966660025, "grad_norm": 334.7706604003906, "learning_rate": 1.7150092537938506e-06, "loss": 26.6562, "step": 8102 }, { "epoch": 0.5381550109583583, "grad_norm": 259.1689147949219, "learning_rate": 1.7149340584736676e-06, "loss": 15.8438, "step": 8103 }, { "epoch": 0.5382214252507139, "grad_norm": 290.4275207519531, "learning_rate": 1.7148588548835393e-06, "loss": 22.7812, "step": 8104 }, { "epoch": 0.5382878395430697, "grad_norm": 341.40557861328125, "learning_rate": 1.714783643024336e-06, "loss": 14.5469, "step": 8105 }, { "epoch": 0.5383542538354253, "grad_norm": 168.42079162597656, "learning_rate": 1.714708422896928e-06, "loss": 22.8594, "step": 8106 }, { "epoch": 0.5384206681277811, "grad_norm": 115.1194839477539, "learning_rate": 1.714633194502185e-06, "loss": 17.2656, "step": 8107 }, { "epoch": 0.5384870824201368, "grad_norm": 327.6538391113281, "learning_rate": 1.7145579578409772e-06, "loss": 23.4375, "step": 8108 }, { "epoch": 0.5385534967124925, "grad_norm": 190.15374755859375, "learning_rate": 1.714482712914175e-06, "loss": 18.6094, "step": 8109 }, { "epoch": 0.5386199110048483, "grad_norm": 273.9770202636719, "learning_rate": 1.7144074597226488e-06, "loss": 20.1406, "step": 8110 }, { "epoch": 0.538686325297204, "grad_norm": 217.3231964111328, "learning_rate": 1.714332198267269e-06, "loss": 15.8438, "step": 8111 }, { "epoch": 0.5387527395895597, "grad_norm": 170.1095733642578, "learning_rate": 1.7142569285489064e-06, "loss": 18.1719, "step": 8112 }, { "epoch": 0.5388191538819154, "grad_norm": 252.3469696044922, "learning_rate": 1.7141816505684315e-06, "loss": 18.9219, "step": 8113 }, { "epoch": 0.5388855681742711, "grad_norm": 122.3340072631836, "learning_rate": 1.7141063643267148e-06, "loss": 10.9375, "step": 8114 }, { "epoch": 0.5389519824666268, "grad_norm": 217.75009155273438, "learning_rate": 1.7140310698246276e-06, "loss": 16.2188, "step": 8115 }, { "epoch": 0.5390183967589826, "grad_norm": 170.57275390625, "learning_rate": 1.7139557670630408e-06, "loss": 14.6406, "step": 8116 }, { "epoch": 0.5390848110513382, "grad_norm": 3901.58154296875, "learning_rate": 1.7138804560428254e-06, "loss": 15.875, "step": 8117 }, { "epoch": 0.539151225343694, "grad_norm": 178.97219848632812, "learning_rate": 1.7138051367648525e-06, "loss": 23.1406, "step": 8118 }, { "epoch": 0.5392176396360496, "grad_norm": 554.0601806640625, "learning_rate": 1.7137298092299934e-06, "loss": 18.9062, "step": 8119 }, { "epoch": 0.5392840539284054, "grad_norm": 162.75521850585938, "learning_rate": 1.7136544734391194e-06, "loss": 16.9062, "step": 8120 }, { "epoch": 0.5393504682207612, "grad_norm": 245.13644409179688, "learning_rate": 1.7135791293931018e-06, "loss": 15.8594, "step": 8121 }, { "epoch": 0.5394168825131168, "grad_norm": 129.8968963623047, "learning_rate": 1.7135037770928124e-06, "loss": 13.4141, "step": 8122 }, { "epoch": 0.5394832968054726, "grad_norm": 188.49440002441406, "learning_rate": 1.713428416539123e-06, "loss": 23.2812, "step": 8123 }, { "epoch": 0.5395497110978282, "grad_norm": 128.87896728515625, "learning_rate": 1.7133530477329048e-06, "loss": 19.7344, "step": 8124 }, { "epoch": 0.539616125390184, "grad_norm": 345.549560546875, "learning_rate": 1.7132776706750298e-06, "loss": 16.9375, "step": 8125 }, { "epoch": 0.5396825396825397, "grad_norm": 192.92958068847656, "learning_rate": 1.7132022853663704e-06, "loss": 14.5469, "step": 8126 }, { "epoch": 0.5397489539748954, "grad_norm": 318.2650146484375, "learning_rate": 1.7131268918077977e-06, "loss": 17.6562, "step": 8127 }, { "epoch": 0.5398153682672511, "grad_norm": 173.4212188720703, "learning_rate": 1.7130514900001846e-06, "loss": 21.875, "step": 8128 }, { "epoch": 0.5398817825596068, "grad_norm": 384.9448547363281, "learning_rate": 1.7129760799444033e-06, "loss": 21.4688, "step": 8129 }, { "epoch": 0.5399481968519625, "grad_norm": 193.71743774414062, "learning_rate": 1.7129006616413256e-06, "loss": 15.4531, "step": 8130 }, { "epoch": 0.5400146111443183, "grad_norm": 289.25274658203125, "learning_rate": 1.7128252350918238e-06, "loss": 13.9219, "step": 8131 }, { "epoch": 0.540081025436674, "grad_norm": 128.96173095703125, "learning_rate": 1.712749800296771e-06, "loss": 18.375, "step": 8132 }, { "epoch": 0.5401474397290297, "grad_norm": 180.7025604248047, "learning_rate": 1.7126743572570397e-06, "loss": 15.9375, "step": 8133 }, { "epoch": 0.5402138540213854, "grad_norm": 320.46746826171875, "learning_rate": 1.7125989059735022e-06, "loss": 18.0938, "step": 8134 }, { "epoch": 0.5402802683137411, "grad_norm": 597.4251708984375, "learning_rate": 1.7125234464470312e-06, "loss": 17.4531, "step": 8135 }, { "epoch": 0.5403466826060969, "grad_norm": 143.14248657226562, "learning_rate": 1.7124479786785002e-06, "loss": 17.9062, "step": 8136 }, { "epoch": 0.5404130968984525, "grad_norm": 269.66497802734375, "learning_rate": 1.712372502668782e-06, "loss": 16.1328, "step": 8137 }, { "epoch": 0.5404795111908083, "grad_norm": 217.23805236816406, "learning_rate": 1.712297018418749e-06, "loss": 13.7188, "step": 8138 }, { "epoch": 0.5405459254831639, "grad_norm": 456.7830505371094, "learning_rate": 1.712221525929275e-06, "loss": 31.3438, "step": 8139 }, { "epoch": 0.5406123397755197, "grad_norm": 161.45974731445312, "learning_rate": 1.7121460252012333e-06, "loss": 19.0, "step": 8140 }, { "epoch": 0.5406787540678754, "grad_norm": 236.5973663330078, "learning_rate": 1.7120705162354964e-06, "loss": 20.1406, "step": 8141 }, { "epoch": 0.5407451683602311, "grad_norm": 248.73025512695312, "learning_rate": 1.7119949990329388e-06, "loss": 13.4844, "step": 8142 }, { "epoch": 0.5408115826525869, "grad_norm": 129.42164611816406, "learning_rate": 1.7119194735944336e-06, "loss": 19.5469, "step": 8143 }, { "epoch": 0.5408779969449425, "grad_norm": 381.226806640625, "learning_rate": 1.7118439399208543e-06, "loss": 17.5469, "step": 8144 }, { "epoch": 0.5409444112372983, "grad_norm": 698.4984741210938, "learning_rate": 1.711768398013075e-06, "loss": 19.7344, "step": 8145 }, { "epoch": 0.541010825529654, "grad_norm": 276.4189147949219, "learning_rate": 1.7116928478719692e-06, "loss": 23.8125, "step": 8146 }, { "epoch": 0.5410772398220097, "grad_norm": 153.9807586669922, "learning_rate": 1.711617289498411e-06, "loss": 14.75, "step": 8147 }, { "epoch": 0.5411436541143654, "grad_norm": 314.2732238769531, "learning_rate": 1.7115417228932742e-06, "loss": 15.4688, "step": 8148 }, { "epoch": 0.5412100684067211, "grad_norm": 192.72865295410156, "learning_rate": 1.7114661480574331e-06, "loss": 14.375, "step": 8149 }, { "epoch": 0.5412764826990768, "grad_norm": 259.0125427246094, "learning_rate": 1.711390564991762e-06, "loss": 18.5156, "step": 8150 }, { "epoch": 0.5413428969914326, "grad_norm": 184.5396728515625, "learning_rate": 1.7113149736971348e-06, "loss": 16.2656, "step": 8151 }, { "epoch": 0.5414093112837882, "grad_norm": 221.6222381591797, "learning_rate": 1.7112393741744262e-06, "loss": 22.4531, "step": 8152 }, { "epoch": 0.541475725576144, "grad_norm": 176.94625854492188, "learning_rate": 1.711163766424511e-06, "loss": 20.8906, "step": 8153 }, { "epoch": 0.5415421398684998, "grad_norm": 235.59735107421875, "learning_rate": 1.711088150448263e-06, "loss": 13.3359, "step": 8154 }, { "epoch": 0.5416085541608554, "grad_norm": 212.7416534423828, "learning_rate": 1.7110125262465577e-06, "loss": 17.7188, "step": 8155 }, { "epoch": 0.5416749684532112, "grad_norm": 198.4540252685547, "learning_rate": 1.7109368938202691e-06, "loss": 20.4375, "step": 8156 }, { "epoch": 0.5417413827455668, "grad_norm": 336.2037353515625, "learning_rate": 1.7108612531702726e-06, "loss": 17.4688, "step": 8157 }, { "epoch": 0.5418077970379226, "grad_norm": 211.08409118652344, "learning_rate": 1.710785604297443e-06, "loss": 18.25, "step": 8158 }, { "epoch": 0.5418742113302782, "grad_norm": 427.84393310546875, "learning_rate": 1.7107099472026553e-06, "loss": 22.0938, "step": 8159 }, { "epoch": 0.541940625622634, "grad_norm": 417.42486572265625, "learning_rate": 1.710634281886785e-06, "loss": 29.1406, "step": 8160 }, { "epoch": 0.5420070399149897, "grad_norm": 629.476318359375, "learning_rate": 1.710558608350707e-06, "loss": 31.9688, "step": 8161 }, { "epoch": 0.5420734542073454, "grad_norm": 294.45318603515625, "learning_rate": 1.7104829265952967e-06, "loss": 15.8438, "step": 8162 }, { "epoch": 0.5421398684997011, "grad_norm": 312.5231628417969, "learning_rate": 1.7104072366214295e-06, "loss": 17.3594, "step": 8163 }, { "epoch": 0.5422062827920568, "grad_norm": 144.70608520507812, "learning_rate": 1.710331538429981e-06, "loss": 13.1094, "step": 8164 }, { "epoch": 0.5422726970844126, "grad_norm": 431.65460205078125, "learning_rate": 1.7102558320218272e-06, "loss": 17.9844, "step": 8165 }, { "epoch": 0.5423391113767683, "grad_norm": 306.1992492675781, "learning_rate": 1.7101801173978433e-06, "loss": 19.9219, "step": 8166 }, { "epoch": 0.542405525669124, "grad_norm": 486.2827453613281, "learning_rate": 1.7101043945589054e-06, "loss": 19.7031, "step": 8167 }, { "epoch": 0.5424719399614797, "grad_norm": 265.53643798828125, "learning_rate": 1.7100286635058892e-06, "loss": 17.3125, "step": 8168 }, { "epoch": 0.5425383542538355, "grad_norm": 446.0332336425781, "learning_rate": 1.7099529242396708e-06, "loss": 17.8594, "step": 8169 }, { "epoch": 0.5426047685461911, "grad_norm": 541.184326171875, "learning_rate": 1.7098771767611266e-06, "loss": 22.5312, "step": 8170 }, { "epoch": 0.5426711828385469, "grad_norm": 198.76577758789062, "learning_rate": 1.7098014210711324e-06, "loss": 18.3125, "step": 8171 }, { "epoch": 0.5427375971309025, "grad_norm": 135.37844848632812, "learning_rate": 1.7097256571705648e-06, "loss": 17.7812, "step": 8172 }, { "epoch": 0.5428040114232583, "grad_norm": 515.0011596679688, "learning_rate": 1.7096498850602998e-06, "loss": 18.3594, "step": 8173 }, { "epoch": 0.542870425715614, "grad_norm": 406.9145202636719, "learning_rate": 1.7095741047412142e-06, "loss": 20.0625, "step": 8174 }, { "epoch": 0.5429368400079697, "grad_norm": 217.36505126953125, "learning_rate": 1.7094983162141847e-06, "loss": 13.8281, "step": 8175 }, { "epoch": 0.5430032543003255, "grad_norm": 271.01617431640625, "learning_rate": 1.7094225194800878e-06, "loss": 17.3125, "step": 8176 }, { "epoch": 0.5430696685926811, "grad_norm": 246.2906951904297, "learning_rate": 1.7093467145398e-06, "loss": 16.875, "step": 8177 }, { "epoch": 0.5431360828850369, "grad_norm": 371.3646240234375, "learning_rate": 1.7092709013941986e-06, "loss": 14.1875, "step": 8178 }, { "epoch": 0.5432024971773926, "grad_norm": 857.6600952148438, "learning_rate": 1.7091950800441603e-06, "loss": 17.0312, "step": 8179 }, { "epoch": 0.5432689114697483, "grad_norm": 198.80372619628906, "learning_rate": 1.7091192504905623e-06, "loss": 22.8438, "step": 8180 }, { "epoch": 0.543335325762104, "grad_norm": 2185.439453125, "learning_rate": 1.7090434127342818e-06, "loss": 19.4375, "step": 8181 }, { "epoch": 0.5434017400544597, "grad_norm": 131.9246368408203, "learning_rate": 1.708967566776196e-06, "loss": 19.2656, "step": 8182 }, { "epoch": 0.5434681543468154, "grad_norm": 110.07500457763672, "learning_rate": 1.708891712617182e-06, "loss": 11.9219, "step": 8183 }, { "epoch": 0.5435345686391712, "grad_norm": 514.745849609375, "learning_rate": 1.7088158502581176e-06, "loss": 16.2188, "step": 8184 }, { "epoch": 0.5436009829315268, "grad_norm": 297.962158203125, "learning_rate": 1.7087399796998804e-06, "loss": 19.2656, "step": 8185 }, { "epoch": 0.5436673972238826, "grad_norm": 211.3730010986328, "learning_rate": 1.7086641009433476e-06, "loss": 20.6875, "step": 8186 }, { "epoch": 0.5437338115162383, "grad_norm": 352.96624755859375, "learning_rate": 1.7085882139893967e-06, "loss": 20.5469, "step": 8187 }, { "epoch": 0.543800225808594, "grad_norm": 477.5261535644531, "learning_rate": 1.7085123188389065e-06, "loss": 18.2031, "step": 8188 }, { "epoch": 0.5438666401009498, "grad_norm": 500.0946350097656, "learning_rate": 1.708436415492754e-06, "loss": 21.5938, "step": 8189 }, { "epoch": 0.5439330543933054, "grad_norm": 111.39971923828125, "learning_rate": 1.708360503951818e-06, "loss": 15.3281, "step": 8190 }, { "epoch": 0.5439994686856612, "grad_norm": 183.3170928955078, "learning_rate": 1.7082845842169755e-06, "loss": 20.8281, "step": 8191 }, { "epoch": 0.5440658829780168, "grad_norm": 273.3667297363281, "learning_rate": 1.7082086562891058e-06, "loss": 25.0156, "step": 8192 }, { "epoch": 0.5441322972703726, "grad_norm": 205.17648315429688, "learning_rate": 1.7081327201690865e-06, "loss": 18.8438, "step": 8193 }, { "epoch": 0.5441987115627283, "grad_norm": 272.57916259765625, "learning_rate": 1.7080567758577966e-06, "loss": 20.5312, "step": 8194 }, { "epoch": 0.544265125855084, "grad_norm": 405.9463195800781, "learning_rate": 1.707980823356114e-06, "loss": 17.5312, "step": 8195 }, { "epoch": 0.5443315401474397, "grad_norm": 257.8517150878906, "learning_rate": 1.7079048626649173e-06, "loss": 13.6094, "step": 8196 }, { "epoch": 0.5443979544397954, "grad_norm": 204.8469696044922, "learning_rate": 1.7078288937850855e-06, "loss": 20.7031, "step": 8197 }, { "epoch": 0.5444643687321512, "grad_norm": 213.7578125, "learning_rate": 1.7077529167174971e-06, "loss": 15.4375, "step": 8198 }, { "epoch": 0.5445307830245069, "grad_norm": 177.67803955078125, "learning_rate": 1.7076769314630312e-06, "loss": 22.1562, "step": 8199 }, { "epoch": 0.5445971973168626, "grad_norm": 272.28887939453125, "learning_rate": 1.7076009380225666e-06, "loss": 27.3438, "step": 8200 }, { "epoch": 0.5446636116092183, "grad_norm": 208.0372772216797, "learning_rate": 1.7075249363969823e-06, "loss": 18.5938, "step": 8201 }, { "epoch": 0.544730025901574, "grad_norm": 150.24447631835938, "learning_rate": 1.707448926587158e-06, "loss": 14.0625, "step": 8202 }, { "epoch": 0.5447964401939297, "grad_norm": 199.5411376953125, "learning_rate": 1.7073729085939718e-06, "loss": 15.9062, "step": 8203 }, { "epoch": 0.5448628544862855, "grad_norm": 207.02725219726562, "learning_rate": 1.7072968824183041e-06, "loss": 22.1875, "step": 8204 }, { "epoch": 0.5449292687786411, "grad_norm": 163.85409545898438, "learning_rate": 1.7072208480610338e-06, "loss": 15.9531, "step": 8205 }, { "epoch": 0.5449956830709969, "grad_norm": 136.7540740966797, "learning_rate": 1.7071448055230404e-06, "loss": 18.5781, "step": 8206 }, { "epoch": 0.5450620973633526, "grad_norm": 169.3341064453125, "learning_rate": 1.7070687548052036e-06, "loss": 16.9375, "step": 8207 }, { "epoch": 0.5451285116557083, "grad_norm": 196.5207977294922, "learning_rate": 1.7069926959084032e-06, "loss": 21.1875, "step": 8208 }, { "epoch": 0.5451949259480641, "grad_norm": 444.8858947753906, "learning_rate": 1.7069166288335193e-06, "loss": 17.125, "step": 8209 }, { "epoch": 0.5452613402404197, "grad_norm": 232.12240600585938, "learning_rate": 1.706840553581431e-06, "loss": 16.0156, "step": 8210 }, { "epoch": 0.5453277545327755, "grad_norm": 306.78265380859375, "learning_rate": 1.7067644701530192e-06, "loss": 22.6562, "step": 8211 }, { "epoch": 0.5453941688251311, "grad_norm": 450.2279968261719, "learning_rate": 1.7066883785491633e-06, "loss": 24.1562, "step": 8212 }, { "epoch": 0.5454605831174869, "grad_norm": 130.99969482421875, "learning_rate": 1.706612278770744e-06, "loss": 19.0, "step": 8213 }, { "epoch": 0.5455269974098426, "grad_norm": 301.8064270019531, "learning_rate": 1.706536170818641e-06, "loss": 15.6719, "step": 8214 }, { "epoch": 0.5455934117021983, "grad_norm": 181.0911407470703, "learning_rate": 1.706460054693735e-06, "loss": 17.875, "step": 8215 }, { "epoch": 0.545659825994554, "grad_norm": 336.80419921875, "learning_rate": 1.7063839303969067e-06, "loss": 20.25, "step": 8216 }, { "epoch": 0.5457262402869097, "grad_norm": 486.22723388671875, "learning_rate": 1.7063077979290364e-06, "loss": 17.9062, "step": 8217 }, { "epoch": 0.5457926545792655, "grad_norm": 310.24334716796875, "learning_rate": 1.7062316572910045e-06, "loss": 21.1875, "step": 8218 }, { "epoch": 0.5458590688716212, "grad_norm": 265.6780700683594, "learning_rate": 1.7061555084836923e-06, "loss": 14.6562, "step": 8219 }, { "epoch": 0.5459254831639769, "grad_norm": 441.8694763183594, "learning_rate": 1.7060793515079803e-06, "loss": 27.875, "step": 8220 }, { "epoch": 0.5459918974563326, "grad_norm": 207.8312530517578, "learning_rate": 1.7060031863647493e-06, "loss": 16.1094, "step": 8221 }, { "epoch": 0.5460583117486884, "grad_norm": 295.1557312011719, "learning_rate": 1.7059270130548808e-06, "loss": 19.625, "step": 8222 }, { "epoch": 0.546124726041044, "grad_norm": 227.81106567382812, "learning_rate": 1.7058508315792554e-06, "loss": 15.7344, "step": 8223 }, { "epoch": 0.5461911403333998, "grad_norm": 122.93488311767578, "learning_rate": 1.7057746419387548e-06, "loss": 15.375, "step": 8224 }, { "epoch": 0.5462575546257554, "grad_norm": 280.2543029785156, "learning_rate": 1.7056984441342603e-06, "loss": 17.7422, "step": 8225 }, { "epoch": 0.5463239689181112, "grad_norm": 362.8629455566406, "learning_rate": 1.705622238166653e-06, "loss": 14.4375, "step": 8226 }, { "epoch": 0.5463903832104668, "grad_norm": 228.56393432617188, "learning_rate": 1.7055460240368143e-06, "loss": 19.0938, "step": 8227 }, { "epoch": 0.5464567975028226, "grad_norm": 146.29612731933594, "learning_rate": 1.7054698017456262e-06, "loss": 19.1719, "step": 8228 }, { "epoch": 0.5465232117951784, "grad_norm": 222.3054656982422, "learning_rate": 1.7053935712939703e-06, "loss": 16.4688, "step": 8229 }, { "epoch": 0.546589626087534, "grad_norm": 180.63819885253906, "learning_rate": 1.7053173326827282e-06, "loss": 21.8438, "step": 8230 }, { "epoch": 0.5466560403798898, "grad_norm": 196.9541473388672, "learning_rate": 1.705241085912782e-06, "loss": 16.9219, "step": 8231 }, { "epoch": 0.5467224546722455, "grad_norm": 273.19610595703125, "learning_rate": 1.7051648309850135e-06, "loss": 24.4375, "step": 8232 }, { "epoch": 0.5467888689646012, "grad_norm": 216.86634826660156, "learning_rate": 1.7050885679003049e-06, "loss": 17.0938, "step": 8233 }, { "epoch": 0.5468552832569569, "grad_norm": 185.6127471923828, "learning_rate": 1.705012296659538e-06, "loss": 18.0781, "step": 8234 }, { "epoch": 0.5469216975493126, "grad_norm": 111.3628921508789, "learning_rate": 1.704936017263596e-06, "loss": 14.5469, "step": 8235 }, { "epoch": 0.5469881118416683, "grad_norm": 185.3664093017578, "learning_rate": 1.7048597297133602e-06, "loss": 20.1094, "step": 8236 }, { "epoch": 0.547054526134024, "grad_norm": 244.90599060058594, "learning_rate": 1.7047834340097138e-06, "loss": 13.2812, "step": 8237 }, { "epoch": 0.5471209404263797, "grad_norm": 220.7974395751953, "learning_rate": 1.704707130153539e-06, "loss": 15.3125, "step": 8238 }, { "epoch": 0.5471873547187355, "grad_norm": 157.83995056152344, "learning_rate": 1.704630818145718e-06, "loss": 20.2812, "step": 8239 }, { "epoch": 0.5472537690110912, "grad_norm": 173.982177734375, "learning_rate": 1.7045544979871345e-06, "loss": 14.2031, "step": 8240 }, { "epoch": 0.5473201833034469, "grad_norm": 144.81271362304688, "learning_rate": 1.7044781696786708e-06, "loss": 16.4688, "step": 8241 }, { "epoch": 0.5473865975958027, "grad_norm": 256.1658020019531, "learning_rate": 1.7044018332212096e-06, "loss": 17.0156, "step": 8242 }, { "epoch": 0.5474530118881583, "grad_norm": 154.8170623779297, "learning_rate": 1.7043254886156344e-06, "loss": 14.0, "step": 8243 }, { "epoch": 0.5475194261805141, "grad_norm": 672.5487060546875, "learning_rate": 1.704249135862828e-06, "loss": 19.8594, "step": 8244 }, { "epoch": 0.5475858404728697, "grad_norm": 173.1758270263672, "learning_rate": 1.7041727749636737e-06, "loss": 14.9531, "step": 8245 }, { "epoch": 0.5476522547652255, "grad_norm": 296.7630615234375, "learning_rate": 1.7040964059190546e-06, "loss": 14.0625, "step": 8246 }, { "epoch": 0.5477186690575812, "grad_norm": 228.9270477294922, "learning_rate": 1.7040200287298545e-06, "loss": 19.5625, "step": 8247 }, { "epoch": 0.5477850833499369, "grad_norm": 214.65728759765625, "learning_rate": 1.7039436433969568e-06, "loss": 16.9531, "step": 8248 }, { "epoch": 0.5478514976422926, "grad_norm": 330.57855224609375, "learning_rate": 1.7038672499212447e-06, "loss": 19.8594, "step": 8249 }, { "epoch": 0.5479179119346483, "grad_norm": 359.21649169921875, "learning_rate": 1.7037908483036022e-06, "loss": 20.7969, "step": 8250 }, { "epoch": 0.5479843262270041, "grad_norm": 149.12123107910156, "learning_rate": 1.703714438544913e-06, "loss": 20.1406, "step": 8251 }, { "epoch": 0.5480507405193598, "grad_norm": 430.98297119140625, "learning_rate": 1.703638020646061e-06, "loss": 16.1406, "step": 8252 }, { "epoch": 0.5481171548117155, "grad_norm": 149.1687469482422, "learning_rate": 1.70356159460793e-06, "loss": 15.7344, "step": 8253 }, { "epoch": 0.5481835691040712, "grad_norm": 278.3367614746094, "learning_rate": 1.7034851604314043e-06, "loss": 18.9531, "step": 8254 }, { "epoch": 0.5482499833964269, "grad_norm": 131.2284393310547, "learning_rate": 1.7034087181173678e-06, "loss": 16.8438, "step": 8255 }, { "epoch": 0.5483163976887826, "grad_norm": 116.1998291015625, "learning_rate": 1.703332267666705e-06, "loss": 17.9688, "step": 8256 }, { "epoch": 0.5483828119811384, "grad_norm": 261.8486328125, "learning_rate": 1.7032558090803e-06, "loss": 24.375, "step": 8257 }, { "epoch": 0.548449226273494, "grad_norm": 356.6650085449219, "learning_rate": 1.7031793423590374e-06, "loss": 18.8906, "step": 8258 }, { "epoch": 0.5485156405658498, "grad_norm": 206.96475219726562, "learning_rate": 1.7031028675038014e-06, "loss": 17.9062, "step": 8259 }, { "epoch": 0.5485820548582054, "grad_norm": 176.80880737304688, "learning_rate": 1.7030263845154772e-06, "loss": 22.1562, "step": 8260 }, { "epoch": 0.5486484691505612, "grad_norm": 221.27383422851562, "learning_rate": 1.702949893394949e-06, "loss": 23.2656, "step": 8261 }, { "epoch": 0.548714883442917, "grad_norm": 230.95066833496094, "learning_rate": 1.7028733941431019e-06, "loss": 17.1562, "step": 8262 }, { "epoch": 0.5487812977352726, "grad_norm": 406.57867431640625, "learning_rate": 1.7027968867608204e-06, "loss": 21.2188, "step": 8263 }, { "epoch": 0.5488477120276284, "grad_norm": 123.44685363769531, "learning_rate": 1.70272037124899e-06, "loss": 16.4688, "step": 8264 }, { "epoch": 0.548914126319984, "grad_norm": 641.2355346679688, "learning_rate": 1.7026438476084957e-06, "loss": 15.2969, "step": 8265 }, { "epoch": 0.5489805406123398, "grad_norm": 341.5929870605469, "learning_rate": 1.702567315840222e-06, "loss": 19.4062, "step": 8266 }, { "epoch": 0.5490469549046955, "grad_norm": 212.4317169189453, "learning_rate": 1.7024907759450553e-06, "loss": 14.2188, "step": 8267 }, { "epoch": 0.5491133691970512, "grad_norm": 174.4397735595703, "learning_rate": 1.7024142279238803e-06, "loss": 19.5312, "step": 8268 }, { "epoch": 0.5491797834894069, "grad_norm": 282.53497314453125, "learning_rate": 1.7023376717775823e-06, "loss": 19.625, "step": 8269 }, { "epoch": 0.5492461977817626, "grad_norm": 287.71661376953125, "learning_rate": 1.7022611075070473e-06, "loss": 21.8438, "step": 8270 }, { "epoch": 0.5493126120741183, "grad_norm": 481.6676940917969, "learning_rate": 1.7021845351131605e-06, "loss": 21.7188, "step": 8271 }, { "epoch": 0.5493790263664741, "grad_norm": 1376.9300537109375, "learning_rate": 1.702107954596808e-06, "loss": 14.75, "step": 8272 }, { "epoch": 0.5494454406588298, "grad_norm": 179.71646118164062, "learning_rate": 1.7020313659588757e-06, "loss": 18.9531, "step": 8273 }, { "epoch": 0.5495118549511855, "grad_norm": 300.88055419921875, "learning_rate": 1.701954769200249e-06, "loss": 19.3438, "step": 8274 }, { "epoch": 0.5495782692435413, "grad_norm": 155.20823669433594, "learning_rate": 1.7018781643218147e-06, "loss": 15.625, "step": 8275 }, { "epoch": 0.5496446835358969, "grad_norm": 325.1643371582031, "learning_rate": 1.7018015513244585e-06, "loss": 15.9219, "step": 8276 }, { "epoch": 0.5497110978282527, "grad_norm": 154.06175231933594, "learning_rate": 1.7017249302090663e-06, "loss": 19.125, "step": 8277 }, { "epoch": 0.5497775121206083, "grad_norm": 181.53138732910156, "learning_rate": 1.701648300976525e-06, "loss": 15.7969, "step": 8278 }, { "epoch": 0.5498439264129641, "grad_norm": 430.9660949707031, "learning_rate": 1.7015716636277206e-06, "loss": 15.5781, "step": 8279 }, { "epoch": 0.5499103407053197, "grad_norm": 90.78829956054688, "learning_rate": 1.70149501816354e-06, "loss": 13.6094, "step": 8280 }, { "epoch": 0.5499767549976755, "grad_norm": 220.08056640625, "learning_rate": 1.7014183645848693e-06, "loss": 30.2812, "step": 8281 }, { "epoch": 0.5500431692900312, "grad_norm": 535.3372802734375, "learning_rate": 1.7013417028925957e-06, "loss": 16.7812, "step": 8282 }, { "epoch": 0.5501095835823869, "grad_norm": 257.0946044921875, "learning_rate": 1.7012650330876053e-06, "loss": 15.5625, "step": 8283 }, { "epoch": 0.5501759978747427, "grad_norm": 163.31297302246094, "learning_rate": 1.7011883551707857e-06, "loss": 13.8281, "step": 8284 }, { "epoch": 0.5502424121670983, "grad_norm": 217.84117126464844, "learning_rate": 1.7011116691430232e-06, "loss": 18.4453, "step": 8285 }, { "epoch": 0.5503088264594541, "grad_norm": 114.58838653564453, "learning_rate": 1.7010349750052055e-06, "loss": 13.7188, "step": 8286 }, { "epoch": 0.5503752407518098, "grad_norm": 208.40725708007812, "learning_rate": 1.7009582727582194e-06, "loss": 21.3438, "step": 8287 }, { "epoch": 0.5504416550441655, "grad_norm": 136.2058563232422, "learning_rate": 1.7008815624029521e-06, "loss": 16.2344, "step": 8288 }, { "epoch": 0.5505080693365212, "grad_norm": 203.8498992919922, "learning_rate": 1.700804843940291e-06, "loss": 15.9375, "step": 8289 }, { "epoch": 0.550574483628877, "grad_norm": 172.46414184570312, "learning_rate": 1.7007281173711238e-06, "loss": 20.2031, "step": 8290 }, { "epoch": 0.5506408979212326, "grad_norm": 397.5384826660156, "learning_rate": 1.7006513826963379e-06, "loss": 24.6875, "step": 8291 }, { "epoch": 0.5507073122135884, "grad_norm": 288.66943359375, "learning_rate": 1.7005746399168203e-06, "loss": 22.0, "step": 8292 }, { "epoch": 0.550773726505944, "grad_norm": 334.8124694824219, "learning_rate": 1.7004978890334596e-06, "loss": 15.4375, "step": 8293 }, { "epoch": 0.5508401407982998, "grad_norm": 127.5551986694336, "learning_rate": 1.7004211300471432e-06, "loss": 16.6562, "step": 8294 }, { "epoch": 0.5509065550906556, "grad_norm": 171.52874755859375, "learning_rate": 1.700344362958759e-06, "loss": 18.1875, "step": 8295 }, { "epoch": 0.5509729693830112, "grad_norm": 212.98182678222656, "learning_rate": 1.7002675877691952e-06, "loss": 22.2188, "step": 8296 }, { "epoch": 0.551039383675367, "grad_norm": 189.97569274902344, "learning_rate": 1.7001908044793397e-06, "loss": 14.0938, "step": 8297 }, { "epoch": 0.5511057979677226, "grad_norm": 182.34642028808594, "learning_rate": 1.7001140130900806e-06, "loss": 18.4688, "step": 8298 }, { "epoch": 0.5511722122600784, "grad_norm": 286.41278076171875, "learning_rate": 1.7000372136023063e-06, "loss": 15.5156, "step": 8299 }, { "epoch": 0.551238626552434, "grad_norm": 297.9433288574219, "learning_rate": 1.6999604060169053e-06, "loss": 22.1094, "step": 8300 }, { "epoch": 0.5513050408447898, "grad_norm": 170.5718994140625, "learning_rate": 1.6998835903347656e-06, "loss": 13.8438, "step": 8301 }, { "epoch": 0.5513714551371455, "grad_norm": 147.97410583496094, "learning_rate": 1.6998067665567766e-06, "loss": 15.25, "step": 8302 }, { "epoch": 0.5514378694295012, "grad_norm": 187.75479125976562, "learning_rate": 1.6997299346838264e-06, "loss": 21.4531, "step": 8303 }, { "epoch": 0.5515042837218569, "grad_norm": 150.95748901367188, "learning_rate": 1.6996530947168034e-06, "loss": 15.3906, "step": 8304 }, { "epoch": 0.5515706980142127, "grad_norm": 137.86061096191406, "learning_rate": 1.6995762466565973e-06, "loss": 16.25, "step": 8305 }, { "epoch": 0.5516371123065684, "grad_norm": 174.94541931152344, "learning_rate": 1.699499390504096e-06, "loss": 15.0156, "step": 8306 }, { "epoch": 0.5517035265989241, "grad_norm": 93.50524139404297, "learning_rate": 1.6994225262601898e-06, "loss": 11.6719, "step": 8307 }, { "epoch": 0.5517699408912798, "grad_norm": 446.7752990722656, "learning_rate": 1.6993456539257667e-06, "loss": 25.2969, "step": 8308 }, { "epoch": 0.5518363551836355, "grad_norm": 203.05117797851562, "learning_rate": 1.6992687735017163e-06, "loss": 19.5, "step": 8309 }, { "epoch": 0.5519027694759913, "grad_norm": 325.0869445800781, "learning_rate": 1.699191884988928e-06, "loss": 17.2969, "step": 8310 }, { "epoch": 0.5519691837683469, "grad_norm": 262.5597229003906, "learning_rate": 1.6991149883882915e-06, "loss": 21.1875, "step": 8311 }, { "epoch": 0.5520355980607027, "grad_norm": 176.40313720703125, "learning_rate": 1.6990380837006954e-06, "loss": 15.4531, "step": 8312 }, { "epoch": 0.5521020123530583, "grad_norm": 144.5847930908203, "learning_rate": 1.6989611709270305e-06, "loss": 17.0625, "step": 8313 }, { "epoch": 0.5521684266454141, "grad_norm": 125.38157653808594, "learning_rate": 1.6988842500681853e-06, "loss": 13.1094, "step": 8314 }, { "epoch": 0.5522348409377698, "grad_norm": 165.25718688964844, "learning_rate": 1.6988073211250501e-06, "loss": 19.1875, "step": 8315 }, { "epoch": 0.5523012552301255, "grad_norm": 467.3982849121094, "learning_rate": 1.698730384098515e-06, "loss": 17.125, "step": 8316 }, { "epoch": 0.5523676695224813, "grad_norm": 196.7206573486328, "learning_rate": 1.6986534389894694e-06, "loss": 14.7031, "step": 8317 }, { "epoch": 0.5524340838148369, "grad_norm": 189.65980529785156, "learning_rate": 1.698576485798804e-06, "loss": 16.2656, "step": 8318 }, { "epoch": 0.5525004981071927, "grad_norm": 571.4657592773438, "learning_rate": 1.6984995245274085e-06, "loss": 25.3906, "step": 8319 }, { "epoch": 0.5525669123995484, "grad_norm": 395.0130920410156, "learning_rate": 1.6984225551761732e-06, "loss": 28.0312, "step": 8320 }, { "epoch": 0.5526333266919041, "grad_norm": 169.78453063964844, "learning_rate": 1.6983455777459886e-06, "loss": 16.1719, "step": 8321 }, { "epoch": 0.5526997409842598, "grad_norm": 344.59173583984375, "learning_rate": 1.6982685922377453e-06, "loss": 17.6562, "step": 8322 }, { "epoch": 0.5527661552766155, "grad_norm": 242.7510986328125, "learning_rate": 1.6981915986523334e-06, "loss": 21.25, "step": 8323 }, { "epoch": 0.5528325695689712, "grad_norm": 250.2704620361328, "learning_rate": 1.6981145969906437e-06, "loss": 20.1406, "step": 8324 }, { "epoch": 0.552898983861327, "grad_norm": 117.36846160888672, "learning_rate": 1.698037587253567e-06, "loss": 13.1562, "step": 8325 }, { "epoch": 0.5529653981536826, "grad_norm": 309.3221130371094, "learning_rate": 1.6979605694419935e-06, "loss": 19.7031, "step": 8326 }, { "epoch": 0.5530318124460384, "grad_norm": 281.51593017578125, "learning_rate": 1.697883543556815e-06, "loss": 17.5469, "step": 8327 }, { "epoch": 0.5530982267383941, "grad_norm": 131.0896453857422, "learning_rate": 1.697806509598922e-06, "loss": 19.125, "step": 8328 }, { "epoch": 0.5531646410307498, "grad_norm": 132.98524475097656, "learning_rate": 1.6977294675692058e-06, "loss": 15.7344, "step": 8329 }, { "epoch": 0.5532310553231056, "grad_norm": 243.0062255859375, "learning_rate": 1.6976524174685574e-06, "loss": 20.5469, "step": 8330 }, { "epoch": 0.5532974696154612, "grad_norm": 280.4720764160156, "learning_rate": 1.697575359297868e-06, "loss": 20.1562, "step": 8331 }, { "epoch": 0.553363883907817, "grad_norm": 167.97689819335938, "learning_rate": 1.6974982930580293e-06, "loss": 14.6562, "step": 8332 }, { "epoch": 0.5534302982001726, "grad_norm": 186.86947631835938, "learning_rate": 1.6974212187499322e-06, "loss": 18.3438, "step": 8333 }, { "epoch": 0.5534967124925284, "grad_norm": 278.6490478515625, "learning_rate": 1.6973441363744689e-06, "loss": 21.8438, "step": 8334 }, { "epoch": 0.5535631267848841, "grad_norm": 144.39462280273438, "learning_rate": 1.6972670459325306e-06, "loss": 17.3906, "step": 8335 }, { "epoch": 0.5536295410772398, "grad_norm": 209.6395721435547, "learning_rate": 1.6971899474250092e-06, "loss": 17.6719, "step": 8336 }, { "epoch": 0.5536959553695955, "grad_norm": 527.6506958007812, "learning_rate": 1.6971128408527968e-06, "loss": 21.625, "step": 8337 }, { "epoch": 0.5537623696619512, "grad_norm": 366.42095947265625, "learning_rate": 1.6970357262167848e-06, "loss": 27.2812, "step": 8338 }, { "epoch": 0.553828783954307, "grad_norm": 276.2654113769531, "learning_rate": 1.6969586035178654e-06, "loss": 15.2031, "step": 8339 }, { "epoch": 0.5538951982466627, "grad_norm": 141.010498046875, "learning_rate": 1.6968814727569308e-06, "loss": 14.5938, "step": 8340 }, { "epoch": 0.5539616125390184, "grad_norm": 563.5798950195312, "learning_rate": 1.6968043339348732e-06, "loss": 25.6094, "step": 8341 }, { "epoch": 0.5540280268313741, "grad_norm": 436.5030822753906, "learning_rate": 1.6967271870525847e-06, "loss": 24.1875, "step": 8342 }, { "epoch": 0.5540944411237299, "grad_norm": 157.691650390625, "learning_rate": 1.696650032110958e-06, "loss": 16.7188, "step": 8343 }, { "epoch": 0.5541608554160855, "grad_norm": 280.4017333984375, "learning_rate": 1.6965728691108856e-06, "loss": 25.9844, "step": 8344 }, { "epoch": 0.5542272697084413, "grad_norm": 188.36968994140625, "learning_rate": 1.6964956980532596e-06, "loss": 17.5781, "step": 8345 }, { "epoch": 0.5542936840007969, "grad_norm": 1113.06640625, "learning_rate": 1.696418518938973e-06, "loss": 25.5625, "step": 8346 }, { "epoch": 0.5543600982931527, "grad_norm": 205.37362670898438, "learning_rate": 1.6963413317689188e-06, "loss": 14.5781, "step": 8347 }, { "epoch": 0.5544265125855083, "grad_norm": 251.70140075683594, "learning_rate": 1.6962641365439896e-06, "loss": 20.4219, "step": 8348 }, { "epoch": 0.5544929268778641, "grad_norm": 316.23095703125, "learning_rate": 1.6961869332650782e-06, "loss": 24.0469, "step": 8349 }, { "epoch": 0.5545593411702199, "grad_norm": 219.4691162109375, "learning_rate": 1.6961097219330777e-06, "loss": 18.0156, "step": 8350 }, { "epoch": 0.5546257554625755, "grad_norm": 321.3006896972656, "learning_rate": 1.6960325025488817e-06, "loss": 17.0781, "step": 8351 }, { "epoch": 0.5546921697549313, "grad_norm": 143.4611053466797, "learning_rate": 1.6959552751133828e-06, "loss": 18.6719, "step": 8352 }, { "epoch": 0.554758584047287, "grad_norm": 100.05292510986328, "learning_rate": 1.6958780396274746e-06, "loss": 15.625, "step": 8353 }, { "epoch": 0.5548249983396427, "grad_norm": 109.36009216308594, "learning_rate": 1.6958007960920503e-06, "loss": 18.8281, "step": 8354 }, { "epoch": 0.5548914126319984, "grad_norm": 215.9959259033203, "learning_rate": 1.695723544508004e-06, "loss": 12.4844, "step": 8355 }, { "epoch": 0.5549578269243541, "grad_norm": 133.84335327148438, "learning_rate": 1.6956462848762288e-06, "loss": 14.6094, "step": 8356 }, { "epoch": 0.5550242412167098, "grad_norm": 271.9211730957031, "learning_rate": 1.6955690171976186e-06, "loss": 24.2812, "step": 8357 }, { "epoch": 0.5550906555090656, "grad_norm": 226.36276245117188, "learning_rate": 1.695491741473067e-06, "loss": 14.0, "step": 8358 }, { "epoch": 0.5551570698014213, "grad_norm": 308.0372314453125, "learning_rate": 1.695414457703468e-06, "loss": 17.0625, "step": 8359 }, { "epoch": 0.555223484093777, "grad_norm": 419.15374755859375, "learning_rate": 1.6953371658897153e-06, "loss": 13.9297, "step": 8360 }, { "epoch": 0.5552898983861327, "grad_norm": 316.91864013671875, "learning_rate": 1.6952598660327033e-06, "loss": 20.5625, "step": 8361 }, { "epoch": 0.5553563126784884, "grad_norm": 242.74295043945312, "learning_rate": 1.6951825581333263e-06, "loss": 19.8125, "step": 8362 }, { "epoch": 0.5554227269708442, "grad_norm": 222.96339416503906, "learning_rate": 1.6951052421924783e-06, "loss": 17.9531, "step": 8363 }, { "epoch": 0.5554891412631998, "grad_norm": 365.19659423828125, "learning_rate": 1.6950279182110536e-06, "loss": 22.1562, "step": 8364 }, { "epoch": 0.5555555555555556, "grad_norm": 450.92327880859375, "learning_rate": 1.6949505861899467e-06, "loss": 28.5312, "step": 8365 }, { "epoch": 0.5556219698479112, "grad_norm": 343.41839599609375, "learning_rate": 1.6948732461300524e-06, "loss": 25.4062, "step": 8366 }, { "epoch": 0.555688384140267, "grad_norm": 123.49320220947266, "learning_rate": 1.6947958980322648e-06, "loss": 15.8594, "step": 8367 }, { "epoch": 0.5557547984326227, "grad_norm": 176.9781036376953, "learning_rate": 1.6947185418974787e-06, "loss": 14.6875, "step": 8368 }, { "epoch": 0.5558212127249784, "grad_norm": 120.4733657836914, "learning_rate": 1.6946411777265898e-06, "loss": 17.7031, "step": 8369 }, { "epoch": 0.5558876270173342, "grad_norm": 550.3811645507812, "learning_rate": 1.6945638055204915e-06, "loss": 19.25, "step": 8370 }, { "epoch": 0.5559540413096898, "grad_norm": 288.5267639160156, "learning_rate": 1.6944864252800802e-06, "loss": 18.875, "step": 8371 }, { "epoch": 0.5560204556020456, "grad_norm": 275.0401306152344, "learning_rate": 1.6944090370062504e-06, "loss": 28.0312, "step": 8372 }, { "epoch": 0.5560868698944013, "grad_norm": 266.8800964355469, "learning_rate": 1.6943316406998966e-06, "loss": 26.3125, "step": 8373 }, { "epoch": 0.556153284186757, "grad_norm": 189.14210510253906, "learning_rate": 1.6942542363619152e-06, "loss": 15.7188, "step": 8374 }, { "epoch": 0.5562196984791127, "grad_norm": 117.70634460449219, "learning_rate": 1.6941768239932013e-06, "loss": 14.5156, "step": 8375 }, { "epoch": 0.5562861127714684, "grad_norm": 172.86392211914062, "learning_rate": 1.69409940359465e-06, "loss": 20.6562, "step": 8376 }, { "epoch": 0.5563525270638241, "grad_norm": 130.1219482421875, "learning_rate": 1.6940219751671569e-06, "loss": 17.1094, "step": 8377 }, { "epoch": 0.5564189413561799, "grad_norm": 198.7600555419922, "learning_rate": 1.693944538711618e-06, "loss": 20.9844, "step": 8378 }, { "epoch": 0.5564853556485355, "grad_norm": 180.78147888183594, "learning_rate": 1.6938670942289291e-06, "loss": 20.5781, "step": 8379 }, { "epoch": 0.5565517699408913, "grad_norm": 187.33444213867188, "learning_rate": 1.6937896417199852e-06, "loss": 18.2969, "step": 8380 }, { "epoch": 0.556618184233247, "grad_norm": 253.8456573486328, "learning_rate": 1.693712181185683e-06, "loss": 19.4219, "step": 8381 }, { "epoch": 0.5566845985256027, "grad_norm": 233.78489685058594, "learning_rate": 1.6936347126269184e-06, "loss": 18.3906, "step": 8382 }, { "epoch": 0.5567510128179585, "grad_norm": 240.52369689941406, "learning_rate": 1.6935572360445872e-06, "loss": 23.5, "step": 8383 }, { "epoch": 0.5568174271103141, "grad_norm": 233.82858276367188, "learning_rate": 1.693479751439586e-06, "loss": 16.0469, "step": 8384 }, { "epoch": 0.5568838414026699, "grad_norm": 462.6151123046875, "learning_rate": 1.693402258812811e-06, "loss": 19.7656, "step": 8385 }, { "epoch": 0.5569502556950255, "grad_norm": 265.6314392089844, "learning_rate": 1.6933247581651585e-06, "loss": 16.0156, "step": 8386 }, { "epoch": 0.5570166699873813, "grad_norm": 136.90380859375, "learning_rate": 1.6932472494975248e-06, "loss": 17.4062, "step": 8387 }, { "epoch": 0.557083084279737, "grad_norm": 435.796875, "learning_rate": 1.6931697328108069e-06, "loss": 21.2812, "step": 8388 }, { "epoch": 0.5571494985720927, "grad_norm": 263.93896484375, "learning_rate": 1.693092208105901e-06, "loss": 18.6094, "step": 8389 }, { "epoch": 0.5572159128644484, "grad_norm": 149.600830078125, "learning_rate": 1.6930146753837041e-06, "loss": 20.1719, "step": 8390 }, { "epoch": 0.5572823271568041, "grad_norm": 637.4230346679688, "learning_rate": 1.692937134645113e-06, "loss": 18.2031, "step": 8391 }, { "epoch": 0.5573487414491599, "grad_norm": 229.6481170654297, "learning_rate": 1.6928595858910248e-06, "loss": 20.375, "step": 8392 }, { "epoch": 0.5574151557415156, "grad_norm": 634.054931640625, "learning_rate": 1.6927820291223364e-06, "loss": 14.2656, "step": 8393 }, { "epoch": 0.5574815700338713, "grad_norm": 343.7788391113281, "learning_rate": 1.692704464339945e-06, "loss": 20.9375, "step": 8394 }, { "epoch": 0.557547984326227, "grad_norm": 241.80638122558594, "learning_rate": 1.692626891544748e-06, "loss": 19.0469, "step": 8395 }, { "epoch": 0.5576143986185828, "grad_norm": 390.2137756347656, "learning_rate": 1.6925493107376423e-06, "loss": 16.1562, "step": 8396 }, { "epoch": 0.5576808129109384, "grad_norm": 450.8624572753906, "learning_rate": 1.6924717219195257e-06, "loss": 23.4375, "step": 8397 }, { "epoch": 0.5577472272032942, "grad_norm": 160.52294921875, "learning_rate": 1.6923941250912952e-06, "loss": 16.9844, "step": 8398 }, { "epoch": 0.5578136414956498, "grad_norm": 204.10916137695312, "learning_rate": 1.6923165202538489e-06, "loss": 20.8594, "step": 8399 }, { "epoch": 0.5578800557880056, "grad_norm": 429.0186767578125, "learning_rate": 1.6922389074080842e-06, "loss": 19.4219, "step": 8400 }, { "epoch": 0.5579464700803612, "grad_norm": 270.6460876464844, "learning_rate": 1.6921612865548994e-06, "loss": 18.5781, "step": 8401 }, { "epoch": 0.558012884372717, "grad_norm": 165.3383331298828, "learning_rate": 1.6920836576951915e-06, "loss": 17.7969, "step": 8402 }, { "epoch": 0.5580792986650728, "grad_norm": 175.2013397216797, "learning_rate": 1.6920060208298592e-06, "loss": 17.75, "step": 8403 }, { "epoch": 0.5581457129574284, "grad_norm": 424.44970703125, "learning_rate": 1.6919283759597999e-06, "loss": 20.0312, "step": 8404 }, { "epoch": 0.5582121272497842, "grad_norm": 206.04946899414062, "learning_rate": 1.6918507230859126e-06, "loss": 14.75, "step": 8405 }, { "epoch": 0.5582785415421399, "grad_norm": 287.8841857910156, "learning_rate": 1.691773062209095e-06, "loss": 18.6406, "step": 8406 }, { "epoch": 0.5583449558344956, "grad_norm": 779.7168579101562, "learning_rate": 1.6916953933302454e-06, "loss": 15.6719, "step": 8407 }, { "epoch": 0.5584113701268513, "grad_norm": 144.9691925048828, "learning_rate": 1.691617716450262e-06, "loss": 22.75, "step": 8408 }, { "epoch": 0.558477784419207, "grad_norm": 173.93739318847656, "learning_rate": 1.6915400315700441e-06, "loss": 15.75, "step": 8409 }, { "epoch": 0.5585441987115627, "grad_norm": 100.7279281616211, "learning_rate": 1.6914623386904898e-06, "loss": 15.9062, "step": 8410 }, { "epoch": 0.5586106130039185, "grad_norm": 263.0087585449219, "learning_rate": 1.691384637812498e-06, "loss": 21.1406, "step": 8411 }, { "epoch": 0.5586770272962741, "grad_norm": 233.98110961914062, "learning_rate": 1.6913069289369672e-06, "loss": 14.4062, "step": 8412 }, { "epoch": 0.5587434415886299, "grad_norm": 267.74700927734375, "learning_rate": 1.6912292120647966e-06, "loss": 21.8438, "step": 8413 }, { "epoch": 0.5588098558809856, "grad_norm": 427.1363830566406, "learning_rate": 1.6911514871968849e-06, "loss": 26.9531, "step": 8414 }, { "epoch": 0.5588762701733413, "grad_norm": 194.8565673828125, "learning_rate": 1.6910737543341313e-06, "loss": 16.9141, "step": 8415 }, { "epoch": 0.5589426844656971, "grad_norm": 457.42327880859375, "learning_rate": 1.6909960134774352e-06, "loss": 22.5938, "step": 8416 }, { "epoch": 0.5590090987580527, "grad_norm": 387.17559814453125, "learning_rate": 1.6909182646276956e-06, "loss": 19.9062, "step": 8417 }, { "epoch": 0.5590755130504085, "grad_norm": 180.54891967773438, "learning_rate": 1.690840507785812e-06, "loss": 19.7031, "step": 8418 }, { "epoch": 0.5591419273427641, "grad_norm": 477.6244201660156, "learning_rate": 1.6907627429526837e-06, "loss": 17.1016, "step": 8419 }, { "epoch": 0.5592083416351199, "grad_norm": 182.14254760742188, "learning_rate": 1.6906849701292103e-06, "loss": 15.7656, "step": 8420 }, { "epoch": 0.5592747559274756, "grad_norm": 352.30596923828125, "learning_rate": 1.6906071893162915e-06, "loss": 22.0938, "step": 8421 }, { "epoch": 0.5593411702198313, "grad_norm": 169.8123779296875, "learning_rate": 1.6905294005148267e-06, "loss": 17.3594, "step": 8422 }, { "epoch": 0.559407584512187, "grad_norm": 97.3760986328125, "learning_rate": 1.6904516037257163e-06, "loss": 11.8594, "step": 8423 }, { "epoch": 0.5594739988045427, "grad_norm": 189.72186279296875, "learning_rate": 1.6903737989498598e-06, "loss": 12.5781, "step": 8424 }, { "epoch": 0.5595404130968985, "grad_norm": 184.60328674316406, "learning_rate": 1.6902959861881571e-06, "loss": 20.875, "step": 8425 }, { "epoch": 0.5596068273892542, "grad_norm": 146.09715270996094, "learning_rate": 1.6902181654415086e-06, "loss": 18.8281, "step": 8426 }, { "epoch": 0.5596732416816099, "grad_norm": 254.92042541503906, "learning_rate": 1.6901403367108145e-06, "loss": 18.7969, "step": 8427 }, { "epoch": 0.5597396559739656, "grad_norm": 365.3903503417969, "learning_rate": 1.6900624999969747e-06, "loss": 21.1562, "step": 8428 }, { "epoch": 0.5598060702663213, "grad_norm": 419.06036376953125, "learning_rate": 1.6899846553008899e-06, "loss": 14.7031, "step": 8429 }, { "epoch": 0.559872484558677, "grad_norm": 221.33380126953125, "learning_rate": 1.6899068026234607e-06, "loss": 16.8906, "step": 8430 }, { "epoch": 0.5599388988510328, "grad_norm": 218.81878662109375, "learning_rate": 1.689828941965587e-06, "loss": 18.7812, "step": 8431 }, { "epoch": 0.5600053131433884, "grad_norm": 248.9112548828125, "learning_rate": 1.6897510733281704e-06, "loss": 13.9375, "step": 8432 }, { "epoch": 0.5600717274357442, "grad_norm": 250.52264404296875, "learning_rate": 1.6896731967121108e-06, "loss": 19.3594, "step": 8433 }, { "epoch": 0.5601381417280998, "grad_norm": 229.40489196777344, "learning_rate": 1.6895953121183094e-06, "loss": 20.4375, "step": 8434 }, { "epoch": 0.5602045560204556, "grad_norm": 348.95269775390625, "learning_rate": 1.689517419547667e-06, "loss": 20.9062, "step": 8435 }, { "epoch": 0.5602709703128114, "grad_norm": 533.0465087890625, "learning_rate": 1.6894395190010848e-06, "loss": 19.0469, "step": 8436 }, { "epoch": 0.560337384605167, "grad_norm": 583.798095703125, "learning_rate": 1.6893616104794636e-06, "loss": 16.7656, "step": 8437 }, { "epoch": 0.5604037988975228, "grad_norm": 153.4120330810547, "learning_rate": 1.689283693983705e-06, "loss": 14.9375, "step": 8438 }, { "epoch": 0.5604702131898784, "grad_norm": 121.67192840576172, "learning_rate": 1.6892057695147099e-06, "loss": 17.0156, "step": 8439 }, { "epoch": 0.5605366274822342, "grad_norm": 443.4833068847656, "learning_rate": 1.68912783707338e-06, "loss": 22.0625, "step": 8440 }, { "epoch": 0.5606030417745899, "grad_norm": 255.47023010253906, "learning_rate": 1.689049896660617e-06, "loss": 19.5, "step": 8441 }, { "epoch": 0.5606694560669456, "grad_norm": 370.63397216796875, "learning_rate": 1.6889719482773215e-06, "loss": 15.4375, "step": 8442 }, { "epoch": 0.5607358703593013, "grad_norm": 280.2546691894531, "learning_rate": 1.6888939919243962e-06, "loss": 23.6562, "step": 8443 }, { "epoch": 0.560802284651657, "grad_norm": 107.88557434082031, "learning_rate": 1.6888160276027423e-06, "loss": 15.7656, "step": 8444 }, { "epoch": 0.5608686989440127, "grad_norm": 531.20068359375, "learning_rate": 1.6887380553132617e-06, "loss": 21.2031, "step": 8445 }, { "epoch": 0.5609351132363685, "grad_norm": 167.24061584472656, "learning_rate": 1.6886600750568566e-06, "loss": 16.2812, "step": 8446 }, { "epoch": 0.5610015275287242, "grad_norm": 153.48480224609375, "learning_rate": 1.688582086834429e-06, "loss": 14.5, "step": 8447 }, { "epoch": 0.5610679418210799, "grad_norm": 243.40127563476562, "learning_rate": 1.6885040906468806e-06, "loss": 18.4219, "step": 8448 }, { "epoch": 0.5611343561134357, "grad_norm": 125.15225219726562, "learning_rate": 1.6884260864951138e-06, "loss": 14.4141, "step": 8449 }, { "epoch": 0.5612007704057913, "grad_norm": 235.21734619140625, "learning_rate": 1.6883480743800314e-06, "loss": 22.1875, "step": 8450 }, { "epoch": 0.5612671846981471, "grad_norm": 431.98638916015625, "learning_rate": 1.688270054302535e-06, "loss": 22.6562, "step": 8451 }, { "epoch": 0.5613335989905027, "grad_norm": 279.50152587890625, "learning_rate": 1.6881920262635277e-06, "loss": 25.1875, "step": 8452 }, { "epoch": 0.5614000132828585, "grad_norm": 162.56874084472656, "learning_rate": 1.6881139902639119e-06, "loss": 15.5469, "step": 8453 }, { "epoch": 0.5614664275752141, "grad_norm": 201.77789306640625, "learning_rate": 1.6880359463045904e-06, "loss": 17.6406, "step": 8454 }, { "epoch": 0.5615328418675699, "grad_norm": 313.6011962890625, "learning_rate": 1.6879578943864653e-06, "loss": 22.0312, "step": 8455 }, { "epoch": 0.5615992561599256, "grad_norm": 266.8785705566406, "learning_rate": 1.6878798345104403e-06, "loss": 24.875, "step": 8456 }, { "epoch": 0.5616656704522813, "grad_norm": 198.2176055908203, "learning_rate": 1.687801766677418e-06, "loss": 16.2344, "step": 8457 }, { "epoch": 0.5617320847446371, "grad_norm": 110.02526092529297, "learning_rate": 1.6877236908883014e-06, "loss": 14.2812, "step": 8458 }, { "epoch": 0.5617984990369927, "grad_norm": 908.7457275390625, "learning_rate": 1.687645607143994e-06, "loss": 20.2656, "step": 8459 }, { "epoch": 0.5618649133293485, "grad_norm": 236.21029663085938, "learning_rate": 1.6875675154453982e-06, "loss": 25.75, "step": 8460 }, { "epoch": 0.5619313276217042, "grad_norm": 335.06671142578125, "learning_rate": 1.6874894157934185e-06, "loss": 12.375, "step": 8461 }, { "epoch": 0.5619977419140599, "grad_norm": 143.34469604492188, "learning_rate": 1.6874113081889575e-06, "loss": 15.75, "step": 8462 }, { "epoch": 0.5620641562064156, "grad_norm": 385.26409912109375, "learning_rate": 1.6873331926329185e-06, "loss": 20.25, "step": 8463 }, { "epoch": 0.5621305704987714, "grad_norm": 176.66424560546875, "learning_rate": 1.6872550691262055e-06, "loss": 23.4844, "step": 8464 }, { "epoch": 0.562196984791127, "grad_norm": 241.0428009033203, "learning_rate": 1.6871769376697223e-06, "loss": 24.0, "step": 8465 }, { "epoch": 0.5622633990834828, "grad_norm": 242.60105895996094, "learning_rate": 1.6870987982643725e-06, "loss": 17.1875, "step": 8466 }, { "epoch": 0.5623298133758384, "grad_norm": 175.69448852539062, "learning_rate": 1.68702065091106e-06, "loss": 18.7969, "step": 8467 }, { "epoch": 0.5623962276681942, "grad_norm": 151.02513122558594, "learning_rate": 1.686942495610689e-06, "loss": 15.0312, "step": 8468 }, { "epoch": 0.56246264196055, "grad_norm": 409.6418151855469, "learning_rate": 1.686864332364163e-06, "loss": 16.5312, "step": 8469 }, { "epoch": 0.5625290562529056, "grad_norm": 131.18182373046875, "learning_rate": 1.6867861611723869e-06, "loss": 22.9375, "step": 8470 }, { "epoch": 0.5625954705452614, "grad_norm": 277.2649230957031, "learning_rate": 1.6867079820362645e-06, "loss": 16.9688, "step": 8471 }, { "epoch": 0.562661884837617, "grad_norm": 294.75, "learning_rate": 1.6866297949566995e-06, "loss": 24.1406, "step": 8472 }, { "epoch": 0.5627282991299728, "grad_norm": 251.65919494628906, "learning_rate": 1.6865515999345977e-06, "loss": 17.2812, "step": 8473 }, { "epoch": 0.5627947134223285, "grad_norm": 253.57254028320312, "learning_rate": 1.6864733969708623e-06, "loss": 17.9219, "step": 8474 }, { "epoch": 0.5628611277146842, "grad_norm": 168.13877868652344, "learning_rate": 1.6863951860663987e-06, "loss": 15.0156, "step": 8475 }, { "epoch": 0.5629275420070399, "grad_norm": 295.26507568359375, "learning_rate": 1.6863169672221114e-06, "loss": 14.875, "step": 8476 }, { "epoch": 0.5629939562993956, "grad_norm": 188.90374755859375, "learning_rate": 1.6862387404389051e-06, "loss": 18.0625, "step": 8477 }, { "epoch": 0.5630603705917513, "grad_norm": 169.3590545654297, "learning_rate": 1.6861605057176851e-06, "loss": 17.0625, "step": 8478 }, { "epoch": 0.5631267848841071, "grad_norm": 3623.72412109375, "learning_rate": 1.6860822630593556e-06, "loss": 21.5938, "step": 8479 }, { "epoch": 0.5631931991764628, "grad_norm": 182.41070556640625, "learning_rate": 1.6860040124648222e-06, "loss": 19.2344, "step": 8480 }, { "epoch": 0.5632596134688185, "grad_norm": 1304.76513671875, "learning_rate": 1.68592575393499e-06, "loss": 21.5312, "step": 8481 }, { "epoch": 0.5633260277611742, "grad_norm": 136.17080688476562, "learning_rate": 1.6858474874707644e-06, "loss": 13.7344, "step": 8482 }, { "epoch": 0.5633924420535299, "grad_norm": 238.7117919921875, "learning_rate": 1.6857692130730503e-06, "loss": 15.0156, "step": 8483 }, { "epoch": 0.5634588563458857, "grad_norm": 385.0166320800781, "learning_rate": 1.6856909307427536e-06, "loss": 26.6875, "step": 8484 }, { "epoch": 0.5635252706382413, "grad_norm": 194.91183471679688, "learning_rate": 1.6856126404807794e-06, "loss": 17.0781, "step": 8485 }, { "epoch": 0.5635916849305971, "grad_norm": 102.4417495727539, "learning_rate": 1.6855343422880336e-06, "loss": 15.3594, "step": 8486 }, { "epoch": 0.5636580992229527, "grad_norm": 407.6731262207031, "learning_rate": 1.685456036165422e-06, "loss": 17.0156, "step": 8487 }, { "epoch": 0.5637245135153085, "grad_norm": 318.4230041503906, "learning_rate": 1.68537772211385e-06, "loss": 19.8125, "step": 8488 }, { "epoch": 0.5637909278076642, "grad_norm": 356.7344055175781, "learning_rate": 1.6852994001342235e-06, "loss": 20.1406, "step": 8489 }, { "epoch": 0.5638573421000199, "grad_norm": 522.6289672851562, "learning_rate": 1.6852210702274493e-06, "loss": 21.6406, "step": 8490 }, { "epoch": 0.5639237563923757, "grad_norm": 226.32252502441406, "learning_rate": 1.6851427323944325e-06, "loss": 19.8438, "step": 8491 }, { "epoch": 0.5639901706847313, "grad_norm": 770.7755737304688, "learning_rate": 1.6850643866360796e-06, "loss": 20.25, "step": 8492 }, { "epoch": 0.5640565849770871, "grad_norm": 158.1162872314453, "learning_rate": 1.6849860329532972e-06, "loss": 14.5938, "step": 8493 }, { "epoch": 0.5641229992694428, "grad_norm": 737.0011596679688, "learning_rate": 1.6849076713469912e-06, "loss": 22.3281, "step": 8494 }, { "epoch": 0.5641894135617985, "grad_norm": 249.56539916992188, "learning_rate": 1.6848293018180678e-06, "loss": 31.5625, "step": 8495 }, { "epoch": 0.5642558278541542, "grad_norm": 152.2261505126953, "learning_rate": 1.6847509243674344e-06, "loss": 17.7266, "step": 8496 }, { "epoch": 0.56432224214651, "grad_norm": 321.8133850097656, "learning_rate": 1.6846725389959973e-06, "loss": 17.2656, "step": 8497 }, { "epoch": 0.5643886564388656, "grad_norm": 278.07318115234375, "learning_rate": 1.6845941457046626e-06, "loss": 21.8125, "step": 8498 }, { "epoch": 0.5644550707312214, "grad_norm": 209.03219604492188, "learning_rate": 1.6845157444943378e-06, "loss": 18.3281, "step": 8499 }, { "epoch": 0.564521485023577, "grad_norm": 173.55718994140625, "learning_rate": 1.6844373353659296e-06, "loss": 18.1719, "step": 8500 }, { "epoch": 0.5645878993159328, "grad_norm": 236.88539123535156, "learning_rate": 1.684358918320345e-06, "loss": 23.6875, "step": 8501 }, { "epoch": 0.5646543136082885, "grad_norm": 512.152099609375, "learning_rate": 1.684280493358491e-06, "loss": 17.0625, "step": 8502 }, { "epoch": 0.5647207279006442, "grad_norm": 204.8983154296875, "learning_rate": 1.684202060481275e-06, "loss": 16.5312, "step": 8503 }, { "epoch": 0.564787142193, "grad_norm": 276.8426818847656, "learning_rate": 1.6841236196896037e-06, "loss": 27.2344, "step": 8504 }, { "epoch": 0.5648535564853556, "grad_norm": 226.26284790039062, "learning_rate": 1.6840451709843855e-06, "loss": 18.75, "step": 8505 }, { "epoch": 0.5649199707777114, "grad_norm": 356.9473571777344, "learning_rate": 1.6839667143665267e-06, "loss": 17.7344, "step": 8506 }, { "epoch": 0.564986385070067, "grad_norm": 229.57382202148438, "learning_rate": 1.6838882498369354e-06, "loss": 22.0, "step": 8507 }, { "epoch": 0.5650527993624228, "grad_norm": 156.40744018554688, "learning_rate": 1.6838097773965193e-06, "loss": 17.6875, "step": 8508 }, { "epoch": 0.5651192136547785, "grad_norm": 199.45204162597656, "learning_rate": 1.683731297046186e-06, "loss": 16.2656, "step": 8509 }, { "epoch": 0.5651856279471342, "grad_norm": 528.8180541992188, "learning_rate": 1.6836528087868432e-06, "loss": 17.125, "step": 8510 }, { "epoch": 0.56525204223949, "grad_norm": 260.1461486816406, "learning_rate": 1.6835743126193988e-06, "loss": 17.0469, "step": 8511 }, { "epoch": 0.5653184565318456, "grad_norm": 1589.164306640625, "learning_rate": 1.683495808544761e-06, "loss": 19.3906, "step": 8512 }, { "epoch": 0.5653848708242014, "grad_norm": 633.6231689453125, "learning_rate": 1.6834172965638382e-06, "loss": 15.6875, "step": 8513 }, { "epoch": 0.5654512851165571, "grad_norm": 322.4214172363281, "learning_rate": 1.6833387766775377e-06, "loss": 24.0312, "step": 8514 }, { "epoch": 0.5655176994089128, "grad_norm": 194.93019104003906, "learning_rate": 1.6832602488867686e-06, "loss": 20.9688, "step": 8515 }, { "epoch": 0.5655841137012685, "grad_norm": 471.4441223144531, "learning_rate": 1.683181713192439e-06, "loss": 15.9531, "step": 8516 }, { "epoch": 0.5656505279936243, "grad_norm": 244.42662048339844, "learning_rate": 1.683103169595457e-06, "loss": 18.9688, "step": 8517 }, { "epoch": 0.5657169422859799, "grad_norm": 268.3437805175781, "learning_rate": 1.6830246180967315e-06, "loss": 16.4375, "step": 8518 }, { "epoch": 0.5657833565783357, "grad_norm": 176.52125549316406, "learning_rate": 1.682946058697171e-06, "loss": 18.75, "step": 8519 }, { "epoch": 0.5658497708706913, "grad_norm": 384.5840148925781, "learning_rate": 1.6828674913976847e-06, "loss": 22.3125, "step": 8520 }, { "epoch": 0.5659161851630471, "grad_norm": 258.0597229003906, "learning_rate": 1.6827889161991807e-06, "loss": 17.5, "step": 8521 }, { "epoch": 0.5659825994554029, "grad_norm": 594.5595092773438, "learning_rate": 1.6827103331025682e-06, "loss": 19.0156, "step": 8522 }, { "epoch": 0.5660490137477585, "grad_norm": 220.30279541015625, "learning_rate": 1.6826317421087564e-06, "loss": 17.4219, "step": 8523 }, { "epoch": 0.5661154280401143, "grad_norm": 303.8847961425781, "learning_rate": 1.682553143218654e-06, "loss": 15.2812, "step": 8524 }, { "epoch": 0.5661818423324699, "grad_norm": 225.43157958984375, "learning_rate": 1.6824745364331707e-06, "loss": 19.2344, "step": 8525 }, { "epoch": 0.5662482566248257, "grad_norm": 264.37158203125, "learning_rate": 1.6823959217532153e-06, "loss": 19.9844, "step": 8526 }, { "epoch": 0.5663146709171814, "grad_norm": 312.8343811035156, "learning_rate": 1.6823172991796976e-06, "loss": 16.375, "step": 8527 }, { "epoch": 0.5663810852095371, "grad_norm": 141.86769104003906, "learning_rate": 1.6822386687135266e-06, "loss": 15.5469, "step": 8528 }, { "epoch": 0.5664474995018928, "grad_norm": 387.5712890625, "learning_rate": 1.682160030355612e-06, "loss": 21.9062, "step": 8529 }, { "epoch": 0.5665139137942485, "grad_norm": 711.7039794921875, "learning_rate": 1.6820813841068638e-06, "loss": 21.0, "step": 8530 }, { "epoch": 0.5665803280866042, "grad_norm": 263.3770751953125, "learning_rate": 1.6820027299681913e-06, "loss": 17.5781, "step": 8531 }, { "epoch": 0.56664674237896, "grad_norm": 209.50177001953125, "learning_rate": 1.6819240679405044e-06, "loss": 15.1562, "step": 8532 }, { "epoch": 0.5667131566713157, "grad_norm": 1690.096923828125, "learning_rate": 1.6818453980247133e-06, "loss": 28.8125, "step": 8533 }, { "epoch": 0.5667795709636714, "grad_norm": 193.44873046875, "learning_rate": 1.6817667202217277e-06, "loss": 16.6719, "step": 8534 }, { "epoch": 0.5668459852560271, "grad_norm": 232.33265686035156, "learning_rate": 1.681688034532458e-06, "loss": 18.9531, "step": 8535 }, { "epoch": 0.5669123995483828, "grad_norm": 112.60807800292969, "learning_rate": 1.6816093409578138e-06, "loss": 16.9688, "step": 8536 }, { "epoch": 0.5669788138407386, "grad_norm": 210.73402404785156, "learning_rate": 1.681530639498706e-06, "loss": 21.8125, "step": 8537 }, { "epoch": 0.5670452281330942, "grad_norm": 241.9856414794922, "learning_rate": 1.6814519301560449e-06, "loss": 18.375, "step": 8538 }, { "epoch": 0.56711164242545, "grad_norm": 454.4049072265625, "learning_rate": 1.6813732129307407e-06, "loss": 20.8125, "step": 8539 }, { "epoch": 0.5671780567178056, "grad_norm": 235.89166259765625, "learning_rate": 1.6812944878237039e-06, "loss": 13.625, "step": 8540 }, { "epoch": 0.5672444710101614, "grad_norm": 201.60671997070312, "learning_rate": 1.6812157548358456e-06, "loss": 16.6406, "step": 8541 }, { "epoch": 0.567310885302517, "grad_norm": 157.83372497558594, "learning_rate": 1.681137013968076e-06, "loss": 16.9531, "step": 8542 }, { "epoch": 0.5673772995948728, "grad_norm": 335.8416748046875, "learning_rate": 1.6810582652213066e-06, "loss": 19.375, "step": 8543 }, { "epoch": 0.5674437138872286, "grad_norm": 197.91912841796875, "learning_rate": 1.6809795085964476e-06, "loss": 19.0938, "step": 8544 }, { "epoch": 0.5675101281795842, "grad_norm": 178.84591674804688, "learning_rate": 1.6809007440944103e-06, "loss": 19.8125, "step": 8545 }, { "epoch": 0.56757654247194, "grad_norm": 346.68084716796875, "learning_rate": 1.680821971716106e-06, "loss": 19.8594, "step": 8546 }, { "epoch": 0.5676429567642957, "grad_norm": 495.1572570800781, "learning_rate": 1.6807431914624454e-06, "loss": 14.6406, "step": 8547 }, { "epoch": 0.5677093710566514, "grad_norm": 244.39834594726562, "learning_rate": 1.6806644033343404e-06, "loss": 23.9375, "step": 8548 }, { "epoch": 0.5677757853490071, "grad_norm": 225.47779846191406, "learning_rate": 1.680585607332702e-06, "loss": 17.4844, "step": 8549 }, { "epoch": 0.5678421996413628, "grad_norm": 184.7855224609375, "learning_rate": 1.6805068034584418e-06, "loss": 22.8281, "step": 8550 }, { "epoch": 0.5679086139337185, "grad_norm": 314.6033630371094, "learning_rate": 1.680427991712471e-06, "loss": 18.25, "step": 8551 }, { "epoch": 0.5679750282260743, "grad_norm": 266.6224365234375, "learning_rate": 1.6803491720957021e-06, "loss": 20.2031, "step": 8552 }, { "epoch": 0.5680414425184299, "grad_norm": 209.45733642578125, "learning_rate": 1.6802703446090457e-06, "loss": 23.2031, "step": 8553 }, { "epoch": 0.5681078568107857, "grad_norm": 359.46820068359375, "learning_rate": 1.6801915092534147e-06, "loss": 17.3125, "step": 8554 }, { "epoch": 0.5681742711031414, "grad_norm": 295.7317199707031, "learning_rate": 1.6801126660297205e-06, "loss": 17.6875, "step": 8555 }, { "epoch": 0.5682406853954971, "grad_norm": 369.90228271484375, "learning_rate": 1.6800338149388749e-06, "loss": 16.5, "step": 8556 }, { "epoch": 0.5683070996878529, "grad_norm": 172.99078369140625, "learning_rate": 1.6799549559817904e-06, "loss": 21.25, "step": 8557 }, { "epoch": 0.5683735139802085, "grad_norm": 213.2207794189453, "learning_rate": 1.6798760891593788e-06, "loss": 22.7031, "step": 8558 }, { "epoch": 0.5684399282725643, "grad_norm": 1396.592041015625, "learning_rate": 1.6797972144725527e-06, "loss": 15.6406, "step": 8559 }, { "epoch": 0.5685063425649199, "grad_norm": 263.489013671875, "learning_rate": 1.6797183319222246e-06, "loss": 20.3438, "step": 8560 }, { "epoch": 0.5685727568572757, "grad_norm": 220.76084899902344, "learning_rate": 1.679639441509307e-06, "loss": 26.5, "step": 8561 }, { "epoch": 0.5686391711496314, "grad_norm": 196.93788146972656, "learning_rate": 1.6795605432347117e-06, "loss": 19.0781, "step": 8562 }, { "epoch": 0.5687055854419871, "grad_norm": 206.3384246826172, "learning_rate": 1.6794816370993523e-06, "loss": 22.5625, "step": 8563 }, { "epoch": 0.5687719997343428, "grad_norm": 170.72061157226562, "learning_rate": 1.679402723104141e-06, "loss": 18.8438, "step": 8564 }, { "epoch": 0.5688384140266985, "grad_norm": 274.8193359375, "learning_rate": 1.6793238012499908e-06, "loss": 21.6406, "step": 8565 }, { "epoch": 0.5689048283190543, "grad_norm": 862.87451171875, "learning_rate": 1.6792448715378144e-06, "loss": 25.2344, "step": 8566 }, { "epoch": 0.56897124261141, "grad_norm": 103.55561065673828, "learning_rate": 1.6791659339685254e-06, "loss": 15.2344, "step": 8567 }, { "epoch": 0.5690376569037657, "grad_norm": 431.15997314453125, "learning_rate": 1.6790869885430362e-06, "loss": 12.0312, "step": 8568 }, { "epoch": 0.5691040711961214, "grad_norm": 189.0741729736328, "learning_rate": 1.6790080352622602e-06, "loss": 18.5781, "step": 8569 }, { "epoch": 0.5691704854884772, "grad_norm": 404.9203186035156, "learning_rate": 1.6789290741271112e-06, "loss": 15.6094, "step": 8570 }, { "epoch": 0.5692368997808328, "grad_norm": 680.5360717773438, "learning_rate": 1.6788501051385018e-06, "loss": 21.9375, "step": 8571 }, { "epoch": 0.5693033140731886, "grad_norm": 391.7449951171875, "learning_rate": 1.678771128297346e-06, "loss": 28.5312, "step": 8572 }, { "epoch": 0.5693697283655442, "grad_norm": 144.72691345214844, "learning_rate": 1.6786921436045572e-06, "loss": 19.2812, "step": 8573 }, { "epoch": 0.5694361426579, "grad_norm": 347.9396057128906, "learning_rate": 1.678613151061049e-06, "loss": 16.6875, "step": 8574 }, { "epoch": 0.5695025569502556, "grad_norm": 443.031494140625, "learning_rate": 1.678534150667735e-06, "loss": 22.5938, "step": 8575 }, { "epoch": 0.5695689712426114, "grad_norm": 175.938720703125, "learning_rate": 1.6784551424255293e-06, "loss": 17.4688, "step": 8576 }, { "epoch": 0.5696353855349672, "grad_norm": 460.8139953613281, "learning_rate": 1.6783761263353457e-06, "loss": 16.4688, "step": 8577 }, { "epoch": 0.5697017998273228, "grad_norm": 365.4779968261719, "learning_rate": 1.6782971023980985e-06, "loss": 28.0781, "step": 8578 }, { "epoch": 0.5697682141196786, "grad_norm": 174.5866241455078, "learning_rate": 1.6782180706147013e-06, "loss": 15.0938, "step": 8579 }, { "epoch": 0.5698346284120342, "grad_norm": 222.82785034179688, "learning_rate": 1.6781390309860685e-06, "loss": 15.7031, "step": 8580 }, { "epoch": 0.56990104270439, "grad_norm": 204.67803955078125, "learning_rate": 1.6780599835131146e-06, "loss": 23.5625, "step": 8581 }, { "epoch": 0.5699674569967457, "grad_norm": 201.25009155273438, "learning_rate": 1.6779809281967538e-06, "loss": 22.3125, "step": 8582 }, { "epoch": 0.5700338712891014, "grad_norm": 135.31785583496094, "learning_rate": 1.6779018650379006e-06, "loss": 16.7656, "step": 8583 }, { "epoch": 0.5701002855814571, "grad_norm": 211.14833068847656, "learning_rate": 1.6778227940374695e-06, "loss": 21.0156, "step": 8584 }, { "epoch": 0.5701666998738129, "grad_norm": 157.642578125, "learning_rate": 1.6777437151963752e-06, "loss": 21.1562, "step": 8585 }, { "epoch": 0.5702331141661685, "grad_norm": 192.3228302001953, "learning_rate": 1.6776646285155323e-06, "loss": 19.0625, "step": 8586 }, { "epoch": 0.5702995284585243, "grad_norm": 749.4480590820312, "learning_rate": 1.6775855339958558e-06, "loss": 20.6875, "step": 8587 }, { "epoch": 0.57036594275088, "grad_norm": 195.2369384765625, "learning_rate": 1.6775064316382605e-06, "loss": 19.4844, "step": 8588 }, { "epoch": 0.5704323570432357, "grad_norm": 283.2644958496094, "learning_rate": 1.6774273214436616e-06, "loss": 19.6406, "step": 8589 }, { "epoch": 0.5704987713355915, "grad_norm": 575.0831909179688, "learning_rate": 1.6773482034129737e-06, "loss": 17.5625, "step": 8590 }, { "epoch": 0.5705651856279471, "grad_norm": 375.8665466308594, "learning_rate": 1.677269077547113e-06, "loss": 12.0, "step": 8591 }, { "epoch": 0.5706315999203029, "grad_norm": 196.27931213378906, "learning_rate": 1.6771899438469936e-06, "loss": 18.0625, "step": 8592 }, { "epoch": 0.5706980142126585, "grad_norm": 230.14398193359375, "learning_rate": 1.6771108023135318e-06, "loss": 19.625, "step": 8593 }, { "epoch": 0.5707644285050143, "grad_norm": 304.2537536621094, "learning_rate": 1.6770316529476425e-06, "loss": 19.6719, "step": 8594 }, { "epoch": 0.57083084279737, "grad_norm": 214.3892059326172, "learning_rate": 1.6769524957502412e-06, "loss": 15.3594, "step": 8595 }, { "epoch": 0.5708972570897257, "grad_norm": 405.7820739746094, "learning_rate": 1.6768733307222443e-06, "loss": 18.7969, "step": 8596 }, { "epoch": 0.5709636713820814, "grad_norm": 634.7674560546875, "learning_rate": 1.6767941578645668e-06, "loss": 19.6094, "step": 8597 }, { "epoch": 0.5710300856744371, "grad_norm": 160.95928955078125, "learning_rate": 1.6767149771781247e-06, "loss": 18.3438, "step": 8598 }, { "epoch": 0.5710964999667929, "grad_norm": 412.92236328125, "learning_rate": 1.6766357886638339e-06, "loss": 23.9219, "step": 8599 }, { "epoch": 0.5711629142591486, "grad_norm": 132.8505096435547, "learning_rate": 1.6765565923226103e-06, "loss": 17.9219, "step": 8600 }, { "epoch": 0.5712293285515043, "grad_norm": 325.33868408203125, "learning_rate": 1.6764773881553705e-06, "loss": 16.0859, "step": 8601 }, { "epoch": 0.57129574284386, "grad_norm": 193.09637451171875, "learning_rate": 1.6763981761630302e-06, "loss": 20.3438, "step": 8602 }, { "epoch": 0.5713621571362157, "grad_norm": 142.75048828125, "learning_rate": 1.6763189563465058e-06, "loss": 14.25, "step": 8603 }, { "epoch": 0.5714285714285714, "grad_norm": 170.38040161132812, "learning_rate": 1.6762397287067137e-06, "loss": 16.1875, "step": 8604 }, { "epoch": 0.5714949857209272, "grad_norm": 137.9629669189453, "learning_rate": 1.6761604932445705e-06, "loss": 13.7031, "step": 8605 }, { "epoch": 0.5715614000132828, "grad_norm": 175.80551147460938, "learning_rate": 1.6760812499609927e-06, "loss": 11.7656, "step": 8606 }, { "epoch": 0.5716278143056386, "grad_norm": 131.73680114746094, "learning_rate": 1.6760019988568966e-06, "loss": 15.6875, "step": 8607 }, { "epoch": 0.5716942285979942, "grad_norm": 184.08135986328125, "learning_rate": 1.6759227399331992e-06, "loss": 17.3906, "step": 8608 }, { "epoch": 0.57176064289035, "grad_norm": 189.35365295410156, "learning_rate": 1.6758434731908176e-06, "loss": 19.3438, "step": 8609 }, { "epoch": 0.5718270571827058, "grad_norm": 159.3380584716797, "learning_rate": 1.675764198630668e-06, "loss": 18.0625, "step": 8610 }, { "epoch": 0.5718934714750614, "grad_norm": 180.43487548828125, "learning_rate": 1.675684916253668e-06, "loss": 16.4844, "step": 8611 }, { "epoch": 0.5719598857674172, "grad_norm": 356.699462890625, "learning_rate": 1.6756056260607343e-06, "loss": 23.4219, "step": 8612 }, { "epoch": 0.5720263000597728, "grad_norm": 223.72958374023438, "learning_rate": 1.6755263280527844e-06, "loss": 14.625, "step": 8613 }, { "epoch": 0.5720927143521286, "grad_norm": 2112.574462890625, "learning_rate": 1.675447022230736e-06, "loss": 18.125, "step": 8614 }, { "epoch": 0.5721591286444843, "grad_norm": 370.2272644042969, "learning_rate": 1.6753677085955053e-06, "loss": 14.6875, "step": 8615 }, { "epoch": 0.57222554293684, "grad_norm": 219.06068420410156, "learning_rate": 1.6752883871480105e-06, "loss": 14.9062, "step": 8616 }, { "epoch": 0.5722919572291957, "grad_norm": 221.7685089111328, "learning_rate": 1.675209057889169e-06, "loss": 16.2031, "step": 8617 }, { "epoch": 0.5723583715215514, "grad_norm": 572.4254760742188, "learning_rate": 1.675129720819899e-06, "loss": 15.625, "step": 8618 }, { "epoch": 0.5724247858139071, "grad_norm": 206.79315185546875, "learning_rate": 1.6750503759411173e-06, "loss": 13.3906, "step": 8619 }, { "epoch": 0.5724912001062629, "grad_norm": 164.2732391357422, "learning_rate": 1.674971023253742e-06, "loss": 13.4531, "step": 8620 }, { "epoch": 0.5725576143986186, "grad_norm": 287.2556457519531, "learning_rate": 1.674891662758691e-06, "loss": 17.9688, "step": 8621 }, { "epoch": 0.5726240286909743, "grad_norm": 217.06663513183594, "learning_rate": 1.6748122944568828e-06, "loss": 17.5469, "step": 8622 }, { "epoch": 0.57269044298333, "grad_norm": 177.46604919433594, "learning_rate": 1.6747329183492349e-06, "loss": 20.125, "step": 8623 }, { "epoch": 0.5727568572756857, "grad_norm": 286.3898010253906, "learning_rate": 1.6746535344366656e-06, "loss": 22.125, "step": 8624 }, { "epoch": 0.5728232715680415, "grad_norm": 213.06674194335938, "learning_rate": 1.6745741427200934e-06, "loss": 17.3125, "step": 8625 }, { "epoch": 0.5728896858603971, "grad_norm": 231.706298828125, "learning_rate": 1.6744947432004363e-06, "loss": 18.25, "step": 8626 }, { "epoch": 0.5729561001527529, "grad_norm": 324.19744873046875, "learning_rate": 1.6744153358786131e-06, "loss": 18.8906, "step": 8627 }, { "epoch": 0.5730225144451085, "grad_norm": 123.68049621582031, "learning_rate": 1.674335920755542e-06, "loss": 16.1875, "step": 8628 }, { "epoch": 0.5730889287374643, "grad_norm": 169.059814453125, "learning_rate": 1.6742564978321424e-06, "loss": 18.8594, "step": 8629 }, { "epoch": 0.57315534302982, "grad_norm": 464.158935546875, "learning_rate": 1.6741770671093318e-06, "loss": 26.6406, "step": 8630 }, { "epoch": 0.5732217573221757, "grad_norm": 193.8971405029297, "learning_rate": 1.6740976285880298e-06, "loss": 16.1562, "step": 8631 }, { "epoch": 0.5732881716145315, "grad_norm": 200.98060607910156, "learning_rate": 1.6740181822691552e-06, "loss": 19.7031, "step": 8632 }, { "epoch": 0.5733545859068871, "grad_norm": 168.6092071533203, "learning_rate": 1.6739387281536269e-06, "loss": 18.0938, "step": 8633 }, { "epoch": 0.5734210001992429, "grad_norm": 261.9024658203125, "learning_rate": 1.6738592662423643e-06, "loss": 17.3594, "step": 8634 }, { "epoch": 0.5734874144915986, "grad_norm": 153.07020568847656, "learning_rate": 1.673779796536286e-06, "loss": 16.1875, "step": 8635 }, { "epoch": 0.5735538287839543, "grad_norm": 130.44955444335938, "learning_rate": 1.6737003190363112e-06, "loss": 19.1719, "step": 8636 }, { "epoch": 0.57362024307631, "grad_norm": 3298.128173828125, "learning_rate": 1.6736208337433602e-06, "loss": 18.3906, "step": 8637 }, { "epoch": 0.5736866573686658, "grad_norm": 276.8927001953125, "learning_rate": 1.6735413406583515e-06, "loss": 22.375, "step": 8638 }, { "epoch": 0.5737530716610214, "grad_norm": 197.17449951171875, "learning_rate": 1.6734618397822052e-06, "loss": 17.3125, "step": 8639 }, { "epoch": 0.5738194859533772, "grad_norm": 355.83294677734375, "learning_rate": 1.6733823311158401e-06, "loss": 20.625, "step": 8640 }, { "epoch": 0.5738859002457328, "grad_norm": 143.3306121826172, "learning_rate": 1.673302814660177e-06, "loss": 14.3906, "step": 8641 }, { "epoch": 0.5739523145380886, "grad_norm": 198.32440185546875, "learning_rate": 1.6732232904161351e-06, "loss": 14.7188, "step": 8642 }, { "epoch": 0.5740187288304444, "grad_norm": 270.8286437988281, "learning_rate": 1.673143758384634e-06, "loss": 18.4844, "step": 8643 }, { "epoch": 0.5740851431228, "grad_norm": 246.3174591064453, "learning_rate": 1.6730642185665947e-06, "loss": 21.6562, "step": 8644 }, { "epoch": 0.5741515574151558, "grad_norm": 335.98583984375, "learning_rate": 1.6729846709629363e-06, "loss": 22.375, "step": 8645 }, { "epoch": 0.5742179717075114, "grad_norm": 269.0547180175781, "learning_rate": 1.6729051155745792e-06, "loss": 17.0781, "step": 8646 }, { "epoch": 0.5742843859998672, "grad_norm": 140.89671325683594, "learning_rate": 1.6728255524024442e-06, "loss": 16.3906, "step": 8647 }, { "epoch": 0.5743508002922229, "grad_norm": 182.81881713867188, "learning_rate": 1.6727459814474508e-06, "loss": 15.8984, "step": 8648 }, { "epoch": 0.5744172145845786, "grad_norm": 153.90664672851562, "learning_rate": 1.6726664027105196e-06, "loss": 19.4375, "step": 8649 }, { "epoch": 0.5744836288769343, "grad_norm": 356.609375, "learning_rate": 1.6725868161925716e-06, "loss": 26.6562, "step": 8650 }, { "epoch": 0.57455004316929, "grad_norm": 109.76419830322266, "learning_rate": 1.672507221894527e-06, "loss": 13.8906, "step": 8651 }, { "epoch": 0.5746164574616457, "grad_norm": 181.97129821777344, "learning_rate": 1.6724276198173072e-06, "loss": 14.7812, "step": 8652 }, { "epoch": 0.5746828717540015, "grad_norm": 278.6658020019531, "learning_rate": 1.6723480099618316e-06, "loss": 16.7344, "step": 8653 }, { "epoch": 0.5747492860463572, "grad_norm": 240.0582275390625, "learning_rate": 1.6722683923290223e-06, "loss": 18.1406, "step": 8654 }, { "epoch": 0.5748157003387129, "grad_norm": 417.1549987792969, "learning_rate": 1.6721887669198002e-06, "loss": 24.25, "step": 8655 }, { "epoch": 0.5748821146310686, "grad_norm": 153.07786560058594, "learning_rate": 1.672109133735086e-06, "loss": 14.5625, "step": 8656 }, { "epoch": 0.5749485289234243, "grad_norm": 369.6909484863281, "learning_rate": 1.6720294927758005e-06, "loss": 22.0, "step": 8657 }, { "epoch": 0.5750149432157801, "grad_norm": 246.358154296875, "learning_rate": 1.6719498440428654e-06, "loss": 21.125, "step": 8658 }, { "epoch": 0.5750813575081357, "grad_norm": 163.5251007080078, "learning_rate": 1.6718701875372023e-06, "loss": 14.0312, "step": 8659 }, { "epoch": 0.5751477718004915, "grad_norm": 194.4919891357422, "learning_rate": 1.6717905232597321e-06, "loss": 20.9219, "step": 8660 }, { "epoch": 0.5752141860928471, "grad_norm": 356.08428955078125, "learning_rate": 1.6717108512113765e-06, "loss": 14.5781, "step": 8661 }, { "epoch": 0.5752806003852029, "grad_norm": 234.311767578125, "learning_rate": 1.671631171393057e-06, "loss": 23.5625, "step": 8662 }, { "epoch": 0.5753470146775587, "grad_norm": 240.177490234375, "learning_rate": 1.6715514838056955e-06, "loss": 23.875, "step": 8663 }, { "epoch": 0.5754134289699143, "grad_norm": 174.9573516845703, "learning_rate": 1.6714717884502138e-06, "loss": 17.6719, "step": 8664 }, { "epoch": 0.5754798432622701, "grad_norm": 215.53167724609375, "learning_rate": 1.6713920853275334e-06, "loss": 15.4219, "step": 8665 }, { "epoch": 0.5755462575546257, "grad_norm": 281.6014099121094, "learning_rate": 1.6713123744385767e-06, "loss": 21.7344, "step": 8666 }, { "epoch": 0.5756126718469815, "grad_norm": 247.2423858642578, "learning_rate": 1.6712326557842652e-06, "loss": 22.8438, "step": 8667 }, { "epoch": 0.5756790861393372, "grad_norm": 683.9086303710938, "learning_rate": 1.671152929365522e-06, "loss": 16.6094, "step": 8668 }, { "epoch": 0.5757455004316929, "grad_norm": 225.66851806640625, "learning_rate": 1.6710731951832683e-06, "loss": 15.7344, "step": 8669 }, { "epoch": 0.5758119147240486, "grad_norm": 145.27117919921875, "learning_rate": 1.670993453238427e-06, "loss": 14.4062, "step": 8670 }, { "epoch": 0.5758783290164043, "grad_norm": 176.34130859375, "learning_rate": 1.6709137035319204e-06, "loss": 14.25, "step": 8671 }, { "epoch": 0.57594474330876, "grad_norm": 731.5258178710938, "learning_rate": 1.6708339460646706e-06, "loss": 17.5781, "step": 8672 }, { "epoch": 0.5760111576011158, "grad_norm": 160.77468872070312, "learning_rate": 1.670754180837601e-06, "loss": 16.3438, "step": 8673 }, { "epoch": 0.5760775718934715, "grad_norm": 199.64920043945312, "learning_rate": 1.6706744078516334e-06, "loss": 13.4141, "step": 8674 }, { "epoch": 0.5761439861858272, "grad_norm": 164.39283752441406, "learning_rate": 1.6705946271076912e-06, "loss": 17.5, "step": 8675 }, { "epoch": 0.576210400478183, "grad_norm": 146.03842163085938, "learning_rate": 1.670514838606697e-06, "loss": 17.375, "step": 8676 }, { "epoch": 0.5762768147705386, "grad_norm": 261.9482727050781, "learning_rate": 1.6704350423495739e-06, "loss": 13.3438, "step": 8677 }, { "epoch": 0.5763432290628944, "grad_norm": 149.6476593017578, "learning_rate": 1.6703552383372447e-06, "loss": 15.0781, "step": 8678 }, { "epoch": 0.57640964335525, "grad_norm": 292.172119140625, "learning_rate": 1.6702754265706327e-06, "loss": 17.1406, "step": 8679 }, { "epoch": 0.5764760576476058, "grad_norm": 117.69940948486328, "learning_rate": 1.6701956070506608e-06, "loss": 17.875, "step": 8680 }, { "epoch": 0.5765424719399614, "grad_norm": 281.3235778808594, "learning_rate": 1.670115779778253e-06, "loss": 22.2031, "step": 8681 }, { "epoch": 0.5766088862323172, "grad_norm": 506.5976867675781, "learning_rate": 1.670035944754332e-06, "loss": 15.5625, "step": 8682 }, { "epoch": 0.5766753005246729, "grad_norm": 140.4879913330078, "learning_rate": 1.6699561019798217e-06, "loss": 19.0469, "step": 8683 }, { "epoch": 0.5767417148170286, "grad_norm": 313.7085266113281, "learning_rate": 1.6698762514556455e-06, "loss": 16.6719, "step": 8684 }, { "epoch": 0.5768081291093844, "grad_norm": 193.34605407714844, "learning_rate": 1.669796393182727e-06, "loss": 12.5312, "step": 8685 }, { "epoch": 0.57687454340174, "grad_norm": 240.45166015625, "learning_rate": 1.66971652716199e-06, "loss": 20.8906, "step": 8686 }, { "epoch": 0.5769409576940958, "grad_norm": 285.8781433105469, "learning_rate": 1.6696366533943584e-06, "loss": 20.8125, "step": 8687 }, { "epoch": 0.5770073719864515, "grad_norm": 286.6421203613281, "learning_rate": 1.6695567718807562e-06, "loss": 16.4219, "step": 8688 }, { "epoch": 0.5770737862788072, "grad_norm": 200.1189422607422, "learning_rate": 1.6694768826221074e-06, "loss": 16.1875, "step": 8689 }, { "epoch": 0.5771402005711629, "grad_norm": 301.6477355957031, "learning_rate": 1.6693969856193361e-06, "loss": 17.3438, "step": 8690 }, { "epoch": 0.5772066148635187, "grad_norm": 213.78860473632812, "learning_rate": 1.6693170808733665e-06, "loss": 14.7031, "step": 8691 }, { "epoch": 0.5772730291558743, "grad_norm": 221.1636505126953, "learning_rate": 1.669237168385123e-06, "loss": 17.4062, "step": 8692 }, { "epoch": 0.5773394434482301, "grad_norm": 216.34133911132812, "learning_rate": 1.6691572481555294e-06, "loss": 16.8281, "step": 8693 }, { "epoch": 0.5774058577405857, "grad_norm": 425.1416015625, "learning_rate": 1.669077320185511e-06, "loss": 25.75, "step": 8694 }, { "epoch": 0.5774722720329415, "grad_norm": 194.21939086914062, "learning_rate": 1.6689973844759915e-06, "loss": 17.2656, "step": 8695 }, { "epoch": 0.5775386863252973, "grad_norm": 279.89453125, "learning_rate": 1.6689174410278963e-06, "loss": 21.5, "step": 8696 }, { "epoch": 0.5776051006176529, "grad_norm": 345.0356750488281, "learning_rate": 1.66883748984215e-06, "loss": 19.8438, "step": 8697 }, { "epoch": 0.5776715149100087, "grad_norm": 116.12432861328125, "learning_rate": 1.6687575309196774e-06, "loss": 15.0938, "step": 8698 }, { "epoch": 0.5777379292023643, "grad_norm": 134.29449462890625, "learning_rate": 1.668677564261403e-06, "loss": 19.3906, "step": 8699 }, { "epoch": 0.5778043434947201, "grad_norm": 140.55767822265625, "learning_rate": 1.668597589868252e-06, "loss": 19.0938, "step": 8700 }, { "epoch": 0.5778707577870758, "grad_norm": 157.74147033691406, "learning_rate": 1.6685176077411498e-06, "loss": 14.3125, "step": 8701 }, { "epoch": 0.5779371720794315, "grad_norm": 192.1861114501953, "learning_rate": 1.6684376178810216e-06, "loss": 19.8906, "step": 8702 }, { "epoch": 0.5780035863717872, "grad_norm": 134.6557159423828, "learning_rate": 1.6683576202887923e-06, "loss": 19.6719, "step": 8703 }, { "epoch": 0.5780700006641429, "grad_norm": 199.55775451660156, "learning_rate": 1.6682776149653875e-06, "loss": 21.2344, "step": 8704 }, { "epoch": 0.5781364149564986, "grad_norm": 274.25701904296875, "learning_rate": 1.6681976019117326e-06, "loss": 20.6094, "step": 8705 }, { "epoch": 0.5782028292488544, "grad_norm": 130.44680786132812, "learning_rate": 1.6681175811287531e-06, "loss": 17.5625, "step": 8706 }, { "epoch": 0.5782692435412101, "grad_norm": 377.1177978515625, "learning_rate": 1.6680375526173746e-06, "loss": 27.5938, "step": 8707 }, { "epoch": 0.5783356578335658, "grad_norm": 91.56580352783203, "learning_rate": 1.667957516378523e-06, "loss": 12.0312, "step": 8708 }, { "epoch": 0.5784020721259215, "grad_norm": 118.49237060546875, "learning_rate": 1.667877472413124e-06, "loss": 13.9531, "step": 8709 }, { "epoch": 0.5784684864182772, "grad_norm": 254.2576904296875, "learning_rate": 1.6677974207221035e-06, "loss": 22.1875, "step": 8710 }, { "epoch": 0.578534900710633, "grad_norm": 1750.5465087890625, "learning_rate": 1.6677173613063876e-06, "loss": 20.4688, "step": 8711 }, { "epoch": 0.5786013150029886, "grad_norm": 194.59909057617188, "learning_rate": 1.6676372941669024e-06, "loss": 17.4844, "step": 8712 }, { "epoch": 0.5786677292953444, "grad_norm": 1153.88818359375, "learning_rate": 1.6675572193045734e-06, "loss": 28.6875, "step": 8713 }, { "epoch": 0.5787341435877, "grad_norm": 126.18401336669922, "learning_rate": 1.667477136720328e-06, "loss": 15.4688, "step": 8714 }, { "epoch": 0.5788005578800558, "grad_norm": 127.4302749633789, "learning_rate": 1.6673970464150917e-06, "loss": 16.6875, "step": 8715 }, { "epoch": 0.5788669721724115, "grad_norm": 273.8582458496094, "learning_rate": 1.6673169483897916e-06, "loss": 14.7969, "step": 8716 }, { "epoch": 0.5789333864647672, "grad_norm": 209.32188415527344, "learning_rate": 1.6672368426453533e-06, "loss": 19.8125, "step": 8717 }, { "epoch": 0.578999800757123, "grad_norm": 193.3042449951172, "learning_rate": 1.6671567291827043e-06, "loss": 15.6719, "step": 8718 }, { "epoch": 0.5790662150494786, "grad_norm": 120.57579803466797, "learning_rate": 1.6670766080027708e-06, "loss": 15.0156, "step": 8719 }, { "epoch": 0.5791326293418344, "grad_norm": 130.29080200195312, "learning_rate": 1.6669964791064798e-06, "loss": 17.5625, "step": 8720 }, { "epoch": 0.5791990436341901, "grad_norm": 159.4199981689453, "learning_rate": 1.666916342494758e-06, "loss": 18.4141, "step": 8721 }, { "epoch": 0.5792654579265458, "grad_norm": 105.94831085205078, "learning_rate": 1.6668361981685325e-06, "loss": 13.4219, "step": 8722 }, { "epoch": 0.5793318722189015, "grad_norm": 325.88946533203125, "learning_rate": 1.6667560461287307e-06, "loss": 15.2969, "step": 8723 }, { "epoch": 0.5793982865112572, "grad_norm": 309.2260437011719, "learning_rate": 1.6666758863762794e-06, "loss": 17.75, "step": 8724 }, { "epoch": 0.5794647008036129, "grad_norm": 232.92059326171875, "learning_rate": 1.6665957189121056e-06, "loss": 20.0312, "step": 8725 }, { "epoch": 0.5795311150959687, "grad_norm": 165.33541870117188, "learning_rate": 1.666515543737137e-06, "loss": 22.0625, "step": 8726 }, { "epoch": 0.5795975293883243, "grad_norm": 238.6648712158203, "learning_rate": 1.666435360852301e-06, "loss": 21.5781, "step": 8727 }, { "epoch": 0.5796639436806801, "grad_norm": 572.54345703125, "learning_rate": 1.666355170258525e-06, "loss": 28.0469, "step": 8728 }, { "epoch": 0.5797303579730358, "grad_norm": 139.73617553710938, "learning_rate": 1.6662749719567366e-06, "loss": 20.8438, "step": 8729 }, { "epoch": 0.5797967722653915, "grad_norm": 211.82089233398438, "learning_rate": 1.6661947659478637e-06, "loss": 18.125, "step": 8730 }, { "epoch": 0.5798631865577473, "grad_norm": 245.74264526367188, "learning_rate": 1.6661145522328338e-06, "loss": 18.6406, "step": 8731 }, { "epoch": 0.5799296008501029, "grad_norm": 198.83172607421875, "learning_rate": 1.6660343308125748e-06, "loss": 13.1875, "step": 8732 }, { "epoch": 0.5799960151424587, "grad_norm": 366.76849365234375, "learning_rate": 1.6659541016880144e-06, "loss": 28.8594, "step": 8733 }, { "epoch": 0.5800624294348143, "grad_norm": 280.3529052734375, "learning_rate": 1.6658738648600816e-06, "loss": 21.0781, "step": 8734 }, { "epoch": 0.5801288437271701, "grad_norm": 212.5980682373047, "learning_rate": 1.6657936203297037e-06, "loss": 18.7344, "step": 8735 }, { "epoch": 0.5801952580195258, "grad_norm": 509.671630859375, "learning_rate": 1.6657133680978088e-06, "loss": 20.3281, "step": 8736 }, { "epoch": 0.5802616723118815, "grad_norm": 161.46751403808594, "learning_rate": 1.6656331081653258e-06, "loss": 16.4375, "step": 8737 }, { "epoch": 0.5803280866042372, "grad_norm": 373.494140625, "learning_rate": 1.665552840533183e-06, "loss": 19.875, "step": 8738 }, { "epoch": 0.580394500896593, "grad_norm": 185.4688262939453, "learning_rate": 1.6654725652023082e-06, "loss": 17.4375, "step": 8739 }, { "epoch": 0.5804609151889487, "grad_norm": 180.5570831298828, "learning_rate": 1.6653922821736309e-06, "loss": 16.3125, "step": 8740 }, { "epoch": 0.5805273294813044, "grad_norm": 424.9340515136719, "learning_rate": 1.6653119914480793e-06, "loss": 22.0625, "step": 8741 }, { "epoch": 0.5805937437736601, "grad_norm": 169.051513671875, "learning_rate": 1.6652316930265823e-06, "loss": 16.1094, "step": 8742 }, { "epoch": 0.5806601580660158, "grad_norm": 137.47921752929688, "learning_rate": 1.6651513869100687e-06, "loss": 16.9062, "step": 8743 }, { "epoch": 0.5807265723583716, "grad_norm": 226.47682189941406, "learning_rate": 1.665071073099467e-06, "loss": 21.0, "step": 8744 }, { "epoch": 0.5807929866507272, "grad_norm": 475.2086181640625, "learning_rate": 1.664990751595707e-06, "loss": 22.75, "step": 8745 }, { "epoch": 0.580859400943083, "grad_norm": 160.07708740234375, "learning_rate": 1.6649104223997172e-06, "loss": 17.875, "step": 8746 }, { "epoch": 0.5809258152354386, "grad_norm": 216.55857849121094, "learning_rate": 1.664830085512427e-06, "loss": 14.7969, "step": 8747 }, { "epoch": 0.5809922295277944, "grad_norm": 264.34381103515625, "learning_rate": 1.6647497409347662e-06, "loss": 18.3281, "step": 8748 }, { "epoch": 0.58105864382015, "grad_norm": 126.16388702392578, "learning_rate": 1.6646693886676632e-06, "loss": 15.7812, "step": 8749 }, { "epoch": 0.5811250581125058, "grad_norm": 124.11613464355469, "learning_rate": 1.6645890287120486e-06, "loss": 16.6562, "step": 8750 }, { "epoch": 0.5811914724048616, "grad_norm": 178.5050506591797, "learning_rate": 1.6645086610688507e-06, "loss": 20.4375, "step": 8751 }, { "epoch": 0.5812578866972172, "grad_norm": 366.4710693359375, "learning_rate": 1.664428285739e-06, "loss": 22.4062, "step": 8752 }, { "epoch": 0.581324300989573, "grad_norm": 232.392822265625, "learning_rate": 1.664347902723426e-06, "loss": 14.4844, "step": 8753 }, { "epoch": 0.5813907152819286, "grad_norm": 140.2273406982422, "learning_rate": 1.6642675120230588e-06, "loss": 18.8125, "step": 8754 }, { "epoch": 0.5814571295742844, "grad_norm": 227.11610412597656, "learning_rate": 1.6641871136388278e-06, "loss": 23.2031, "step": 8755 }, { "epoch": 0.5815235438666401, "grad_norm": 935.5802001953125, "learning_rate": 1.6641067075716633e-06, "loss": 20.2812, "step": 8756 }, { "epoch": 0.5815899581589958, "grad_norm": 309.58251953125, "learning_rate": 1.6640262938224953e-06, "loss": 20.3281, "step": 8757 }, { "epoch": 0.5816563724513515, "grad_norm": 141.62841796875, "learning_rate": 1.6639458723922544e-06, "loss": 16.1562, "step": 8758 }, { "epoch": 0.5817227867437073, "grad_norm": 126.42320251464844, "learning_rate": 1.6638654432818699e-06, "loss": 16.375, "step": 8759 }, { "epoch": 0.5817892010360629, "grad_norm": 172.74038696289062, "learning_rate": 1.6637850064922732e-06, "loss": 15.3125, "step": 8760 }, { "epoch": 0.5818556153284187, "grad_norm": 171.40074157714844, "learning_rate": 1.6637045620243942e-06, "loss": 17.8281, "step": 8761 }, { "epoch": 0.5819220296207744, "grad_norm": 171.91946411132812, "learning_rate": 1.6636241098791634e-06, "loss": 16.375, "step": 8762 }, { "epoch": 0.5819884439131301, "grad_norm": 146.48875427246094, "learning_rate": 1.6635436500575116e-06, "loss": 16.1094, "step": 8763 }, { "epoch": 0.5820548582054859, "grad_norm": 148.6309051513672, "learning_rate": 1.6634631825603696e-06, "loss": 15.0781, "step": 8764 }, { "epoch": 0.5821212724978415, "grad_norm": 374.0087585449219, "learning_rate": 1.663382707388668e-06, "loss": 21.0156, "step": 8765 }, { "epoch": 0.5821876867901973, "grad_norm": 287.88824462890625, "learning_rate": 1.663302224543338e-06, "loss": 20.1094, "step": 8766 }, { "epoch": 0.5822541010825529, "grad_norm": 325.8296813964844, "learning_rate": 1.66322173402531e-06, "loss": 26.125, "step": 8767 }, { "epoch": 0.5823205153749087, "grad_norm": 200.47073364257812, "learning_rate": 1.6631412358355154e-06, "loss": 17.3906, "step": 8768 }, { "epoch": 0.5823869296672644, "grad_norm": 83.49834442138672, "learning_rate": 1.6630607299748858e-06, "loss": 11.0781, "step": 8769 }, { "epoch": 0.5824533439596201, "grad_norm": 178.86007690429688, "learning_rate": 1.6629802164443518e-06, "loss": 22.625, "step": 8770 }, { "epoch": 0.5825197582519758, "grad_norm": 205.25563049316406, "learning_rate": 1.6628996952448449e-06, "loss": 23.7656, "step": 8771 }, { "epoch": 0.5825861725443315, "grad_norm": 135.5181427001953, "learning_rate": 1.6628191663772965e-06, "loss": 16.9844, "step": 8772 }, { "epoch": 0.5826525868366873, "grad_norm": 333.8544921875, "learning_rate": 1.6627386298426386e-06, "loss": 15.1719, "step": 8773 }, { "epoch": 0.582719001129043, "grad_norm": 108.11213684082031, "learning_rate": 1.6626580856418018e-06, "loss": 14.7812, "step": 8774 }, { "epoch": 0.5827854154213987, "grad_norm": 259.8745422363281, "learning_rate": 1.6625775337757186e-06, "loss": 15.4062, "step": 8775 }, { "epoch": 0.5828518297137544, "grad_norm": 280.1911315917969, "learning_rate": 1.6624969742453209e-06, "loss": 26.625, "step": 8776 }, { "epoch": 0.5829182440061101, "grad_norm": 314.5093994140625, "learning_rate": 1.6624164070515397e-06, "loss": 16.6094, "step": 8777 }, { "epoch": 0.5829846582984658, "grad_norm": 248.63304138183594, "learning_rate": 1.6623358321953075e-06, "loss": 16.6094, "step": 8778 }, { "epoch": 0.5830510725908216, "grad_norm": 119.7342529296875, "learning_rate": 1.6622552496775566e-06, "loss": 18.1094, "step": 8779 }, { "epoch": 0.5831174868831772, "grad_norm": 199.1842803955078, "learning_rate": 1.6621746594992188e-06, "loss": 14.2188, "step": 8780 }, { "epoch": 0.583183901175533, "grad_norm": 158.13771057128906, "learning_rate": 1.6620940616612262e-06, "loss": 24.8594, "step": 8781 }, { "epoch": 0.5832503154678886, "grad_norm": 167.23336791992188, "learning_rate": 1.6620134561645114e-06, "loss": 18.75, "step": 8782 }, { "epoch": 0.5833167297602444, "grad_norm": 455.6575927734375, "learning_rate": 1.6619328430100068e-06, "loss": 28.5625, "step": 8783 }, { "epoch": 0.5833831440526002, "grad_norm": 223.43980407714844, "learning_rate": 1.6618522221986447e-06, "loss": 21.25, "step": 8784 }, { "epoch": 0.5834495583449558, "grad_norm": 250.12208557128906, "learning_rate": 1.6617715937313576e-06, "loss": 15.8594, "step": 8785 }, { "epoch": 0.5835159726373116, "grad_norm": 109.27053833007812, "learning_rate": 1.6616909576090783e-06, "loss": 18.9688, "step": 8786 }, { "epoch": 0.5835823869296672, "grad_norm": 312.98492431640625, "learning_rate": 1.6616103138327398e-06, "loss": 16.3906, "step": 8787 }, { "epoch": 0.583648801222023, "grad_norm": 290.67279052734375, "learning_rate": 1.6615296624032746e-06, "loss": 16.7344, "step": 8788 }, { "epoch": 0.5837152155143787, "grad_norm": 251.97689819335938, "learning_rate": 1.6614490033216155e-06, "loss": 21.0625, "step": 8789 }, { "epoch": 0.5837816298067344, "grad_norm": 279.0993957519531, "learning_rate": 1.661368336588696e-06, "loss": 15.6875, "step": 8790 }, { "epoch": 0.5838480440990901, "grad_norm": 200.4832763671875, "learning_rate": 1.6612876622054487e-06, "loss": 19.7031, "step": 8791 }, { "epoch": 0.5839144583914458, "grad_norm": 165.17294311523438, "learning_rate": 1.6612069801728072e-06, "loss": 17.625, "step": 8792 }, { "epoch": 0.5839808726838015, "grad_norm": 390.4054870605469, "learning_rate": 1.6611262904917047e-06, "loss": 20.5156, "step": 8793 }, { "epoch": 0.5840472869761573, "grad_norm": 341.0150146484375, "learning_rate": 1.6610455931630746e-06, "loss": 13.2656, "step": 8794 }, { "epoch": 0.584113701268513, "grad_norm": 147.5495147705078, "learning_rate": 1.6609648881878503e-06, "loss": 11.625, "step": 8795 }, { "epoch": 0.5841801155608687, "grad_norm": 257.321533203125, "learning_rate": 1.6608841755669646e-06, "loss": 16.25, "step": 8796 }, { "epoch": 0.5842465298532244, "grad_norm": 317.6314697265625, "learning_rate": 1.6608034553013523e-06, "loss": 20.9531, "step": 8797 }, { "epoch": 0.5843129441455801, "grad_norm": 555.045166015625, "learning_rate": 1.660722727391947e-06, "loss": 26.2812, "step": 8798 }, { "epoch": 0.5843793584379359, "grad_norm": 164.2300262451172, "learning_rate": 1.660641991839682e-06, "loss": 14.9062, "step": 8799 }, { "epoch": 0.5844457727302915, "grad_norm": 123.7072525024414, "learning_rate": 1.660561248645491e-06, "loss": 14.6094, "step": 8800 }, { "epoch": 0.5845121870226473, "grad_norm": 398.46240234375, "learning_rate": 1.6604804978103087e-06, "loss": 19.5938, "step": 8801 }, { "epoch": 0.5845786013150029, "grad_norm": 473.9273681640625, "learning_rate": 1.660399739335069e-06, "loss": 18.1406, "step": 8802 }, { "epoch": 0.5846450156073587, "grad_norm": 131.4593048095703, "learning_rate": 1.6603189732207056e-06, "loss": 16.8594, "step": 8803 }, { "epoch": 0.5847114298997144, "grad_norm": 176.27294921875, "learning_rate": 1.6602381994681532e-06, "loss": 18.0625, "step": 8804 }, { "epoch": 0.5847778441920701, "grad_norm": 287.5381164550781, "learning_rate": 1.660157418078346e-06, "loss": 22.375, "step": 8805 }, { "epoch": 0.5848442584844259, "grad_norm": 204.87338256835938, "learning_rate": 1.6600766290522183e-06, "loss": 14.9688, "step": 8806 }, { "epoch": 0.5849106727767815, "grad_norm": 192.3640899658203, "learning_rate": 1.659995832390705e-06, "loss": 17.5938, "step": 8807 }, { "epoch": 0.5849770870691373, "grad_norm": 331.94549560546875, "learning_rate": 1.6599150280947407e-06, "loss": 24.0, "step": 8808 }, { "epoch": 0.585043501361493, "grad_norm": 118.26351165771484, "learning_rate": 1.6598342161652595e-06, "loss": 13.625, "step": 8809 }, { "epoch": 0.5851099156538487, "grad_norm": 161.6853485107422, "learning_rate": 1.6597533966031966e-06, "loss": 16.6562, "step": 8810 }, { "epoch": 0.5851763299462044, "grad_norm": 253.0608673095703, "learning_rate": 1.6596725694094872e-06, "loss": 16.9062, "step": 8811 }, { "epoch": 0.5852427442385602, "grad_norm": 214.7534637451172, "learning_rate": 1.6595917345850656e-06, "loss": 11.5859, "step": 8812 }, { "epoch": 0.5853091585309158, "grad_norm": 194.9953155517578, "learning_rate": 1.6595108921308673e-06, "loss": 17.1875, "step": 8813 }, { "epoch": 0.5853755728232716, "grad_norm": 266.6230773925781, "learning_rate": 1.6594300420478274e-06, "loss": 21.0312, "step": 8814 }, { "epoch": 0.5854419871156273, "grad_norm": 461.3354797363281, "learning_rate": 1.6593491843368805e-06, "loss": 25.0156, "step": 8815 }, { "epoch": 0.585508401407983, "grad_norm": 202.73390197753906, "learning_rate": 1.659268318998963e-06, "loss": 21.9375, "step": 8816 }, { "epoch": 0.5855748157003388, "grad_norm": 604.940673828125, "learning_rate": 1.6591874460350095e-06, "loss": 16.9531, "step": 8817 }, { "epoch": 0.5856412299926944, "grad_norm": 171.07431030273438, "learning_rate": 1.6591065654459558e-06, "loss": 17.5781, "step": 8818 }, { "epoch": 0.5857076442850502, "grad_norm": 227.07394409179688, "learning_rate": 1.6590256772327371e-06, "loss": 16.0312, "step": 8819 }, { "epoch": 0.5857740585774058, "grad_norm": 336.1758117675781, "learning_rate": 1.6589447813962897e-06, "loss": 25.0312, "step": 8820 }, { "epoch": 0.5858404728697616, "grad_norm": 640.6559448242188, "learning_rate": 1.658863877937549e-06, "loss": 18.0625, "step": 8821 }, { "epoch": 0.5859068871621173, "grad_norm": 270.4573974609375, "learning_rate": 1.658782966857451e-06, "loss": 26.8281, "step": 8822 }, { "epoch": 0.585973301454473, "grad_norm": 145.7606658935547, "learning_rate": 1.6587020481569314e-06, "loss": 20.4844, "step": 8823 }, { "epoch": 0.5860397157468287, "grad_norm": 453.1196594238281, "learning_rate": 1.6586211218369263e-06, "loss": 22.7031, "step": 8824 }, { "epoch": 0.5861061300391844, "grad_norm": 146.4302520751953, "learning_rate": 1.658540187898372e-06, "loss": 23.0156, "step": 8825 }, { "epoch": 0.5861725443315402, "grad_norm": 295.2486877441406, "learning_rate": 1.6584592463422043e-06, "loss": 18.8281, "step": 8826 }, { "epoch": 0.5862389586238959, "grad_norm": 164.08055114746094, "learning_rate": 1.6583782971693596e-06, "loss": 17.0625, "step": 8827 }, { "epoch": 0.5863053729162516, "grad_norm": 171.638671875, "learning_rate": 1.6582973403807747e-06, "loss": 15.8281, "step": 8828 }, { "epoch": 0.5863717872086073, "grad_norm": 294.03216552734375, "learning_rate": 1.6582163759773859e-06, "loss": 18.0156, "step": 8829 }, { "epoch": 0.586438201500963, "grad_norm": 191.8470916748047, "learning_rate": 1.6581354039601295e-06, "loss": 22.3594, "step": 8830 }, { "epoch": 0.5865046157933187, "grad_norm": 327.6980285644531, "learning_rate": 1.658054424329942e-06, "loss": 23.7109, "step": 8831 }, { "epoch": 0.5865710300856745, "grad_norm": 441.0441589355469, "learning_rate": 1.6579734370877602e-06, "loss": 18.2812, "step": 8832 }, { "epoch": 0.5866374443780301, "grad_norm": 1202.606201171875, "learning_rate": 1.6578924422345218e-06, "loss": 17.8125, "step": 8833 }, { "epoch": 0.5867038586703859, "grad_norm": 575.1474609375, "learning_rate": 1.6578114397711624e-06, "loss": 26.0469, "step": 8834 }, { "epoch": 0.5867702729627415, "grad_norm": 183.27450561523438, "learning_rate": 1.6577304296986198e-06, "loss": 20.7188, "step": 8835 }, { "epoch": 0.5868366872550973, "grad_norm": 401.8778076171875, "learning_rate": 1.6576494120178308e-06, "loss": 22.1562, "step": 8836 }, { "epoch": 0.5869031015474531, "grad_norm": 170.21778869628906, "learning_rate": 1.6575683867297328e-06, "loss": 18.1875, "step": 8837 }, { "epoch": 0.5869695158398087, "grad_norm": 248.54347229003906, "learning_rate": 1.6574873538352628e-06, "loss": 25.875, "step": 8838 }, { "epoch": 0.5870359301321645, "grad_norm": 130.9971160888672, "learning_rate": 1.657406313335358e-06, "loss": 16.4844, "step": 8839 }, { "epoch": 0.5871023444245201, "grad_norm": 328.30230712890625, "learning_rate": 1.6573252652309563e-06, "loss": 22.7188, "step": 8840 }, { "epoch": 0.5871687587168759, "grad_norm": 206.10244750976562, "learning_rate": 1.6572442095229947e-06, "loss": 22.6094, "step": 8841 }, { "epoch": 0.5872351730092316, "grad_norm": 165.5025634765625, "learning_rate": 1.6571631462124116e-06, "loss": 15.25, "step": 8842 }, { "epoch": 0.5873015873015873, "grad_norm": 325.44000244140625, "learning_rate": 1.6570820753001439e-06, "loss": 17.8125, "step": 8843 }, { "epoch": 0.587368001593943, "grad_norm": 225.2393035888672, "learning_rate": 1.6570009967871295e-06, "loss": 22.0781, "step": 8844 }, { "epoch": 0.5874344158862987, "grad_norm": 98.52198791503906, "learning_rate": 1.6569199106743064e-06, "loss": 15.125, "step": 8845 }, { "epoch": 0.5875008301786544, "grad_norm": 241.74346923828125, "learning_rate": 1.6568388169626127e-06, "loss": 21.4844, "step": 8846 }, { "epoch": 0.5875672444710102, "grad_norm": 219.19557189941406, "learning_rate": 1.6567577156529864e-06, "loss": 19.2656, "step": 8847 }, { "epoch": 0.5876336587633659, "grad_norm": 193.9335479736328, "learning_rate": 1.6566766067463654e-06, "loss": 17.1562, "step": 8848 }, { "epoch": 0.5877000730557216, "grad_norm": 125.77014923095703, "learning_rate": 1.656595490243688e-06, "loss": 13.0156, "step": 8849 }, { "epoch": 0.5877664873480773, "grad_norm": 147.05303955078125, "learning_rate": 1.6565143661458927e-06, "loss": 17.4375, "step": 8850 }, { "epoch": 0.587832901640433, "grad_norm": 346.60345458984375, "learning_rate": 1.6564332344539178e-06, "loss": 32.2812, "step": 8851 }, { "epoch": 0.5878993159327888, "grad_norm": 169.54800415039062, "learning_rate": 1.6563520951687018e-06, "loss": 18.7969, "step": 8852 }, { "epoch": 0.5879657302251444, "grad_norm": 301.1312255859375, "learning_rate": 1.6562709482911834e-06, "loss": 17.7969, "step": 8853 }, { "epoch": 0.5880321445175002, "grad_norm": 185.7286376953125, "learning_rate": 1.6561897938223007e-06, "loss": 16.5625, "step": 8854 }, { "epoch": 0.5880985588098558, "grad_norm": 380.4928283691406, "learning_rate": 1.656108631762993e-06, "loss": 22.4375, "step": 8855 }, { "epoch": 0.5881649731022116, "grad_norm": 190.9799346923828, "learning_rate": 1.656027462114199e-06, "loss": 13.4531, "step": 8856 }, { "epoch": 0.5882313873945673, "grad_norm": 234.89515686035156, "learning_rate": 1.6559462848768577e-06, "loss": 26.5938, "step": 8857 }, { "epoch": 0.588297801686923, "grad_norm": 219.6302947998047, "learning_rate": 1.655865100051908e-06, "loss": 19.3438, "step": 8858 }, { "epoch": 0.5883642159792788, "grad_norm": 309.0606994628906, "learning_rate": 1.6557839076402888e-06, "loss": 19.75, "step": 8859 }, { "epoch": 0.5884306302716344, "grad_norm": 349.5477294921875, "learning_rate": 1.6557027076429398e-06, "loss": 17.0938, "step": 8860 }, { "epoch": 0.5884970445639902, "grad_norm": 767.8772583007812, "learning_rate": 1.6556215000607997e-06, "loss": 21.5312, "step": 8861 }, { "epoch": 0.5885634588563459, "grad_norm": 157.9774932861328, "learning_rate": 1.6555402848948084e-06, "loss": 21.4375, "step": 8862 }, { "epoch": 0.5886298731487016, "grad_norm": 184.48472595214844, "learning_rate": 1.6554590621459048e-06, "loss": 20.0, "step": 8863 }, { "epoch": 0.5886962874410573, "grad_norm": 425.4902038574219, "learning_rate": 1.6553778318150289e-06, "loss": 22.1484, "step": 8864 }, { "epoch": 0.588762701733413, "grad_norm": 266.2928161621094, "learning_rate": 1.6552965939031202e-06, "loss": 18.7344, "step": 8865 }, { "epoch": 0.5888291160257687, "grad_norm": 132.38705444335938, "learning_rate": 1.6552153484111185e-06, "loss": 18.1562, "step": 8866 }, { "epoch": 0.5888955303181245, "grad_norm": 395.754150390625, "learning_rate": 1.6551340953399632e-06, "loss": 20.1094, "step": 8867 }, { "epoch": 0.5889619446104801, "grad_norm": 196.36973571777344, "learning_rate": 1.6550528346905945e-06, "loss": 19.2344, "step": 8868 }, { "epoch": 0.5890283589028359, "grad_norm": 265.25372314453125, "learning_rate": 1.6549715664639524e-06, "loss": 17.0312, "step": 8869 }, { "epoch": 0.5890947731951917, "grad_norm": 243.22850036621094, "learning_rate": 1.6548902906609766e-06, "loss": 21.1406, "step": 8870 }, { "epoch": 0.5891611874875473, "grad_norm": 99.28752136230469, "learning_rate": 1.654809007282608e-06, "loss": 14.7344, "step": 8871 }, { "epoch": 0.5892276017799031, "grad_norm": 218.40455627441406, "learning_rate": 1.654727716329786e-06, "loss": 21.9844, "step": 8872 }, { "epoch": 0.5892940160722587, "grad_norm": 142.04676818847656, "learning_rate": 1.6546464178034518e-06, "loss": 16.1562, "step": 8873 }, { "epoch": 0.5893604303646145, "grad_norm": 204.83331298828125, "learning_rate": 1.654565111704545e-06, "loss": 18.75, "step": 8874 }, { "epoch": 0.5894268446569702, "grad_norm": 419.63037109375, "learning_rate": 1.6544837980340064e-06, "loss": 35.7188, "step": 8875 }, { "epoch": 0.5894932589493259, "grad_norm": 185.13421630859375, "learning_rate": 1.654402476792777e-06, "loss": 18.2812, "step": 8876 }, { "epoch": 0.5895596732416816, "grad_norm": 598.9248046875, "learning_rate": 1.6543211479817965e-06, "loss": 24.6562, "step": 8877 }, { "epoch": 0.5896260875340373, "grad_norm": 390.2762756347656, "learning_rate": 1.6542398116020067e-06, "loss": 19.0156, "step": 8878 }, { "epoch": 0.589692501826393, "grad_norm": 151.92835998535156, "learning_rate": 1.6541584676543478e-06, "loss": 13.4375, "step": 8879 }, { "epoch": 0.5897589161187488, "grad_norm": 207.76654052734375, "learning_rate": 1.654077116139761e-06, "loss": 15.7656, "step": 8880 }, { "epoch": 0.5898253304111045, "grad_norm": 227.15269470214844, "learning_rate": 1.6539957570591874e-06, "loss": 16.5781, "step": 8881 }, { "epoch": 0.5898917447034602, "grad_norm": 294.29547119140625, "learning_rate": 1.653914390413568e-06, "loss": 18.8281, "step": 8882 }, { "epoch": 0.5899581589958159, "grad_norm": 1042.7911376953125, "learning_rate": 1.6538330162038438e-06, "loss": 27.3438, "step": 8883 }, { "epoch": 0.5900245732881716, "grad_norm": 423.37567138671875, "learning_rate": 1.6537516344309564e-06, "loss": 15.4219, "step": 8884 }, { "epoch": 0.5900909875805274, "grad_norm": 288.5218811035156, "learning_rate": 1.6536702450958472e-06, "loss": 22.875, "step": 8885 }, { "epoch": 0.590157401872883, "grad_norm": 846.4689331054688, "learning_rate": 1.6535888481994573e-06, "loss": 18.7031, "step": 8886 }, { "epoch": 0.5902238161652388, "grad_norm": 383.655029296875, "learning_rate": 1.6535074437427286e-06, "loss": 26.2656, "step": 8887 }, { "epoch": 0.5902902304575944, "grad_norm": 150.2433319091797, "learning_rate": 1.6534260317266027e-06, "loss": 18.7344, "step": 8888 }, { "epoch": 0.5903566447499502, "grad_norm": 303.5056457519531, "learning_rate": 1.6533446121520212e-06, "loss": 22.875, "step": 8889 }, { "epoch": 0.5904230590423059, "grad_norm": 262.9789123535156, "learning_rate": 1.653263185019926e-06, "loss": 18.0312, "step": 8890 }, { "epoch": 0.5904894733346616, "grad_norm": 165.70875549316406, "learning_rate": 1.653181750331259e-06, "loss": 18.2344, "step": 8891 }, { "epoch": 0.5905558876270174, "grad_norm": 550.8589477539062, "learning_rate": 1.6531003080869619e-06, "loss": 16.2656, "step": 8892 }, { "epoch": 0.590622301919373, "grad_norm": 170.01315307617188, "learning_rate": 1.6530188582879771e-06, "loss": 14.2344, "step": 8893 }, { "epoch": 0.5906887162117288, "grad_norm": 151.60858154296875, "learning_rate": 1.6529374009352468e-06, "loss": 15.8125, "step": 8894 }, { "epoch": 0.5907551305040845, "grad_norm": 417.8448181152344, "learning_rate": 1.6528559360297132e-06, "loss": 22.4531, "step": 8895 }, { "epoch": 0.5908215447964402, "grad_norm": 126.81956481933594, "learning_rate": 1.6527744635723187e-06, "loss": 17.0312, "step": 8896 }, { "epoch": 0.5908879590887959, "grad_norm": 224.1820526123047, "learning_rate": 1.6526929835640053e-06, "loss": 16.5156, "step": 8897 }, { "epoch": 0.5909543733811516, "grad_norm": 209.6136474609375, "learning_rate": 1.652611496005716e-06, "loss": 16.875, "step": 8898 }, { "epoch": 0.5910207876735073, "grad_norm": 228.8277130126953, "learning_rate": 1.6525300008983933e-06, "loss": 16.4531, "step": 8899 }, { "epoch": 0.5910872019658631, "grad_norm": 137.54803466796875, "learning_rate": 1.6524484982429797e-06, "loss": 22.9531, "step": 8900 }, { "epoch": 0.5911536162582187, "grad_norm": 143.77926635742188, "learning_rate": 1.652366988040418e-06, "loss": 16.4219, "step": 8901 }, { "epoch": 0.5912200305505745, "grad_norm": 251.8296661376953, "learning_rate": 1.6522854702916513e-06, "loss": 19.4219, "step": 8902 }, { "epoch": 0.5912864448429302, "grad_norm": 204.80026245117188, "learning_rate": 1.6522039449976224e-06, "loss": 15.4688, "step": 8903 }, { "epoch": 0.5913528591352859, "grad_norm": 249.49732971191406, "learning_rate": 1.6521224121592746e-06, "loss": 13.6406, "step": 8904 }, { "epoch": 0.5914192734276417, "grad_norm": 191.56167602539062, "learning_rate": 1.6520408717775506e-06, "loss": 14.75, "step": 8905 }, { "epoch": 0.5914856877199973, "grad_norm": 192.89413452148438, "learning_rate": 1.6519593238533935e-06, "loss": 16.4375, "step": 8906 }, { "epoch": 0.5915521020123531, "grad_norm": 213.938720703125, "learning_rate": 1.651877768387747e-06, "loss": 18.7344, "step": 8907 }, { "epoch": 0.5916185163047087, "grad_norm": 199.71908569335938, "learning_rate": 1.6517962053815544e-06, "loss": 23.3125, "step": 8908 }, { "epoch": 0.5916849305970645, "grad_norm": 261.04852294921875, "learning_rate": 1.651714634835759e-06, "loss": 21.5938, "step": 8909 }, { "epoch": 0.5917513448894202, "grad_norm": 380.935546875, "learning_rate": 1.6516330567513047e-06, "loss": 19.7656, "step": 8910 }, { "epoch": 0.5918177591817759, "grad_norm": 160.3075408935547, "learning_rate": 1.651551471129135e-06, "loss": 15.3125, "step": 8911 }, { "epoch": 0.5918841734741316, "grad_norm": 134.0032196044922, "learning_rate": 1.651469877970194e-06, "loss": 14.75, "step": 8912 }, { "epoch": 0.5919505877664873, "grad_norm": 398.14306640625, "learning_rate": 1.6513882772754243e-06, "loss": 23.1875, "step": 8913 }, { "epoch": 0.5920170020588431, "grad_norm": 102.48194122314453, "learning_rate": 1.651306669045771e-06, "loss": 13.6562, "step": 8914 }, { "epoch": 0.5920834163511988, "grad_norm": 384.73504638671875, "learning_rate": 1.651225053282178e-06, "loss": 17.9375, "step": 8915 }, { "epoch": 0.5921498306435545, "grad_norm": 241.8061065673828, "learning_rate": 1.6511434299855887e-06, "loss": 18.4531, "step": 8916 }, { "epoch": 0.5922162449359102, "grad_norm": 245.2470245361328, "learning_rate": 1.6510617991569476e-06, "loss": 21.625, "step": 8917 }, { "epoch": 0.592282659228266, "grad_norm": 485.871826171875, "learning_rate": 1.6509801607971992e-06, "loss": 26.6719, "step": 8918 }, { "epoch": 0.5923490735206216, "grad_norm": 106.8482894897461, "learning_rate": 1.6508985149072875e-06, "loss": 15.0312, "step": 8919 }, { "epoch": 0.5924154878129774, "grad_norm": 243.387939453125, "learning_rate": 1.6508168614881574e-06, "loss": 22.7188, "step": 8920 }, { "epoch": 0.592481902105333, "grad_norm": 346.01544189453125, "learning_rate": 1.6507352005407532e-06, "loss": 26.6875, "step": 8921 }, { "epoch": 0.5925483163976888, "grad_norm": 269.4844665527344, "learning_rate": 1.6506535320660194e-06, "loss": 16.8438, "step": 8922 }, { "epoch": 0.5926147306900444, "grad_norm": 464.74847412109375, "learning_rate": 1.6505718560649004e-06, "loss": 17.3125, "step": 8923 }, { "epoch": 0.5926811449824002, "grad_norm": 121.13044738769531, "learning_rate": 1.6504901725383416e-06, "loss": 20.5781, "step": 8924 }, { "epoch": 0.592747559274756, "grad_norm": 561.1890869140625, "learning_rate": 1.6504084814872873e-06, "loss": 16.9375, "step": 8925 }, { "epoch": 0.5928139735671116, "grad_norm": 144.8138427734375, "learning_rate": 1.650326782912683e-06, "loss": 15.9219, "step": 8926 }, { "epoch": 0.5928803878594674, "grad_norm": 186.9105682373047, "learning_rate": 1.6502450768154735e-06, "loss": 21.7969, "step": 8927 }, { "epoch": 0.592946802151823, "grad_norm": 209.50511169433594, "learning_rate": 1.6501633631966036e-06, "loss": 16.3203, "step": 8928 }, { "epoch": 0.5930132164441788, "grad_norm": 151.02117919921875, "learning_rate": 1.650081642057019e-06, "loss": 16.5156, "step": 8929 }, { "epoch": 0.5930796307365345, "grad_norm": 232.93370056152344, "learning_rate": 1.6499999133976647e-06, "loss": 19.1562, "step": 8930 }, { "epoch": 0.5931460450288902, "grad_norm": 142.68466186523438, "learning_rate": 1.6499181772194866e-06, "loss": 13.5312, "step": 8931 }, { "epoch": 0.5932124593212459, "grad_norm": 451.69091796875, "learning_rate": 1.6498364335234293e-06, "loss": 20.5625, "step": 8932 }, { "epoch": 0.5932788736136017, "grad_norm": 148.27574157714844, "learning_rate": 1.649754682310439e-06, "loss": 16.0781, "step": 8933 }, { "epoch": 0.5933452879059573, "grad_norm": 260.65093994140625, "learning_rate": 1.6496729235814614e-06, "loss": 19.8906, "step": 8934 }, { "epoch": 0.5934117021983131, "grad_norm": 136.98745727539062, "learning_rate": 1.649591157337442e-06, "loss": 16.4219, "step": 8935 }, { "epoch": 0.5934781164906688, "grad_norm": 181.14788818359375, "learning_rate": 1.6495093835793266e-06, "loss": 20.1094, "step": 8936 }, { "epoch": 0.5935445307830245, "grad_norm": 204.5328826904297, "learning_rate": 1.6494276023080612e-06, "loss": 17.3125, "step": 8937 }, { "epoch": 0.5936109450753803, "grad_norm": 262.0688171386719, "learning_rate": 1.649345813524592e-06, "loss": 14.8594, "step": 8938 }, { "epoch": 0.5936773593677359, "grad_norm": 176.8131561279297, "learning_rate": 1.6492640172298645e-06, "loss": 16.7188, "step": 8939 }, { "epoch": 0.5937437736600917, "grad_norm": 168.76577758789062, "learning_rate": 1.6491822134248251e-06, "loss": 14.75, "step": 8940 }, { "epoch": 0.5938101879524473, "grad_norm": 247.9608154296875, "learning_rate": 1.6491004021104207e-06, "loss": 16.0469, "step": 8941 }, { "epoch": 0.5938766022448031, "grad_norm": 127.8580322265625, "learning_rate": 1.6490185832875967e-06, "loss": 22.7812, "step": 8942 }, { "epoch": 0.5939430165371588, "grad_norm": 320.94708251953125, "learning_rate": 1.6489367569573e-06, "loss": 22.75, "step": 8943 }, { "epoch": 0.5940094308295145, "grad_norm": 174.17706298828125, "learning_rate": 1.6488549231204774e-06, "loss": 21.6875, "step": 8944 }, { "epoch": 0.5940758451218702, "grad_norm": 216.11944580078125, "learning_rate": 1.6487730817780752e-06, "loss": 20.1719, "step": 8945 }, { "epoch": 0.5941422594142259, "grad_norm": 188.8852996826172, "learning_rate": 1.64869123293104e-06, "loss": 20.7812, "step": 8946 }, { "epoch": 0.5942086737065817, "grad_norm": 206.45840454101562, "learning_rate": 1.6486093765803185e-06, "loss": 16.9531, "step": 8947 }, { "epoch": 0.5942750879989374, "grad_norm": 117.85714721679688, "learning_rate": 1.6485275127268579e-06, "loss": 16.6562, "step": 8948 }, { "epoch": 0.5943415022912931, "grad_norm": 181.23251342773438, "learning_rate": 1.6484456413716048e-06, "loss": 20.5, "step": 8949 }, { "epoch": 0.5944079165836488, "grad_norm": 292.0658264160156, "learning_rate": 1.6483637625155066e-06, "loss": 21.8281, "step": 8950 }, { "epoch": 0.5944743308760045, "grad_norm": 188.8028106689453, "learning_rate": 1.6482818761595106e-06, "loss": 15.875, "step": 8951 }, { "epoch": 0.5945407451683602, "grad_norm": 173.7922821044922, "learning_rate": 1.6481999823045629e-06, "loss": 16.2812, "step": 8952 }, { "epoch": 0.594607159460716, "grad_norm": 1032.2965087890625, "learning_rate": 1.6481180809516122e-06, "loss": 17.0781, "step": 8953 }, { "epoch": 0.5946735737530716, "grad_norm": 147.93698120117188, "learning_rate": 1.6480361721016053e-06, "loss": 18.7812, "step": 8954 }, { "epoch": 0.5947399880454274, "grad_norm": 120.96943664550781, "learning_rate": 1.6479542557554893e-06, "loss": 19.1406, "step": 8955 }, { "epoch": 0.594806402337783, "grad_norm": 767.8881225585938, "learning_rate": 1.6478723319142122e-06, "loss": 28.6875, "step": 8956 }, { "epoch": 0.5948728166301388, "grad_norm": 360.62042236328125, "learning_rate": 1.6477904005787215e-06, "loss": 21.6562, "step": 8957 }, { "epoch": 0.5949392309224946, "grad_norm": 211.1556396484375, "learning_rate": 1.647708461749965e-06, "loss": 17.3125, "step": 8958 }, { "epoch": 0.5950056452148502, "grad_norm": 383.2297058105469, "learning_rate": 1.6476265154288907e-06, "loss": 19.3594, "step": 8959 }, { "epoch": 0.595072059507206, "grad_norm": 248.80899047851562, "learning_rate": 1.647544561616446e-06, "loss": 21.125, "step": 8960 }, { "epoch": 0.5951384737995616, "grad_norm": 620.8692626953125, "learning_rate": 1.6474626003135796e-06, "loss": 20.4844, "step": 8961 }, { "epoch": 0.5952048880919174, "grad_norm": 394.6729431152344, "learning_rate": 1.647380631521239e-06, "loss": 19.7188, "step": 8962 }, { "epoch": 0.5952713023842731, "grad_norm": 191.38290405273438, "learning_rate": 1.6472986552403723e-06, "loss": 21.3906, "step": 8963 }, { "epoch": 0.5953377166766288, "grad_norm": 365.4295959472656, "learning_rate": 1.6472166714719285e-06, "loss": 20.3125, "step": 8964 }, { "epoch": 0.5954041309689845, "grad_norm": 213.4550018310547, "learning_rate": 1.6471346802168551e-06, "loss": 15.1562, "step": 8965 }, { "epoch": 0.5954705452613402, "grad_norm": 209.9042510986328, "learning_rate": 1.647052681476101e-06, "loss": 17.2344, "step": 8966 }, { "epoch": 0.595536959553696, "grad_norm": 170.8582305908203, "learning_rate": 1.6469706752506145e-06, "loss": 15.0469, "step": 8967 }, { "epoch": 0.5956033738460517, "grad_norm": 93.31490325927734, "learning_rate": 1.6468886615413444e-06, "loss": 15.5156, "step": 8968 }, { "epoch": 0.5956697881384074, "grad_norm": 173.85617065429688, "learning_rate": 1.646806640349239e-06, "loss": 13.7031, "step": 8969 }, { "epoch": 0.5957362024307631, "grad_norm": 167.9942626953125, "learning_rate": 1.6467246116752477e-06, "loss": 16.5156, "step": 8970 }, { "epoch": 0.5958026167231188, "grad_norm": 152.5384979248047, "learning_rate": 1.646642575520319e-06, "loss": 19.0312, "step": 8971 }, { "epoch": 0.5958690310154745, "grad_norm": 134.4801483154297, "learning_rate": 1.646560531885402e-06, "loss": 19.6094, "step": 8972 }, { "epoch": 0.5959354453078303, "grad_norm": 116.59456634521484, "learning_rate": 1.646478480771445e-06, "loss": 15.4531, "step": 8973 }, { "epoch": 0.5960018596001859, "grad_norm": 145.82571411132812, "learning_rate": 1.6463964221793982e-06, "loss": 18.0625, "step": 8974 }, { "epoch": 0.5960682738925417, "grad_norm": 156.86102294921875, "learning_rate": 1.6463143561102102e-06, "loss": 16.0625, "step": 8975 }, { "epoch": 0.5961346881848973, "grad_norm": 158.1583709716797, "learning_rate": 1.6462322825648306e-06, "loss": 15.4531, "step": 8976 }, { "epoch": 0.5962011024772531, "grad_norm": 227.94097900390625, "learning_rate": 1.6461502015442084e-06, "loss": 15.2188, "step": 8977 }, { "epoch": 0.5962675167696089, "grad_norm": 414.1260070800781, "learning_rate": 1.6460681130492931e-06, "loss": 27.375, "step": 8978 }, { "epoch": 0.5963339310619645, "grad_norm": 222.6487274169922, "learning_rate": 1.6459860170810344e-06, "loss": 23.0781, "step": 8979 }, { "epoch": 0.5964003453543203, "grad_norm": 186.07586669921875, "learning_rate": 1.645903913640382e-06, "loss": 19.5312, "step": 8980 }, { "epoch": 0.596466759646676, "grad_norm": 340.69451904296875, "learning_rate": 1.6458218027282858e-06, "loss": 15.0625, "step": 8981 }, { "epoch": 0.5965331739390317, "grad_norm": 328.9167175292969, "learning_rate": 1.6457396843456955e-06, "loss": 21.0312, "step": 8982 }, { "epoch": 0.5965995882313874, "grad_norm": 301.88507080078125, "learning_rate": 1.6456575584935606e-06, "loss": 22.125, "step": 8983 }, { "epoch": 0.5966660025237431, "grad_norm": 267.7232666015625, "learning_rate": 1.6455754251728316e-06, "loss": 18.6094, "step": 8984 }, { "epoch": 0.5967324168160988, "grad_norm": 261.450439453125, "learning_rate": 1.645493284384458e-06, "loss": 15.6875, "step": 8985 }, { "epoch": 0.5967988311084546, "grad_norm": 261.3828125, "learning_rate": 1.6454111361293905e-06, "loss": 16.0625, "step": 8986 }, { "epoch": 0.5968652454008102, "grad_norm": 130.2188720703125, "learning_rate": 1.6453289804085793e-06, "loss": 16.1875, "step": 8987 }, { "epoch": 0.596931659693166, "grad_norm": 384.5728454589844, "learning_rate": 1.6452468172229743e-06, "loss": 18.7812, "step": 8988 }, { "epoch": 0.5969980739855217, "grad_norm": 158.25677490234375, "learning_rate": 1.6451646465735262e-06, "loss": 20.2656, "step": 8989 }, { "epoch": 0.5970644882778774, "grad_norm": 314.66510009765625, "learning_rate": 1.6450824684611858e-06, "loss": 16.5625, "step": 8990 }, { "epoch": 0.5971309025702332, "grad_norm": 156.1282958984375, "learning_rate": 1.6450002828869029e-06, "loss": 16.7812, "step": 8991 }, { "epoch": 0.5971973168625888, "grad_norm": 531.3334350585938, "learning_rate": 1.6449180898516292e-06, "loss": 23.6562, "step": 8992 }, { "epoch": 0.5972637311549446, "grad_norm": 139.065185546875, "learning_rate": 1.6448358893563143e-06, "loss": 12.5156, "step": 8993 }, { "epoch": 0.5973301454473002, "grad_norm": 319.5317077636719, "learning_rate": 1.64475368140191e-06, "loss": 22.8125, "step": 8994 }, { "epoch": 0.597396559739656, "grad_norm": 194.7849884033203, "learning_rate": 1.6446714659893671e-06, "loss": 20.2812, "step": 8995 }, { "epoch": 0.5974629740320117, "grad_norm": 263.5093688964844, "learning_rate": 1.6445892431196361e-06, "loss": 13.7344, "step": 8996 }, { "epoch": 0.5975293883243674, "grad_norm": 280.12030029296875, "learning_rate": 1.6445070127936687e-06, "loss": 16.9844, "step": 8997 }, { "epoch": 0.5975958026167231, "grad_norm": 258.60546875, "learning_rate": 1.6444247750124159e-06, "loss": 18.0938, "step": 8998 }, { "epoch": 0.5976622169090788, "grad_norm": 127.9474105834961, "learning_rate": 1.6443425297768286e-06, "loss": 21.1406, "step": 8999 }, { "epoch": 0.5977286312014346, "grad_norm": 220.51646423339844, "learning_rate": 1.6442602770878584e-06, "loss": 12.9531, "step": 9000 }, { "epoch": 0.5977950454937903, "grad_norm": 494.1928405761719, "learning_rate": 1.644178016946457e-06, "loss": 20.4375, "step": 9001 }, { "epoch": 0.597861459786146, "grad_norm": 173.19679260253906, "learning_rate": 1.6440957493535757e-06, "loss": 15.5156, "step": 9002 }, { "epoch": 0.5979278740785017, "grad_norm": 214.88955688476562, "learning_rate": 1.6440134743101661e-06, "loss": 20.0625, "step": 9003 }, { "epoch": 0.5979942883708574, "grad_norm": 161.77017211914062, "learning_rate": 1.6439311918171801e-06, "loss": 17.5938, "step": 9004 }, { "epoch": 0.5980607026632131, "grad_norm": 280.88970947265625, "learning_rate": 1.6438489018755694e-06, "loss": 22.3125, "step": 9005 }, { "epoch": 0.5981271169555689, "grad_norm": 208.84361267089844, "learning_rate": 1.6437666044862857e-06, "loss": 20.2656, "step": 9006 }, { "epoch": 0.5981935312479245, "grad_norm": 106.57958221435547, "learning_rate": 1.6436842996502813e-06, "loss": 14.625, "step": 9007 }, { "epoch": 0.5982599455402803, "grad_norm": 278.57708740234375, "learning_rate": 1.6436019873685082e-06, "loss": 16.25, "step": 9008 }, { "epoch": 0.5983263598326359, "grad_norm": 313.56732177734375, "learning_rate": 1.6435196676419183e-06, "loss": 17.7812, "step": 9009 }, { "epoch": 0.5983927741249917, "grad_norm": 232.4165496826172, "learning_rate": 1.6434373404714638e-06, "loss": 18.8438, "step": 9010 }, { "epoch": 0.5984591884173475, "grad_norm": 370.8321838378906, "learning_rate": 1.6433550058580975e-06, "loss": 26.7812, "step": 9011 }, { "epoch": 0.5985256027097031, "grad_norm": 276.3812561035156, "learning_rate": 1.6432726638027715e-06, "loss": 21.9062, "step": 9012 }, { "epoch": 0.5985920170020589, "grad_norm": 235.12979125976562, "learning_rate": 1.643190314306438e-06, "loss": 22.7969, "step": 9013 }, { "epoch": 0.5986584312944145, "grad_norm": 286.8552551269531, "learning_rate": 1.6431079573700498e-06, "loss": 21.9375, "step": 9014 }, { "epoch": 0.5987248455867703, "grad_norm": 352.12213134765625, "learning_rate": 1.6430255929945593e-06, "loss": 20.3594, "step": 9015 }, { "epoch": 0.598791259879126, "grad_norm": 308.1036071777344, "learning_rate": 1.64294322118092e-06, "loss": 16.3906, "step": 9016 }, { "epoch": 0.5988576741714817, "grad_norm": 167.42410278320312, "learning_rate": 1.642860841930084e-06, "loss": 16.25, "step": 9017 }, { "epoch": 0.5989240884638374, "grad_norm": 191.26202392578125, "learning_rate": 1.642778455243005e-06, "loss": 16.6875, "step": 9018 }, { "epoch": 0.5989905027561931, "grad_norm": 252.09449768066406, "learning_rate": 1.6426960611206348e-06, "loss": 24.2188, "step": 9019 }, { "epoch": 0.5990569170485488, "grad_norm": 203.39334106445312, "learning_rate": 1.6426136595639277e-06, "loss": 18.2188, "step": 9020 }, { "epoch": 0.5991233313409046, "grad_norm": 265.26434326171875, "learning_rate": 1.642531250573836e-06, "loss": 23.1562, "step": 9021 }, { "epoch": 0.5991897456332603, "grad_norm": 226.8737030029297, "learning_rate": 1.6424488341513134e-06, "loss": 14.7031, "step": 9022 }, { "epoch": 0.599256159925616, "grad_norm": 253.66326904296875, "learning_rate": 1.642366410297313e-06, "loss": 24.4688, "step": 9023 }, { "epoch": 0.5993225742179717, "grad_norm": 254.1387939453125, "learning_rate": 1.6422839790127887e-06, "loss": 21.9688, "step": 9024 }, { "epoch": 0.5993889885103274, "grad_norm": 101.44827270507812, "learning_rate": 1.6422015402986935e-06, "loss": 23.0469, "step": 9025 }, { "epoch": 0.5994554028026832, "grad_norm": 168.21238708496094, "learning_rate": 1.6421190941559813e-06, "loss": 19.7031, "step": 9026 }, { "epoch": 0.5995218170950388, "grad_norm": 143.3982391357422, "learning_rate": 1.6420366405856054e-06, "loss": 17.2344, "step": 9027 }, { "epoch": 0.5995882313873946, "grad_norm": 170.9564208984375, "learning_rate": 1.6419541795885202e-06, "loss": 15.8438, "step": 9028 }, { "epoch": 0.5996546456797502, "grad_norm": 274.2982482910156, "learning_rate": 1.6418717111656793e-06, "loss": 20.7812, "step": 9029 }, { "epoch": 0.599721059972106, "grad_norm": 576.3330078125, "learning_rate": 1.6417892353180362e-06, "loss": 16.5781, "step": 9030 }, { "epoch": 0.5997874742644617, "grad_norm": 418.9360656738281, "learning_rate": 1.6417067520465455e-06, "loss": 21.625, "step": 9031 }, { "epoch": 0.5998538885568174, "grad_norm": 154.8125762939453, "learning_rate": 1.6416242613521608e-06, "loss": 17.0781, "step": 9032 }, { "epoch": 0.5999203028491732, "grad_norm": 191.46490478515625, "learning_rate": 1.6415417632358373e-06, "loss": 19.3906, "step": 9033 }, { "epoch": 0.5999867171415288, "grad_norm": 139.04025268554688, "learning_rate": 1.641459257698528e-06, "loss": 16.7031, "step": 9034 }, { "epoch": 0.6000531314338846, "grad_norm": 247.47946166992188, "learning_rate": 1.6413767447411883e-06, "loss": 19.2031, "step": 9035 }, { "epoch": 0.6001195457262403, "grad_norm": 232.72459411621094, "learning_rate": 1.6412942243647719e-06, "loss": 15.9688, "step": 9036 }, { "epoch": 0.600185960018596, "grad_norm": 294.9817810058594, "learning_rate": 1.6412116965702341e-06, "loss": 21.1875, "step": 9037 }, { "epoch": 0.6002523743109517, "grad_norm": 185.08396911621094, "learning_rate": 1.6411291613585288e-06, "loss": 18.0625, "step": 9038 }, { "epoch": 0.6003187886033075, "grad_norm": 279.7582092285156, "learning_rate": 1.6410466187306112e-06, "loss": 15.9062, "step": 9039 }, { "epoch": 0.6003852028956631, "grad_norm": 585.0520629882812, "learning_rate": 1.640964068687436e-06, "loss": 19.7188, "step": 9040 }, { "epoch": 0.6004516171880189, "grad_norm": 183.8934326171875, "learning_rate": 1.6408815112299578e-06, "loss": 15.9062, "step": 9041 }, { "epoch": 0.6005180314803745, "grad_norm": 290.4090881347656, "learning_rate": 1.640798946359132e-06, "loss": 18.7344, "step": 9042 }, { "epoch": 0.6005844457727303, "grad_norm": 158.83636474609375, "learning_rate": 1.6407163740759135e-06, "loss": 15.3594, "step": 9043 }, { "epoch": 0.6006508600650861, "grad_norm": 579.3156127929688, "learning_rate": 1.6406337943812576e-06, "loss": 21.0938, "step": 9044 }, { "epoch": 0.6007172743574417, "grad_norm": 137.21580505371094, "learning_rate": 1.6405512072761191e-06, "loss": 15.4531, "step": 9045 }, { "epoch": 0.6007836886497975, "grad_norm": 123.8885269165039, "learning_rate": 1.6404686127614537e-06, "loss": 13.4219, "step": 9046 }, { "epoch": 0.6008501029421531, "grad_norm": 173.7136993408203, "learning_rate": 1.6403860108382169e-06, "loss": 23.0625, "step": 9047 }, { "epoch": 0.6009165172345089, "grad_norm": 189.2117919921875, "learning_rate": 1.640303401507364e-06, "loss": 15.4062, "step": 9048 }, { "epoch": 0.6009829315268645, "grad_norm": 166.5130157470703, "learning_rate": 1.6402207847698502e-06, "loss": 22.1875, "step": 9049 }, { "epoch": 0.6010493458192203, "grad_norm": 192.08937072753906, "learning_rate": 1.6401381606266317e-06, "loss": 19.1406, "step": 9050 }, { "epoch": 0.601115760111576, "grad_norm": 169.16778564453125, "learning_rate": 1.6400555290786643e-06, "loss": 14.8984, "step": 9051 }, { "epoch": 0.6011821744039317, "grad_norm": 227.39865112304688, "learning_rate": 1.6399728901269034e-06, "loss": 16.0781, "step": 9052 }, { "epoch": 0.6012485886962874, "grad_norm": 407.8515930175781, "learning_rate": 1.639890243772305e-06, "loss": 22.7031, "step": 9053 }, { "epoch": 0.6013150029886432, "grad_norm": 118.41741180419922, "learning_rate": 1.6398075900158255e-06, "loss": 13.4688, "step": 9054 }, { "epoch": 0.6013814172809989, "grad_norm": 189.3762969970703, "learning_rate": 1.6397249288584204e-06, "loss": 20.1406, "step": 9055 }, { "epoch": 0.6014478315733546, "grad_norm": 600.5050048828125, "learning_rate": 1.6396422603010467e-06, "loss": 23.9062, "step": 9056 }, { "epoch": 0.6015142458657103, "grad_norm": 310.012939453125, "learning_rate": 1.63955958434466e-06, "loss": 22.0938, "step": 9057 }, { "epoch": 0.601580660158066, "grad_norm": 1061.2320556640625, "learning_rate": 1.6394769009902167e-06, "loss": 17.8594, "step": 9058 }, { "epoch": 0.6016470744504218, "grad_norm": 687.0081176757812, "learning_rate": 1.6393942102386732e-06, "loss": 20.2188, "step": 9059 }, { "epoch": 0.6017134887427774, "grad_norm": 327.52972412109375, "learning_rate": 1.6393115120909865e-06, "loss": 16.5938, "step": 9060 }, { "epoch": 0.6017799030351332, "grad_norm": 205.6351318359375, "learning_rate": 1.6392288065481127e-06, "loss": 13.2656, "step": 9061 }, { "epoch": 0.6018463173274888, "grad_norm": 508.4967956542969, "learning_rate": 1.6391460936110088e-06, "loss": 24.0, "step": 9062 }, { "epoch": 0.6019127316198446, "grad_norm": 164.1044158935547, "learning_rate": 1.6390633732806315e-06, "loss": 19.0625, "step": 9063 }, { "epoch": 0.6019791459122003, "grad_norm": 118.80951690673828, "learning_rate": 1.6389806455579373e-06, "loss": 16.0156, "step": 9064 }, { "epoch": 0.602045560204556, "grad_norm": 297.0047607421875, "learning_rate": 1.6388979104438837e-06, "loss": 17.9688, "step": 9065 }, { "epoch": 0.6021119744969118, "grad_norm": 271.0590515136719, "learning_rate": 1.6388151679394272e-06, "loss": 17.2344, "step": 9066 }, { "epoch": 0.6021783887892674, "grad_norm": 170.8987579345703, "learning_rate": 1.6387324180455256e-06, "loss": 10.3594, "step": 9067 }, { "epoch": 0.6022448030816232, "grad_norm": 206.7340087890625, "learning_rate": 1.6386496607631352e-06, "loss": 14.5938, "step": 9068 }, { "epoch": 0.6023112173739789, "grad_norm": 244.76364135742188, "learning_rate": 1.6385668960932141e-06, "loss": 19.0938, "step": 9069 }, { "epoch": 0.6023776316663346, "grad_norm": 395.039306640625, "learning_rate": 1.6384841240367196e-06, "loss": 21.2969, "step": 9070 }, { "epoch": 0.6024440459586903, "grad_norm": 213.02479553222656, "learning_rate": 1.6384013445946087e-06, "loss": 15.5156, "step": 9071 }, { "epoch": 0.602510460251046, "grad_norm": 291.21270751953125, "learning_rate": 1.638318557767839e-06, "loss": 20.9062, "step": 9072 }, { "epoch": 0.6025768745434017, "grad_norm": 104.42967224121094, "learning_rate": 1.6382357635573687e-06, "loss": 14.3438, "step": 9073 }, { "epoch": 0.6026432888357575, "grad_norm": 771.8170776367188, "learning_rate": 1.6381529619641547e-06, "loss": 13.5781, "step": 9074 }, { "epoch": 0.6027097031281131, "grad_norm": 228.0747528076172, "learning_rate": 1.6380701529891553e-06, "loss": 24.4844, "step": 9075 }, { "epoch": 0.6027761174204689, "grad_norm": 221.0457000732422, "learning_rate": 1.6379873366333285e-06, "loss": 21.4375, "step": 9076 }, { "epoch": 0.6028425317128246, "grad_norm": 307.8956604003906, "learning_rate": 1.6379045128976318e-06, "loss": 20.9844, "step": 9077 }, { "epoch": 0.6029089460051803, "grad_norm": 157.9813995361328, "learning_rate": 1.637821681783024e-06, "loss": 14.9375, "step": 9078 }, { "epoch": 0.6029753602975361, "grad_norm": 273.5368957519531, "learning_rate": 1.6377388432904622e-06, "loss": 22.5312, "step": 9079 }, { "epoch": 0.6030417745898917, "grad_norm": 187.3298797607422, "learning_rate": 1.6376559974209054e-06, "loss": 18.3906, "step": 9080 }, { "epoch": 0.6031081888822475, "grad_norm": 128.5904083251953, "learning_rate": 1.637573144175312e-06, "loss": 25.2031, "step": 9081 }, { "epoch": 0.6031746031746031, "grad_norm": 157.87725830078125, "learning_rate": 1.6374902835546397e-06, "loss": 17.9062, "step": 9082 }, { "epoch": 0.6032410174669589, "grad_norm": 363.06689453125, "learning_rate": 1.6374074155598478e-06, "loss": 22.6094, "step": 9083 }, { "epoch": 0.6033074317593146, "grad_norm": 119.86865234375, "learning_rate": 1.6373245401918943e-06, "loss": 13.3906, "step": 9084 }, { "epoch": 0.6033738460516703, "grad_norm": 236.1234588623047, "learning_rate": 1.637241657451738e-06, "loss": 21.0156, "step": 9085 }, { "epoch": 0.603440260344026, "grad_norm": 205.21852111816406, "learning_rate": 1.6371587673403372e-06, "loss": 16.0469, "step": 9086 }, { "epoch": 0.6035066746363817, "grad_norm": 216.0463409423828, "learning_rate": 1.637075869858652e-06, "loss": 14.1562, "step": 9087 }, { "epoch": 0.6035730889287375, "grad_norm": 170.53488159179688, "learning_rate": 1.63699296500764e-06, "loss": 18.6406, "step": 9088 }, { "epoch": 0.6036395032210932, "grad_norm": 243.26458740234375, "learning_rate": 1.636910052788261e-06, "loss": 21.9062, "step": 9089 }, { "epoch": 0.6037059175134489, "grad_norm": 397.0836181640625, "learning_rate": 1.6368271332014731e-06, "loss": 15.2969, "step": 9090 }, { "epoch": 0.6037723318058046, "grad_norm": 212.8559112548828, "learning_rate": 1.6367442062482368e-06, "loss": 12.6172, "step": 9091 }, { "epoch": 0.6038387460981604, "grad_norm": 417.5660095214844, "learning_rate": 1.6366612719295106e-06, "loss": 18.8594, "step": 9092 }, { "epoch": 0.603905160390516, "grad_norm": 448.03363037109375, "learning_rate": 1.636578330246254e-06, "loss": 15.375, "step": 9093 }, { "epoch": 0.6039715746828718, "grad_norm": 203.00331115722656, "learning_rate": 1.636495381199426e-06, "loss": 15.5469, "step": 9094 }, { "epoch": 0.6040379889752274, "grad_norm": 253.9456329345703, "learning_rate": 1.6364124247899866e-06, "loss": 20.2031, "step": 9095 }, { "epoch": 0.6041044032675832, "grad_norm": 404.8042907714844, "learning_rate": 1.6363294610188953e-06, "loss": 24.0625, "step": 9096 }, { "epoch": 0.6041708175599388, "grad_norm": 246.9109344482422, "learning_rate": 1.6362464898871119e-06, "loss": 24.5, "step": 9097 }, { "epoch": 0.6042372318522946, "grad_norm": 285.73309326171875, "learning_rate": 1.6361635113955957e-06, "loss": 34.8125, "step": 9098 }, { "epoch": 0.6043036461446504, "grad_norm": 185.72792053222656, "learning_rate": 1.636080525545307e-06, "loss": 21.5781, "step": 9099 }, { "epoch": 0.604370060437006, "grad_norm": 83.50599670410156, "learning_rate": 1.6359975323372053e-06, "loss": 15.5156, "step": 9100 }, { "epoch": 0.6044364747293618, "grad_norm": 144.9356231689453, "learning_rate": 1.6359145317722511e-06, "loss": 13.2031, "step": 9101 }, { "epoch": 0.6045028890217174, "grad_norm": 193.5585174560547, "learning_rate": 1.6358315238514042e-06, "loss": 21.125, "step": 9102 }, { "epoch": 0.6045693033140732, "grad_norm": 174.65530395507812, "learning_rate": 1.635748508575625e-06, "loss": 18.375, "step": 9103 }, { "epoch": 0.6046357176064289, "grad_norm": 143.30499267578125, "learning_rate": 1.6356654859458736e-06, "loss": 15.25, "step": 9104 }, { "epoch": 0.6047021318987846, "grad_norm": 203.7557830810547, "learning_rate": 1.6355824559631104e-06, "loss": 18.4531, "step": 9105 }, { "epoch": 0.6047685461911403, "grad_norm": 139.54823303222656, "learning_rate": 1.6354994186282959e-06, "loss": 15.625, "step": 9106 }, { "epoch": 0.604834960483496, "grad_norm": 444.0420227050781, "learning_rate": 1.6354163739423905e-06, "loss": 16.0625, "step": 9107 }, { "epoch": 0.6049013747758517, "grad_norm": 285.3788757324219, "learning_rate": 1.635333321906355e-06, "loss": 19.375, "step": 9108 }, { "epoch": 0.6049677890682075, "grad_norm": 188.69834899902344, "learning_rate": 1.6352502625211498e-06, "loss": 16.0469, "step": 9109 }, { "epoch": 0.6050342033605632, "grad_norm": 242.50282287597656, "learning_rate": 1.6351671957877357e-06, "loss": 19.7188, "step": 9110 }, { "epoch": 0.6051006176529189, "grad_norm": 206.44534301757812, "learning_rate": 1.635084121707074e-06, "loss": 18.25, "step": 9111 }, { "epoch": 0.6051670319452747, "grad_norm": 247.99415588378906, "learning_rate": 1.6350010402801258e-06, "loss": 20.375, "step": 9112 }, { "epoch": 0.6052334462376303, "grad_norm": 185.62649536132812, "learning_rate": 1.6349179515078513e-06, "loss": 18.1719, "step": 9113 }, { "epoch": 0.6052998605299861, "grad_norm": 111.55756378173828, "learning_rate": 1.6348348553912118e-06, "loss": 19.1875, "step": 9114 }, { "epoch": 0.6053662748223417, "grad_norm": 219.8092041015625, "learning_rate": 1.6347517519311691e-06, "loss": 18.3906, "step": 9115 }, { "epoch": 0.6054326891146975, "grad_norm": 318.16729736328125, "learning_rate": 1.634668641128684e-06, "loss": 19.6406, "step": 9116 }, { "epoch": 0.6054991034070532, "grad_norm": 104.32415771484375, "learning_rate": 1.6345855229847181e-06, "loss": 13.4375, "step": 9117 }, { "epoch": 0.6055655176994089, "grad_norm": 130.62144470214844, "learning_rate": 1.634502397500233e-06, "loss": 16.4531, "step": 9118 }, { "epoch": 0.6056319319917647, "grad_norm": 473.2150573730469, "learning_rate": 1.6344192646761895e-06, "loss": 28.1562, "step": 9119 }, { "epoch": 0.6056983462841203, "grad_norm": 281.7564697265625, "learning_rate": 1.6343361245135503e-06, "loss": 16.3281, "step": 9120 }, { "epoch": 0.6057647605764761, "grad_norm": 142.81398010253906, "learning_rate": 1.6342529770132766e-06, "loss": 16.4062, "step": 9121 }, { "epoch": 0.6058311748688318, "grad_norm": 345.6133728027344, "learning_rate": 1.6341698221763298e-06, "loss": 19.9844, "step": 9122 }, { "epoch": 0.6058975891611875, "grad_norm": 233.36770629882812, "learning_rate": 1.6340866600036724e-06, "loss": 21.4219, "step": 9123 }, { "epoch": 0.6059640034535432, "grad_norm": 216.14906311035156, "learning_rate": 1.6340034904962661e-06, "loss": 19.625, "step": 9124 }, { "epoch": 0.6060304177458989, "grad_norm": 181.44061279296875, "learning_rate": 1.6339203136550733e-06, "loss": 15.8438, "step": 9125 }, { "epoch": 0.6060968320382546, "grad_norm": 210.2919464111328, "learning_rate": 1.6338371294810552e-06, "loss": 16.625, "step": 9126 }, { "epoch": 0.6061632463306104, "grad_norm": 155.0174102783203, "learning_rate": 1.6337539379751753e-06, "loss": 15.8125, "step": 9127 }, { "epoch": 0.606229660622966, "grad_norm": 175.39175415039062, "learning_rate": 1.6336707391383948e-06, "loss": 16.7188, "step": 9128 }, { "epoch": 0.6062960749153218, "grad_norm": 182.11009216308594, "learning_rate": 1.6335875329716768e-06, "loss": 15.6406, "step": 9129 }, { "epoch": 0.6063624892076775, "grad_norm": 254.87774658203125, "learning_rate": 1.6335043194759836e-06, "loss": 13.9688, "step": 9130 }, { "epoch": 0.6064289035000332, "grad_norm": 193.1158447265625, "learning_rate": 1.633421098652278e-06, "loss": 20.0, "step": 9131 }, { "epoch": 0.606495317792389, "grad_norm": 957.8463134765625, "learning_rate": 1.633337870501522e-06, "loss": 21.875, "step": 9132 }, { "epoch": 0.6065617320847446, "grad_norm": 197.00120544433594, "learning_rate": 1.633254635024679e-06, "loss": 14.7344, "step": 9133 }, { "epoch": 0.6066281463771004, "grad_norm": 105.86853790283203, "learning_rate": 1.633171392222711e-06, "loss": 13.4219, "step": 9134 }, { "epoch": 0.606694560669456, "grad_norm": 315.3501892089844, "learning_rate": 1.6330881420965821e-06, "loss": 20.2188, "step": 9135 }, { "epoch": 0.6067609749618118, "grad_norm": 249.5040740966797, "learning_rate": 1.6330048846472543e-06, "loss": 26.0, "step": 9136 }, { "epoch": 0.6068273892541675, "grad_norm": 247.1549530029297, "learning_rate": 1.6329216198756913e-06, "loss": 15.75, "step": 9137 }, { "epoch": 0.6068938035465232, "grad_norm": 339.1764221191406, "learning_rate": 1.6328383477828555e-06, "loss": 15.3281, "step": 9138 }, { "epoch": 0.6069602178388789, "grad_norm": 167.63079833984375, "learning_rate": 1.6327550683697107e-06, "loss": 16.0156, "step": 9139 }, { "epoch": 0.6070266321312346, "grad_norm": 292.898681640625, "learning_rate": 1.6326717816372204e-06, "loss": 17.3906, "step": 9140 }, { "epoch": 0.6070930464235904, "grad_norm": 212.1637420654297, "learning_rate": 1.6325884875863477e-06, "loss": 17.5625, "step": 9141 }, { "epoch": 0.6071594607159461, "grad_norm": 197.22885131835938, "learning_rate": 1.6325051862180561e-06, "loss": 19.6562, "step": 9142 }, { "epoch": 0.6072258750083018, "grad_norm": 208.6856231689453, "learning_rate": 1.632421877533309e-06, "loss": 26.9688, "step": 9143 }, { "epoch": 0.6072922893006575, "grad_norm": 315.06097412109375, "learning_rate": 1.6323385615330705e-06, "loss": 15.2656, "step": 9144 }, { "epoch": 0.6073587035930132, "grad_norm": 253.2114715576172, "learning_rate": 1.6322552382183044e-06, "loss": 18.3281, "step": 9145 }, { "epoch": 0.6074251178853689, "grad_norm": 163.279052734375, "learning_rate": 1.6321719075899738e-06, "loss": 16.1094, "step": 9146 }, { "epoch": 0.6074915321777247, "grad_norm": 600.7286987304688, "learning_rate": 1.6320885696490434e-06, "loss": 20.8594, "step": 9147 }, { "epoch": 0.6075579464700803, "grad_norm": 119.37345886230469, "learning_rate": 1.6320052243964767e-06, "loss": 13.1406, "step": 9148 }, { "epoch": 0.6076243607624361, "grad_norm": 229.65757751464844, "learning_rate": 1.6319218718332382e-06, "loss": 15.4688, "step": 9149 }, { "epoch": 0.6076907750547917, "grad_norm": 97.72085571289062, "learning_rate": 1.631838511960292e-06, "loss": 15.9688, "step": 9150 }, { "epoch": 0.6077571893471475, "grad_norm": 202.48580932617188, "learning_rate": 1.631755144778602e-06, "loss": 15.6875, "step": 9151 }, { "epoch": 0.6078236036395033, "grad_norm": 194.12257385253906, "learning_rate": 1.6316717702891326e-06, "loss": 14.2734, "step": 9152 }, { "epoch": 0.6078900179318589, "grad_norm": 159.39202880859375, "learning_rate": 1.6315883884928487e-06, "loss": 18.0859, "step": 9153 }, { "epoch": 0.6079564322242147, "grad_norm": 264.35009765625, "learning_rate": 1.6315049993907145e-06, "loss": 22.5938, "step": 9154 }, { "epoch": 0.6080228465165703, "grad_norm": 368.4433288574219, "learning_rate": 1.6314216029836945e-06, "loss": 19.2344, "step": 9155 }, { "epoch": 0.6080892608089261, "grad_norm": 175.07484436035156, "learning_rate": 1.6313381992727536e-06, "loss": 15.2656, "step": 9156 }, { "epoch": 0.6081556751012818, "grad_norm": 102.12113189697266, "learning_rate": 1.6312547882588565e-06, "loss": 17.6875, "step": 9157 }, { "epoch": 0.6082220893936375, "grad_norm": 249.0463409423828, "learning_rate": 1.6311713699429678e-06, "loss": 18.5938, "step": 9158 }, { "epoch": 0.6082885036859932, "grad_norm": 224.97640991210938, "learning_rate": 1.6310879443260529e-06, "loss": 19.25, "step": 9159 }, { "epoch": 0.608354917978349, "grad_norm": 174.7483367919922, "learning_rate": 1.6310045114090763e-06, "loss": 22.0938, "step": 9160 }, { "epoch": 0.6084213322707046, "grad_norm": 660.0400390625, "learning_rate": 1.6309210711930035e-06, "loss": 20.0625, "step": 9161 }, { "epoch": 0.6084877465630604, "grad_norm": 149.1517333984375, "learning_rate": 1.6308376236787998e-06, "loss": 16.1719, "step": 9162 }, { "epoch": 0.6085541608554161, "grad_norm": 176.98411560058594, "learning_rate": 1.6307541688674298e-06, "loss": 18.375, "step": 9163 }, { "epoch": 0.6086205751477718, "grad_norm": 157.70703125, "learning_rate": 1.6306707067598592e-06, "loss": 20.0781, "step": 9164 }, { "epoch": 0.6086869894401276, "grad_norm": 186.6322479248047, "learning_rate": 1.630587237357054e-06, "loss": 19.0312, "step": 9165 }, { "epoch": 0.6087534037324832, "grad_norm": 439.3670654296875, "learning_rate": 1.630503760659979e-06, "loss": 19.0625, "step": 9166 }, { "epoch": 0.608819818024839, "grad_norm": 143.42449951171875, "learning_rate": 1.6304202766696e-06, "loss": 19.8438, "step": 9167 }, { "epoch": 0.6088862323171946, "grad_norm": 352.3592224121094, "learning_rate": 1.6303367853868832e-06, "loss": 20.0938, "step": 9168 }, { "epoch": 0.6089526466095504, "grad_norm": 202.00030517578125, "learning_rate": 1.6302532868127936e-06, "loss": 15.8125, "step": 9169 }, { "epoch": 0.609019060901906, "grad_norm": 119.26911163330078, "learning_rate": 1.6301697809482976e-06, "loss": 13.0781, "step": 9170 }, { "epoch": 0.6090854751942618, "grad_norm": 264.66693115234375, "learning_rate": 1.6300862677943605e-06, "loss": 15.8438, "step": 9171 }, { "epoch": 0.6091518894866175, "grad_norm": 421.6661071777344, "learning_rate": 1.6300027473519491e-06, "loss": 23.8906, "step": 9172 }, { "epoch": 0.6092183037789732, "grad_norm": 500.10931396484375, "learning_rate": 1.629919219622029e-06, "loss": 36.7812, "step": 9173 }, { "epoch": 0.609284718071329, "grad_norm": 169.90863037109375, "learning_rate": 1.629835684605567e-06, "loss": 18.5312, "step": 9174 }, { "epoch": 0.6093511323636847, "grad_norm": 104.6340560913086, "learning_rate": 1.6297521423035288e-06, "loss": 16.0312, "step": 9175 }, { "epoch": 0.6094175466560404, "grad_norm": 188.71827697753906, "learning_rate": 1.6296685927168807e-06, "loss": 16.7578, "step": 9176 }, { "epoch": 0.6094839609483961, "grad_norm": 168.0282440185547, "learning_rate": 1.6295850358465896e-06, "loss": 15.4531, "step": 9177 }, { "epoch": 0.6095503752407518, "grad_norm": 833.0057983398438, "learning_rate": 1.629501471693622e-06, "loss": 20.9688, "step": 9178 }, { "epoch": 0.6096167895331075, "grad_norm": 580.6954956054688, "learning_rate": 1.6294179002589443e-06, "loss": 25.1719, "step": 9179 }, { "epoch": 0.6096832038254633, "grad_norm": 195.28758239746094, "learning_rate": 1.629334321543523e-06, "loss": 18.0469, "step": 9180 }, { "epoch": 0.6097496181178189, "grad_norm": 210.6452178955078, "learning_rate": 1.6292507355483256e-06, "loss": 22.4375, "step": 9181 }, { "epoch": 0.6098160324101747, "grad_norm": 331.8852844238281, "learning_rate": 1.6291671422743182e-06, "loss": 18.875, "step": 9182 }, { "epoch": 0.6098824467025303, "grad_norm": 218.5575714111328, "learning_rate": 1.6290835417224682e-06, "loss": 16.8125, "step": 9183 }, { "epoch": 0.6099488609948861, "grad_norm": 151.87095642089844, "learning_rate": 1.6289999338937426e-06, "loss": 22.7812, "step": 9184 }, { "epoch": 0.6100152752872419, "grad_norm": 291.4331359863281, "learning_rate": 1.6289163187891083e-06, "loss": 19.9375, "step": 9185 }, { "epoch": 0.6100816895795975, "grad_norm": 332.9796447753906, "learning_rate": 1.6288326964095326e-06, "loss": 20.1562, "step": 9186 }, { "epoch": 0.6101481038719533, "grad_norm": 393.5271301269531, "learning_rate": 1.6287490667559833e-06, "loss": 18.3438, "step": 9187 }, { "epoch": 0.6102145181643089, "grad_norm": 528.2221069335938, "learning_rate": 1.6286654298294272e-06, "loss": 22.4688, "step": 9188 }, { "epoch": 0.6102809324566647, "grad_norm": 174.49853515625, "learning_rate": 1.6285817856308314e-06, "loss": 17.9844, "step": 9189 }, { "epoch": 0.6103473467490204, "grad_norm": 182.28492736816406, "learning_rate": 1.6284981341611647e-06, "loss": 18.8438, "step": 9190 }, { "epoch": 0.6104137610413761, "grad_norm": 131.16497802734375, "learning_rate": 1.6284144754213936e-06, "loss": 15.4062, "step": 9191 }, { "epoch": 0.6104801753337318, "grad_norm": 416.32672119140625, "learning_rate": 1.6283308094124864e-06, "loss": 21.0, "step": 9192 }, { "epoch": 0.6105465896260875, "grad_norm": 226.41329956054688, "learning_rate": 1.6282471361354105e-06, "loss": 20.6719, "step": 9193 }, { "epoch": 0.6106130039184432, "grad_norm": 365.5603942871094, "learning_rate": 1.6281634555911344e-06, "loss": 21.7344, "step": 9194 }, { "epoch": 0.610679418210799, "grad_norm": 182.60113525390625, "learning_rate": 1.628079767780625e-06, "loss": 16.6719, "step": 9195 }, { "epoch": 0.6107458325031547, "grad_norm": 185.59092712402344, "learning_rate": 1.6279960727048513e-06, "loss": 20.8281, "step": 9196 }, { "epoch": 0.6108122467955104, "grad_norm": 233.1487579345703, "learning_rate": 1.6279123703647816e-06, "loss": 12.5391, "step": 9197 }, { "epoch": 0.6108786610878661, "grad_norm": 326.9505920410156, "learning_rate": 1.6278286607613834e-06, "loss": 14.9688, "step": 9198 }, { "epoch": 0.6109450753802218, "grad_norm": 244.31488037109375, "learning_rate": 1.627744943895625e-06, "loss": 24.2969, "step": 9199 }, { "epoch": 0.6110114896725776, "grad_norm": 148.12509155273438, "learning_rate": 1.6276612197684752e-06, "loss": 17.5938, "step": 9200 }, { "epoch": 0.6110779039649332, "grad_norm": 269.8463134765625, "learning_rate": 1.6275774883809024e-06, "loss": 21.8281, "step": 9201 }, { "epoch": 0.611144318257289, "grad_norm": 295.5538330078125, "learning_rate": 1.6274937497338752e-06, "loss": 16.7969, "step": 9202 }, { "epoch": 0.6112107325496446, "grad_norm": 228.8782196044922, "learning_rate": 1.6274100038283618e-06, "loss": 24.7812, "step": 9203 }, { "epoch": 0.6112771468420004, "grad_norm": 291.8970947265625, "learning_rate": 1.6273262506653317e-06, "loss": 19.0312, "step": 9204 }, { "epoch": 0.6113435611343561, "grad_norm": 310.88177490234375, "learning_rate": 1.627242490245753e-06, "loss": 14.375, "step": 9205 }, { "epoch": 0.6114099754267118, "grad_norm": 109.07907104492188, "learning_rate": 1.6271587225705949e-06, "loss": 20.4688, "step": 9206 }, { "epoch": 0.6114763897190676, "grad_norm": 184.3028106689453, "learning_rate": 1.6270749476408263e-06, "loss": 17.4062, "step": 9207 }, { "epoch": 0.6115428040114232, "grad_norm": 299.0027160644531, "learning_rate": 1.6269911654574165e-06, "loss": 19.5625, "step": 9208 }, { "epoch": 0.611609218303779, "grad_norm": 199.98997497558594, "learning_rate": 1.6269073760213343e-06, "loss": 16.4844, "step": 9209 }, { "epoch": 0.6116756325961347, "grad_norm": 1032.8642578125, "learning_rate": 1.626823579333549e-06, "loss": 16.9531, "step": 9210 }, { "epoch": 0.6117420468884904, "grad_norm": 408.75390625, "learning_rate": 1.6267397753950301e-06, "loss": 11.9375, "step": 9211 }, { "epoch": 0.6118084611808461, "grad_norm": 307.41180419921875, "learning_rate": 1.6266559642067467e-06, "loss": 20.3125, "step": 9212 }, { "epoch": 0.6118748754732019, "grad_norm": 128.34397888183594, "learning_rate": 1.6265721457696689e-06, "loss": 19.1562, "step": 9213 }, { "epoch": 0.6119412897655575, "grad_norm": 103.39549255371094, "learning_rate": 1.6264883200847655e-06, "loss": 14.7031, "step": 9214 }, { "epoch": 0.6120077040579133, "grad_norm": 356.8778076171875, "learning_rate": 1.6264044871530062e-06, "loss": 19.3281, "step": 9215 }, { "epoch": 0.6120741183502689, "grad_norm": 233.2626495361328, "learning_rate": 1.6263206469753613e-06, "loss": 17.9688, "step": 9216 }, { "epoch": 0.6121405326426247, "grad_norm": 158.49537658691406, "learning_rate": 1.6262367995528003e-06, "loss": 17.1719, "step": 9217 }, { "epoch": 0.6122069469349805, "grad_norm": 135.99044799804688, "learning_rate": 1.6261529448862931e-06, "loss": 17.6562, "step": 9218 }, { "epoch": 0.6122733612273361, "grad_norm": 208.96168518066406, "learning_rate": 1.62606908297681e-06, "loss": 13.3438, "step": 9219 }, { "epoch": 0.6123397755196919, "grad_norm": 404.6461486816406, "learning_rate": 1.6259852138253205e-06, "loss": 15.9375, "step": 9220 }, { "epoch": 0.6124061898120475, "grad_norm": 354.15679931640625, "learning_rate": 1.6259013374327947e-06, "loss": 17.6719, "step": 9221 }, { "epoch": 0.6124726041044033, "grad_norm": 307.1145935058594, "learning_rate": 1.6258174538002037e-06, "loss": 30.375, "step": 9222 }, { "epoch": 0.612539018396759, "grad_norm": 281.240966796875, "learning_rate": 1.6257335629285166e-06, "loss": 15.0312, "step": 9223 }, { "epoch": 0.6126054326891147, "grad_norm": 441.8105773925781, "learning_rate": 1.625649664818705e-06, "loss": 12.4844, "step": 9224 }, { "epoch": 0.6126718469814704, "grad_norm": 114.7209701538086, "learning_rate": 1.6255657594717386e-06, "loss": 12.9766, "step": 9225 }, { "epoch": 0.6127382612738261, "grad_norm": 345.6671447753906, "learning_rate": 1.6254818468885885e-06, "loss": 23.6562, "step": 9226 }, { "epoch": 0.6128046755661818, "grad_norm": 235.54539489746094, "learning_rate": 1.6253979270702247e-06, "loss": 19.5156, "step": 9227 }, { "epoch": 0.6128710898585376, "grad_norm": 318.0631103515625, "learning_rate": 1.6253140000176186e-06, "loss": 20.9688, "step": 9228 }, { "epoch": 0.6129375041508933, "grad_norm": 242.70816040039062, "learning_rate": 1.6252300657317406e-06, "loss": 14.1094, "step": 9229 }, { "epoch": 0.613003918443249, "grad_norm": 160.47137451171875, "learning_rate": 1.6251461242135615e-06, "loss": 19.4688, "step": 9230 }, { "epoch": 0.6130703327356047, "grad_norm": 198.5006561279297, "learning_rate": 1.625062175464053e-06, "loss": 15.7656, "step": 9231 }, { "epoch": 0.6131367470279604, "grad_norm": 434.9969177246094, "learning_rate": 1.6249782194841854e-06, "loss": 13.7031, "step": 9232 }, { "epoch": 0.6132031613203162, "grad_norm": 261.91162109375, "learning_rate": 1.6248942562749302e-06, "loss": 16.8594, "step": 9233 }, { "epoch": 0.6132695756126718, "grad_norm": 360.18621826171875, "learning_rate": 1.6248102858372582e-06, "loss": 17.4531, "step": 9234 }, { "epoch": 0.6133359899050276, "grad_norm": 206.8722686767578, "learning_rate": 1.6247263081721419e-06, "loss": 22.5, "step": 9235 }, { "epoch": 0.6134024041973832, "grad_norm": 191.1163330078125, "learning_rate": 1.6246423232805513e-06, "loss": 16.7969, "step": 9236 }, { "epoch": 0.613468818489739, "grad_norm": 119.47476959228516, "learning_rate": 1.6245583311634583e-06, "loss": 12.7188, "step": 9237 }, { "epoch": 0.6135352327820947, "grad_norm": 498.3365783691406, "learning_rate": 1.6244743318218352e-06, "loss": 21.4375, "step": 9238 }, { "epoch": 0.6136016470744504, "grad_norm": 136.90863037109375, "learning_rate": 1.624390325256653e-06, "loss": 18.4062, "step": 9239 }, { "epoch": 0.6136680613668062, "grad_norm": 196.43568420410156, "learning_rate": 1.6243063114688833e-06, "loss": 20.2969, "step": 9240 }, { "epoch": 0.6137344756591618, "grad_norm": 310.8731384277344, "learning_rate": 1.6242222904594987e-06, "loss": 17.7812, "step": 9241 }, { "epoch": 0.6138008899515176, "grad_norm": 393.85980224609375, "learning_rate": 1.6241382622294698e-06, "loss": 22.4219, "step": 9242 }, { "epoch": 0.6138673042438733, "grad_norm": 147.64743041992188, "learning_rate": 1.62405422677977e-06, "loss": 15.4844, "step": 9243 }, { "epoch": 0.613933718536229, "grad_norm": 410.74444580078125, "learning_rate": 1.623970184111371e-06, "loss": 17.875, "step": 9244 }, { "epoch": 0.6140001328285847, "grad_norm": 193.61143493652344, "learning_rate": 1.6238861342252442e-06, "loss": 18.1094, "step": 9245 }, { "epoch": 0.6140665471209404, "grad_norm": 271.9428405761719, "learning_rate": 1.6238020771223622e-06, "loss": 16.7656, "step": 9246 }, { "epoch": 0.6141329614132961, "grad_norm": 474.9273986816406, "learning_rate": 1.6237180128036978e-06, "loss": 14.6406, "step": 9247 }, { "epoch": 0.6141993757056519, "grad_norm": 134.3749542236328, "learning_rate": 1.6236339412702232e-06, "loss": 16.8281, "step": 9248 }, { "epoch": 0.6142657899980075, "grad_norm": 314.4081726074219, "learning_rate": 1.6235498625229105e-06, "loss": 21.5781, "step": 9249 }, { "epoch": 0.6143322042903633, "grad_norm": 500.71917724609375, "learning_rate": 1.6234657765627326e-06, "loss": 24.7031, "step": 9250 }, { "epoch": 0.614398618582719, "grad_norm": 234.75262451171875, "learning_rate": 1.6233816833906623e-06, "loss": 17.2031, "step": 9251 }, { "epoch": 0.6144650328750747, "grad_norm": 124.08118438720703, "learning_rate": 1.623297583007672e-06, "loss": 14.5, "step": 9252 }, { "epoch": 0.6145314471674305, "grad_norm": 270.103271484375, "learning_rate": 1.6232134754147347e-06, "loss": 23.4219, "step": 9253 }, { "epoch": 0.6145978614597861, "grad_norm": 178.133544921875, "learning_rate": 1.6231293606128232e-06, "loss": 20.3281, "step": 9254 }, { "epoch": 0.6146642757521419, "grad_norm": 313.2117614746094, "learning_rate": 1.6230452386029106e-06, "loss": 21.4688, "step": 9255 }, { "epoch": 0.6147306900444975, "grad_norm": 153.6293487548828, "learning_rate": 1.62296110938597e-06, "loss": 19.2344, "step": 9256 }, { "epoch": 0.6147971043368533, "grad_norm": 166.99781799316406, "learning_rate": 1.6228769729629745e-06, "loss": 15.4688, "step": 9257 }, { "epoch": 0.614863518629209, "grad_norm": 169.0693359375, "learning_rate": 1.6227928293348976e-06, "loss": 20.3438, "step": 9258 }, { "epoch": 0.6149299329215647, "grad_norm": 170.42547607421875, "learning_rate": 1.622708678502712e-06, "loss": 20.0938, "step": 9259 }, { "epoch": 0.6149963472139204, "grad_norm": 347.8380432128906, "learning_rate": 1.6226245204673917e-06, "loss": 20.5, "step": 9260 }, { "epoch": 0.6150627615062761, "grad_norm": 295.5372314453125, "learning_rate": 1.6225403552299098e-06, "loss": 19.8125, "step": 9261 }, { "epoch": 0.6151291757986319, "grad_norm": 217.3892364501953, "learning_rate": 1.62245618279124e-06, "loss": 24.5938, "step": 9262 }, { "epoch": 0.6151955900909876, "grad_norm": 242.75738525390625, "learning_rate": 1.6223720031523561e-06, "loss": 17.75, "step": 9263 }, { "epoch": 0.6152620043833433, "grad_norm": 209.69815063476562, "learning_rate": 1.6222878163142318e-06, "loss": 21.2344, "step": 9264 }, { "epoch": 0.615328418675699, "grad_norm": 289.7508239746094, "learning_rate": 1.6222036222778411e-06, "loss": 21.3906, "step": 9265 }, { "epoch": 0.6153948329680548, "grad_norm": 237.43502807617188, "learning_rate": 1.6221194210441572e-06, "loss": 14.7031, "step": 9266 }, { "epoch": 0.6154612472604104, "grad_norm": 276.6844177246094, "learning_rate": 1.622035212614155e-06, "loss": 16.9219, "step": 9267 }, { "epoch": 0.6155276615527662, "grad_norm": 117.78868103027344, "learning_rate": 1.6219509969888078e-06, "loss": 16.4062, "step": 9268 }, { "epoch": 0.6155940758451218, "grad_norm": 184.71583557128906, "learning_rate": 1.6218667741690903e-06, "loss": 21.6406, "step": 9269 }, { "epoch": 0.6156604901374776, "grad_norm": 191.19261169433594, "learning_rate": 1.6217825441559765e-06, "loss": 19.0, "step": 9270 }, { "epoch": 0.6157269044298334, "grad_norm": 183.3252410888672, "learning_rate": 1.621698306950441e-06, "loss": 18.5625, "step": 9271 }, { "epoch": 0.615793318722189, "grad_norm": 528.2846069335938, "learning_rate": 1.6216140625534575e-06, "loss": 26.1875, "step": 9272 }, { "epoch": 0.6158597330145448, "grad_norm": 157.28968811035156, "learning_rate": 1.6215298109660013e-06, "loss": 16.0312, "step": 9273 }, { "epoch": 0.6159261473069004, "grad_norm": 81.4987564086914, "learning_rate": 1.6214455521890464e-06, "loss": 14.2344, "step": 9274 }, { "epoch": 0.6159925615992562, "grad_norm": 333.4059143066406, "learning_rate": 1.6213612862235678e-06, "loss": 19.9531, "step": 9275 }, { "epoch": 0.6160589758916118, "grad_norm": 146.1719207763672, "learning_rate": 1.6212770130705403e-06, "loss": 15.6875, "step": 9276 }, { "epoch": 0.6161253901839676, "grad_norm": 330.71063232421875, "learning_rate": 1.6211927327309383e-06, "loss": 24.4688, "step": 9277 }, { "epoch": 0.6161918044763233, "grad_norm": 213.7183380126953, "learning_rate": 1.6211084452057372e-06, "loss": 17.375, "step": 9278 }, { "epoch": 0.616258218768679, "grad_norm": 320.2441101074219, "learning_rate": 1.6210241504959117e-06, "loss": 20.125, "step": 9279 }, { "epoch": 0.6163246330610347, "grad_norm": 365.2159118652344, "learning_rate": 1.6209398486024369e-06, "loss": 21.2812, "step": 9280 }, { "epoch": 0.6163910473533905, "grad_norm": 307.49505615234375, "learning_rate": 1.620855539526288e-06, "loss": 19.25, "step": 9281 }, { "epoch": 0.6164574616457462, "grad_norm": 155.78016662597656, "learning_rate": 1.6207712232684401e-06, "loss": 11.9688, "step": 9282 }, { "epoch": 0.6165238759381019, "grad_norm": 208.47647094726562, "learning_rate": 1.620686899829869e-06, "loss": 22.625, "step": 9283 }, { "epoch": 0.6165902902304576, "grad_norm": 388.2359619140625, "learning_rate": 1.6206025692115493e-06, "loss": 19.5625, "step": 9284 }, { "epoch": 0.6166567045228133, "grad_norm": 186.08245849609375, "learning_rate": 1.6205182314144572e-06, "loss": 16.6719, "step": 9285 }, { "epoch": 0.6167231188151691, "grad_norm": 252.97198486328125, "learning_rate": 1.620433886439568e-06, "loss": 26.3438, "step": 9286 }, { "epoch": 0.6167895331075247, "grad_norm": 525.7938842773438, "learning_rate": 1.6203495342878572e-06, "loss": 17.3906, "step": 9287 }, { "epoch": 0.6168559473998805, "grad_norm": 1319.177001953125, "learning_rate": 1.620265174960301e-06, "loss": 13.7656, "step": 9288 }, { "epoch": 0.6169223616922361, "grad_norm": 172.8104248046875, "learning_rate": 1.6201808084578747e-06, "loss": 22.125, "step": 9289 }, { "epoch": 0.6169887759845919, "grad_norm": 180.99627685546875, "learning_rate": 1.6200964347815545e-06, "loss": 20.0781, "step": 9290 }, { "epoch": 0.6170551902769476, "grad_norm": 121.92466735839844, "learning_rate": 1.6200120539323164e-06, "loss": 15.8906, "step": 9291 }, { "epoch": 0.6171216045693033, "grad_norm": 423.7875671386719, "learning_rate": 1.619927665911136e-06, "loss": 15.8125, "step": 9292 }, { "epoch": 0.6171880188616591, "grad_norm": 186.83274841308594, "learning_rate": 1.6198432707189901e-06, "loss": 18.2188, "step": 9293 }, { "epoch": 0.6172544331540147, "grad_norm": 518.5579833984375, "learning_rate": 1.6197588683568548e-06, "loss": 14.6484, "step": 9294 }, { "epoch": 0.6173208474463705, "grad_norm": 170.4759979248047, "learning_rate": 1.6196744588257062e-06, "loss": 22.5625, "step": 9295 }, { "epoch": 0.6173872617387262, "grad_norm": 217.64382934570312, "learning_rate": 1.6195900421265205e-06, "loss": 18.4844, "step": 9296 }, { "epoch": 0.6174536760310819, "grad_norm": 291.5281066894531, "learning_rate": 1.619505618260275e-06, "loss": 14.0469, "step": 9297 }, { "epoch": 0.6175200903234376, "grad_norm": 212.65609741210938, "learning_rate": 1.6194211872279452e-06, "loss": 17.6562, "step": 9298 }, { "epoch": 0.6175865046157933, "grad_norm": 167.38784790039062, "learning_rate": 1.6193367490305088e-06, "loss": 20.1562, "step": 9299 }, { "epoch": 0.617652918908149, "grad_norm": 219.94667053222656, "learning_rate": 1.6192523036689417e-06, "loss": 21.4375, "step": 9300 }, { "epoch": 0.6177193332005048, "grad_norm": 332.1939392089844, "learning_rate": 1.6191678511442208e-06, "loss": 18.2656, "step": 9301 }, { "epoch": 0.6177857474928604, "grad_norm": 880.5400390625, "learning_rate": 1.6190833914573236e-06, "loss": 15.3594, "step": 9302 }, { "epoch": 0.6178521617852162, "grad_norm": 265.00775146484375, "learning_rate": 1.6189989246092267e-06, "loss": 17.7656, "step": 9303 }, { "epoch": 0.617918576077572, "grad_norm": 178.06993103027344, "learning_rate": 1.6189144506009072e-06, "loss": 15.9062, "step": 9304 }, { "epoch": 0.6179849903699276, "grad_norm": 337.1888732910156, "learning_rate": 1.6188299694333419e-06, "loss": 15.4531, "step": 9305 }, { "epoch": 0.6180514046622834, "grad_norm": 229.0672149658203, "learning_rate": 1.6187454811075086e-06, "loss": 14.0625, "step": 9306 }, { "epoch": 0.618117818954639, "grad_norm": 231.59095764160156, "learning_rate": 1.6186609856243845e-06, "loss": 17.5, "step": 9307 }, { "epoch": 0.6181842332469948, "grad_norm": 208.45175170898438, "learning_rate": 1.6185764829849467e-06, "loss": 19.5, "step": 9308 }, { "epoch": 0.6182506475393504, "grad_norm": 281.484130859375, "learning_rate": 1.6184919731901727e-06, "loss": 20.3594, "step": 9309 }, { "epoch": 0.6183170618317062, "grad_norm": 152.7681121826172, "learning_rate": 1.6184074562410404e-06, "loss": 25.875, "step": 9310 }, { "epoch": 0.6183834761240619, "grad_norm": 119.7308349609375, "learning_rate": 1.618322932138527e-06, "loss": 13.6562, "step": 9311 }, { "epoch": 0.6184498904164176, "grad_norm": 296.8908996582031, "learning_rate": 1.6182384008836106e-06, "loss": 14.9375, "step": 9312 }, { "epoch": 0.6185163047087733, "grad_norm": 199.04710388183594, "learning_rate": 1.6181538624772691e-06, "loss": 16.75, "step": 9313 }, { "epoch": 0.618582719001129, "grad_norm": 208.2545928955078, "learning_rate": 1.6180693169204797e-06, "loss": 17.5312, "step": 9314 }, { "epoch": 0.6186491332934848, "grad_norm": 144.12686157226562, "learning_rate": 1.617984764214221e-06, "loss": 16.0156, "step": 9315 }, { "epoch": 0.6187155475858405, "grad_norm": 478.615966796875, "learning_rate": 1.6179002043594711e-06, "loss": 23.25, "step": 9316 }, { "epoch": 0.6187819618781962, "grad_norm": 387.42041015625, "learning_rate": 1.617815637357208e-06, "loss": 23.9062, "step": 9317 }, { "epoch": 0.6188483761705519, "grad_norm": 305.87591552734375, "learning_rate": 1.6177310632084096e-06, "loss": 22.5938, "step": 9318 }, { "epoch": 0.6189147904629076, "grad_norm": 332.59002685546875, "learning_rate": 1.6176464819140546e-06, "loss": 18.4844, "step": 9319 }, { "epoch": 0.6189812047552633, "grad_norm": 158.85597229003906, "learning_rate": 1.6175618934751212e-06, "loss": 13.75, "step": 9320 }, { "epoch": 0.6190476190476191, "grad_norm": 524.3280029296875, "learning_rate": 1.617477297892588e-06, "loss": 20.2188, "step": 9321 }, { "epoch": 0.6191140333399747, "grad_norm": 429.1229553222656, "learning_rate": 1.6173926951674335e-06, "loss": 18.875, "step": 9322 }, { "epoch": 0.6191804476323305, "grad_norm": 436.8974304199219, "learning_rate": 1.6173080853006363e-06, "loss": 12.8203, "step": 9323 }, { "epoch": 0.6192468619246861, "grad_norm": 253.17100524902344, "learning_rate": 1.617223468293175e-06, "loss": 18.6562, "step": 9324 }, { "epoch": 0.6193132762170419, "grad_norm": 237.45620727539062, "learning_rate": 1.6171388441460285e-06, "loss": 22.2812, "step": 9325 }, { "epoch": 0.6193796905093977, "grad_norm": 428.5759582519531, "learning_rate": 1.6170542128601758e-06, "loss": 19.4844, "step": 9326 }, { "epoch": 0.6194461048017533, "grad_norm": 287.3639221191406, "learning_rate": 1.6169695744365958e-06, "loss": 18.3281, "step": 9327 }, { "epoch": 0.6195125190941091, "grad_norm": 348.87506103515625, "learning_rate": 1.6168849288762675e-06, "loss": 16.375, "step": 9328 }, { "epoch": 0.6195789333864647, "grad_norm": 333.9768981933594, "learning_rate": 1.6168002761801703e-06, "loss": 19.1875, "step": 9329 }, { "epoch": 0.6196453476788205, "grad_norm": 323.3826599121094, "learning_rate": 1.6167156163492827e-06, "loss": 14.8438, "step": 9330 }, { "epoch": 0.6197117619711762, "grad_norm": 158.54803466796875, "learning_rate": 1.616630949384585e-06, "loss": 19.6719, "step": 9331 }, { "epoch": 0.6197781762635319, "grad_norm": 265.2891845703125, "learning_rate": 1.6165462752870554e-06, "loss": 25.2188, "step": 9332 }, { "epoch": 0.6198445905558876, "grad_norm": 152.064697265625, "learning_rate": 1.6164615940576744e-06, "loss": 12.8906, "step": 9333 }, { "epoch": 0.6199110048482434, "grad_norm": 121.30574798583984, "learning_rate": 1.6163769056974212e-06, "loss": 19.3281, "step": 9334 }, { "epoch": 0.619977419140599, "grad_norm": 303.2149353027344, "learning_rate": 1.6162922102072753e-06, "loss": 21.4062, "step": 9335 }, { "epoch": 0.6200438334329548, "grad_norm": 406.66107177734375, "learning_rate": 1.6162075075882164e-06, "loss": 17.8125, "step": 9336 }, { "epoch": 0.6201102477253105, "grad_norm": 244.06365966796875, "learning_rate": 1.6161227978412247e-06, "loss": 20.2344, "step": 9337 }, { "epoch": 0.6201766620176662, "grad_norm": 307.8075866699219, "learning_rate": 1.6160380809672796e-06, "loss": 27.0234, "step": 9338 }, { "epoch": 0.620243076310022, "grad_norm": 490.5261535644531, "learning_rate": 1.6159533569673608e-06, "loss": 20.6094, "step": 9339 }, { "epoch": 0.6203094906023776, "grad_norm": 228.61314392089844, "learning_rate": 1.6158686258424493e-06, "loss": 23.5625, "step": 9340 }, { "epoch": 0.6203759048947334, "grad_norm": 182.74476623535156, "learning_rate": 1.6157838875935244e-06, "loss": 19.6562, "step": 9341 }, { "epoch": 0.620442319187089, "grad_norm": 188.92869567871094, "learning_rate": 1.6156991422215666e-06, "loss": 16.6875, "step": 9342 }, { "epoch": 0.6205087334794448, "grad_norm": 197.84243774414062, "learning_rate": 1.615614389727556e-06, "loss": 18.6094, "step": 9343 }, { "epoch": 0.6205751477718005, "grad_norm": 224.9297332763672, "learning_rate": 1.6155296301124734e-06, "loss": 23.1094, "step": 9344 }, { "epoch": 0.6206415620641562, "grad_norm": 411.26104736328125, "learning_rate": 1.615444863377299e-06, "loss": 22.1562, "step": 9345 }, { "epoch": 0.6207079763565119, "grad_norm": 302.9114074707031, "learning_rate": 1.615360089523013e-06, "loss": 20.7188, "step": 9346 }, { "epoch": 0.6207743906488676, "grad_norm": 549.6847534179688, "learning_rate": 1.6152753085505964e-06, "loss": 21.4062, "step": 9347 }, { "epoch": 0.6208408049412234, "grad_norm": 225.6328887939453, "learning_rate": 1.6151905204610298e-06, "loss": 24.5625, "step": 9348 }, { "epoch": 0.620907219233579, "grad_norm": 272.0263366699219, "learning_rate": 1.6151057252552942e-06, "loss": 19.9531, "step": 9349 }, { "epoch": 0.6209736335259348, "grad_norm": 226.33665466308594, "learning_rate": 1.6150209229343702e-06, "loss": 17.0625, "step": 9350 }, { "epoch": 0.6210400478182905, "grad_norm": 220.73382568359375, "learning_rate": 1.6149361134992387e-06, "loss": 15.4219, "step": 9351 }, { "epoch": 0.6211064621106462, "grad_norm": 147.98123168945312, "learning_rate": 1.614851296950881e-06, "loss": 16.4688, "step": 9352 }, { "epoch": 0.6211728764030019, "grad_norm": 263.57147216796875, "learning_rate": 1.614766473290278e-06, "loss": 20.1406, "step": 9353 }, { "epoch": 0.6212392906953577, "grad_norm": 121.71478271484375, "learning_rate": 1.614681642518411e-06, "loss": 16.0156, "step": 9354 }, { "epoch": 0.6213057049877133, "grad_norm": 214.36756896972656, "learning_rate": 1.6145968046362609e-06, "loss": 18.4219, "step": 9355 }, { "epoch": 0.6213721192800691, "grad_norm": 151.0864715576172, "learning_rate": 1.6145119596448096e-06, "loss": 18.6406, "step": 9356 }, { "epoch": 0.6214385335724247, "grad_norm": 296.0901184082031, "learning_rate": 1.614427107545038e-06, "loss": 18.2344, "step": 9357 }, { "epoch": 0.6215049478647805, "grad_norm": 118.8132095336914, "learning_rate": 1.6143422483379281e-06, "loss": 15.3281, "step": 9358 }, { "epoch": 0.6215713621571363, "grad_norm": 895.0809326171875, "learning_rate": 1.6142573820244616e-06, "loss": 23.8438, "step": 9359 }, { "epoch": 0.6216377764494919, "grad_norm": 248.9422607421875, "learning_rate": 1.6141725086056196e-06, "loss": 18.2812, "step": 9360 }, { "epoch": 0.6217041907418477, "grad_norm": 296.3357849121094, "learning_rate": 1.6140876280823843e-06, "loss": 17.7344, "step": 9361 }, { "epoch": 0.6217706050342033, "grad_norm": 283.90252685546875, "learning_rate": 1.6140027404557373e-06, "loss": 17.5469, "step": 9362 }, { "epoch": 0.6218370193265591, "grad_norm": 235.04397583007812, "learning_rate": 1.6139178457266608e-06, "loss": 22.6562, "step": 9363 }, { "epoch": 0.6219034336189148, "grad_norm": 294.8955078125, "learning_rate": 1.6138329438961366e-06, "loss": 18.6172, "step": 9364 }, { "epoch": 0.6219698479112705, "grad_norm": 261.18511962890625, "learning_rate": 1.613748034965147e-06, "loss": 20.3906, "step": 9365 }, { "epoch": 0.6220362622036262, "grad_norm": 1033.6485595703125, "learning_rate": 1.613663118934674e-06, "loss": 24.5, "step": 9366 }, { "epoch": 0.6221026764959819, "grad_norm": 3324.981689453125, "learning_rate": 1.6135781958056995e-06, "loss": 23.2656, "step": 9367 }, { "epoch": 0.6221690907883376, "grad_norm": 158.71034240722656, "learning_rate": 1.6134932655792065e-06, "loss": 17.1875, "step": 9368 }, { "epoch": 0.6222355050806934, "grad_norm": 426.817138671875, "learning_rate": 1.6134083282561773e-06, "loss": 15.4531, "step": 9369 }, { "epoch": 0.6223019193730491, "grad_norm": 132.34425354003906, "learning_rate": 1.6133233838375941e-06, "loss": 21.5312, "step": 9370 }, { "epoch": 0.6223683336654048, "grad_norm": 115.67589569091797, "learning_rate": 1.6132384323244396e-06, "loss": 16.2031, "step": 9371 }, { "epoch": 0.6224347479577605, "grad_norm": 287.7967529296875, "learning_rate": 1.613153473717697e-06, "loss": 16.3438, "step": 9372 }, { "epoch": 0.6225011622501162, "grad_norm": 534.862548828125, "learning_rate": 1.613068508018348e-06, "loss": 22.8906, "step": 9373 }, { "epoch": 0.622567576542472, "grad_norm": 108.7054214477539, "learning_rate": 1.6129835352273764e-06, "loss": 12.4688, "step": 9374 }, { "epoch": 0.6226339908348276, "grad_norm": 274.1351318359375, "learning_rate": 1.6128985553457644e-06, "loss": 14.6875, "step": 9375 }, { "epoch": 0.6227004051271834, "grad_norm": 143.05775451660156, "learning_rate": 1.6128135683744956e-06, "loss": 13.3906, "step": 9376 }, { "epoch": 0.622766819419539, "grad_norm": 283.01568603515625, "learning_rate": 1.6127285743145528e-06, "loss": 17.5156, "step": 9377 }, { "epoch": 0.6228332337118948, "grad_norm": 157.95181274414062, "learning_rate": 1.612643573166919e-06, "loss": 21.3281, "step": 9378 }, { "epoch": 0.6228996480042505, "grad_norm": 222.92893981933594, "learning_rate": 1.6125585649325778e-06, "loss": 13.6719, "step": 9379 }, { "epoch": 0.6229660622966062, "grad_norm": 320.670654296875, "learning_rate": 1.612473549612512e-06, "loss": 15.6484, "step": 9380 }, { "epoch": 0.623032476588962, "grad_norm": 124.23709106445312, "learning_rate": 1.6123885272077058e-06, "loss": 19.75, "step": 9381 }, { "epoch": 0.6230988908813176, "grad_norm": 168.75448608398438, "learning_rate": 1.612303497719142e-06, "loss": 21.9219, "step": 9382 }, { "epoch": 0.6231653051736734, "grad_norm": 171.60861206054688, "learning_rate": 1.6122184611478047e-06, "loss": 19.3438, "step": 9383 }, { "epoch": 0.6232317194660291, "grad_norm": 207.9339141845703, "learning_rate": 1.6121334174946765e-06, "loss": 14.8281, "step": 9384 }, { "epoch": 0.6232981337583848, "grad_norm": 394.48760986328125, "learning_rate": 1.6120483667607428e-06, "loss": 17.3125, "step": 9385 }, { "epoch": 0.6233645480507405, "grad_norm": 163.9935760498047, "learning_rate": 1.6119633089469859e-06, "loss": 21.4531, "step": 9386 }, { "epoch": 0.6234309623430963, "grad_norm": 461.31103515625, "learning_rate": 1.6118782440543908e-06, "loss": 18.9062, "step": 9387 }, { "epoch": 0.6234973766354519, "grad_norm": 181.0987091064453, "learning_rate": 1.6117931720839405e-06, "loss": 20.8438, "step": 9388 }, { "epoch": 0.6235637909278077, "grad_norm": 271.72418212890625, "learning_rate": 1.61170809303662e-06, "loss": 25.4062, "step": 9389 }, { "epoch": 0.6236302052201633, "grad_norm": 216.14505004882812, "learning_rate": 1.6116230069134128e-06, "loss": 22.2031, "step": 9390 }, { "epoch": 0.6236966195125191, "grad_norm": 878.810546875, "learning_rate": 1.6115379137153036e-06, "loss": 21.6875, "step": 9391 }, { "epoch": 0.6237630338048749, "grad_norm": 383.3150634765625, "learning_rate": 1.611452813443276e-06, "loss": 23.0781, "step": 9392 }, { "epoch": 0.6238294480972305, "grad_norm": 159.0681915283203, "learning_rate": 1.611367706098315e-06, "loss": 16.7656, "step": 9393 }, { "epoch": 0.6238958623895863, "grad_norm": 235.75367736816406, "learning_rate": 1.611282591681405e-06, "loss": 33.0625, "step": 9394 }, { "epoch": 0.6239622766819419, "grad_norm": 394.28857421875, "learning_rate": 1.6111974701935306e-06, "loss": 23.7188, "step": 9395 }, { "epoch": 0.6240286909742977, "grad_norm": 591.204833984375, "learning_rate": 1.611112341635676e-06, "loss": 24.4375, "step": 9396 }, { "epoch": 0.6240951052666533, "grad_norm": 159.56980895996094, "learning_rate": 1.6110272060088264e-06, "loss": 17.1406, "step": 9397 }, { "epoch": 0.6241615195590091, "grad_norm": 447.04559326171875, "learning_rate": 1.6109420633139664e-06, "loss": 21.6094, "step": 9398 }, { "epoch": 0.6242279338513648, "grad_norm": 165.07342529296875, "learning_rate": 1.6108569135520808e-06, "loss": 13.7656, "step": 9399 }, { "epoch": 0.6242943481437205, "grad_norm": 222.96969604492188, "learning_rate": 1.610771756724155e-06, "loss": 19.2188, "step": 9400 }, { "epoch": 0.6243607624360762, "grad_norm": 349.6292724609375, "learning_rate": 1.6106865928311733e-06, "loss": 17.4219, "step": 9401 }, { "epoch": 0.624427176728432, "grad_norm": 655.3377075195312, "learning_rate": 1.6106014218741213e-06, "loss": 12.8125, "step": 9402 }, { "epoch": 0.6244935910207877, "grad_norm": 239.62026977539062, "learning_rate": 1.6105162438539844e-06, "loss": 21.7031, "step": 9403 }, { "epoch": 0.6245600053131434, "grad_norm": 191.40606689453125, "learning_rate": 1.6104310587717475e-06, "loss": 16.6406, "step": 9404 }, { "epoch": 0.6246264196054991, "grad_norm": 124.65821075439453, "learning_rate": 1.610345866628396e-06, "loss": 18.0312, "step": 9405 }, { "epoch": 0.6246928338978548, "grad_norm": 128.01329040527344, "learning_rate": 1.6102606674249156e-06, "loss": 15.1875, "step": 9406 }, { "epoch": 0.6247592481902106, "grad_norm": 345.0602111816406, "learning_rate": 1.6101754611622916e-06, "loss": 17.9219, "step": 9407 }, { "epoch": 0.6248256624825662, "grad_norm": 108.78302764892578, "learning_rate": 1.6100902478415098e-06, "loss": 14.2344, "step": 9408 }, { "epoch": 0.624892076774922, "grad_norm": 390.8831787109375, "learning_rate": 1.6100050274635557e-06, "loss": 20.7812, "step": 9409 }, { "epoch": 0.6249584910672776, "grad_norm": 187.9704132080078, "learning_rate": 1.6099198000294152e-06, "loss": 18.0938, "step": 9410 }, { "epoch": 0.6250249053596334, "grad_norm": 460.5272521972656, "learning_rate": 1.6098345655400745e-06, "loss": 19.0781, "step": 9411 }, { "epoch": 0.6250913196519892, "grad_norm": 229.98696899414062, "learning_rate": 1.6097493239965187e-06, "loss": 18.0156, "step": 9412 }, { "epoch": 0.6251577339443448, "grad_norm": 254.99913024902344, "learning_rate": 1.6096640753997346e-06, "loss": 19.375, "step": 9413 }, { "epoch": 0.6252241482367006, "grad_norm": 166.663818359375, "learning_rate": 1.6095788197507079e-06, "loss": 19.5312, "step": 9414 }, { "epoch": 0.6252905625290562, "grad_norm": 384.8300476074219, "learning_rate": 1.609493557050425e-06, "loss": 22.2188, "step": 9415 }, { "epoch": 0.625356976821412, "grad_norm": 152.7174530029297, "learning_rate": 1.609408287299872e-06, "loss": 14.3281, "step": 9416 }, { "epoch": 0.6254233911137677, "grad_norm": 277.5640563964844, "learning_rate": 1.6093230105000353e-06, "loss": 19.875, "step": 9417 }, { "epoch": 0.6254898054061234, "grad_norm": 155.49343872070312, "learning_rate": 1.6092377266519016e-06, "loss": 12.0469, "step": 9418 }, { "epoch": 0.6255562196984791, "grad_norm": 263.6170654296875, "learning_rate": 1.6091524357564569e-06, "loss": 17.4062, "step": 9419 }, { "epoch": 0.6256226339908348, "grad_norm": 149.9661102294922, "learning_rate": 1.6090671378146885e-06, "loss": 16.0, "step": 9420 }, { "epoch": 0.6256890482831905, "grad_norm": 347.5901794433594, "learning_rate": 1.6089818328275822e-06, "loss": 27.0625, "step": 9421 }, { "epoch": 0.6257554625755463, "grad_norm": 285.0367736816406, "learning_rate": 1.6088965207961255e-06, "loss": 18.8125, "step": 9422 }, { "epoch": 0.625821876867902, "grad_norm": 286.473388671875, "learning_rate": 1.6088112017213047e-06, "loss": 20.6406, "step": 9423 }, { "epoch": 0.6258882911602577, "grad_norm": 448.4151306152344, "learning_rate": 1.6087258756041072e-06, "loss": 21.625, "step": 9424 }, { "epoch": 0.6259547054526134, "grad_norm": 284.07080078125, "learning_rate": 1.6086405424455197e-06, "loss": 15.1094, "step": 9425 }, { "epoch": 0.6260211197449691, "grad_norm": 101.02718353271484, "learning_rate": 1.6085552022465292e-06, "loss": 14.6719, "step": 9426 }, { "epoch": 0.6260875340373249, "grad_norm": 155.1531219482422, "learning_rate": 1.6084698550081232e-06, "loss": 13.125, "step": 9427 }, { "epoch": 0.6261539483296805, "grad_norm": 207.93861389160156, "learning_rate": 1.6083845007312889e-06, "loss": 15.625, "step": 9428 }, { "epoch": 0.6262203626220363, "grad_norm": 156.05174255371094, "learning_rate": 1.6082991394170137e-06, "loss": 19.7188, "step": 9429 }, { "epoch": 0.6262867769143919, "grad_norm": 286.9539489746094, "learning_rate": 1.6082137710662841e-06, "loss": 21.2812, "step": 9430 }, { "epoch": 0.6263531912067477, "grad_norm": 149.89280700683594, "learning_rate": 1.6081283956800888e-06, "loss": 19.7969, "step": 9431 }, { "epoch": 0.6264196054991034, "grad_norm": 209.52488708496094, "learning_rate": 1.6080430132594152e-06, "loss": 15.0938, "step": 9432 }, { "epoch": 0.6264860197914591, "grad_norm": 189.457763671875, "learning_rate": 1.6079576238052499e-06, "loss": 14.6719, "step": 9433 }, { "epoch": 0.6265524340838149, "grad_norm": 174.73907470703125, "learning_rate": 1.607872227318582e-06, "loss": 21.0, "step": 9434 }, { "epoch": 0.6266188483761705, "grad_norm": 107.75126647949219, "learning_rate": 1.6077868238003985e-06, "loss": 15.9219, "step": 9435 }, { "epoch": 0.6266852626685263, "grad_norm": 466.0093994140625, "learning_rate": 1.6077014132516874e-06, "loss": 26.4219, "step": 9436 }, { "epoch": 0.626751676960882, "grad_norm": 171.60592651367188, "learning_rate": 1.6076159956734365e-06, "loss": 19.0156, "step": 9437 }, { "epoch": 0.6268180912532377, "grad_norm": 224.41067504882812, "learning_rate": 1.6075305710666347e-06, "loss": 16.2031, "step": 9438 }, { "epoch": 0.6268845055455934, "grad_norm": 193.07142639160156, "learning_rate": 1.6074451394322693e-06, "loss": 24.1562, "step": 9439 }, { "epoch": 0.6269509198379491, "grad_norm": 169.38272094726562, "learning_rate": 1.6073597007713287e-06, "loss": 17.625, "step": 9440 }, { "epoch": 0.6270173341303048, "grad_norm": 259.92425537109375, "learning_rate": 1.6072742550848015e-06, "loss": 14.4844, "step": 9441 }, { "epoch": 0.6270837484226606, "grad_norm": 176.46192932128906, "learning_rate": 1.6071888023736757e-06, "loss": 24.0312, "step": 9442 }, { "epoch": 0.6271501627150162, "grad_norm": 182.86280822753906, "learning_rate": 1.6071033426389401e-06, "loss": 21.375, "step": 9443 }, { "epoch": 0.627216577007372, "grad_norm": 303.88970947265625, "learning_rate": 1.6070178758815832e-06, "loss": 13.8906, "step": 9444 }, { "epoch": 0.6272829912997278, "grad_norm": 569.4445190429688, "learning_rate": 1.6069324021025933e-06, "loss": 26.0938, "step": 9445 }, { "epoch": 0.6273494055920834, "grad_norm": 184.24220275878906, "learning_rate": 1.6068469213029596e-06, "loss": 17.9219, "step": 9446 }, { "epoch": 0.6274158198844392, "grad_norm": 182.5476531982422, "learning_rate": 1.6067614334836706e-06, "loss": 19.7031, "step": 9447 }, { "epoch": 0.6274822341767948, "grad_norm": 207.58937072753906, "learning_rate": 1.606675938645715e-06, "loss": 18.25, "step": 9448 }, { "epoch": 0.6275486484691506, "grad_norm": 711.7501831054688, "learning_rate": 1.606590436790082e-06, "loss": 18.8594, "step": 9449 }, { "epoch": 0.6276150627615062, "grad_norm": 287.5324401855469, "learning_rate": 1.606504927917761e-06, "loss": 17.8125, "step": 9450 }, { "epoch": 0.627681477053862, "grad_norm": 171.63751220703125, "learning_rate": 1.6064194120297403e-06, "loss": 18.5312, "step": 9451 }, { "epoch": 0.6277478913462177, "grad_norm": 175.32211303710938, "learning_rate": 1.6063338891270096e-06, "loss": 16.9062, "step": 9452 }, { "epoch": 0.6278143056385734, "grad_norm": 130.81478881835938, "learning_rate": 1.6062483592105582e-06, "loss": 19.4375, "step": 9453 }, { "epoch": 0.6278807199309291, "grad_norm": 141.32005310058594, "learning_rate": 1.6061628222813752e-06, "loss": 14.2656, "step": 9454 }, { "epoch": 0.6279471342232849, "grad_norm": 470.71575927734375, "learning_rate": 1.6060772783404506e-06, "loss": 15.2969, "step": 9455 }, { "epoch": 0.6280135485156406, "grad_norm": 273.50823974609375, "learning_rate": 1.605991727388773e-06, "loss": 19.125, "step": 9456 }, { "epoch": 0.6280799628079963, "grad_norm": 256.297119140625, "learning_rate": 1.6059061694273328e-06, "loss": 16.9375, "step": 9457 }, { "epoch": 0.628146377100352, "grad_norm": 175.32176208496094, "learning_rate": 1.6058206044571195e-06, "loss": 12.5312, "step": 9458 }, { "epoch": 0.6282127913927077, "grad_norm": 196.8859405517578, "learning_rate": 1.6057350324791227e-06, "loss": 15.8359, "step": 9459 }, { "epoch": 0.6282792056850635, "grad_norm": 126.93058013916016, "learning_rate": 1.605649453494332e-06, "loss": 12.625, "step": 9460 }, { "epoch": 0.6283456199774191, "grad_norm": 178.26715087890625, "learning_rate": 1.6055638675037382e-06, "loss": 16.125, "step": 9461 }, { "epoch": 0.6284120342697749, "grad_norm": 256.65008544921875, "learning_rate": 1.6054782745083304e-06, "loss": 15.9062, "step": 9462 }, { "epoch": 0.6284784485621305, "grad_norm": 161.7789764404297, "learning_rate": 1.6053926745090992e-06, "loss": 19.3438, "step": 9463 }, { "epoch": 0.6285448628544863, "grad_norm": 155.78024291992188, "learning_rate": 1.6053070675070348e-06, "loss": 15.8125, "step": 9464 }, { "epoch": 0.628611277146842, "grad_norm": 294.5328063964844, "learning_rate": 1.6052214535031273e-06, "loss": 19.125, "step": 9465 }, { "epoch": 0.6286776914391977, "grad_norm": 351.7680969238281, "learning_rate": 1.6051358324983665e-06, "loss": 24.4844, "step": 9466 }, { "epoch": 0.6287441057315535, "grad_norm": 162.74365234375, "learning_rate": 1.6050502044937436e-06, "loss": 17.1406, "step": 9467 }, { "epoch": 0.6288105200239091, "grad_norm": 179.88961791992188, "learning_rate": 1.604964569490249e-06, "loss": 24.3125, "step": 9468 }, { "epoch": 0.6288769343162649, "grad_norm": 174.19509887695312, "learning_rate": 1.604878927488873e-06, "loss": 15.8125, "step": 9469 }, { "epoch": 0.6289433486086206, "grad_norm": 476.8846130371094, "learning_rate": 1.6047932784906064e-06, "loss": 22.5, "step": 9470 }, { "epoch": 0.6290097629009763, "grad_norm": 142.85736083984375, "learning_rate": 1.60470762249644e-06, "loss": 18.4219, "step": 9471 }, { "epoch": 0.629076177193332, "grad_norm": 303.9879150390625, "learning_rate": 1.6046219595073643e-06, "loss": 19.8125, "step": 9472 }, { "epoch": 0.6291425914856877, "grad_norm": 160.15914916992188, "learning_rate": 1.6045362895243707e-06, "loss": 17.2656, "step": 9473 }, { "epoch": 0.6292090057780434, "grad_norm": 185.61070251464844, "learning_rate": 1.6044506125484496e-06, "loss": 12.4531, "step": 9474 }, { "epoch": 0.6292754200703992, "grad_norm": 939.772705078125, "learning_rate": 1.6043649285805923e-06, "loss": 17.8906, "step": 9475 }, { "epoch": 0.6293418343627548, "grad_norm": 148.3353271484375, "learning_rate": 1.6042792376217902e-06, "loss": 11.3203, "step": 9476 }, { "epoch": 0.6294082486551106, "grad_norm": 193.21217346191406, "learning_rate": 1.6041935396730342e-06, "loss": 20.1094, "step": 9477 }, { "epoch": 0.6294746629474663, "grad_norm": 185.5853729248047, "learning_rate": 1.604107834735316e-06, "loss": 16.7969, "step": 9478 }, { "epoch": 0.629541077239822, "grad_norm": 204.3723602294922, "learning_rate": 1.6040221228096265e-06, "loss": 16.3281, "step": 9479 }, { "epoch": 0.6296074915321778, "grad_norm": 224.5890350341797, "learning_rate": 1.603936403896957e-06, "loss": 24.9531, "step": 9480 }, { "epoch": 0.6296739058245334, "grad_norm": 155.71434020996094, "learning_rate": 1.6038506779983e-06, "loss": 15.5391, "step": 9481 }, { "epoch": 0.6297403201168892, "grad_norm": 273.3056335449219, "learning_rate": 1.6037649451146465e-06, "loss": 22.7188, "step": 9482 }, { "epoch": 0.6298067344092448, "grad_norm": 240.1740264892578, "learning_rate": 1.6036792052469883e-06, "loss": 15.6406, "step": 9483 }, { "epoch": 0.6298731487016006, "grad_norm": 209.98330688476562, "learning_rate": 1.603593458396317e-06, "loss": 11.4062, "step": 9484 }, { "epoch": 0.6299395629939563, "grad_norm": 226.23338317871094, "learning_rate": 1.6035077045636246e-06, "loss": 19.7812, "step": 9485 }, { "epoch": 0.630005977286312, "grad_norm": 700.1453857421875, "learning_rate": 1.603421943749903e-06, "loss": 19.4062, "step": 9486 }, { "epoch": 0.6300723915786677, "grad_norm": 666.2794799804688, "learning_rate": 1.6033361759561443e-06, "loss": 18.9219, "step": 9487 }, { "epoch": 0.6301388058710234, "grad_norm": 182.45452880859375, "learning_rate": 1.603250401183341e-06, "loss": 20.2188, "step": 9488 }, { "epoch": 0.6302052201633792, "grad_norm": 247.41017150878906, "learning_rate": 1.6031646194324847e-06, "loss": 12.5781, "step": 9489 }, { "epoch": 0.6302716344557349, "grad_norm": 310.75347900390625, "learning_rate": 1.6030788307045677e-06, "loss": 18.375, "step": 9490 }, { "epoch": 0.6303380487480906, "grad_norm": 155.64454650878906, "learning_rate": 1.6029930350005827e-06, "loss": 16.4219, "step": 9491 }, { "epoch": 0.6304044630404463, "grad_norm": 273.2087097167969, "learning_rate": 1.602907232321522e-06, "loss": 16.6406, "step": 9492 }, { "epoch": 0.630470877332802, "grad_norm": 675.1847534179688, "learning_rate": 1.6028214226683782e-06, "loss": 16.8594, "step": 9493 }, { "epoch": 0.6305372916251577, "grad_norm": 556.885986328125, "learning_rate": 1.6027356060421436e-06, "loss": 23.6094, "step": 9494 }, { "epoch": 0.6306037059175135, "grad_norm": 384.96087646484375, "learning_rate": 1.602649782443811e-06, "loss": 17.6719, "step": 9495 }, { "epoch": 0.6306701202098691, "grad_norm": 138.30728149414062, "learning_rate": 1.6025639518743734e-06, "loss": 17.0312, "step": 9496 }, { "epoch": 0.6307365345022249, "grad_norm": 270.7886047363281, "learning_rate": 1.6024781143348234e-06, "loss": 19.6406, "step": 9497 }, { "epoch": 0.6308029487945805, "grad_norm": 188.6880645751953, "learning_rate": 1.602392269826154e-06, "loss": 19.1406, "step": 9498 }, { "epoch": 0.6308693630869363, "grad_norm": 156.36585998535156, "learning_rate": 1.602306418349358e-06, "loss": 16.7656, "step": 9499 }, { "epoch": 0.6309357773792921, "grad_norm": 243.0670166015625, "learning_rate": 1.602220559905429e-06, "loss": 15.8125, "step": 9500 }, { "epoch": 0.6310021916716477, "grad_norm": 456.8387145996094, "learning_rate": 1.6021346944953594e-06, "loss": 22.2031, "step": 9501 }, { "epoch": 0.6310686059640035, "grad_norm": 176.53627014160156, "learning_rate": 1.6020488221201433e-06, "loss": 18.7812, "step": 9502 }, { "epoch": 0.6311350202563591, "grad_norm": 300.51605224609375, "learning_rate": 1.6019629427807733e-06, "loss": 19.1406, "step": 9503 }, { "epoch": 0.6312014345487149, "grad_norm": 288.7936096191406, "learning_rate": 1.6018770564782433e-06, "loss": 20.6094, "step": 9504 }, { "epoch": 0.6312678488410706, "grad_norm": 138.9303436279297, "learning_rate": 1.6017911632135464e-06, "loss": 15.1875, "step": 9505 }, { "epoch": 0.6313342631334263, "grad_norm": 274.94122314453125, "learning_rate": 1.6017052629876763e-06, "loss": 25.9531, "step": 9506 }, { "epoch": 0.631400677425782, "grad_norm": 207.40093994140625, "learning_rate": 1.601619355801627e-06, "loss": 21.5312, "step": 9507 }, { "epoch": 0.6314670917181378, "grad_norm": 389.6383056640625, "learning_rate": 1.6015334416563916e-06, "loss": 12.8516, "step": 9508 }, { "epoch": 0.6315335060104934, "grad_norm": 206.23361206054688, "learning_rate": 1.6014475205529643e-06, "loss": 24.2812, "step": 9509 }, { "epoch": 0.6315999203028492, "grad_norm": 249.59576416015625, "learning_rate": 1.601361592492339e-06, "loss": 17.4844, "step": 9510 }, { "epoch": 0.6316663345952049, "grad_norm": 270.25732421875, "learning_rate": 1.6012756574755094e-06, "loss": 17.3438, "step": 9511 }, { "epoch": 0.6317327488875606, "grad_norm": 176.2714080810547, "learning_rate": 1.6011897155034695e-06, "loss": 13.0312, "step": 9512 }, { "epoch": 0.6317991631799164, "grad_norm": 261.8223876953125, "learning_rate": 1.6011037665772143e-06, "loss": 18.6094, "step": 9513 }, { "epoch": 0.631865577472272, "grad_norm": 300.29473876953125, "learning_rate": 1.6010178106977368e-06, "loss": 19.0625, "step": 9514 }, { "epoch": 0.6319319917646278, "grad_norm": 211.9186553955078, "learning_rate": 1.600931847866032e-06, "loss": 19.0156, "step": 9515 }, { "epoch": 0.6319984060569834, "grad_norm": 205.11399841308594, "learning_rate": 1.6008458780830941e-06, "loss": 14.7969, "step": 9516 }, { "epoch": 0.6320648203493392, "grad_norm": 349.7182922363281, "learning_rate": 1.6007599013499173e-06, "loss": 19.8281, "step": 9517 }, { "epoch": 0.6321312346416949, "grad_norm": 166.26171875, "learning_rate": 1.6006739176674968e-06, "loss": 19.5781, "step": 9518 }, { "epoch": 0.6321976489340506, "grad_norm": 143.52029418945312, "learning_rate": 1.6005879270368266e-06, "loss": 12.4844, "step": 9519 }, { "epoch": 0.6322640632264063, "grad_norm": 426.50469970703125, "learning_rate": 1.6005019294589016e-06, "loss": 18.8906, "step": 9520 }, { "epoch": 0.632330477518762, "grad_norm": 197.6848602294922, "learning_rate": 1.6004159249347164e-06, "loss": 17.8125, "step": 9521 }, { "epoch": 0.6323968918111178, "grad_norm": 265.46429443359375, "learning_rate": 1.6003299134652664e-06, "loss": 18.5156, "step": 9522 }, { "epoch": 0.6324633061034735, "grad_norm": 196.36593627929688, "learning_rate": 1.6002438950515456e-06, "loss": 21.0938, "step": 9523 }, { "epoch": 0.6325297203958292, "grad_norm": 145.677734375, "learning_rate": 1.60015786969455e-06, "loss": 14.4531, "step": 9524 }, { "epoch": 0.6325961346881849, "grad_norm": 481.7918395996094, "learning_rate": 1.600071837395274e-06, "loss": 17.9062, "step": 9525 }, { "epoch": 0.6326625489805406, "grad_norm": 303.8114318847656, "learning_rate": 1.599985798154713e-06, "loss": 23.5312, "step": 9526 }, { "epoch": 0.6327289632728963, "grad_norm": 160.15646362304688, "learning_rate": 1.5998997519738623e-06, "loss": 15.0781, "step": 9527 }, { "epoch": 0.6327953775652521, "grad_norm": 502.4113464355469, "learning_rate": 1.5998136988537171e-06, "loss": 30.4688, "step": 9528 }, { "epoch": 0.6328617918576077, "grad_norm": 252.10147094726562, "learning_rate": 1.599727638795273e-06, "loss": 19.1094, "step": 9529 }, { "epoch": 0.6329282061499635, "grad_norm": 234.43630981445312, "learning_rate": 1.5996415717995254e-06, "loss": 18.9844, "step": 9530 }, { "epoch": 0.6329946204423191, "grad_norm": 475.3739929199219, "learning_rate": 1.5995554978674699e-06, "loss": 20.6406, "step": 9531 }, { "epoch": 0.6330610347346749, "grad_norm": 198.0396270751953, "learning_rate": 1.5994694170001019e-06, "loss": 14.0938, "step": 9532 }, { "epoch": 0.6331274490270307, "grad_norm": 482.67626953125, "learning_rate": 1.5993833291984175e-06, "loss": 20.2031, "step": 9533 }, { "epoch": 0.6331938633193863, "grad_norm": 177.14671325683594, "learning_rate": 1.5992972344634126e-06, "loss": 17.7812, "step": 9534 }, { "epoch": 0.6332602776117421, "grad_norm": 188.9422149658203, "learning_rate": 1.5992111327960827e-06, "loss": 14.9531, "step": 9535 }, { "epoch": 0.6333266919040977, "grad_norm": 280.0331726074219, "learning_rate": 1.599125024197424e-06, "loss": 26.6875, "step": 9536 }, { "epoch": 0.6333931061964535, "grad_norm": 173.91761779785156, "learning_rate": 1.5990389086684322e-06, "loss": 20.7812, "step": 9537 }, { "epoch": 0.6334595204888092, "grad_norm": 227.72059631347656, "learning_rate": 1.598952786210104e-06, "loss": 18.9062, "step": 9538 }, { "epoch": 0.6335259347811649, "grad_norm": 202.49659729003906, "learning_rate": 1.5988666568234353e-06, "loss": 15.375, "step": 9539 }, { "epoch": 0.6335923490735206, "grad_norm": 308.25299072265625, "learning_rate": 1.5987805205094225e-06, "loss": 18.0, "step": 9540 }, { "epoch": 0.6336587633658763, "grad_norm": 260.8970642089844, "learning_rate": 1.5986943772690618e-06, "loss": 14.6562, "step": 9541 }, { "epoch": 0.633725177658232, "grad_norm": 102.17539978027344, "learning_rate": 1.5986082271033498e-06, "loss": 11.4531, "step": 9542 }, { "epoch": 0.6337915919505878, "grad_norm": 519.2128295898438, "learning_rate": 1.598522070013283e-06, "loss": 19.3906, "step": 9543 }, { "epoch": 0.6338580062429435, "grad_norm": 163.02830505371094, "learning_rate": 1.598435905999858e-06, "loss": 13.9531, "step": 9544 }, { "epoch": 0.6339244205352992, "grad_norm": 236.60401916503906, "learning_rate": 1.5983497350640716e-06, "loss": 13.2656, "step": 9545 }, { "epoch": 0.633990834827655, "grad_norm": 180.66909790039062, "learning_rate": 1.5982635572069207e-06, "loss": 18.9688, "step": 9546 }, { "epoch": 0.6340572491200106, "grad_norm": 196.0830078125, "learning_rate": 1.5981773724294017e-06, "loss": 18.7812, "step": 9547 }, { "epoch": 0.6341236634123664, "grad_norm": 344.5390930175781, "learning_rate": 1.5980911807325117e-06, "loss": 12.9062, "step": 9548 }, { "epoch": 0.634190077704722, "grad_norm": 143.04962158203125, "learning_rate": 1.5980049821172478e-06, "loss": 21.1562, "step": 9549 }, { "epoch": 0.6342564919970778, "grad_norm": 437.0001525878906, "learning_rate": 1.5979187765846072e-06, "loss": 17.8594, "step": 9550 }, { "epoch": 0.6343229062894334, "grad_norm": 184.48622131347656, "learning_rate": 1.597832564135587e-06, "loss": 24.625, "step": 9551 }, { "epoch": 0.6343893205817892, "grad_norm": 241.59170532226562, "learning_rate": 1.5977463447711843e-06, "loss": 18.8125, "step": 9552 }, { "epoch": 0.6344557348741449, "grad_norm": 201.5890655517578, "learning_rate": 1.5976601184923967e-06, "loss": 18.4844, "step": 9553 }, { "epoch": 0.6345221491665006, "grad_norm": 296.0127868652344, "learning_rate": 1.5975738853002217e-06, "loss": 18.7344, "step": 9554 }, { "epoch": 0.6345885634588564, "grad_norm": 205.21241760253906, "learning_rate": 1.597487645195656e-06, "loss": 16.875, "step": 9555 }, { "epoch": 0.634654977751212, "grad_norm": 243.32232666015625, "learning_rate": 1.5974013981796982e-06, "loss": 15.8281, "step": 9556 }, { "epoch": 0.6347213920435678, "grad_norm": 186.47802734375, "learning_rate": 1.5973151442533455e-06, "loss": 16.7031, "step": 9557 }, { "epoch": 0.6347878063359235, "grad_norm": 154.92938232421875, "learning_rate": 1.5972288834175953e-06, "loss": 17.9375, "step": 9558 }, { "epoch": 0.6348542206282792, "grad_norm": 374.6912536621094, "learning_rate": 1.5971426156734463e-06, "loss": 19.375, "step": 9559 }, { "epoch": 0.6349206349206349, "grad_norm": 365.2856750488281, "learning_rate": 1.5970563410218954e-06, "loss": 16.8906, "step": 9560 }, { "epoch": 0.6349870492129907, "grad_norm": 199.67172241210938, "learning_rate": 1.596970059463941e-06, "loss": 10.3438, "step": 9561 }, { "epoch": 0.6350534635053463, "grad_norm": 124.27667236328125, "learning_rate": 1.5968837710005813e-06, "loss": 18.25, "step": 9562 }, { "epoch": 0.6351198777977021, "grad_norm": 162.35525512695312, "learning_rate": 1.5967974756328145e-06, "loss": 15.0938, "step": 9563 }, { "epoch": 0.6351862920900578, "grad_norm": 155.37730407714844, "learning_rate": 1.5967111733616383e-06, "loss": 17.8125, "step": 9564 }, { "epoch": 0.6352527063824135, "grad_norm": 347.75775146484375, "learning_rate": 1.5966248641880516e-06, "loss": 17.8125, "step": 9565 }, { "epoch": 0.6353191206747693, "grad_norm": 307.1426696777344, "learning_rate": 1.5965385481130524e-06, "loss": 21.9531, "step": 9566 }, { "epoch": 0.6353855349671249, "grad_norm": 172.07164001464844, "learning_rate": 1.5964522251376394e-06, "loss": 17.125, "step": 9567 }, { "epoch": 0.6354519492594807, "grad_norm": 375.26641845703125, "learning_rate": 1.5963658952628107e-06, "loss": 21.5938, "step": 9568 }, { "epoch": 0.6355183635518363, "grad_norm": 475.49273681640625, "learning_rate": 1.5962795584895656e-06, "loss": 16.2812, "step": 9569 }, { "epoch": 0.6355847778441921, "grad_norm": 140.3875732421875, "learning_rate": 1.5961932148189021e-06, "loss": 16.4844, "step": 9570 }, { "epoch": 0.6356511921365477, "grad_norm": 166.02505493164062, "learning_rate": 1.5961068642518195e-06, "loss": 13.6406, "step": 9571 }, { "epoch": 0.6357176064289035, "grad_norm": 277.3589172363281, "learning_rate": 1.596020506789316e-06, "loss": 21.1719, "step": 9572 }, { "epoch": 0.6357840207212592, "grad_norm": 160.70156860351562, "learning_rate": 1.5959341424323913e-06, "loss": 16.5625, "step": 9573 }, { "epoch": 0.6358504350136149, "grad_norm": 129.94741821289062, "learning_rate": 1.595847771182044e-06, "loss": 14.0938, "step": 9574 }, { "epoch": 0.6359168493059707, "grad_norm": 538.6012573242188, "learning_rate": 1.5957613930392734e-06, "loss": 18.3438, "step": 9575 }, { "epoch": 0.6359832635983264, "grad_norm": 198.76390075683594, "learning_rate": 1.5956750080050784e-06, "loss": 15.9062, "step": 9576 }, { "epoch": 0.6360496778906821, "grad_norm": 278.5505065917969, "learning_rate": 1.5955886160804584e-06, "loss": 14.7656, "step": 9577 }, { "epoch": 0.6361160921830378, "grad_norm": 466.219970703125, "learning_rate": 1.595502217266413e-06, "loss": 17.2969, "step": 9578 }, { "epoch": 0.6361825064753935, "grad_norm": 243.1695098876953, "learning_rate": 1.5954158115639409e-06, "loss": 10.8203, "step": 9579 }, { "epoch": 0.6362489207677492, "grad_norm": 241.22021484375, "learning_rate": 1.5953293989740425e-06, "loss": 13.7031, "step": 9580 }, { "epoch": 0.636315335060105, "grad_norm": 210.252685546875, "learning_rate": 1.5952429794977165e-06, "loss": 15.75, "step": 9581 }, { "epoch": 0.6363817493524606, "grad_norm": 159.14617919921875, "learning_rate": 1.595156553135963e-06, "loss": 17.7656, "step": 9582 }, { "epoch": 0.6364481636448164, "grad_norm": 226.6067657470703, "learning_rate": 1.5950701198897818e-06, "loss": 18.5312, "step": 9583 }, { "epoch": 0.636514577937172, "grad_norm": 214.40933227539062, "learning_rate": 1.5949836797601726e-06, "loss": 16.2969, "step": 9584 }, { "epoch": 0.6365809922295278, "grad_norm": 176.0802001953125, "learning_rate": 1.5948972327481352e-06, "loss": 20.1875, "step": 9585 }, { "epoch": 0.6366474065218836, "grad_norm": 165.444091796875, "learning_rate": 1.5948107788546697e-06, "loss": 13.4688, "step": 9586 }, { "epoch": 0.6367138208142392, "grad_norm": 226.88002014160156, "learning_rate": 1.594724318080776e-06, "loss": 18.4844, "step": 9587 }, { "epoch": 0.636780235106595, "grad_norm": 234.44082641601562, "learning_rate": 1.5946378504274546e-06, "loss": 17.125, "step": 9588 }, { "epoch": 0.6368466493989506, "grad_norm": 278.5648193359375, "learning_rate": 1.5945513758957052e-06, "loss": 19.8281, "step": 9589 }, { "epoch": 0.6369130636913064, "grad_norm": 160.48709106445312, "learning_rate": 1.5944648944865282e-06, "loss": 27.3906, "step": 9590 }, { "epoch": 0.6369794779836621, "grad_norm": 250.2020721435547, "learning_rate": 1.5943784062009242e-06, "loss": 17.7344, "step": 9591 }, { "epoch": 0.6370458922760178, "grad_norm": 194.16256713867188, "learning_rate": 1.5942919110398937e-06, "loss": 16.5469, "step": 9592 }, { "epoch": 0.6371123065683735, "grad_norm": 298.8252868652344, "learning_rate": 1.594205409004437e-06, "loss": 17.7656, "step": 9593 }, { "epoch": 0.6371787208607292, "grad_norm": 309.8199462890625, "learning_rate": 1.5941189000955547e-06, "loss": 19.4062, "step": 9594 }, { "epoch": 0.6372451351530849, "grad_norm": 314.3068542480469, "learning_rate": 1.5940323843142474e-06, "loss": 18.0625, "step": 9595 }, { "epoch": 0.6373115494454407, "grad_norm": 189.66555786132812, "learning_rate": 1.5939458616615161e-06, "loss": 22.1406, "step": 9596 }, { "epoch": 0.6373779637377964, "grad_norm": 217.35000610351562, "learning_rate": 1.5938593321383617e-06, "loss": 17.3125, "step": 9597 }, { "epoch": 0.6374443780301521, "grad_norm": 320.3729248046875, "learning_rate": 1.5937727957457852e-06, "loss": 20.4062, "step": 9598 }, { "epoch": 0.6375107923225078, "grad_norm": 197.7127227783203, "learning_rate": 1.593686252484787e-06, "loss": 15.9844, "step": 9599 }, { "epoch": 0.6375772066148635, "grad_norm": 267.3204650878906, "learning_rate": 1.5935997023563688e-06, "loss": 19.7031, "step": 9600 }, { "epoch": 0.6376436209072193, "grad_norm": 171.11886596679688, "learning_rate": 1.5935131453615315e-06, "loss": 14.3047, "step": 9601 }, { "epoch": 0.6377100351995749, "grad_norm": 242.3330841064453, "learning_rate": 1.593426581501276e-06, "loss": 19.2188, "step": 9602 }, { "epoch": 0.6377764494919307, "grad_norm": 138.25991821289062, "learning_rate": 1.5933400107766044e-06, "loss": 14.875, "step": 9603 }, { "epoch": 0.6378428637842863, "grad_norm": 223.20001220703125, "learning_rate": 1.5932534331885178e-06, "loss": 18.5625, "step": 9604 }, { "epoch": 0.6379092780766421, "grad_norm": 158.57061767578125, "learning_rate": 1.593166848738017e-06, "loss": 17.7969, "step": 9605 }, { "epoch": 0.6379756923689978, "grad_norm": 269.9624328613281, "learning_rate": 1.5930802574261045e-06, "loss": 25.625, "step": 9606 }, { "epoch": 0.6380421066613535, "grad_norm": 506.1298522949219, "learning_rate": 1.5929936592537817e-06, "loss": 15.9062, "step": 9607 }, { "epoch": 0.6381085209537093, "grad_norm": 160.82969665527344, "learning_rate": 1.5929070542220497e-06, "loss": 17.5781, "step": 9608 }, { "epoch": 0.638174935246065, "grad_norm": 123.42565155029297, "learning_rate": 1.5928204423319112e-06, "loss": 15.6641, "step": 9609 }, { "epoch": 0.6382413495384207, "grad_norm": 287.28179931640625, "learning_rate": 1.5927338235843676e-06, "loss": 18.0938, "step": 9610 }, { "epoch": 0.6383077638307764, "grad_norm": 124.98283386230469, "learning_rate": 1.5926471979804207e-06, "loss": 16.4531, "step": 9611 }, { "epoch": 0.6383741781231321, "grad_norm": 829.19775390625, "learning_rate": 1.5925605655210733e-06, "loss": 22.2969, "step": 9612 }, { "epoch": 0.6384405924154878, "grad_norm": 173.30506896972656, "learning_rate": 1.5924739262073266e-06, "loss": 20.2031, "step": 9613 }, { "epoch": 0.6385070067078435, "grad_norm": 167.26243591308594, "learning_rate": 1.5923872800401833e-06, "loss": 13.1406, "step": 9614 }, { "epoch": 0.6385734210001992, "grad_norm": 430.0251770019531, "learning_rate": 1.5923006270206454e-06, "loss": 20.125, "step": 9615 }, { "epoch": 0.638639835292555, "grad_norm": 486.3058166503906, "learning_rate": 1.5922139671497155e-06, "loss": 28.0938, "step": 9616 }, { "epoch": 0.6387062495849106, "grad_norm": 226.84359741210938, "learning_rate": 1.5921273004283955e-06, "loss": 20.2969, "step": 9617 }, { "epoch": 0.6387726638772664, "grad_norm": 136.29580688476562, "learning_rate": 1.5920406268576888e-06, "loss": 15.7969, "step": 9618 }, { "epoch": 0.6388390781696222, "grad_norm": 137.16424560546875, "learning_rate": 1.5919539464385975e-06, "loss": 17.2031, "step": 9619 }, { "epoch": 0.6389054924619778, "grad_norm": 143.73846435546875, "learning_rate": 1.5918672591721242e-06, "loss": 21.0625, "step": 9620 }, { "epoch": 0.6389719067543336, "grad_norm": 201.2579345703125, "learning_rate": 1.5917805650592716e-06, "loss": 16.1875, "step": 9621 }, { "epoch": 0.6390383210466892, "grad_norm": 213.0261993408203, "learning_rate": 1.591693864101043e-06, "loss": 15.1562, "step": 9622 }, { "epoch": 0.639104735339045, "grad_norm": 615.1082763671875, "learning_rate": 1.5916071562984405e-06, "loss": 19.2812, "step": 9623 }, { "epoch": 0.6391711496314006, "grad_norm": 444.00299072265625, "learning_rate": 1.5915204416524678e-06, "loss": 22.0938, "step": 9624 }, { "epoch": 0.6392375639237564, "grad_norm": 214.0261688232422, "learning_rate": 1.5914337201641277e-06, "loss": 16.2812, "step": 9625 }, { "epoch": 0.6393039782161121, "grad_norm": 406.1964111328125, "learning_rate": 1.5913469918344236e-06, "loss": 22.5, "step": 9626 }, { "epoch": 0.6393703925084678, "grad_norm": 280.4315185546875, "learning_rate": 1.5912602566643582e-06, "loss": 24.7969, "step": 9627 }, { "epoch": 0.6394368068008235, "grad_norm": 117.95582580566406, "learning_rate": 1.591173514654935e-06, "loss": 12.1094, "step": 9628 }, { "epoch": 0.6395032210931793, "grad_norm": 167.23806762695312, "learning_rate": 1.5910867658071579e-06, "loss": 14.9688, "step": 9629 }, { "epoch": 0.639569635385535, "grad_norm": 155.67652893066406, "learning_rate": 1.5910000101220296e-06, "loss": 17.8281, "step": 9630 }, { "epoch": 0.6396360496778907, "grad_norm": 127.56229400634766, "learning_rate": 1.590913247600554e-06, "loss": 16.8594, "step": 9631 }, { "epoch": 0.6397024639702464, "grad_norm": 161.15573120117188, "learning_rate": 1.5908264782437346e-06, "loss": 23.8281, "step": 9632 }, { "epoch": 0.6397688782626021, "grad_norm": 312.43890380859375, "learning_rate": 1.5907397020525754e-06, "loss": 20.3281, "step": 9633 }, { "epoch": 0.6398352925549579, "grad_norm": 426.1686706542969, "learning_rate": 1.59065291902808e-06, "loss": 16.8281, "step": 9634 }, { "epoch": 0.6399017068473135, "grad_norm": 251.42738342285156, "learning_rate": 1.5905661291712522e-06, "loss": 21.5, "step": 9635 }, { "epoch": 0.6399681211396693, "grad_norm": 384.4183349609375, "learning_rate": 1.5904793324830958e-06, "loss": 27.3125, "step": 9636 }, { "epoch": 0.6400345354320249, "grad_norm": 445.3973083496094, "learning_rate": 1.590392528964615e-06, "loss": 25.8438, "step": 9637 }, { "epoch": 0.6401009497243807, "grad_norm": 180.07290649414062, "learning_rate": 1.5903057186168139e-06, "loss": 19.8125, "step": 9638 }, { "epoch": 0.6401673640167364, "grad_norm": 273.2376708984375, "learning_rate": 1.5902189014406967e-06, "loss": 15.2031, "step": 9639 }, { "epoch": 0.6402337783090921, "grad_norm": 210.1481170654297, "learning_rate": 1.5901320774372674e-06, "loss": 30.0, "step": 9640 }, { "epoch": 0.6403001926014479, "grad_norm": 216.69447326660156, "learning_rate": 1.5900452466075306e-06, "loss": 18.5781, "step": 9641 }, { "epoch": 0.6403666068938035, "grad_norm": 150.00186157226562, "learning_rate": 1.5899584089524906e-06, "loss": 18.6562, "step": 9642 }, { "epoch": 0.6404330211861593, "grad_norm": 187.72117614746094, "learning_rate": 1.5898715644731522e-06, "loss": 13.6094, "step": 9643 }, { "epoch": 0.640499435478515, "grad_norm": 146.14926147460938, "learning_rate": 1.5897847131705193e-06, "loss": 17.1094, "step": 9644 }, { "epoch": 0.6405658497708707, "grad_norm": 266.4130554199219, "learning_rate": 1.589697855045597e-06, "loss": 19.25, "step": 9645 }, { "epoch": 0.6406322640632264, "grad_norm": 362.2743835449219, "learning_rate": 1.5896109900993905e-06, "loss": 17.5781, "step": 9646 }, { "epoch": 0.6406986783555821, "grad_norm": 129.0749969482422, "learning_rate": 1.5895241183329032e-06, "loss": 16.1094, "step": 9647 }, { "epoch": 0.6407650926479378, "grad_norm": 118.57516479492188, "learning_rate": 1.5894372397471414e-06, "loss": 14.9375, "step": 9648 }, { "epoch": 0.6408315069402936, "grad_norm": 275.9895935058594, "learning_rate": 1.5893503543431094e-06, "loss": 16.5781, "step": 9649 }, { "epoch": 0.6408979212326492, "grad_norm": 223.5066375732422, "learning_rate": 1.5892634621218122e-06, "loss": 15.8594, "step": 9650 }, { "epoch": 0.640964335525005, "grad_norm": 361.5744323730469, "learning_rate": 1.5891765630842554e-06, "loss": 17.625, "step": 9651 }, { "epoch": 0.6410307498173607, "grad_norm": 318.2477722167969, "learning_rate": 1.5890896572314433e-06, "loss": 17.8281, "step": 9652 }, { "epoch": 0.6410971641097164, "grad_norm": 398.067138671875, "learning_rate": 1.5890027445643822e-06, "loss": 18.1406, "step": 9653 }, { "epoch": 0.6411635784020722, "grad_norm": 447.4839782714844, "learning_rate": 1.5889158250840766e-06, "loss": 22.2344, "step": 9654 }, { "epoch": 0.6412299926944278, "grad_norm": 315.5238037109375, "learning_rate": 1.5888288987915327e-06, "loss": 22.9688, "step": 9655 }, { "epoch": 0.6412964069867836, "grad_norm": 219.5970001220703, "learning_rate": 1.5887419656877556e-06, "loss": 14.3906, "step": 9656 }, { "epoch": 0.6413628212791392, "grad_norm": 125.8001708984375, "learning_rate": 1.5886550257737508e-06, "loss": 11.9844, "step": 9657 }, { "epoch": 0.641429235571495, "grad_norm": 338.59576416015625, "learning_rate": 1.588568079050524e-06, "loss": 16.7188, "step": 9658 }, { "epoch": 0.6414956498638507, "grad_norm": 301.48486328125, "learning_rate": 1.588481125519081e-06, "loss": 20.9688, "step": 9659 }, { "epoch": 0.6415620641562064, "grad_norm": 217.18856811523438, "learning_rate": 1.588394165180428e-06, "loss": 18.0625, "step": 9660 }, { "epoch": 0.6416284784485621, "grad_norm": 232.33372497558594, "learning_rate": 1.5883071980355705e-06, "loss": 22.4375, "step": 9661 }, { "epoch": 0.6416948927409178, "grad_norm": 173.13067626953125, "learning_rate": 1.5882202240855147e-06, "loss": 17.6406, "step": 9662 }, { "epoch": 0.6417613070332736, "grad_norm": 244.50686645507812, "learning_rate": 1.5881332433312662e-06, "loss": 21.625, "step": 9663 }, { "epoch": 0.6418277213256293, "grad_norm": 296.4388427734375, "learning_rate": 1.5880462557738316e-06, "loss": 15.4219, "step": 9664 }, { "epoch": 0.641894135617985, "grad_norm": 251.3617401123047, "learning_rate": 1.587959261414217e-06, "loss": 15.8438, "step": 9665 }, { "epoch": 0.6419605499103407, "grad_norm": 185.05799865722656, "learning_rate": 1.5878722602534287e-06, "loss": 17.6562, "step": 9666 }, { "epoch": 0.6420269642026964, "grad_norm": 216.62261962890625, "learning_rate": 1.587785252292473e-06, "loss": 19.3281, "step": 9667 }, { "epoch": 0.6420933784950521, "grad_norm": 143.6438446044922, "learning_rate": 1.5876982375323567e-06, "loss": 15.2969, "step": 9668 }, { "epoch": 0.6421597927874079, "grad_norm": 148.6148223876953, "learning_rate": 1.587611215974086e-06, "loss": 13.4062, "step": 9669 }, { "epoch": 0.6422262070797635, "grad_norm": 108.03854370117188, "learning_rate": 1.5875241876186674e-06, "loss": 16.3281, "step": 9670 }, { "epoch": 0.6422926213721193, "grad_norm": 111.70278930664062, "learning_rate": 1.5874371524671077e-06, "loss": 12.5156, "step": 9671 }, { "epoch": 0.6423590356644749, "grad_norm": 295.4832458496094, "learning_rate": 1.5873501105204142e-06, "loss": 22.8906, "step": 9672 }, { "epoch": 0.6424254499568307, "grad_norm": 254.546630859375, "learning_rate": 1.587263061779593e-06, "loss": 22.3906, "step": 9673 }, { "epoch": 0.6424918642491865, "grad_norm": 348.6564636230469, "learning_rate": 1.5871760062456514e-06, "loss": 18.5, "step": 9674 }, { "epoch": 0.6425582785415421, "grad_norm": 234.49880981445312, "learning_rate": 1.5870889439195962e-06, "loss": 25.9375, "step": 9675 }, { "epoch": 0.6426246928338979, "grad_norm": 197.38150024414062, "learning_rate": 1.5870018748024348e-06, "loss": 13.1797, "step": 9676 }, { "epoch": 0.6426911071262535, "grad_norm": 435.60211181640625, "learning_rate": 1.5869147988951742e-06, "loss": 19.6406, "step": 9677 }, { "epoch": 0.6427575214186093, "grad_norm": 237.99966430664062, "learning_rate": 1.5868277161988215e-06, "loss": 13.7266, "step": 9678 }, { "epoch": 0.642823935710965, "grad_norm": 222.9970703125, "learning_rate": 1.5867406267143842e-06, "loss": 16.4688, "step": 9679 }, { "epoch": 0.6428903500033207, "grad_norm": 719.115478515625, "learning_rate": 1.5866535304428697e-06, "loss": 16.4531, "step": 9680 }, { "epoch": 0.6429567642956764, "grad_norm": 410.8941345214844, "learning_rate": 1.5865664273852855e-06, "loss": 17.4688, "step": 9681 }, { "epoch": 0.6430231785880322, "grad_norm": 240.2062530517578, "learning_rate": 1.586479317542639e-06, "loss": 17.0, "step": 9682 }, { "epoch": 0.6430895928803878, "grad_norm": 327.48443603515625, "learning_rate": 1.586392200915938e-06, "loss": 16.9219, "step": 9683 }, { "epoch": 0.6431560071727436, "grad_norm": 351.8938293457031, "learning_rate": 1.5863050775061901e-06, "loss": 20.125, "step": 9684 }, { "epoch": 0.6432224214650993, "grad_norm": 180.9730224609375, "learning_rate": 1.5862179473144033e-06, "loss": 17.8281, "step": 9685 }, { "epoch": 0.643288835757455, "grad_norm": 106.18588256835938, "learning_rate": 1.5861308103415852e-06, "loss": 13.3047, "step": 9686 }, { "epoch": 0.6433552500498108, "grad_norm": 374.69671630859375, "learning_rate": 1.5860436665887437e-06, "loss": 19.5156, "step": 9687 }, { "epoch": 0.6434216643421664, "grad_norm": 164.70103454589844, "learning_rate": 1.5859565160568873e-06, "loss": 18.4375, "step": 9688 }, { "epoch": 0.6434880786345222, "grad_norm": 665.4820556640625, "learning_rate": 1.5858693587470237e-06, "loss": 19.5469, "step": 9689 }, { "epoch": 0.6435544929268778, "grad_norm": 238.49139404296875, "learning_rate": 1.5857821946601613e-06, "loss": 19.6094, "step": 9690 }, { "epoch": 0.6436209072192336, "grad_norm": 319.58056640625, "learning_rate": 1.585695023797308e-06, "loss": 14.9375, "step": 9691 }, { "epoch": 0.6436873215115892, "grad_norm": 331.11761474609375, "learning_rate": 1.5856078461594726e-06, "loss": 21.25, "step": 9692 }, { "epoch": 0.643753735803945, "grad_norm": 126.05677795410156, "learning_rate": 1.5855206617476633e-06, "loss": 16.25, "step": 9693 }, { "epoch": 0.6438201500963007, "grad_norm": 192.79591369628906, "learning_rate": 1.5854334705628887e-06, "loss": 17.0938, "step": 9694 }, { "epoch": 0.6438865643886564, "grad_norm": 129.85511779785156, "learning_rate": 1.5853462726061571e-06, "loss": 15.9375, "step": 9695 }, { "epoch": 0.6439529786810122, "grad_norm": 146.17564392089844, "learning_rate": 1.5852590678784775e-06, "loss": 16.1875, "step": 9696 }, { "epoch": 0.6440193929733679, "grad_norm": 165.26193237304688, "learning_rate": 1.585171856380858e-06, "loss": 19.75, "step": 9697 }, { "epoch": 0.6440858072657236, "grad_norm": 154.0568389892578, "learning_rate": 1.5850846381143086e-06, "loss": 16.75, "step": 9698 }, { "epoch": 0.6441522215580793, "grad_norm": 147.06532287597656, "learning_rate": 1.584997413079837e-06, "loss": 17.2812, "step": 9699 }, { "epoch": 0.644218635850435, "grad_norm": 338.55206298828125, "learning_rate": 1.5849101812784528e-06, "loss": 27.8359, "step": 9700 }, { "epoch": 0.6442850501427907, "grad_norm": 244.34561157226562, "learning_rate": 1.5848229427111651e-06, "loss": 12.0312, "step": 9701 }, { "epoch": 0.6443514644351465, "grad_norm": 98.8185806274414, "learning_rate": 1.5847356973789827e-06, "loss": 15.2188, "step": 9702 }, { "epoch": 0.6444178787275021, "grad_norm": 183.81910705566406, "learning_rate": 1.5846484452829148e-06, "loss": 21.8438, "step": 9703 }, { "epoch": 0.6444842930198579, "grad_norm": 123.49109649658203, "learning_rate": 1.5845611864239708e-06, "loss": 15.3125, "step": 9704 }, { "epoch": 0.6445507073122135, "grad_norm": 192.2173614501953, "learning_rate": 1.5844739208031603e-06, "loss": 19.0781, "step": 9705 }, { "epoch": 0.6446171216045693, "grad_norm": 219.16429138183594, "learning_rate": 1.5843866484214924e-06, "loss": 21.75, "step": 9706 }, { "epoch": 0.6446835358969251, "grad_norm": 146.3306121826172, "learning_rate": 1.5842993692799767e-06, "loss": 17.1719, "step": 9707 }, { "epoch": 0.6447499501892807, "grad_norm": 167.71116638183594, "learning_rate": 1.5842120833796227e-06, "loss": 18.1719, "step": 9708 }, { "epoch": 0.6448163644816365, "grad_norm": 268.23541259765625, "learning_rate": 1.5841247907214403e-06, "loss": 17.4062, "step": 9709 }, { "epoch": 0.6448827787739921, "grad_norm": 152.96824645996094, "learning_rate": 1.584037491306439e-06, "loss": 16.1875, "step": 9710 }, { "epoch": 0.6449491930663479, "grad_norm": 322.7377624511719, "learning_rate": 1.5839501851356293e-06, "loss": 15.2812, "step": 9711 }, { "epoch": 0.6450156073587036, "grad_norm": 761.5059204101562, "learning_rate": 1.5838628722100201e-06, "loss": 18.5156, "step": 9712 }, { "epoch": 0.6450820216510593, "grad_norm": 153.6328582763672, "learning_rate": 1.583775552530622e-06, "loss": 12.7578, "step": 9713 }, { "epoch": 0.645148435943415, "grad_norm": 537.648681640625, "learning_rate": 1.583688226098445e-06, "loss": 14.8125, "step": 9714 }, { "epoch": 0.6452148502357707, "grad_norm": 397.61322021484375, "learning_rate": 1.583600892914499e-06, "loss": 16.5781, "step": 9715 }, { "epoch": 0.6452812645281265, "grad_norm": 201.25613403320312, "learning_rate": 1.5835135529797943e-06, "loss": 14.4062, "step": 9716 }, { "epoch": 0.6453476788204822, "grad_norm": 278.91448974609375, "learning_rate": 1.5834262062953417e-06, "loss": 16.8906, "step": 9717 }, { "epoch": 0.6454140931128379, "grad_norm": 238.1356964111328, "learning_rate": 1.583338852862151e-06, "loss": 20.0469, "step": 9718 }, { "epoch": 0.6454805074051936, "grad_norm": 446.4309997558594, "learning_rate": 1.5832514926812328e-06, "loss": 22.125, "step": 9719 }, { "epoch": 0.6455469216975493, "grad_norm": 161.3932342529297, "learning_rate": 1.5831641257535975e-06, "loss": 14.75, "step": 9720 }, { "epoch": 0.645613335989905, "grad_norm": 280.0240478515625, "learning_rate": 1.5830767520802558e-06, "loss": 16.2812, "step": 9721 }, { "epoch": 0.6456797502822608, "grad_norm": 325.81280517578125, "learning_rate": 1.5829893716622186e-06, "loss": 18.8125, "step": 9722 }, { "epoch": 0.6457461645746164, "grad_norm": 130.9033966064453, "learning_rate": 1.5829019845004966e-06, "loss": 15.4844, "step": 9723 }, { "epoch": 0.6458125788669722, "grad_norm": 595.2844848632812, "learning_rate": 1.5828145905961008e-06, "loss": 21.5, "step": 9724 }, { "epoch": 0.6458789931593278, "grad_norm": 123.1045150756836, "learning_rate": 1.5827271899500416e-06, "loss": 16.4688, "step": 9725 }, { "epoch": 0.6459454074516836, "grad_norm": 167.38706970214844, "learning_rate": 1.5826397825633305e-06, "loss": 18.2656, "step": 9726 }, { "epoch": 0.6460118217440394, "grad_norm": 1484.208251953125, "learning_rate": 1.5825523684369778e-06, "loss": 16.4531, "step": 9727 }, { "epoch": 0.646078236036395, "grad_norm": 102.48328399658203, "learning_rate": 1.5824649475719957e-06, "loss": 17.8906, "step": 9728 }, { "epoch": 0.6461446503287508, "grad_norm": 154.06483459472656, "learning_rate": 1.5823775199693949e-06, "loss": 15.4219, "step": 9729 }, { "epoch": 0.6462110646211064, "grad_norm": 262.43634033203125, "learning_rate": 1.5822900856301868e-06, "loss": 22.5, "step": 9730 }, { "epoch": 0.6462774789134622, "grad_norm": 238.06149291992188, "learning_rate": 1.5822026445553827e-06, "loss": 22.4375, "step": 9731 }, { "epoch": 0.6463438932058179, "grad_norm": 253.60580444335938, "learning_rate": 1.5821151967459941e-06, "loss": 20.5, "step": 9732 }, { "epoch": 0.6464103074981736, "grad_norm": 283.17388916015625, "learning_rate": 1.5820277422030324e-06, "loss": 16.1875, "step": 9733 }, { "epoch": 0.6464767217905293, "grad_norm": 197.3978271484375, "learning_rate": 1.5819402809275096e-06, "loss": 15.6406, "step": 9734 }, { "epoch": 0.646543136082885, "grad_norm": 183.0987548828125, "learning_rate": 1.5818528129204372e-06, "loss": 13.2031, "step": 9735 }, { "epoch": 0.6466095503752407, "grad_norm": 212.1858367919922, "learning_rate": 1.5817653381828269e-06, "loss": 20.4688, "step": 9736 }, { "epoch": 0.6466759646675965, "grad_norm": 149.55953979492188, "learning_rate": 1.5816778567156907e-06, "loss": 12.8594, "step": 9737 }, { "epoch": 0.6467423789599522, "grad_norm": 395.8653259277344, "learning_rate": 1.5815903685200406e-06, "loss": 22.5625, "step": 9738 }, { "epoch": 0.6468087932523079, "grad_norm": 381.9820251464844, "learning_rate": 1.5815028735968882e-06, "loss": 27.0469, "step": 9739 }, { "epoch": 0.6468752075446637, "grad_norm": 194.04278564453125, "learning_rate": 1.581415371947246e-06, "loss": 15.5781, "step": 9740 }, { "epoch": 0.6469416218370193, "grad_norm": 303.77142333984375, "learning_rate": 1.581327863572126e-06, "loss": 18.6875, "step": 9741 }, { "epoch": 0.6470080361293751, "grad_norm": 235.1060028076172, "learning_rate": 1.5812403484725404e-06, "loss": 17.6094, "step": 9742 }, { "epoch": 0.6470744504217307, "grad_norm": 442.1924133300781, "learning_rate": 1.581152826649502e-06, "loss": 19.2656, "step": 9743 }, { "epoch": 0.6471408647140865, "grad_norm": 142.58285522460938, "learning_rate": 1.5810652981040225e-06, "loss": 20.2188, "step": 9744 }, { "epoch": 0.6472072790064421, "grad_norm": 238.9464111328125, "learning_rate": 1.5809777628371146e-06, "loss": 17.6719, "step": 9745 }, { "epoch": 0.6472736932987979, "grad_norm": 205.92044067382812, "learning_rate": 1.5808902208497912e-06, "loss": 17.3906, "step": 9746 }, { "epoch": 0.6473401075911536, "grad_norm": 171.62811279296875, "learning_rate": 1.5808026721430644e-06, "loss": 14.0625, "step": 9747 }, { "epoch": 0.6474065218835093, "grad_norm": 181.78672790527344, "learning_rate": 1.5807151167179476e-06, "loss": 16.1406, "step": 9748 }, { "epoch": 0.6474729361758651, "grad_norm": 207.96267700195312, "learning_rate": 1.5806275545754528e-06, "loss": 17.4531, "step": 9749 }, { "epoch": 0.6475393504682208, "grad_norm": 141.5652618408203, "learning_rate": 1.5805399857165935e-06, "loss": 18.4688, "step": 9750 }, { "epoch": 0.6476057647605765, "grad_norm": 169.8479766845703, "learning_rate": 1.5804524101423823e-06, "loss": 13.5781, "step": 9751 }, { "epoch": 0.6476721790529322, "grad_norm": 273.6025695800781, "learning_rate": 1.5803648278538324e-06, "loss": 14.9531, "step": 9752 }, { "epoch": 0.6477385933452879, "grad_norm": 165.1311798095703, "learning_rate": 1.5802772388519565e-06, "loss": 18.0625, "step": 9753 }, { "epoch": 0.6478050076376436, "grad_norm": 271.6081237792969, "learning_rate": 1.5801896431377686e-06, "loss": 14.3906, "step": 9754 }, { "epoch": 0.6478714219299994, "grad_norm": 376.6363830566406, "learning_rate": 1.5801020407122811e-06, "loss": 18.9531, "step": 9755 }, { "epoch": 0.647937836222355, "grad_norm": 185.06607055664062, "learning_rate": 1.580014431576508e-06, "loss": 15.5, "step": 9756 }, { "epoch": 0.6480042505147108, "grad_norm": 156.19737243652344, "learning_rate": 1.5799268157314622e-06, "loss": 17.8125, "step": 9757 }, { "epoch": 0.6480706648070664, "grad_norm": 289.1603698730469, "learning_rate": 1.5798391931781576e-06, "loss": 19.3906, "step": 9758 }, { "epoch": 0.6481370790994222, "grad_norm": 181.91256713867188, "learning_rate": 1.5797515639176074e-06, "loss": 15.4062, "step": 9759 }, { "epoch": 0.648203493391778, "grad_norm": 368.89599609375, "learning_rate": 1.5796639279508254e-06, "loss": 16.9531, "step": 9760 }, { "epoch": 0.6482699076841336, "grad_norm": 593.70654296875, "learning_rate": 1.5795762852788252e-06, "loss": 30.125, "step": 9761 }, { "epoch": 0.6483363219764894, "grad_norm": 173.14402770996094, "learning_rate": 1.5794886359026212e-06, "loss": 15.5781, "step": 9762 }, { "epoch": 0.648402736268845, "grad_norm": 138.79478454589844, "learning_rate": 1.5794009798232264e-06, "loss": 16.9219, "step": 9763 }, { "epoch": 0.6484691505612008, "grad_norm": 367.6025390625, "learning_rate": 1.5793133170416554e-06, "loss": 18.2891, "step": 9764 }, { "epoch": 0.6485355648535565, "grad_norm": 173.4796142578125, "learning_rate": 1.5792256475589217e-06, "loss": 19.2188, "step": 9765 }, { "epoch": 0.6486019791459122, "grad_norm": 1023.5480346679688, "learning_rate": 1.57913797137604e-06, "loss": 35.3906, "step": 9766 }, { "epoch": 0.6486683934382679, "grad_norm": 203.69607543945312, "learning_rate": 1.5790502884940243e-06, "loss": 14.6094, "step": 9767 }, { "epoch": 0.6487348077306236, "grad_norm": 380.4682922363281, "learning_rate": 1.5789625989138883e-06, "loss": 18.2031, "step": 9768 }, { "epoch": 0.6488012220229793, "grad_norm": 274.714599609375, "learning_rate": 1.578874902636647e-06, "loss": 12.8906, "step": 9769 }, { "epoch": 0.6488676363153351, "grad_norm": 259.9791259765625, "learning_rate": 1.578787199663315e-06, "loss": 18.875, "step": 9770 }, { "epoch": 0.6489340506076908, "grad_norm": 165.9247589111328, "learning_rate": 1.578699489994906e-06, "loss": 18.5156, "step": 9771 }, { "epoch": 0.6490004649000465, "grad_norm": 189.72364807128906, "learning_rate": 1.5786117736324349e-06, "loss": 16.6094, "step": 9772 }, { "epoch": 0.6490668791924022, "grad_norm": 121.20919036865234, "learning_rate": 1.578524050576917e-06, "loss": 17.5625, "step": 9773 }, { "epoch": 0.6491332934847579, "grad_norm": 287.75152587890625, "learning_rate": 1.578436320829366e-06, "loss": 15.0938, "step": 9774 }, { "epoch": 0.6491997077771137, "grad_norm": 640.722900390625, "learning_rate": 1.5783485843907973e-06, "loss": 22.6562, "step": 9775 }, { "epoch": 0.6492661220694693, "grad_norm": 354.88336181640625, "learning_rate": 1.5782608412622257e-06, "loss": 17.625, "step": 9776 }, { "epoch": 0.6493325363618251, "grad_norm": 125.71018981933594, "learning_rate": 1.5781730914446661e-06, "loss": 19.3125, "step": 9777 }, { "epoch": 0.6493989506541807, "grad_norm": 171.09310913085938, "learning_rate": 1.5780853349391337e-06, "loss": 18.125, "step": 9778 }, { "epoch": 0.6494653649465365, "grad_norm": 383.9112548828125, "learning_rate": 1.5779975717466432e-06, "loss": 13.4375, "step": 9779 }, { "epoch": 0.6495317792388922, "grad_norm": 461.63037109375, "learning_rate": 1.5779098018682105e-06, "loss": 27.6875, "step": 9780 }, { "epoch": 0.6495981935312479, "grad_norm": 510.0411682128906, "learning_rate": 1.57782202530485e-06, "loss": 22.4688, "step": 9781 }, { "epoch": 0.6496646078236037, "grad_norm": 201.11062622070312, "learning_rate": 1.577734242057578e-06, "loss": 14.3594, "step": 9782 }, { "epoch": 0.6497310221159593, "grad_norm": 197.15328979492188, "learning_rate": 1.5776464521274089e-06, "loss": 20.5938, "step": 9783 }, { "epoch": 0.6497974364083151, "grad_norm": 162.2890167236328, "learning_rate": 1.577558655515359e-06, "loss": 21.9844, "step": 9784 }, { "epoch": 0.6498638507006708, "grad_norm": 351.53485107421875, "learning_rate": 1.5774708522224436e-06, "loss": 19.6719, "step": 9785 }, { "epoch": 0.6499302649930265, "grad_norm": 246.04898071289062, "learning_rate": 1.5773830422496784e-06, "loss": 20.4688, "step": 9786 }, { "epoch": 0.6499966792853822, "grad_norm": 377.50347900390625, "learning_rate": 1.5772952255980792e-06, "loss": 21.2188, "step": 9787 }, { "epoch": 0.650063093577738, "grad_norm": 274.0904846191406, "learning_rate": 1.5772074022686614e-06, "loss": 19.5, "step": 9788 }, { "epoch": 0.6501295078700936, "grad_norm": 829.3387451171875, "learning_rate": 1.5771195722624415e-06, "loss": 27.125, "step": 9789 }, { "epoch": 0.6501959221624494, "grad_norm": 178.8704071044922, "learning_rate": 1.577031735580435e-06, "loss": 17.5156, "step": 9790 }, { "epoch": 0.650262336454805, "grad_norm": 172.2509307861328, "learning_rate": 1.576943892223658e-06, "loss": 17.5234, "step": 9791 }, { "epoch": 0.6503287507471608, "grad_norm": 124.38665008544922, "learning_rate": 1.576856042193127e-06, "loss": 17.75, "step": 9792 }, { "epoch": 0.6503951650395166, "grad_norm": 200.39654541015625, "learning_rate": 1.576768185489858e-06, "loss": 18.0625, "step": 9793 }, { "epoch": 0.6504615793318722, "grad_norm": 228.15406799316406, "learning_rate": 1.5766803221148673e-06, "loss": 22.2188, "step": 9794 }, { "epoch": 0.650527993624228, "grad_norm": 258.3771057128906, "learning_rate": 1.5765924520691706e-06, "loss": 19.7344, "step": 9795 }, { "epoch": 0.6505944079165836, "grad_norm": 351.54534912109375, "learning_rate": 1.5765045753537854e-06, "loss": 18.2188, "step": 9796 }, { "epoch": 0.6506608222089394, "grad_norm": 423.5411376953125, "learning_rate": 1.5764166919697275e-06, "loss": 20.0781, "step": 9797 }, { "epoch": 0.650727236501295, "grad_norm": 109.98111724853516, "learning_rate": 1.576328801918014e-06, "loss": 17.4688, "step": 9798 }, { "epoch": 0.6507936507936508, "grad_norm": 155.67459106445312, "learning_rate": 1.5762409051996608e-06, "loss": 18.4531, "step": 9799 }, { "epoch": 0.6508600650860065, "grad_norm": 241.97186279296875, "learning_rate": 1.5761530018156852e-06, "loss": 20.0156, "step": 9800 }, { "epoch": 0.6509264793783622, "grad_norm": 182.79576110839844, "learning_rate": 1.5760650917671039e-06, "loss": 18.0781, "step": 9801 }, { "epoch": 0.6509928936707179, "grad_norm": 305.449951171875, "learning_rate": 1.575977175054934e-06, "loss": 19.0469, "step": 9802 }, { "epoch": 0.6510593079630737, "grad_norm": 153.4467315673828, "learning_rate": 1.5758892516801923e-06, "loss": 14.3906, "step": 9803 }, { "epoch": 0.6511257222554294, "grad_norm": 162.19789123535156, "learning_rate": 1.5758013216438954e-06, "loss": 16.5938, "step": 9804 }, { "epoch": 0.6511921365477851, "grad_norm": 189.4874725341797, "learning_rate": 1.5757133849470613e-06, "loss": 16.6562, "step": 9805 }, { "epoch": 0.6512585508401408, "grad_norm": 175.18112182617188, "learning_rate": 1.5756254415907064e-06, "loss": 12.8125, "step": 9806 }, { "epoch": 0.6513249651324965, "grad_norm": 207.74134826660156, "learning_rate": 1.5755374915758485e-06, "loss": 20.9062, "step": 9807 }, { "epoch": 0.6513913794248523, "grad_norm": 129.65733337402344, "learning_rate": 1.5754495349035047e-06, "loss": 18.6094, "step": 9808 }, { "epoch": 0.6514577937172079, "grad_norm": 219.90115356445312, "learning_rate": 1.5753615715746924e-06, "loss": 18.9375, "step": 9809 }, { "epoch": 0.6515242080095637, "grad_norm": 243.8300323486328, "learning_rate": 1.5752736015904296e-06, "loss": 16.4062, "step": 9810 }, { "epoch": 0.6515906223019193, "grad_norm": 172.5012664794922, "learning_rate": 1.5751856249517333e-06, "loss": 15.8438, "step": 9811 }, { "epoch": 0.6516570365942751, "grad_norm": 536.76806640625, "learning_rate": 1.5750976416596212e-06, "loss": 19.5312, "step": 9812 }, { "epoch": 0.6517234508866308, "grad_norm": 174.5625762939453, "learning_rate": 1.5750096517151113e-06, "loss": 14.1562, "step": 9813 }, { "epoch": 0.6517898651789865, "grad_norm": 192.02561950683594, "learning_rate": 1.5749216551192212e-06, "loss": 17.3438, "step": 9814 }, { "epoch": 0.6518562794713423, "grad_norm": 246.4168243408203, "learning_rate": 1.574833651872969e-06, "loss": 18.1719, "step": 9815 }, { "epoch": 0.6519226937636979, "grad_norm": 176.81210327148438, "learning_rate": 1.5747456419773727e-06, "loss": 13.9375, "step": 9816 }, { "epoch": 0.6519891080560537, "grad_norm": 241.09898376464844, "learning_rate": 1.5746576254334501e-06, "loss": 13.25, "step": 9817 }, { "epoch": 0.6520555223484094, "grad_norm": 240.0722198486328, "learning_rate": 1.5745696022422194e-06, "loss": 11.4219, "step": 9818 }, { "epoch": 0.6521219366407651, "grad_norm": 235.08396911621094, "learning_rate": 1.5744815724046992e-06, "loss": 24.5625, "step": 9819 }, { "epoch": 0.6521883509331208, "grad_norm": 297.1187744140625, "learning_rate": 1.5743935359219071e-06, "loss": 19.9844, "step": 9820 }, { "epoch": 0.6522547652254765, "grad_norm": 200.64405822753906, "learning_rate": 1.574305492794862e-06, "loss": 19.125, "step": 9821 }, { "epoch": 0.6523211795178322, "grad_norm": 265.20904541015625, "learning_rate": 1.574217443024582e-06, "loss": 15.8281, "step": 9822 }, { "epoch": 0.652387593810188, "grad_norm": 338.16461181640625, "learning_rate": 1.5741293866120857e-06, "loss": 22.0156, "step": 9823 }, { "epoch": 0.6524540081025436, "grad_norm": 157.38262939453125, "learning_rate": 1.5740413235583918e-06, "loss": 17.8281, "step": 9824 }, { "epoch": 0.6525204223948994, "grad_norm": 296.6435852050781, "learning_rate": 1.5739532538645187e-06, "loss": 16.2188, "step": 9825 }, { "epoch": 0.6525868366872551, "grad_norm": 281.4869689941406, "learning_rate": 1.5738651775314855e-06, "loss": 17.0625, "step": 9826 }, { "epoch": 0.6526532509796108, "grad_norm": 191.2609405517578, "learning_rate": 1.5737770945603106e-06, "loss": 20.8438, "step": 9827 }, { "epoch": 0.6527196652719666, "grad_norm": 145.38792419433594, "learning_rate": 1.5736890049520134e-06, "loss": 14.7969, "step": 9828 }, { "epoch": 0.6527860795643222, "grad_norm": 186.9792938232422, "learning_rate": 1.5736009087076126e-06, "loss": 15.4688, "step": 9829 }, { "epoch": 0.652852493856678, "grad_norm": 257.29248046875, "learning_rate": 1.573512805828127e-06, "loss": 14.5391, "step": 9830 }, { "epoch": 0.6529189081490336, "grad_norm": 159.7154998779297, "learning_rate": 1.5734246963145762e-06, "loss": 16.3438, "step": 9831 }, { "epoch": 0.6529853224413894, "grad_norm": 192.77493286132812, "learning_rate": 1.5733365801679793e-06, "loss": 16.4688, "step": 9832 }, { "epoch": 0.6530517367337451, "grad_norm": 426.10394287109375, "learning_rate": 1.5732484573893552e-06, "loss": 18.5781, "step": 9833 }, { "epoch": 0.6531181510261008, "grad_norm": 137.33103942871094, "learning_rate": 1.5731603279797234e-06, "loss": 15.9844, "step": 9834 }, { "epoch": 0.6531845653184565, "grad_norm": 292.5760803222656, "learning_rate": 1.5730721919401035e-06, "loss": 15.7031, "step": 9835 }, { "epoch": 0.6532509796108122, "grad_norm": 118.18338775634766, "learning_rate": 1.5729840492715152e-06, "loss": 15.0, "step": 9836 }, { "epoch": 0.653317393903168, "grad_norm": 325.33428955078125, "learning_rate": 1.5728958999749775e-06, "loss": 21.2969, "step": 9837 }, { "epoch": 0.6533838081955237, "grad_norm": 302.8778991699219, "learning_rate": 1.5728077440515105e-06, "loss": 20.0859, "step": 9838 }, { "epoch": 0.6534502224878794, "grad_norm": 464.0259704589844, "learning_rate": 1.5727195815021338e-06, "loss": 33.7812, "step": 9839 }, { "epoch": 0.6535166367802351, "grad_norm": 325.7901916503906, "learning_rate": 1.5726314123278674e-06, "loss": 21.5625, "step": 9840 }, { "epoch": 0.6535830510725908, "grad_norm": 114.33917999267578, "learning_rate": 1.572543236529731e-06, "loss": 18.7812, "step": 9841 }, { "epoch": 0.6536494653649465, "grad_norm": 303.0911560058594, "learning_rate": 1.5724550541087442e-06, "loss": 18.375, "step": 9842 }, { "epoch": 0.6537158796573023, "grad_norm": 197.66879272460938, "learning_rate": 1.572366865065928e-06, "loss": 18.6562, "step": 9843 }, { "epoch": 0.6537822939496579, "grad_norm": 156.35464477539062, "learning_rate": 1.5722786694023015e-06, "loss": 15.0938, "step": 9844 }, { "epoch": 0.6538487082420137, "grad_norm": 314.769775390625, "learning_rate": 1.5721904671188855e-06, "loss": 23.125, "step": 9845 }, { "epoch": 0.6539151225343693, "grad_norm": 151.67034912109375, "learning_rate": 1.5721022582167001e-06, "loss": 14.4531, "step": 9846 }, { "epoch": 0.6539815368267251, "grad_norm": 303.9964599609375, "learning_rate": 1.5720140426967655e-06, "loss": 17.9219, "step": 9847 }, { "epoch": 0.6540479511190809, "grad_norm": 323.0395812988281, "learning_rate": 1.5719258205601027e-06, "loss": 24.3281, "step": 9848 }, { "epoch": 0.6541143654114365, "grad_norm": 237.64120483398438, "learning_rate": 1.5718375918077315e-06, "loss": 19.1562, "step": 9849 }, { "epoch": 0.6541807797037923, "grad_norm": 146.92007446289062, "learning_rate": 1.571749356440673e-06, "loss": 15.7969, "step": 9850 }, { "epoch": 0.654247193996148, "grad_norm": 564.2448120117188, "learning_rate": 1.5716611144599475e-06, "loss": 27.3438, "step": 9851 }, { "epoch": 0.6543136082885037, "grad_norm": 134.8175506591797, "learning_rate": 1.571572865866576e-06, "loss": 21.4062, "step": 9852 }, { "epoch": 0.6543800225808594, "grad_norm": 197.45159912109375, "learning_rate": 1.5714846106615789e-06, "loss": 14.625, "step": 9853 }, { "epoch": 0.6544464368732151, "grad_norm": 276.38671875, "learning_rate": 1.5713963488459773e-06, "loss": 17.9531, "step": 9854 }, { "epoch": 0.6545128511655708, "grad_norm": 250.24586486816406, "learning_rate": 1.5713080804207926e-06, "loss": 11.9219, "step": 9855 }, { "epoch": 0.6545792654579266, "grad_norm": 283.8553466796875, "learning_rate": 1.5712198053870452e-06, "loss": 22.9062, "step": 9856 }, { "epoch": 0.6546456797502822, "grad_norm": 271.5994567871094, "learning_rate": 1.5711315237457566e-06, "loss": 18.4062, "step": 9857 }, { "epoch": 0.654712094042638, "grad_norm": 175.79736328125, "learning_rate": 1.5710432354979477e-06, "loss": 17.25, "step": 9858 }, { "epoch": 0.6547785083349937, "grad_norm": 160.5143280029297, "learning_rate": 1.57095494064464e-06, "loss": 12.4219, "step": 9859 }, { "epoch": 0.6548449226273494, "grad_norm": 147.96253967285156, "learning_rate": 1.5708666391868551e-06, "loss": 15.8438, "step": 9860 }, { "epoch": 0.6549113369197052, "grad_norm": 356.1510314941406, "learning_rate": 1.5707783311256138e-06, "loss": 20.375, "step": 9861 }, { "epoch": 0.6549777512120608, "grad_norm": 262.3909912109375, "learning_rate": 1.5706900164619377e-06, "loss": 18.0781, "step": 9862 }, { "epoch": 0.6550441655044166, "grad_norm": 395.0989685058594, "learning_rate": 1.570601695196849e-06, "loss": 19.0312, "step": 9863 }, { "epoch": 0.6551105797967722, "grad_norm": 171.0846405029297, "learning_rate": 1.5705133673313683e-06, "loss": 15.7812, "step": 9864 }, { "epoch": 0.655176994089128, "grad_norm": 428.432373046875, "learning_rate": 1.5704250328665185e-06, "loss": 16.5, "step": 9865 }, { "epoch": 0.6552434083814836, "grad_norm": 291.89471435546875, "learning_rate": 1.5703366918033208e-06, "loss": 19.6094, "step": 9866 }, { "epoch": 0.6553098226738394, "grad_norm": 182.40463256835938, "learning_rate": 1.570248344142797e-06, "loss": 17.4375, "step": 9867 }, { "epoch": 0.6553762369661952, "grad_norm": 151.88002014160156, "learning_rate": 1.5701599898859692e-06, "loss": 16.7812, "step": 9868 }, { "epoch": 0.6554426512585508, "grad_norm": 199.4517364501953, "learning_rate": 1.5700716290338594e-06, "loss": 18.5312, "step": 9869 }, { "epoch": 0.6555090655509066, "grad_norm": 163.35879516601562, "learning_rate": 1.5699832615874897e-06, "loss": 18.1094, "step": 9870 }, { "epoch": 0.6555754798432623, "grad_norm": 228.1835479736328, "learning_rate": 1.5698948875478823e-06, "loss": 22.0938, "step": 9871 }, { "epoch": 0.655641894135618, "grad_norm": 259.3472900390625, "learning_rate": 1.5698065069160594e-06, "loss": 16.4531, "step": 9872 }, { "epoch": 0.6557083084279737, "grad_norm": 84.39105224609375, "learning_rate": 1.5697181196930438e-06, "loss": 15.0, "step": 9873 }, { "epoch": 0.6557747227203294, "grad_norm": 251.3291473388672, "learning_rate": 1.569629725879857e-06, "loss": 20.8438, "step": 9874 }, { "epoch": 0.6558411370126851, "grad_norm": 131.2504119873047, "learning_rate": 1.5695413254775223e-06, "loss": 17.5938, "step": 9875 }, { "epoch": 0.6559075513050409, "grad_norm": 208.8737030029297, "learning_rate": 1.569452918487062e-06, "loss": 12.8125, "step": 9876 }, { "epoch": 0.6559739655973965, "grad_norm": 257.12847900390625, "learning_rate": 1.5693645049094984e-06, "loss": 18.4688, "step": 9877 }, { "epoch": 0.6560403798897523, "grad_norm": 376.16510009765625, "learning_rate": 1.5692760847458547e-06, "loss": 19.625, "step": 9878 }, { "epoch": 0.656106794182108, "grad_norm": 288.52850341796875, "learning_rate": 1.5691876579971534e-06, "loss": 15.1562, "step": 9879 }, { "epoch": 0.6561732084744637, "grad_norm": 168.64682006835938, "learning_rate": 1.5690992246644176e-06, "loss": 17.3594, "step": 9880 }, { "epoch": 0.6562396227668195, "grad_norm": 178.73426818847656, "learning_rate": 1.5690107847486703e-06, "loss": 18.0938, "step": 9881 }, { "epoch": 0.6563060370591751, "grad_norm": 352.4501953125, "learning_rate": 1.568922338250934e-06, "loss": 16.5938, "step": 9882 }, { "epoch": 0.6563724513515309, "grad_norm": 192.6802978515625, "learning_rate": 1.5688338851722324e-06, "loss": 23.2812, "step": 9883 }, { "epoch": 0.6564388656438865, "grad_norm": 371.2537536621094, "learning_rate": 1.5687454255135883e-06, "loss": 22.75, "step": 9884 }, { "epoch": 0.6565052799362423, "grad_norm": 243.22714233398438, "learning_rate": 1.5686569592760253e-06, "loss": 18.7344, "step": 9885 }, { "epoch": 0.656571694228598, "grad_norm": 433.21337890625, "learning_rate": 1.5685684864605665e-06, "loss": 19.5625, "step": 9886 }, { "epoch": 0.6566381085209537, "grad_norm": 250.31216430664062, "learning_rate": 1.568480007068235e-06, "loss": 18.6562, "step": 9887 }, { "epoch": 0.6567045228133094, "grad_norm": 328.7663269042969, "learning_rate": 1.5683915211000546e-06, "loss": 20.2031, "step": 9888 }, { "epoch": 0.6567709371056651, "grad_norm": 143.5172119140625, "learning_rate": 1.568303028557049e-06, "loss": 14.4688, "step": 9889 }, { "epoch": 0.6568373513980209, "grad_norm": 270.1737365722656, "learning_rate": 1.5682145294402415e-06, "loss": 16.7656, "step": 9890 }, { "epoch": 0.6569037656903766, "grad_norm": 284.0362548828125, "learning_rate": 1.5681260237506562e-06, "loss": 20.8438, "step": 9891 }, { "epoch": 0.6569701799827323, "grad_norm": 235.59523010253906, "learning_rate": 1.5680375114893166e-06, "loss": 21.9844, "step": 9892 }, { "epoch": 0.657036594275088, "grad_norm": 266.3184509277344, "learning_rate": 1.5679489926572464e-06, "loss": 15.2188, "step": 9893 }, { "epoch": 0.6571030085674437, "grad_norm": 318.0582580566406, "learning_rate": 1.5678604672554701e-06, "loss": 13.1875, "step": 9894 }, { "epoch": 0.6571694228597994, "grad_norm": 290.4663391113281, "learning_rate": 1.5677719352850111e-06, "loss": 20.2812, "step": 9895 }, { "epoch": 0.6572358371521552, "grad_norm": 1398.05517578125, "learning_rate": 1.567683396746894e-06, "loss": 31.4062, "step": 9896 }, { "epoch": 0.6573022514445108, "grad_norm": 433.47479248046875, "learning_rate": 1.5675948516421421e-06, "loss": 15.3906, "step": 9897 }, { "epoch": 0.6573686657368666, "grad_norm": 318.6138000488281, "learning_rate": 1.567506299971781e-06, "loss": 19.3438, "step": 9898 }, { "epoch": 0.6574350800292222, "grad_norm": 449.0543212890625, "learning_rate": 1.5674177417368336e-06, "loss": 19.2188, "step": 9899 }, { "epoch": 0.657501494321578, "grad_norm": 390.66644287109375, "learning_rate": 1.5673291769383255e-06, "loss": 13.375, "step": 9900 }, { "epoch": 0.6575679086139338, "grad_norm": 152.90737915039062, "learning_rate": 1.5672406055772805e-06, "loss": 19.4219, "step": 9901 }, { "epoch": 0.6576343229062894, "grad_norm": 358.6860656738281, "learning_rate": 1.567152027654723e-06, "loss": 18.3594, "step": 9902 }, { "epoch": 0.6577007371986452, "grad_norm": 191.9142303466797, "learning_rate": 1.567063443171678e-06, "loss": 19.5938, "step": 9903 }, { "epoch": 0.6577671514910008, "grad_norm": 236.046142578125, "learning_rate": 1.5669748521291704e-06, "loss": 16.7656, "step": 9904 }, { "epoch": 0.6578335657833566, "grad_norm": 219.65443420410156, "learning_rate": 1.5668862545282245e-06, "loss": 14.4062, "step": 9905 }, { "epoch": 0.6578999800757123, "grad_norm": 201.20562744140625, "learning_rate": 1.5667976503698651e-06, "loss": 26.6875, "step": 9906 }, { "epoch": 0.657966394368068, "grad_norm": 121.05767822265625, "learning_rate": 1.5667090396551173e-06, "loss": 13.5938, "step": 9907 }, { "epoch": 0.6580328086604237, "grad_norm": 222.09585571289062, "learning_rate": 1.5666204223850063e-06, "loss": 20.0312, "step": 9908 }, { "epoch": 0.6580992229527795, "grad_norm": 203.34664916992188, "learning_rate": 1.5665317985605568e-06, "loss": 18.4531, "step": 9909 }, { "epoch": 0.6581656372451351, "grad_norm": 269.0628967285156, "learning_rate": 1.5664431681827944e-06, "loss": 17.25, "step": 9910 }, { "epoch": 0.6582320515374909, "grad_norm": 171.45204162597656, "learning_rate": 1.5663545312527437e-06, "loss": 12.4375, "step": 9911 }, { "epoch": 0.6582984658298466, "grad_norm": 425.8558349609375, "learning_rate": 1.5662658877714306e-06, "loss": 14.6562, "step": 9912 }, { "epoch": 0.6583648801222023, "grad_norm": 310.1818542480469, "learning_rate": 1.5661772377398804e-06, "loss": 16.75, "step": 9913 }, { "epoch": 0.658431294414558, "grad_norm": 233.69830322265625, "learning_rate": 1.5660885811591182e-06, "loss": 21.5625, "step": 9914 }, { "epoch": 0.6584977087069137, "grad_norm": 367.948974609375, "learning_rate": 1.5659999180301697e-06, "loss": 14.9375, "step": 9915 }, { "epoch": 0.6585641229992695, "grad_norm": 231.730712890625, "learning_rate": 1.5659112483540605e-06, "loss": 20.3125, "step": 9916 }, { "epoch": 0.6586305372916251, "grad_norm": 660.1256713867188, "learning_rate": 1.5658225721318164e-06, "loss": 25.125, "step": 9917 }, { "epoch": 0.6586969515839809, "grad_norm": 494.009033203125, "learning_rate": 1.5657338893644627e-06, "loss": 12.8125, "step": 9918 }, { "epoch": 0.6587633658763365, "grad_norm": 135.4127960205078, "learning_rate": 1.5656452000530261e-06, "loss": 12.2969, "step": 9919 }, { "epoch": 0.6588297801686923, "grad_norm": 963.2916870117188, "learning_rate": 1.5655565041985317e-06, "loss": 19.75, "step": 9920 }, { "epoch": 0.658896194461048, "grad_norm": 227.87600708007812, "learning_rate": 1.565467801802006e-06, "loss": 18.9844, "step": 9921 }, { "epoch": 0.6589626087534037, "grad_norm": 331.28363037109375, "learning_rate": 1.5653790928644745e-06, "loss": 20.8125, "step": 9922 }, { "epoch": 0.6590290230457595, "grad_norm": 1051.53955078125, "learning_rate": 1.5652903773869638e-06, "loss": 14.5625, "step": 9923 }, { "epoch": 0.6590954373381152, "grad_norm": 122.1436996459961, "learning_rate": 1.5652016553705e-06, "loss": 18.7656, "step": 9924 }, { "epoch": 0.6591618516304709, "grad_norm": 202.94923400878906, "learning_rate": 1.5651129268161096e-06, "loss": 13.6172, "step": 9925 }, { "epoch": 0.6592282659228266, "grad_norm": 545.5286254882812, "learning_rate": 1.5650241917248186e-06, "loss": 19.5938, "step": 9926 }, { "epoch": 0.6592946802151823, "grad_norm": 166.6086883544922, "learning_rate": 1.5649354500976535e-06, "loss": 16.375, "step": 9927 }, { "epoch": 0.659361094507538, "grad_norm": 125.03443145751953, "learning_rate": 1.5648467019356408e-06, "loss": 13.4219, "step": 9928 }, { "epoch": 0.6594275087998938, "grad_norm": 124.22410583496094, "learning_rate": 1.5647579472398072e-06, "loss": 14.9844, "step": 9929 }, { "epoch": 0.6594939230922494, "grad_norm": 314.27203369140625, "learning_rate": 1.5646691860111794e-06, "loss": 15.4609, "step": 9930 }, { "epoch": 0.6595603373846052, "grad_norm": 217.2017059326172, "learning_rate": 1.564580418250784e-06, "loss": 18.5469, "step": 9931 }, { "epoch": 0.6596267516769608, "grad_norm": 149.05322265625, "learning_rate": 1.564491643959648e-06, "loss": 20.5938, "step": 9932 }, { "epoch": 0.6596931659693166, "grad_norm": 225.8419189453125, "learning_rate": 1.564402863138798e-06, "loss": 16.7812, "step": 9933 }, { "epoch": 0.6597595802616724, "grad_norm": 260.7765808105469, "learning_rate": 1.5643140757892612e-06, "loss": 23.7812, "step": 9934 }, { "epoch": 0.659825994554028, "grad_norm": 211.59475708007812, "learning_rate": 1.5642252819120646e-06, "loss": 21.5156, "step": 9935 }, { "epoch": 0.6598924088463838, "grad_norm": 538.6500854492188, "learning_rate": 1.5641364815082349e-06, "loss": 16.75, "step": 9936 }, { "epoch": 0.6599588231387394, "grad_norm": 279.3988952636719, "learning_rate": 1.5640476745788002e-06, "loss": 17.9531, "step": 9937 }, { "epoch": 0.6600252374310952, "grad_norm": 239.22084045410156, "learning_rate": 1.5639588611247872e-06, "loss": 19.6562, "step": 9938 }, { "epoch": 0.6600916517234509, "grad_norm": 577.343994140625, "learning_rate": 1.563870041147223e-06, "loss": 18.3125, "step": 9939 }, { "epoch": 0.6601580660158066, "grad_norm": 253.08836364746094, "learning_rate": 1.5637812146471357e-06, "loss": 23.375, "step": 9940 }, { "epoch": 0.6602244803081623, "grad_norm": 240.3740997314453, "learning_rate": 1.563692381625552e-06, "loss": 21.2969, "step": 9941 }, { "epoch": 0.660290894600518, "grad_norm": 148.20901489257812, "learning_rate": 1.5636035420835002e-06, "loss": 22.1484, "step": 9942 }, { "epoch": 0.6603573088928737, "grad_norm": 156.81529235839844, "learning_rate": 1.5635146960220073e-06, "loss": 17.0625, "step": 9943 }, { "epoch": 0.6604237231852295, "grad_norm": 948.133056640625, "learning_rate": 1.5634258434421014e-06, "loss": 15.375, "step": 9944 }, { "epoch": 0.6604901374775852, "grad_norm": 191.0654296875, "learning_rate": 1.5633369843448106e-06, "loss": 23.0312, "step": 9945 }, { "epoch": 0.6605565517699409, "grad_norm": 413.32073974609375, "learning_rate": 1.5632481187311622e-06, "loss": 23.375, "step": 9946 }, { "epoch": 0.6606229660622966, "grad_norm": 215.9058074951172, "learning_rate": 1.5631592466021844e-06, "loss": 14.6719, "step": 9947 }, { "epoch": 0.6606893803546523, "grad_norm": 635.2412719726562, "learning_rate": 1.5630703679589047e-06, "loss": 22.4531, "step": 9948 }, { "epoch": 0.6607557946470081, "grad_norm": 124.35591125488281, "learning_rate": 1.5629814828023524e-06, "loss": 13.0625, "step": 9949 }, { "epoch": 0.6608222089393637, "grad_norm": 203.73724365234375, "learning_rate": 1.5628925911335545e-06, "loss": 18.2031, "step": 9950 }, { "epoch": 0.6608886232317195, "grad_norm": 394.2263488769531, "learning_rate": 1.5628036929535397e-06, "loss": 16.9688, "step": 9951 }, { "epoch": 0.6609550375240751, "grad_norm": 114.67505645751953, "learning_rate": 1.5627147882633366e-06, "loss": 13.8359, "step": 9952 }, { "epoch": 0.6610214518164309, "grad_norm": 183.89710998535156, "learning_rate": 1.562625877063973e-06, "loss": 17.0938, "step": 9953 }, { "epoch": 0.6610878661087866, "grad_norm": 415.3482666015625, "learning_rate": 1.5625369593564776e-06, "loss": 19.5, "step": 9954 }, { "epoch": 0.6611542804011423, "grad_norm": 181.49282836914062, "learning_rate": 1.5624480351418794e-06, "loss": 23.9531, "step": 9955 }, { "epoch": 0.6612206946934981, "grad_norm": 194.42234802246094, "learning_rate": 1.5623591044212066e-06, "loss": 21.0938, "step": 9956 }, { "epoch": 0.6612871089858537, "grad_norm": 231.63511657714844, "learning_rate": 1.5622701671954877e-06, "loss": 17.7188, "step": 9957 }, { "epoch": 0.6613535232782095, "grad_norm": 360.99786376953125, "learning_rate": 1.5621812234657519e-06, "loss": 28.3594, "step": 9958 }, { "epoch": 0.6614199375705652, "grad_norm": 181.36085510253906, "learning_rate": 1.5620922732330277e-06, "loss": 13.4531, "step": 9959 }, { "epoch": 0.6614863518629209, "grad_norm": 334.67523193359375, "learning_rate": 1.5620033164983444e-06, "loss": 23.5938, "step": 9960 }, { "epoch": 0.6615527661552766, "grad_norm": 157.58409118652344, "learning_rate": 1.5619143532627309e-06, "loss": 19.9688, "step": 9961 }, { "epoch": 0.6616191804476323, "grad_norm": 254.57460021972656, "learning_rate": 1.5618253835272158e-06, "loss": 16.7812, "step": 9962 }, { "epoch": 0.661685594739988, "grad_norm": 151.64598083496094, "learning_rate": 1.5617364072928289e-06, "loss": 23.5625, "step": 9963 }, { "epoch": 0.6617520090323438, "grad_norm": 142.0664520263672, "learning_rate": 1.561647424560599e-06, "loss": 15.2344, "step": 9964 }, { "epoch": 0.6618184233246994, "grad_norm": 160.15614318847656, "learning_rate": 1.561558435331556e-06, "loss": 17.9062, "step": 9965 }, { "epoch": 0.6618848376170552, "grad_norm": 624.0867919921875, "learning_rate": 1.5614694396067282e-06, "loss": 36.375, "step": 9966 }, { "epoch": 0.661951251909411, "grad_norm": 250.29405212402344, "learning_rate": 1.5613804373871462e-06, "loss": 16.4844, "step": 9967 }, { "epoch": 0.6620176662017666, "grad_norm": 186.27755737304688, "learning_rate": 1.5612914286738389e-06, "loss": 23.3594, "step": 9968 }, { "epoch": 0.6620840804941224, "grad_norm": 1142.595947265625, "learning_rate": 1.5612024134678357e-06, "loss": 15.2031, "step": 9969 }, { "epoch": 0.662150494786478, "grad_norm": 395.7835998535156, "learning_rate": 1.5611133917701666e-06, "loss": 15.3438, "step": 9970 }, { "epoch": 0.6622169090788338, "grad_norm": 213.9214324951172, "learning_rate": 1.5610243635818616e-06, "loss": 21.6406, "step": 9971 }, { "epoch": 0.6622833233711894, "grad_norm": 527.693359375, "learning_rate": 1.5609353289039504e-06, "loss": 25.9219, "step": 9972 }, { "epoch": 0.6623497376635452, "grad_norm": 132.62869262695312, "learning_rate": 1.5608462877374625e-06, "loss": 12.6094, "step": 9973 }, { "epoch": 0.6624161519559009, "grad_norm": 178.65701293945312, "learning_rate": 1.560757240083428e-06, "loss": 22.5156, "step": 9974 }, { "epoch": 0.6624825662482566, "grad_norm": 159.0954132080078, "learning_rate": 1.560668185942877e-06, "loss": 17.1719, "step": 9975 }, { "epoch": 0.6625489805406123, "grad_norm": 300.5513916015625, "learning_rate": 1.5605791253168404e-06, "loss": 21.8438, "step": 9976 }, { "epoch": 0.662615394832968, "grad_norm": 277.8545227050781, "learning_rate": 1.5604900582063475e-06, "loss": 20.2656, "step": 9977 }, { "epoch": 0.6626818091253238, "grad_norm": 292.7765197753906, "learning_rate": 1.5604009846124284e-06, "loss": 17.4688, "step": 9978 }, { "epoch": 0.6627482234176795, "grad_norm": 178.16091918945312, "learning_rate": 1.5603119045361144e-06, "loss": 13.7656, "step": 9979 }, { "epoch": 0.6628146377100352, "grad_norm": 228.07427978515625, "learning_rate": 1.5602228179784348e-06, "loss": 26.75, "step": 9980 }, { "epoch": 0.6628810520023909, "grad_norm": 387.3455810546875, "learning_rate": 1.5601337249404211e-06, "loss": 13.7188, "step": 9981 }, { "epoch": 0.6629474662947467, "grad_norm": 271.78076171875, "learning_rate": 1.5600446254231034e-06, "loss": 19.8906, "step": 9982 }, { "epoch": 0.6630138805871023, "grad_norm": 137.7449951171875, "learning_rate": 1.5599555194275126e-06, "loss": 16.4375, "step": 9983 }, { "epoch": 0.6630802948794581, "grad_norm": 475.55126953125, "learning_rate": 1.5598664069546788e-06, "loss": 23.125, "step": 9984 }, { "epoch": 0.6631467091718137, "grad_norm": 391.0137939453125, "learning_rate": 1.5597772880056335e-06, "loss": 31.2812, "step": 9985 }, { "epoch": 0.6632131234641695, "grad_norm": 192.0690155029297, "learning_rate": 1.5596881625814071e-06, "loss": 17.1406, "step": 9986 }, { "epoch": 0.6632795377565252, "grad_norm": 657.927978515625, "learning_rate": 1.5595990306830312e-06, "loss": 19.7188, "step": 9987 }, { "epoch": 0.6633459520488809, "grad_norm": 184.2012939453125, "learning_rate": 1.559509892311536e-06, "loss": 19.0938, "step": 9988 }, { "epoch": 0.6634123663412367, "grad_norm": 360.6007995605469, "learning_rate": 1.5594207474679531e-06, "loss": 20.1562, "step": 9989 }, { "epoch": 0.6634787806335923, "grad_norm": 320.859130859375, "learning_rate": 1.5593315961533135e-06, "loss": 26.1875, "step": 9990 }, { "epoch": 0.6635451949259481, "grad_norm": 143.329833984375, "learning_rate": 1.5592424383686487e-06, "loss": 18.2344, "step": 9991 }, { "epoch": 0.6636116092183038, "grad_norm": 162.22450256347656, "learning_rate": 1.55915327411499e-06, "loss": 18.1406, "step": 9992 }, { "epoch": 0.6636780235106595, "grad_norm": 166.4829559326172, "learning_rate": 1.5590641033933682e-06, "loss": 14.9844, "step": 9993 }, { "epoch": 0.6637444378030152, "grad_norm": 273.4130554199219, "learning_rate": 1.5589749262048154e-06, "loss": 17.0469, "step": 9994 }, { "epoch": 0.6638108520953709, "grad_norm": 476.0928955078125, "learning_rate": 1.558885742550363e-06, "loss": 35.4688, "step": 9995 }, { "epoch": 0.6638772663877266, "grad_norm": 88.8766860961914, "learning_rate": 1.5587965524310428e-06, "loss": 14.1406, "step": 9996 }, { "epoch": 0.6639436806800824, "grad_norm": 131.17666625976562, "learning_rate": 1.5587073558478862e-06, "loss": 15.5312, "step": 9997 }, { "epoch": 0.664010094972438, "grad_norm": 625.8451538085938, "learning_rate": 1.5586181528019248e-06, "loss": 13.0156, "step": 9998 }, { "epoch": 0.6640765092647938, "grad_norm": 218.75196838378906, "learning_rate": 1.5585289432941911e-06, "loss": 15.6406, "step": 9999 }, { "epoch": 0.6641429235571495, "grad_norm": 834.0540771484375, "learning_rate": 1.5584397273257163e-06, "loss": 19.1719, "step": 10000 }, { "epoch": 0.6642093378495052, "grad_norm": 191.55079650878906, "learning_rate": 1.5583505048975333e-06, "loss": 16.5312, "step": 10001 }, { "epoch": 0.664275752141861, "grad_norm": 171.58120727539062, "learning_rate": 1.5582612760106734e-06, "loss": 15.4062, "step": 10002 }, { "epoch": 0.6643421664342166, "grad_norm": 180.82522583007812, "learning_rate": 1.5581720406661685e-06, "loss": 16.3125, "step": 10003 }, { "epoch": 0.6644085807265724, "grad_norm": 241.57070922851562, "learning_rate": 1.5580827988650515e-06, "loss": 23.4062, "step": 10004 }, { "epoch": 0.664474995018928, "grad_norm": 336.3379211425781, "learning_rate": 1.5579935506083548e-06, "loss": 16.9219, "step": 10005 }, { "epoch": 0.6645414093112838, "grad_norm": 346.604736328125, "learning_rate": 1.5579042958971103e-06, "loss": 15.6719, "step": 10006 }, { "epoch": 0.6646078236036395, "grad_norm": 176.1680145263672, "learning_rate": 1.5578150347323507e-06, "loss": 14.5, "step": 10007 }, { "epoch": 0.6646742378959952, "grad_norm": 305.7244873046875, "learning_rate": 1.5577257671151083e-06, "loss": 15.7188, "step": 10008 }, { "epoch": 0.6647406521883509, "grad_norm": 362.8313903808594, "learning_rate": 1.5576364930464156e-06, "loss": 18.1406, "step": 10009 }, { "epoch": 0.6648070664807066, "grad_norm": 370.41845703125, "learning_rate": 1.5575472125273058e-06, "loss": 18.6094, "step": 10010 }, { "epoch": 0.6648734807730624, "grad_norm": 145.43128967285156, "learning_rate": 1.5574579255588114e-06, "loss": 16.5, "step": 10011 }, { "epoch": 0.6649398950654181, "grad_norm": 277.86126708984375, "learning_rate": 1.557368632141965e-06, "loss": 21.0469, "step": 10012 }, { "epoch": 0.6650063093577738, "grad_norm": 178.57708740234375, "learning_rate": 1.5572793322777998e-06, "loss": 15.4062, "step": 10013 }, { "epoch": 0.6650727236501295, "grad_norm": 199.7473907470703, "learning_rate": 1.5571900259673485e-06, "loss": 16.0156, "step": 10014 }, { "epoch": 0.6651391379424852, "grad_norm": 177.87774658203125, "learning_rate": 1.5571007132116446e-06, "loss": 17.9531, "step": 10015 }, { "epoch": 0.6652055522348409, "grad_norm": 187.69662475585938, "learning_rate": 1.5570113940117203e-06, "loss": 15.7969, "step": 10016 }, { "epoch": 0.6652719665271967, "grad_norm": 143.4358367919922, "learning_rate": 1.5569220683686098e-06, "loss": 20.0156, "step": 10017 }, { "epoch": 0.6653383808195523, "grad_norm": 379.6969909667969, "learning_rate": 1.5568327362833457e-06, "loss": 22.1875, "step": 10018 }, { "epoch": 0.6654047951119081, "grad_norm": 352.44439697265625, "learning_rate": 1.556743397756962e-06, "loss": 22.0, "step": 10019 }, { "epoch": 0.6654712094042639, "grad_norm": 181.8494415283203, "learning_rate": 1.5566540527904914e-06, "loss": 20.3438, "step": 10020 }, { "epoch": 0.6655376236966195, "grad_norm": 159.14303588867188, "learning_rate": 1.5565647013849676e-06, "loss": 28.625, "step": 10021 }, { "epoch": 0.6656040379889753, "grad_norm": 110.90147399902344, "learning_rate": 1.5564753435414246e-06, "loss": 20.1094, "step": 10022 }, { "epoch": 0.6656704522813309, "grad_norm": 151.2217254638672, "learning_rate": 1.5563859792608956e-06, "loss": 19.375, "step": 10023 }, { "epoch": 0.6657368665736867, "grad_norm": 272.6567077636719, "learning_rate": 1.5562966085444146e-06, "loss": 16.8594, "step": 10024 }, { "epoch": 0.6658032808660423, "grad_norm": 297.7215576171875, "learning_rate": 1.5562072313930145e-06, "loss": 18.7344, "step": 10025 }, { "epoch": 0.6658696951583981, "grad_norm": 332.0426940917969, "learning_rate": 1.5561178478077307e-06, "loss": 18.2656, "step": 10026 }, { "epoch": 0.6659361094507538, "grad_norm": 393.7131042480469, "learning_rate": 1.5560284577895959e-06, "loss": 18.7969, "step": 10027 }, { "epoch": 0.6660025237431095, "grad_norm": 154.45130920410156, "learning_rate": 1.5559390613396447e-06, "loss": 15.9219, "step": 10028 }, { "epoch": 0.6660689380354652, "grad_norm": 283.64849853515625, "learning_rate": 1.5558496584589107e-06, "loss": 25.6875, "step": 10029 }, { "epoch": 0.666135352327821, "grad_norm": 253.78353881835938, "learning_rate": 1.5557602491484287e-06, "loss": 19.0312, "step": 10030 }, { "epoch": 0.6662017666201767, "grad_norm": 170.148193359375, "learning_rate": 1.5556708334092323e-06, "loss": 16.9531, "step": 10031 }, { "epoch": 0.6662681809125324, "grad_norm": 296.504150390625, "learning_rate": 1.5555814112423564e-06, "loss": 21.9219, "step": 10032 }, { "epoch": 0.6663345952048881, "grad_norm": 183.7997283935547, "learning_rate": 1.5554919826488352e-06, "loss": 15.0156, "step": 10033 }, { "epoch": 0.6664010094972438, "grad_norm": 187.0491943359375, "learning_rate": 1.5554025476297027e-06, "loss": 15.8203, "step": 10034 }, { "epoch": 0.6664674237895996, "grad_norm": 123.61165618896484, "learning_rate": 1.555313106185994e-06, "loss": 17.1406, "step": 10035 }, { "epoch": 0.6665338380819552, "grad_norm": 279.3951721191406, "learning_rate": 1.5552236583187432e-06, "loss": 22.9844, "step": 10036 }, { "epoch": 0.666600252374311, "grad_norm": 215.1683807373047, "learning_rate": 1.5551342040289858e-06, "loss": 27.1094, "step": 10037 }, { "epoch": 0.6666666666666666, "grad_norm": 263.1885986328125, "learning_rate": 1.5550447433177559e-06, "loss": 22.7188, "step": 10038 }, { "epoch": 0.6667330809590224, "grad_norm": 221.2145233154297, "learning_rate": 1.5549552761860882e-06, "loss": 18.7188, "step": 10039 }, { "epoch": 0.666799495251378, "grad_norm": 392.9295654296875, "learning_rate": 1.5548658026350178e-06, "loss": 12.1875, "step": 10040 }, { "epoch": 0.6668659095437338, "grad_norm": 351.1698303222656, "learning_rate": 1.55477632266558e-06, "loss": 18.7031, "step": 10041 }, { "epoch": 0.6669323238360896, "grad_norm": 180.0093994140625, "learning_rate": 1.5546868362788096e-06, "loss": 19.9688, "step": 10042 }, { "epoch": 0.6669987381284452, "grad_norm": 141.8533935546875, "learning_rate": 1.5545973434757419e-06, "loss": 18.2188, "step": 10043 }, { "epoch": 0.667065152420801, "grad_norm": 141.3152313232422, "learning_rate": 1.5545078442574116e-06, "loss": 19.2344, "step": 10044 }, { "epoch": 0.6671315667131567, "grad_norm": 164.446533203125, "learning_rate": 1.5544183386248544e-06, "loss": 18.9062, "step": 10045 }, { "epoch": 0.6671979810055124, "grad_norm": 409.9600830078125, "learning_rate": 1.5543288265791055e-06, "loss": 19.9062, "step": 10046 }, { "epoch": 0.6672643952978681, "grad_norm": 208.81805419921875, "learning_rate": 1.5542393081212004e-06, "loss": 20.9688, "step": 10047 }, { "epoch": 0.6673308095902238, "grad_norm": 144.19775390625, "learning_rate": 1.5541497832521748e-06, "loss": 18.4531, "step": 10048 }, { "epoch": 0.6673972238825795, "grad_norm": 174.65982055664062, "learning_rate": 1.5540602519730638e-06, "loss": 15.2188, "step": 10049 }, { "epoch": 0.6674636381749353, "grad_norm": 244.5115509033203, "learning_rate": 1.5539707142849033e-06, "loss": 19.7812, "step": 10050 }, { "epoch": 0.6675300524672909, "grad_norm": 220.6749725341797, "learning_rate": 1.553881170188729e-06, "loss": 15.6719, "step": 10051 }, { "epoch": 0.6675964667596467, "grad_norm": 154.8092498779297, "learning_rate": 1.5537916196855769e-06, "loss": 14.2812, "step": 10052 }, { "epoch": 0.6676628810520024, "grad_norm": 131.817138671875, "learning_rate": 1.5537020627764824e-06, "loss": 19.9844, "step": 10053 }, { "epoch": 0.6677292953443581, "grad_norm": 141.06549072265625, "learning_rate": 1.5536124994624823e-06, "loss": 13.3906, "step": 10054 }, { "epoch": 0.6677957096367139, "grad_norm": 141.72337341308594, "learning_rate": 1.5535229297446115e-06, "loss": 14.2969, "step": 10055 }, { "epoch": 0.6678621239290695, "grad_norm": 241.24957275390625, "learning_rate": 1.5534333536239069e-06, "loss": 15.5156, "step": 10056 }, { "epoch": 0.6679285382214253, "grad_norm": 167.40408325195312, "learning_rate": 1.5533437711014041e-06, "loss": 16.125, "step": 10057 }, { "epoch": 0.6679949525137809, "grad_norm": 188.6153564453125, "learning_rate": 1.55325418217814e-06, "loss": 21.125, "step": 10058 }, { "epoch": 0.6680613668061367, "grad_norm": 94.54966735839844, "learning_rate": 1.5531645868551501e-06, "loss": 14.5469, "step": 10059 }, { "epoch": 0.6681277810984924, "grad_norm": 302.3052978515625, "learning_rate": 1.5530749851334716e-06, "loss": 21.5938, "step": 10060 }, { "epoch": 0.6681941953908481, "grad_norm": 214.9390411376953, "learning_rate": 1.5529853770141404e-06, "loss": 18.1875, "step": 10061 }, { "epoch": 0.6682606096832038, "grad_norm": 124.44916534423828, "learning_rate": 1.5528957624981933e-06, "loss": 13.3906, "step": 10062 }, { "epoch": 0.6683270239755595, "grad_norm": 170.14300537109375, "learning_rate": 1.5528061415866668e-06, "loss": 15.0312, "step": 10063 }, { "epoch": 0.6683934382679153, "grad_norm": 483.76080322265625, "learning_rate": 1.5527165142805978e-06, "loss": 26.9844, "step": 10064 }, { "epoch": 0.668459852560271, "grad_norm": 204.59909057617188, "learning_rate": 1.5526268805810227e-06, "loss": 22.4688, "step": 10065 }, { "epoch": 0.6685262668526267, "grad_norm": 234.5443572998047, "learning_rate": 1.5525372404889785e-06, "loss": 18.2812, "step": 10066 }, { "epoch": 0.6685926811449824, "grad_norm": 121.69280242919922, "learning_rate": 1.552447594005502e-06, "loss": 13.1094, "step": 10067 }, { "epoch": 0.6686590954373381, "grad_norm": 198.8877716064453, "learning_rate": 1.5523579411316306e-06, "loss": 19.1406, "step": 10068 }, { "epoch": 0.6687255097296938, "grad_norm": 210.18252563476562, "learning_rate": 1.5522682818684008e-06, "loss": 15.6094, "step": 10069 }, { "epoch": 0.6687919240220496, "grad_norm": 206.34872436523438, "learning_rate": 1.55217861621685e-06, "loss": 17.4688, "step": 10070 }, { "epoch": 0.6688583383144052, "grad_norm": 782.8905639648438, "learning_rate": 1.5520889441780157e-06, "loss": 27.0938, "step": 10071 }, { "epoch": 0.668924752606761, "grad_norm": 203.96725463867188, "learning_rate": 1.5519992657529347e-06, "loss": 18.3906, "step": 10072 }, { "epoch": 0.6689911668991166, "grad_norm": 422.5316467285156, "learning_rate": 1.5519095809426438e-06, "loss": 19.6875, "step": 10073 }, { "epoch": 0.6690575811914724, "grad_norm": 119.96308898925781, "learning_rate": 1.551819889748182e-06, "loss": 17.8906, "step": 10074 }, { "epoch": 0.6691239954838282, "grad_norm": 151.50428771972656, "learning_rate": 1.5517301921705856e-06, "loss": 19.3906, "step": 10075 }, { "epoch": 0.6691904097761838, "grad_norm": 255.8685760498047, "learning_rate": 1.5516404882108924e-06, "loss": 17.1406, "step": 10076 }, { "epoch": 0.6692568240685396, "grad_norm": 424.14752197265625, "learning_rate": 1.5515507778701402e-06, "loss": 19.4375, "step": 10077 }, { "epoch": 0.6693232383608952, "grad_norm": 413.8880310058594, "learning_rate": 1.5514610611493666e-06, "loss": 21.8594, "step": 10078 }, { "epoch": 0.669389652653251, "grad_norm": 269.800048828125, "learning_rate": 1.5513713380496095e-06, "loss": 16.5781, "step": 10079 }, { "epoch": 0.6694560669456067, "grad_norm": 196.1444091796875, "learning_rate": 1.551281608571907e-06, "loss": 22.6406, "step": 10080 }, { "epoch": 0.6695224812379624, "grad_norm": 308.9009094238281, "learning_rate": 1.5511918727172963e-06, "loss": 16.2812, "step": 10081 }, { "epoch": 0.6695888955303181, "grad_norm": 175.58218383789062, "learning_rate": 1.5511021304868158e-06, "loss": 13.5625, "step": 10082 }, { "epoch": 0.6696553098226738, "grad_norm": 208.62376403808594, "learning_rate": 1.5510123818815038e-06, "loss": 17.3594, "step": 10083 }, { "epoch": 0.6697217241150295, "grad_norm": 146.42088317871094, "learning_rate": 1.5509226269023984e-06, "loss": 14.375, "step": 10084 }, { "epoch": 0.6697881384073853, "grad_norm": 222.2330780029297, "learning_rate": 1.5508328655505377e-06, "loss": 16.0156, "step": 10085 }, { "epoch": 0.669854552699741, "grad_norm": 141.90528869628906, "learning_rate": 1.55074309782696e-06, "loss": 14.6562, "step": 10086 }, { "epoch": 0.6699209669920967, "grad_norm": 282.6688232421875, "learning_rate": 1.5506533237327034e-06, "loss": 18.375, "step": 10087 }, { "epoch": 0.6699873812844525, "grad_norm": 152.21144104003906, "learning_rate": 1.5505635432688072e-06, "loss": 16.5156, "step": 10088 }, { "epoch": 0.6700537955768081, "grad_norm": 152.2460174560547, "learning_rate": 1.5504737564363093e-06, "loss": 15.1562, "step": 10089 }, { "epoch": 0.6701202098691639, "grad_norm": 316.4474792480469, "learning_rate": 1.550383963236248e-06, "loss": 18.625, "step": 10090 }, { "epoch": 0.6701866241615195, "grad_norm": 194.89791870117188, "learning_rate": 1.5502941636696627e-06, "loss": 22.0234, "step": 10091 }, { "epoch": 0.6702530384538753, "grad_norm": 110.81127166748047, "learning_rate": 1.5502043577375919e-06, "loss": 14.7188, "step": 10092 }, { "epoch": 0.670319452746231, "grad_norm": 267.0482177734375, "learning_rate": 1.5501145454410743e-06, "loss": 16.0781, "step": 10093 }, { "epoch": 0.6703858670385867, "grad_norm": 132.1894989013672, "learning_rate": 1.550024726781149e-06, "loss": 18.0, "step": 10094 }, { "epoch": 0.6704522813309424, "grad_norm": 449.07275390625, "learning_rate": 1.5499349017588545e-06, "loss": 16.9062, "step": 10095 }, { "epoch": 0.6705186956232981, "grad_norm": 100.4688491821289, "learning_rate": 1.5498450703752303e-06, "loss": 16.2656, "step": 10096 }, { "epoch": 0.6705851099156539, "grad_norm": 252.56748962402344, "learning_rate": 1.5497552326313155e-06, "loss": 22.5, "step": 10097 }, { "epoch": 0.6706515242080096, "grad_norm": 128.5994415283203, "learning_rate": 1.549665388528149e-06, "loss": 14.5156, "step": 10098 }, { "epoch": 0.6707179385003653, "grad_norm": 99.72848510742188, "learning_rate": 1.5495755380667703e-06, "loss": 12.6094, "step": 10099 }, { "epoch": 0.670784352792721, "grad_norm": 227.90757751464844, "learning_rate": 1.5494856812482188e-06, "loss": 17.2656, "step": 10100 }, { "epoch": 0.6708507670850767, "grad_norm": 416.54022216796875, "learning_rate": 1.5493958180735335e-06, "loss": 25.1875, "step": 10101 }, { "epoch": 0.6709171813774324, "grad_norm": 229.33717346191406, "learning_rate": 1.5493059485437546e-06, "loss": 12.9688, "step": 10102 }, { "epoch": 0.6709835956697882, "grad_norm": 145.65379333496094, "learning_rate": 1.5492160726599208e-06, "loss": 16.4375, "step": 10103 }, { "epoch": 0.6710500099621438, "grad_norm": 219.62548828125, "learning_rate": 1.5491261904230727e-06, "loss": 27.5625, "step": 10104 }, { "epoch": 0.6711164242544996, "grad_norm": 688.4150390625, "learning_rate": 1.5490363018342487e-06, "loss": 20.25, "step": 10105 }, { "epoch": 0.6711828385468552, "grad_norm": 384.46142578125, "learning_rate": 1.54894640689449e-06, "loss": 18.5469, "step": 10106 }, { "epoch": 0.671249252839211, "grad_norm": 298.4068298339844, "learning_rate": 1.5488565056048355e-06, "loss": 19.4531, "step": 10107 }, { "epoch": 0.6713156671315668, "grad_norm": 277.5964660644531, "learning_rate": 1.5487665979663256e-06, "loss": 17.8984, "step": 10108 }, { "epoch": 0.6713820814239224, "grad_norm": 431.1894836425781, "learning_rate": 1.54867668398e-06, "loss": 20.9844, "step": 10109 }, { "epoch": 0.6714484957162782, "grad_norm": 577.0825805664062, "learning_rate": 1.548586763646899e-06, "loss": 20.5781, "step": 10110 }, { "epoch": 0.6715149100086338, "grad_norm": 581.399169921875, "learning_rate": 1.5484968369680623e-06, "loss": 15.8438, "step": 10111 }, { "epoch": 0.6715813243009896, "grad_norm": 356.9783630371094, "learning_rate": 1.5484069039445311e-06, "loss": 16.5781, "step": 10112 }, { "epoch": 0.6716477385933453, "grad_norm": 181.3880157470703, "learning_rate": 1.5483169645773448e-06, "loss": 21.1094, "step": 10113 }, { "epoch": 0.671714152885701, "grad_norm": 341.1371765136719, "learning_rate": 1.5482270188675439e-06, "loss": 18.3906, "step": 10114 }, { "epoch": 0.6717805671780567, "grad_norm": 274.9233093261719, "learning_rate": 1.548137066816169e-06, "loss": 15.3125, "step": 10115 }, { "epoch": 0.6718469814704124, "grad_norm": 218.79153442382812, "learning_rate": 1.5480471084242606e-06, "loss": 14.5312, "step": 10116 }, { "epoch": 0.6719133957627681, "grad_norm": 109.08780670166016, "learning_rate": 1.5479571436928595e-06, "loss": 12.0312, "step": 10117 }, { "epoch": 0.6719798100551239, "grad_norm": 411.94403076171875, "learning_rate": 1.5478671726230056e-06, "loss": 15.5781, "step": 10118 }, { "epoch": 0.6720462243474796, "grad_norm": 250.32058715820312, "learning_rate": 1.5477771952157408e-06, "loss": 23.2969, "step": 10119 }, { "epoch": 0.6721126386398353, "grad_norm": 224.302734375, "learning_rate": 1.547687211472105e-06, "loss": 20.9688, "step": 10120 }, { "epoch": 0.672179052932191, "grad_norm": 150.14865112304688, "learning_rate": 1.547597221393139e-06, "loss": 12.9453, "step": 10121 }, { "epoch": 0.6722454672245467, "grad_norm": 107.720947265625, "learning_rate": 1.5475072249798845e-06, "loss": 12.0469, "step": 10122 }, { "epoch": 0.6723118815169025, "grad_norm": 141.40492248535156, "learning_rate": 1.5474172222333821e-06, "loss": 16.75, "step": 10123 }, { "epoch": 0.6723782958092581, "grad_norm": 357.2862243652344, "learning_rate": 1.547327213154673e-06, "loss": 18.75, "step": 10124 }, { "epoch": 0.6724447101016139, "grad_norm": 212.27032470703125, "learning_rate": 1.5472371977447983e-06, "loss": 16.8125, "step": 10125 }, { "epoch": 0.6725111243939695, "grad_norm": 164.74501037597656, "learning_rate": 1.5471471760047989e-06, "loss": 15.4844, "step": 10126 }, { "epoch": 0.6725775386863253, "grad_norm": 505.2762145996094, "learning_rate": 1.5470571479357171e-06, "loss": 22.5781, "step": 10127 }, { "epoch": 0.672643952978681, "grad_norm": 277.0459289550781, "learning_rate": 1.546967113538593e-06, "loss": 21.0625, "step": 10128 }, { "epoch": 0.6727103672710367, "grad_norm": 307.44195556640625, "learning_rate": 1.5468770728144692e-06, "loss": 14.7344, "step": 10129 }, { "epoch": 0.6727767815633925, "grad_norm": 186.22256469726562, "learning_rate": 1.5467870257643865e-06, "loss": 18.2812, "step": 10130 }, { "epoch": 0.6728431958557481, "grad_norm": 291.7510681152344, "learning_rate": 1.546696972389387e-06, "loss": 17.0938, "step": 10131 }, { "epoch": 0.6729096101481039, "grad_norm": 218.33082580566406, "learning_rate": 1.5466069126905119e-06, "loss": 21.7656, "step": 10132 }, { "epoch": 0.6729760244404596, "grad_norm": 412.1130676269531, "learning_rate": 1.5465168466688035e-06, "loss": 18.6094, "step": 10133 }, { "epoch": 0.6730424387328153, "grad_norm": 291.0715637207031, "learning_rate": 1.5464267743253033e-06, "loss": 18.7656, "step": 10134 }, { "epoch": 0.673108853025171, "grad_norm": 294.8126220703125, "learning_rate": 1.5463366956610528e-06, "loss": 18.3125, "step": 10135 }, { "epoch": 0.6731752673175267, "grad_norm": 219.15260314941406, "learning_rate": 1.546246610677095e-06, "loss": 28.1406, "step": 10136 }, { "epoch": 0.6732416816098824, "grad_norm": 174.78726196289062, "learning_rate": 1.5461565193744712e-06, "loss": 14.3438, "step": 10137 }, { "epoch": 0.6733080959022382, "grad_norm": 827.0575561523438, "learning_rate": 1.546066421754224e-06, "loss": 12.7812, "step": 10138 }, { "epoch": 0.6733745101945938, "grad_norm": 230.60765075683594, "learning_rate": 1.5459763178173947e-06, "loss": 19.9375, "step": 10139 }, { "epoch": 0.6734409244869496, "grad_norm": 210.51980590820312, "learning_rate": 1.5458862075650268e-06, "loss": 12.1406, "step": 10140 }, { "epoch": 0.6735073387793054, "grad_norm": 347.55517578125, "learning_rate": 1.5457960909981615e-06, "loss": 15.3438, "step": 10141 }, { "epoch": 0.673573753071661, "grad_norm": 423.56439208984375, "learning_rate": 1.5457059681178421e-06, "loss": 12.0312, "step": 10142 }, { "epoch": 0.6736401673640168, "grad_norm": 154.8494415283203, "learning_rate": 1.5456158389251107e-06, "loss": 21.0781, "step": 10143 }, { "epoch": 0.6737065816563724, "grad_norm": 258.9452819824219, "learning_rate": 1.54552570342101e-06, "loss": 14.5312, "step": 10144 }, { "epoch": 0.6737729959487282, "grad_norm": 228.9175567626953, "learning_rate": 1.5454355616065819e-06, "loss": 19.75, "step": 10145 }, { "epoch": 0.6738394102410838, "grad_norm": 357.12860107421875, "learning_rate": 1.5453454134828705e-06, "loss": 18.5312, "step": 10146 }, { "epoch": 0.6739058245334396, "grad_norm": 190.5509490966797, "learning_rate": 1.5452552590509175e-06, "loss": 15.2969, "step": 10147 }, { "epoch": 0.6739722388257953, "grad_norm": 169.7257080078125, "learning_rate": 1.5451650983117662e-06, "loss": 17.8594, "step": 10148 }, { "epoch": 0.674038653118151, "grad_norm": 171.36024475097656, "learning_rate": 1.5450749312664593e-06, "loss": 14.3438, "step": 10149 }, { "epoch": 0.6741050674105067, "grad_norm": 214.779541015625, "learning_rate": 1.5449847579160398e-06, "loss": 19.2812, "step": 10150 }, { "epoch": 0.6741714817028625, "grad_norm": 197.36622619628906, "learning_rate": 1.544894578261551e-06, "loss": 17.5156, "step": 10151 }, { "epoch": 0.6742378959952182, "grad_norm": 250.17745971679688, "learning_rate": 1.544804392304036e-06, "loss": 22.5938, "step": 10152 }, { "epoch": 0.6743043102875739, "grad_norm": 186.94801330566406, "learning_rate": 1.5447142000445377e-06, "loss": 23.4375, "step": 10153 }, { "epoch": 0.6743707245799296, "grad_norm": 170.75672912597656, "learning_rate": 1.5446240014840996e-06, "loss": 19.5625, "step": 10154 }, { "epoch": 0.6744371388722853, "grad_norm": 192.88552856445312, "learning_rate": 1.5445337966237654e-06, "loss": 11.8281, "step": 10155 }, { "epoch": 0.6745035531646411, "grad_norm": 233.2757568359375, "learning_rate": 1.5444435854645783e-06, "loss": 14.2266, "step": 10156 }, { "epoch": 0.6745699674569967, "grad_norm": 235.15574645996094, "learning_rate": 1.5443533680075814e-06, "loss": 18.8047, "step": 10157 }, { "epoch": 0.6746363817493525, "grad_norm": 191.48715209960938, "learning_rate": 1.5442631442538188e-06, "loss": 15.0938, "step": 10158 }, { "epoch": 0.6747027960417081, "grad_norm": 144.97572326660156, "learning_rate": 1.544172914204334e-06, "loss": 15.0312, "step": 10159 }, { "epoch": 0.6747692103340639, "grad_norm": 425.4132995605469, "learning_rate": 1.5440826778601706e-06, "loss": 18.9219, "step": 10160 }, { "epoch": 0.6748356246264196, "grad_norm": 256.71136474609375, "learning_rate": 1.5439924352223726e-06, "loss": 18.1875, "step": 10161 }, { "epoch": 0.6749020389187753, "grad_norm": 309.0330810546875, "learning_rate": 1.5439021862919837e-06, "loss": 18.5781, "step": 10162 }, { "epoch": 0.6749684532111311, "grad_norm": 182.01951599121094, "learning_rate": 1.543811931070048e-06, "loss": 17.4062, "step": 10163 }, { "epoch": 0.6750348675034867, "grad_norm": 251.28512573242188, "learning_rate": 1.5437216695576097e-06, "loss": 14.2656, "step": 10164 }, { "epoch": 0.6751012817958425, "grad_norm": 147.65049743652344, "learning_rate": 1.5436314017557126e-06, "loss": 14.7969, "step": 10165 }, { "epoch": 0.6751676960881982, "grad_norm": 156.77459716796875, "learning_rate": 1.5435411276654006e-06, "loss": 14.3281, "step": 10166 }, { "epoch": 0.6752341103805539, "grad_norm": 185.61199951171875, "learning_rate": 1.5434508472877185e-06, "loss": 16.7656, "step": 10167 }, { "epoch": 0.6753005246729096, "grad_norm": 170.44192504882812, "learning_rate": 1.5433605606237104e-06, "loss": 19.375, "step": 10168 }, { "epoch": 0.6753669389652653, "grad_norm": 141.6604461669922, "learning_rate": 1.5432702676744205e-06, "loss": 16.9844, "step": 10169 }, { "epoch": 0.675433353257621, "grad_norm": 560.969482421875, "learning_rate": 1.5431799684408934e-06, "loss": 17.2188, "step": 10170 }, { "epoch": 0.6754997675499768, "grad_norm": 155.18441772460938, "learning_rate": 1.5430896629241738e-06, "loss": 16.4219, "step": 10171 }, { "epoch": 0.6755661818423325, "grad_norm": 303.9031066894531, "learning_rate": 1.5429993511253062e-06, "loss": 23.375, "step": 10172 }, { "epoch": 0.6756325961346882, "grad_norm": 198.37583923339844, "learning_rate": 1.542909033045335e-06, "loss": 20.4375, "step": 10173 }, { "epoch": 0.675699010427044, "grad_norm": 250.6036376953125, "learning_rate": 1.542818708685305e-06, "loss": 16.8906, "step": 10174 }, { "epoch": 0.6757654247193996, "grad_norm": 114.39898681640625, "learning_rate": 1.5427283780462616e-06, "loss": 17.1562, "step": 10175 }, { "epoch": 0.6758318390117554, "grad_norm": 364.3153076171875, "learning_rate": 1.542638041129249e-06, "loss": 16.625, "step": 10176 }, { "epoch": 0.675898253304111, "grad_norm": 283.6446838378906, "learning_rate": 1.5425476979353128e-06, "loss": 14.4375, "step": 10177 }, { "epoch": 0.6759646675964668, "grad_norm": 153.8675537109375, "learning_rate": 1.5424573484654975e-06, "loss": 15.0938, "step": 10178 }, { "epoch": 0.6760310818888224, "grad_norm": 198.349365234375, "learning_rate": 1.542366992720848e-06, "loss": 21.4531, "step": 10179 }, { "epoch": 0.6760974961811782, "grad_norm": 264.2647399902344, "learning_rate": 1.5422766307024105e-06, "loss": 26.7188, "step": 10180 }, { "epoch": 0.6761639104735339, "grad_norm": 156.64027404785156, "learning_rate": 1.5421862624112292e-06, "loss": 13.2109, "step": 10181 }, { "epoch": 0.6762303247658896, "grad_norm": 261.3200378417969, "learning_rate": 1.5420958878483501e-06, "loss": 13.7422, "step": 10182 }, { "epoch": 0.6762967390582454, "grad_norm": 825.5073852539062, "learning_rate": 1.5420055070148182e-06, "loss": 17.0938, "step": 10183 }, { "epoch": 0.676363153350601, "grad_norm": 102.82752990722656, "learning_rate": 1.541915119911679e-06, "loss": 13.0156, "step": 10184 }, { "epoch": 0.6764295676429568, "grad_norm": 746.7655029296875, "learning_rate": 1.5418247265399783e-06, "loss": 30.75, "step": 10185 }, { "epoch": 0.6764959819353125, "grad_norm": 359.6729736328125, "learning_rate": 1.541734326900762e-06, "loss": 21.25, "step": 10186 }, { "epoch": 0.6765623962276682, "grad_norm": 325.598876953125, "learning_rate": 1.5416439209950749e-06, "loss": 18.375, "step": 10187 }, { "epoch": 0.6766288105200239, "grad_norm": 197.28700256347656, "learning_rate": 1.5415535088239633e-06, "loss": 15.4062, "step": 10188 }, { "epoch": 0.6766952248123796, "grad_norm": 506.1547546386719, "learning_rate": 1.5414630903884734e-06, "loss": 14.0, "step": 10189 }, { "epoch": 0.6767616391047353, "grad_norm": 376.13916015625, "learning_rate": 1.5413726656896502e-06, "loss": 24.875, "step": 10190 }, { "epoch": 0.6768280533970911, "grad_norm": 136.2812042236328, "learning_rate": 1.5412822347285403e-06, "loss": 14.4531, "step": 10191 }, { "epoch": 0.6768944676894467, "grad_norm": 150.16677856445312, "learning_rate": 1.5411917975061898e-06, "loss": 15.3438, "step": 10192 }, { "epoch": 0.6769608819818025, "grad_norm": 193.59530639648438, "learning_rate": 1.5411013540236443e-06, "loss": 15.9531, "step": 10193 }, { "epoch": 0.6770272962741583, "grad_norm": 295.76361083984375, "learning_rate": 1.5410109042819505e-06, "loss": 13.7188, "step": 10194 }, { "epoch": 0.6770937105665139, "grad_norm": 300.02484130859375, "learning_rate": 1.5409204482821547e-06, "loss": 16.7344, "step": 10195 }, { "epoch": 0.6771601248588697, "grad_norm": 245.11463928222656, "learning_rate": 1.540829986025303e-06, "loss": 18.6094, "step": 10196 }, { "epoch": 0.6772265391512253, "grad_norm": 504.3264465332031, "learning_rate": 1.540739517512442e-06, "loss": 19.7812, "step": 10197 }, { "epoch": 0.6772929534435811, "grad_norm": 154.4155731201172, "learning_rate": 1.5406490427446177e-06, "loss": 14.7344, "step": 10198 }, { "epoch": 0.6773593677359367, "grad_norm": 142.152099609375, "learning_rate": 1.5405585617228774e-06, "loss": 13.5312, "step": 10199 }, { "epoch": 0.6774257820282925, "grad_norm": 220.43971252441406, "learning_rate": 1.5404680744482671e-06, "loss": 16.5469, "step": 10200 }, { "epoch": 0.6774921963206482, "grad_norm": 448.13739013671875, "learning_rate": 1.540377580921834e-06, "loss": 17.3438, "step": 10201 }, { "epoch": 0.6775586106130039, "grad_norm": 171.7911834716797, "learning_rate": 1.5402870811446245e-06, "loss": 26.6094, "step": 10202 }, { "epoch": 0.6776250249053596, "grad_norm": 296.8846130371094, "learning_rate": 1.5401965751176858e-06, "loss": 19.1562, "step": 10203 }, { "epoch": 0.6776914391977154, "grad_norm": 182.24533081054688, "learning_rate": 1.5401060628420645e-06, "loss": 14.9062, "step": 10204 }, { "epoch": 0.6777578534900711, "grad_norm": 312.5530700683594, "learning_rate": 1.5400155443188077e-06, "loss": 24.5, "step": 10205 }, { "epoch": 0.6778242677824268, "grad_norm": 201.90191650390625, "learning_rate": 1.5399250195489621e-06, "loss": 17.7969, "step": 10206 }, { "epoch": 0.6778906820747825, "grad_norm": 297.86541748046875, "learning_rate": 1.5398344885335757e-06, "loss": 17.2656, "step": 10207 }, { "epoch": 0.6779570963671382, "grad_norm": 292.1385192871094, "learning_rate": 1.5397439512736951e-06, "loss": 19.3906, "step": 10208 }, { "epoch": 0.678023510659494, "grad_norm": 320.2330322265625, "learning_rate": 1.5396534077703675e-06, "loss": 21.6094, "step": 10209 }, { "epoch": 0.6780899249518496, "grad_norm": 387.0292663574219, "learning_rate": 1.5395628580246408e-06, "loss": 23.25, "step": 10210 }, { "epoch": 0.6781563392442054, "grad_norm": 332.86724853515625, "learning_rate": 1.539472302037562e-06, "loss": 24.4531, "step": 10211 }, { "epoch": 0.678222753536561, "grad_norm": 174.392333984375, "learning_rate": 1.5393817398101785e-06, "loss": 13.3594, "step": 10212 }, { "epoch": 0.6782891678289168, "grad_norm": 159.9552001953125, "learning_rate": 1.5392911713435382e-06, "loss": 15.2812, "step": 10213 }, { "epoch": 0.6783555821212724, "grad_norm": 143.2604217529297, "learning_rate": 1.5392005966386887e-06, "loss": 14.4062, "step": 10214 }, { "epoch": 0.6784219964136282, "grad_norm": 207.0104522705078, "learning_rate": 1.5391100156966774e-06, "loss": 18.1719, "step": 10215 }, { "epoch": 0.678488410705984, "grad_norm": 572.4171142578125, "learning_rate": 1.5390194285185523e-06, "loss": 24.5625, "step": 10216 }, { "epoch": 0.6785548249983396, "grad_norm": 204.65504455566406, "learning_rate": 1.5389288351053615e-06, "loss": 14.5781, "step": 10217 }, { "epoch": 0.6786212392906954, "grad_norm": 246.48406982421875, "learning_rate": 1.5388382354581526e-06, "loss": 21.8438, "step": 10218 }, { "epoch": 0.678687653583051, "grad_norm": 534.32470703125, "learning_rate": 1.5387476295779736e-06, "loss": 15.4375, "step": 10219 }, { "epoch": 0.6787540678754068, "grad_norm": 161.1791534423828, "learning_rate": 1.5386570174658728e-06, "loss": 18.5312, "step": 10220 }, { "epoch": 0.6788204821677625, "grad_norm": 115.02339935302734, "learning_rate": 1.538566399122898e-06, "loss": 14.0625, "step": 10221 }, { "epoch": 0.6788868964601182, "grad_norm": 157.72007751464844, "learning_rate": 1.538475774550098e-06, "loss": 22.1719, "step": 10222 }, { "epoch": 0.6789533107524739, "grad_norm": 288.9508361816406, "learning_rate": 1.5383851437485204e-06, "loss": 16.7656, "step": 10223 }, { "epoch": 0.6790197250448297, "grad_norm": 205.28492736816406, "learning_rate": 1.5382945067192141e-06, "loss": 23.5, "step": 10224 }, { "epoch": 0.6790861393371853, "grad_norm": 137.95852661132812, "learning_rate": 1.5382038634632276e-06, "loss": 15.8594, "step": 10225 }, { "epoch": 0.6791525536295411, "grad_norm": 198.2830810546875, "learning_rate": 1.5381132139816088e-06, "loss": 15.625, "step": 10226 }, { "epoch": 0.6792189679218968, "grad_norm": 227.4158477783203, "learning_rate": 1.5380225582754064e-06, "loss": 16.4219, "step": 10227 }, { "epoch": 0.6792853822142525, "grad_norm": 331.52569580078125, "learning_rate": 1.5379318963456697e-06, "loss": 24.7812, "step": 10228 }, { "epoch": 0.6793517965066083, "grad_norm": 188.62057495117188, "learning_rate": 1.5378412281934473e-06, "loss": 15.6719, "step": 10229 }, { "epoch": 0.6794182107989639, "grad_norm": 280.0057067871094, "learning_rate": 1.5377505538197869e-06, "loss": 16.125, "step": 10230 }, { "epoch": 0.6794846250913197, "grad_norm": 186.85003662109375, "learning_rate": 1.5376598732257389e-06, "loss": 19.7969, "step": 10231 }, { "epoch": 0.6795510393836753, "grad_norm": 153.82151794433594, "learning_rate": 1.537569186412351e-06, "loss": 15.2656, "step": 10232 }, { "epoch": 0.6796174536760311, "grad_norm": 268.3845520019531, "learning_rate": 1.5374784933806728e-06, "loss": 19.5625, "step": 10233 }, { "epoch": 0.6796838679683868, "grad_norm": 105.87015533447266, "learning_rate": 1.5373877941317535e-06, "loss": 15.3906, "step": 10234 }, { "epoch": 0.6797502822607425, "grad_norm": 1916.2242431640625, "learning_rate": 1.5372970886666421e-06, "loss": 23.1875, "step": 10235 }, { "epoch": 0.6798166965530982, "grad_norm": 294.4922180175781, "learning_rate": 1.5372063769863878e-06, "loss": 18.1406, "step": 10236 }, { "epoch": 0.6798831108454539, "grad_norm": 85.0626449584961, "learning_rate": 1.53711565909204e-06, "loss": 10.7578, "step": 10237 }, { "epoch": 0.6799495251378097, "grad_norm": 182.46435546875, "learning_rate": 1.5370249349846475e-06, "loss": 13.7812, "step": 10238 }, { "epoch": 0.6800159394301654, "grad_norm": 169.86561584472656, "learning_rate": 1.5369342046652608e-06, "loss": 17.4062, "step": 10239 }, { "epoch": 0.6800823537225211, "grad_norm": 184.54808044433594, "learning_rate": 1.5368434681349285e-06, "loss": 17.3281, "step": 10240 }, { "epoch": 0.6801487680148768, "grad_norm": 187.9472198486328, "learning_rate": 1.5367527253947006e-06, "loss": 21.8203, "step": 10241 }, { "epoch": 0.6802151823072325, "grad_norm": 214.98480224609375, "learning_rate": 1.5366619764456268e-06, "loss": 12.8281, "step": 10242 }, { "epoch": 0.6802815965995882, "grad_norm": 187.47897338867188, "learning_rate": 1.5365712212887564e-06, "loss": 13.6719, "step": 10243 }, { "epoch": 0.680348010891944, "grad_norm": 293.17620849609375, "learning_rate": 1.5364804599251397e-06, "loss": 20.2812, "step": 10244 }, { "epoch": 0.6804144251842996, "grad_norm": 218.0859375, "learning_rate": 1.5363896923558264e-06, "loss": 17.5156, "step": 10245 }, { "epoch": 0.6804808394766554, "grad_norm": 252.126220703125, "learning_rate": 1.5362989185818664e-06, "loss": 15.0469, "step": 10246 }, { "epoch": 0.680547253769011, "grad_norm": 217.6980438232422, "learning_rate": 1.53620813860431e-06, "loss": 13.6562, "step": 10247 }, { "epoch": 0.6806136680613668, "grad_norm": 328.3011169433594, "learning_rate": 1.5361173524242066e-06, "loss": 16.8438, "step": 10248 }, { "epoch": 0.6806800823537226, "grad_norm": 344.9989318847656, "learning_rate": 1.536026560042607e-06, "loss": 15.1875, "step": 10249 }, { "epoch": 0.6807464966460782, "grad_norm": 199.794677734375, "learning_rate": 1.5359357614605615e-06, "loss": 20.7344, "step": 10250 }, { "epoch": 0.680812910938434, "grad_norm": 99.16400909423828, "learning_rate": 1.5358449566791199e-06, "loss": 16.5625, "step": 10251 }, { "epoch": 0.6808793252307896, "grad_norm": 110.59369659423828, "learning_rate": 1.5357541456993328e-06, "loss": 13.7812, "step": 10252 }, { "epoch": 0.6809457395231454, "grad_norm": 339.80322265625, "learning_rate": 1.5356633285222512e-06, "loss": 23.0312, "step": 10253 }, { "epoch": 0.6810121538155011, "grad_norm": 336.2764892578125, "learning_rate": 1.5355725051489243e-06, "loss": 20.5, "step": 10254 }, { "epoch": 0.6810785681078568, "grad_norm": 146.5393829345703, "learning_rate": 1.5354816755804038e-06, "loss": 13.25, "step": 10255 }, { "epoch": 0.6811449824002125, "grad_norm": 265.6423645019531, "learning_rate": 1.5353908398177403e-06, "loss": 19.2969, "step": 10256 }, { "epoch": 0.6812113966925682, "grad_norm": 190.89097595214844, "learning_rate": 1.535299997861984e-06, "loss": 25.6875, "step": 10257 }, { "epoch": 0.6812778109849239, "grad_norm": 195.38548278808594, "learning_rate": 1.535209149714186e-06, "loss": 13.5938, "step": 10258 }, { "epoch": 0.6813442252772797, "grad_norm": 158.1507110595703, "learning_rate": 1.535118295375397e-06, "loss": 19.3438, "step": 10259 }, { "epoch": 0.6814106395696354, "grad_norm": 626.2368774414062, "learning_rate": 1.5350274348466688e-06, "loss": 15.1406, "step": 10260 }, { "epoch": 0.6814770538619911, "grad_norm": 254.46287536621094, "learning_rate": 1.5349365681290514e-06, "loss": 22.7344, "step": 10261 }, { "epoch": 0.6815434681543469, "grad_norm": 225.72988891601562, "learning_rate": 1.534845695223596e-06, "loss": 20.2656, "step": 10262 }, { "epoch": 0.6816098824467025, "grad_norm": 118.698486328125, "learning_rate": 1.534754816131354e-06, "loss": 15.5938, "step": 10263 }, { "epoch": 0.6816762967390583, "grad_norm": 277.08984375, "learning_rate": 1.5346639308533771e-06, "loss": 24.2812, "step": 10264 }, { "epoch": 0.6817427110314139, "grad_norm": 293.1763916015625, "learning_rate": 1.534573039390716e-06, "loss": 21.4688, "step": 10265 }, { "epoch": 0.6818091253237697, "grad_norm": 156.26536560058594, "learning_rate": 1.5344821417444219e-06, "loss": 16.6875, "step": 10266 }, { "epoch": 0.6818755396161253, "grad_norm": 468.9638977050781, "learning_rate": 1.5343912379155466e-06, "loss": 14.4844, "step": 10267 }, { "epoch": 0.6819419539084811, "grad_norm": 462.4765625, "learning_rate": 1.5343003279051418e-06, "loss": 22.6406, "step": 10268 }, { "epoch": 0.6820083682008368, "grad_norm": 206.02487182617188, "learning_rate": 1.5342094117142588e-06, "loss": 20.9219, "step": 10269 }, { "epoch": 0.6820747824931925, "grad_norm": 458.7149658203125, "learning_rate": 1.5341184893439497e-06, "loss": 18.9844, "step": 10270 }, { "epoch": 0.6821411967855483, "grad_norm": 174.90530395507812, "learning_rate": 1.5340275607952658e-06, "loss": 21.2188, "step": 10271 }, { "epoch": 0.682207611077904, "grad_norm": 176.60459899902344, "learning_rate": 1.5339366260692588e-06, "loss": 18.2031, "step": 10272 }, { "epoch": 0.6822740253702597, "grad_norm": 168.6627197265625, "learning_rate": 1.5338456851669807e-06, "loss": 14.25, "step": 10273 }, { "epoch": 0.6823404396626154, "grad_norm": 342.5443420410156, "learning_rate": 1.5337547380894837e-06, "loss": 20.3125, "step": 10274 }, { "epoch": 0.6824068539549711, "grad_norm": 227.73031616210938, "learning_rate": 1.5336637848378197e-06, "loss": 21.7188, "step": 10275 }, { "epoch": 0.6824732682473268, "grad_norm": 371.961181640625, "learning_rate": 1.5335728254130408e-06, "loss": 19.1406, "step": 10276 }, { "epoch": 0.6825396825396826, "grad_norm": 291.025634765625, "learning_rate": 1.5334818598161992e-06, "loss": 16.1562, "step": 10277 }, { "epoch": 0.6826060968320382, "grad_norm": 153.56802368164062, "learning_rate": 1.5333908880483471e-06, "loss": 18.7969, "step": 10278 }, { "epoch": 0.682672511124394, "grad_norm": 299.3913879394531, "learning_rate": 1.5332999101105364e-06, "loss": 17.9062, "step": 10279 }, { "epoch": 0.6827389254167496, "grad_norm": 288.4456481933594, "learning_rate": 1.53320892600382e-06, "loss": 21.9375, "step": 10280 }, { "epoch": 0.6828053397091054, "grad_norm": 135.6194305419922, "learning_rate": 1.5331179357292504e-06, "loss": 16.3281, "step": 10281 }, { "epoch": 0.6828717540014612, "grad_norm": 140.1094207763672, "learning_rate": 1.5330269392878798e-06, "loss": 21.7344, "step": 10282 }, { "epoch": 0.6829381682938168, "grad_norm": 122.07479095458984, "learning_rate": 1.5329359366807611e-06, "loss": 14.0781, "step": 10283 }, { "epoch": 0.6830045825861726, "grad_norm": 445.5099182128906, "learning_rate": 1.5328449279089466e-06, "loss": 22.7812, "step": 10284 }, { "epoch": 0.6830709968785282, "grad_norm": 413.1791687011719, "learning_rate": 1.5327539129734895e-06, "loss": 18.7656, "step": 10285 }, { "epoch": 0.683137411170884, "grad_norm": 111.96309661865234, "learning_rate": 1.5326628918754421e-06, "loss": 14.2188, "step": 10286 }, { "epoch": 0.6832038254632397, "grad_norm": 915.2639770507812, "learning_rate": 1.5325718646158575e-06, "loss": 15.4062, "step": 10287 }, { "epoch": 0.6832702397555954, "grad_norm": 170.51702880859375, "learning_rate": 1.5324808311957887e-06, "loss": 15.125, "step": 10288 }, { "epoch": 0.6833366540479511, "grad_norm": 204.9369354248047, "learning_rate": 1.5323897916162887e-06, "loss": 19.0156, "step": 10289 }, { "epoch": 0.6834030683403068, "grad_norm": 209.544189453125, "learning_rate": 1.5322987458784104e-06, "loss": 22.8125, "step": 10290 }, { "epoch": 0.6834694826326625, "grad_norm": 750.0045166015625, "learning_rate": 1.5322076939832074e-06, "loss": 16.3672, "step": 10291 }, { "epoch": 0.6835358969250183, "grad_norm": 317.6976623535156, "learning_rate": 1.5321166359317326e-06, "loss": 13.9062, "step": 10292 }, { "epoch": 0.683602311217374, "grad_norm": 185.5758819580078, "learning_rate": 1.5320255717250397e-06, "loss": 29.375, "step": 10293 }, { "epoch": 0.6836687255097297, "grad_norm": 219.7129364013672, "learning_rate": 1.5319345013641814e-06, "loss": 24.0156, "step": 10294 }, { "epoch": 0.6837351398020854, "grad_norm": 101.7071533203125, "learning_rate": 1.5318434248502115e-06, "loss": 15.4219, "step": 10295 }, { "epoch": 0.6838015540944411, "grad_norm": 132.25515747070312, "learning_rate": 1.5317523421841835e-06, "loss": 21.7812, "step": 10296 }, { "epoch": 0.6838679683867969, "grad_norm": 163.63226318359375, "learning_rate": 1.5316612533671513e-06, "loss": 13.1016, "step": 10297 }, { "epoch": 0.6839343826791525, "grad_norm": 314.3456726074219, "learning_rate": 1.5315701584001683e-06, "loss": 19.5469, "step": 10298 }, { "epoch": 0.6840007969715083, "grad_norm": 142.35894775390625, "learning_rate": 1.531479057284288e-06, "loss": 15.4375, "step": 10299 }, { "epoch": 0.6840672112638639, "grad_norm": 272.9272766113281, "learning_rate": 1.5313879500205647e-06, "loss": 20.8125, "step": 10300 }, { "epoch": 0.6841336255562197, "grad_norm": 170.49595642089844, "learning_rate": 1.5312968366100516e-06, "loss": 18.4531, "step": 10301 }, { "epoch": 0.6842000398485754, "grad_norm": 214.31610107421875, "learning_rate": 1.5312057170538033e-06, "loss": 14.9062, "step": 10302 }, { "epoch": 0.6842664541409311, "grad_norm": 196.24961853027344, "learning_rate": 1.5311145913528739e-06, "loss": 18.0469, "step": 10303 }, { "epoch": 0.6843328684332869, "grad_norm": 273.3241271972656, "learning_rate": 1.5310234595083164e-06, "loss": 14.1719, "step": 10304 }, { "epoch": 0.6843992827256425, "grad_norm": 166.69842529296875, "learning_rate": 1.5309323215211862e-06, "loss": 15.0781, "step": 10305 }, { "epoch": 0.6844656970179983, "grad_norm": 662.8309326171875, "learning_rate": 1.530841177392537e-06, "loss": 16.7969, "step": 10306 }, { "epoch": 0.684532111310354, "grad_norm": 707.2380981445312, "learning_rate": 1.5307500271234232e-06, "loss": 16.0156, "step": 10307 }, { "epoch": 0.6845985256027097, "grad_norm": 723.9490966796875, "learning_rate": 1.530658870714899e-06, "loss": 21.1094, "step": 10308 }, { "epoch": 0.6846649398950654, "grad_norm": 328.0939025878906, "learning_rate": 1.5305677081680193e-06, "loss": 14.2188, "step": 10309 }, { "epoch": 0.6847313541874211, "grad_norm": 304.8825988769531, "learning_rate": 1.530476539483838e-06, "loss": 28.0938, "step": 10310 }, { "epoch": 0.6847977684797768, "grad_norm": 175.15780639648438, "learning_rate": 1.5303853646634102e-06, "loss": 16.5781, "step": 10311 }, { "epoch": 0.6848641827721326, "grad_norm": 250.31005859375, "learning_rate": 1.5302941837077902e-06, "loss": 16.4844, "step": 10312 }, { "epoch": 0.6849305970644882, "grad_norm": 168.8011474609375, "learning_rate": 1.5302029966180328e-06, "loss": 16.7656, "step": 10313 }, { "epoch": 0.684997011356844, "grad_norm": 140.11534118652344, "learning_rate": 1.5301118033951933e-06, "loss": 16.5781, "step": 10314 }, { "epoch": 0.6850634256491998, "grad_norm": 214.0883026123047, "learning_rate": 1.5300206040403257e-06, "loss": 19.6406, "step": 10315 }, { "epoch": 0.6851298399415554, "grad_norm": 217.1322021484375, "learning_rate": 1.5299293985544852e-06, "loss": 17.4219, "step": 10316 }, { "epoch": 0.6851962542339112, "grad_norm": 293.7062072753906, "learning_rate": 1.5298381869387274e-06, "loss": 15.3828, "step": 10317 }, { "epoch": 0.6852626685262668, "grad_norm": 203.16818237304688, "learning_rate": 1.5297469691941069e-06, "loss": 14.6016, "step": 10318 }, { "epoch": 0.6853290828186226, "grad_norm": 395.6341247558594, "learning_rate": 1.5296557453216788e-06, "loss": 20.9531, "step": 10319 }, { "epoch": 0.6853954971109782, "grad_norm": 143.0717315673828, "learning_rate": 1.5295645153224985e-06, "loss": 14.625, "step": 10320 }, { "epoch": 0.685461911403334, "grad_norm": 473.3367614746094, "learning_rate": 1.5294732791976213e-06, "loss": 16.1406, "step": 10321 }, { "epoch": 0.6855283256956897, "grad_norm": 193.06654357910156, "learning_rate": 1.5293820369481026e-06, "loss": 18.75, "step": 10322 }, { "epoch": 0.6855947399880454, "grad_norm": 601.8270874023438, "learning_rate": 1.5292907885749975e-06, "loss": 17.3281, "step": 10323 }, { "epoch": 0.6856611542804012, "grad_norm": 86.54644012451172, "learning_rate": 1.5291995340793618e-06, "loss": 13.2812, "step": 10324 }, { "epoch": 0.6857275685727569, "grad_norm": 123.6210708618164, "learning_rate": 1.5291082734622514e-06, "loss": 14.3281, "step": 10325 }, { "epoch": 0.6857939828651126, "grad_norm": 185.80299377441406, "learning_rate": 1.5290170067247211e-06, "loss": 20.8594, "step": 10326 }, { "epoch": 0.6858603971574683, "grad_norm": 150.5104522705078, "learning_rate": 1.5289257338678273e-06, "loss": 20.5938, "step": 10327 }, { "epoch": 0.685926811449824, "grad_norm": 209.55084228515625, "learning_rate": 1.528834454892626e-06, "loss": 18.1562, "step": 10328 }, { "epoch": 0.6859932257421797, "grad_norm": 163.27976989746094, "learning_rate": 1.5287431698001722e-06, "loss": 15.5469, "step": 10329 }, { "epoch": 0.6860596400345355, "grad_norm": 107.01873779296875, "learning_rate": 1.5286518785915225e-06, "loss": 15.5, "step": 10330 }, { "epoch": 0.6861260543268911, "grad_norm": 159.36495971679688, "learning_rate": 1.5285605812677326e-06, "loss": 18.6875, "step": 10331 }, { "epoch": 0.6861924686192469, "grad_norm": 215.7950897216797, "learning_rate": 1.5284692778298588e-06, "loss": 17.9844, "step": 10332 }, { "epoch": 0.6862588829116025, "grad_norm": 132.51226806640625, "learning_rate": 1.5283779682789571e-06, "loss": 16.875, "step": 10333 }, { "epoch": 0.6863252972039583, "grad_norm": 260.31475830078125, "learning_rate": 1.5282866526160836e-06, "loss": 12.0312, "step": 10334 }, { "epoch": 0.6863917114963141, "grad_norm": 121.82301330566406, "learning_rate": 1.5281953308422951e-06, "loss": 15.9531, "step": 10335 }, { "epoch": 0.6864581257886697, "grad_norm": 145.82749938964844, "learning_rate": 1.5281040029586473e-06, "loss": 18.0156, "step": 10336 }, { "epoch": 0.6865245400810255, "grad_norm": 300.7505187988281, "learning_rate": 1.5280126689661967e-06, "loss": 13.8281, "step": 10337 }, { "epoch": 0.6865909543733811, "grad_norm": 468.39837646484375, "learning_rate": 1.5279213288660002e-06, "loss": 14.2109, "step": 10338 }, { "epoch": 0.6866573686657369, "grad_norm": 540.4429321289062, "learning_rate": 1.5278299826591146e-06, "loss": 18.2188, "step": 10339 }, { "epoch": 0.6867237829580926, "grad_norm": 218.8403778076172, "learning_rate": 1.5277386303465955e-06, "loss": 15.7656, "step": 10340 }, { "epoch": 0.6867901972504483, "grad_norm": 135.40940856933594, "learning_rate": 1.5276472719295001e-06, "loss": 16.9062, "step": 10341 }, { "epoch": 0.686856611542804, "grad_norm": 567.0235595703125, "learning_rate": 1.5275559074088861e-06, "loss": 14.3594, "step": 10342 }, { "epoch": 0.6869230258351597, "grad_norm": 395.11968994140625, "learning_rate": 1.5274645367858093e-06, "loss": 22.4844, "step": 10343 }, { "epoch": 0.6869894401275154, "grad_norm": 165.5177001953125, "learning_rate": 1.5273731600613267e-06, "loss": 15.9062, "step": 10344 }, { "epoch": 0.6870558544198712, "grad_norm": 287.94464111328125, "learning_rate": 1.5272817772364955e-06, "loss": 18.9375, "step": 10345 }, { "epoch": 0.6871222687122269, "grad_norm": 236.4655303955078, "learning_rate": 1.5271903883123729e-06, "loss": 13.7188, "step": 10346 }, { "epoch": 0.6871886830045826, "grad_norm": 171.89707946777344, "learning_rate": 1.5270989932900156e-06, "loss": 17.3125, "step": 10347 }, { "epoch": 0.6872550972969383, "grad_norm": 182.04742431640625, "learning_rate": 1.5270075921704812e-06, "loss": 17.5156, "step": 10348 }, { "epoch": 0.687321511589294, "grad_norm": 320.21331787109375, "learning_rate": 1.5269161849548272e-06, "loss": 18.9844, "step": 10349 }, { "epoch": 0.6873879258816498, "grad_norm": 115.88239288330078, "learning_rate": 1.5268247716441102e-06, "loss": 13.875, "step": 10350 }, { "epoch": 0.6874543401740054, "grad_norm": 315.8597106933594, "learning_rate": 1.5267333522393882e-06, "loss": 25.875, "step": 10351 }, { "epoch": 0.6875207544663612, "grad_norm": 424.62408447265625, "learning_rate": 1.5266419267417186e-06, "loss": 18.2344, "step": 10352 }, { "epoch": 0.6875871687587168, "grad_norm": 236.33616638183594, "learning_rate": 1.5265504951521587e-06, "loss": 20.8125, "step": 10353 }, { "epoch": 0.6876535830510726, "grad_norm": 799.3893432617188, "learning_rate": 1.5264590574717664e-06, "loss": 15.4688, "step": 10354 }, { "epoch": 0.6877199973434283, "grad_norm": 423.6357116699219, "learning_rate": 1.5263676137015993e-06, "loss": 19.3125, "step": 10355 }, { "epoch": 0.687786411635784, "grad_norm": 139.11376953125, "learning_rate": 1.526276163842715e-06, "loss": 17.0625, "step": 10356 }, { "epoch": 0.6878528259281398, "grad_norm": 180.884521484375, "learning_rate": 1.5261847078961717e-06, "loss": 17.1562, "step": 10357 }, { "epoch": 0.6879192402204954, "grad_norm": 169.07492065429688, "learning_rate": 1.526093245863027e-06, "loss": 15.3281, "step": 10358 }, { "epoch": 0.6879856545128512, "grad_norm": 201.16619873046875, "learning_rate": 1.526001777744339e-06, "loss": 20.75, "step": 10359 }, { "epoch": 0.6880520688052069, "grad_norm": 170.2459716796875, "learning_rate": 1.525910303541166e-06, "loss": 15.5781, "step": 10360 }, { "epoch": 0.6881184830975626, "grad_norm": 274.5879821777344, "learning_rate": 1.5258188232545654e-06, "loss": 16.5312, "step": 10361 }, { "epoch": 0.6881848973899183, "grad_norm": 505.26092529296875, "learning_rate": 1.5257273368855961e-06, "loss": 21.5938, "step": 10362 }, { "epoch": 0.688251311682274, "grad_norm": 148.83131408691406, "learning_rate": 1.5256358444353161e-06, "loss": 19.6406, "step": 10363 }, { "epoch": 0.6883177259746297, "grad_norm": 145.7227325439453, "learning_rate": 1.525544345904784e-06, "loss": 14.25, "step": 10364 }, { "epoch": 0.6883841402669855, "grad_norm": 250.76275634765625, "learning_rate": 1.5254528412950575e-06, "loss": 20.2969, "step": 10365 }, { "epoch": 0.6884505545593411, "grad_norm": 130.49839782714844, "learning_rate": 1.525361330607196e-06, "loss": 16.9844, "step": 10366 }, { "epoch": 0.6885169688516969, "grad_norm": 174.46090698242188, "learning_rate": 1.5252698138422573e-06, "loss": 13.3125, "step": 10367 }, { "epoch": 0.6885833831440527, "grad_norm": 106.48828887939453, "learning_rate": 1.5251782910013001e-06, "loss": 14.0625, "step": 10368 }, { "epoch": 0.6886497974364083, "grad_norm": 118.26709747314453, "learning_rate": 1.5250867620853835e-06, "loss": 16.625, "step": 10369 }, { "epoch": 0.6887162117287641, "grad_norm": 913.007080078125, "learning_rate": 1.524995227095566e-06, "loss": 21.0625, "step": 10370 }, { "epoch": 0.6887826260211197, "grad_norm": 233.1459503173828, "learning_rate": 1.5249036860329066e-06, "loss": 22.2344, "step": 10371 }, { "epoch": 0.6888490403134755, "grad_norm": 535.0484008789062, "learning_rate": 1.524812138898464e-06, "loss": 16.9453, "step": 10372 }, { "epoch": 0.6889154546058311, "grad_norm": 126.6742935180664, "learning_rate": 1.5247205856932967e-06, "loss": 14.1875, "step": 10373 }, { "epoch": 0.6889818688981869, "grad_norm": 162.7362060546875, "learning_rate": 1.5246290264184648e-06, "loss": 16.6094, "step": 10374 }, { "epoch": 0.6890482831905426, "grad_norm": 294.5740051269531, "learning_rate": 1.5245374610750266e-06, "loss": 20.2969, "step": 10375 }, { "epoch": 0.6891146974828983, "grad_norm": 334.95013427734375, "learning_rate": 1.5244458896640415e-06, "loss": 14.7812, "step": 10376 }, { "epoch": 0.689181111775254, "grad_norm": 215.27252197265625, "learning_rate": 1.5243543121865688e-06, "loss": 16.875, "step": 10377 }, { "epoch": 0.6892475260676098, "grad_norm": 204.44943237304688, "learning_rate": 1.5242627286436678e-06, "loss": 16.2188, "step": 10378 }, { "epoch": 0.6893139403599655, "grad_norm": 361.1408386230469, "learning_rate": 1.5241711390363978e-06, "loss": 14.8281, "step": 10379 }, { "epoch": 0.6893803546523212, "grad_norm": 236.6713104248047, "learning_rate": 1.5240795433658185e-06, "loss": 16.4688, "step": 10380 }, { "epoch": 0.6894467689446769, "grad_norm": 282.0773010253906, "learning_rate": 1.5239879416329891e-06, "loss": 25.4062, "step": 10381 }, { "epoch": 0.6895131832370326, "grad_norm": 205.9153289794922, "learning_rate": 1.5238963338389695e-06, "loss": 11.6719, "step": 10382 }, { "epoch": 0.6895795975293884, "grad_norm": 154.2844696044922, "learning_rate": 1.523804719984819e-06, "loss": 13.0156, "step": 10383 }, { "epoch": 0.689646011821744, "grad_norm": 206.72686767578125, "learning_rate": 1.5237131000715976e-06, "loss": 20.625, "step": 10384 }, { "epoch": 0.6897124261140998, "grad_norm": 193.03395080566406, "learning_rate": 1.523621474100365e-06, "loss": 18.3281, "step": 10385 }, { "epoch": 0.6897788404064554, "grad_norm": 161.93675231933594, "learning_rate": 1.5235298420721811e-06, "loss": 14.0156, "step": 10386 }, { "epoch": 0.6898452546988112, "grad_norm": 224.74636840820312, "learning_rate": 1.523438203988106e-06, "loss": 25.6875, "step": 10387 }, { "epoch": 0.6899116689911668, "grad_norm": 292.21575927734375, "learning_rate": 1.5233465598491996e-06, "loss": 15.1094, "step": 10388 }, { "epoch": 0.6899780832835226, "grad_norm": 162.1011199951172, "learning_rate": 1.523254909656522e-06, "loss": 23.3438, "step": 10389 }, { "epoch": 0.6900444975758784, "grad_norm": 193.51588439941406, "learning_rate": 1.5231632534111332e-06, "loss": 15.2656, "step": 10390 }, { "epoch": 0.690110911868234, "grad_norm": 169.90444946289062, "learning_rate": 1.5230715911140935e-06, "loss": 18.0781, "step": 10391 }, { "epoch": 0.6901773261605898, "grad_norm": 1069.2171630859375, "learning_rate": 1.5229799227664635e-06, "loss": 17.7188, "step": 10392 }, { "epoch": 0.6902437404529455, "grad_norm": 125.80757141113281, "learning_rate": 1.522888248369303e-06, "loss": 14.5781, "step": 10393 }, { "epoch": 0.6903101547453012, "grad_norm": 199.67364501953125, "learning_rate": 1.522796567923673e-06, "loss": 17.8281, "step": 10394 }, { "epoch": 0.6903765690376569, "grad_norm": 228.9861602783203, "learning_rate": 1.5227048814306335e-06, "loss": 21.9375, "step": 10395 }, { "epoch": 0.6904429833300126, "grad_norm": 391.54571533203125, "learning_rate": 1.522613188891246e-06, "loss": 14.4688, "step": 10396 }, { "epoch": 0.6905093976223683, "grad_norm": 227.363525390625, "learning_rate": 1.5225214903065697e-06, "loss": 16.25, "step": 10397 }, { "epoch": 0.6905758119147241, "grad_norm": 192.5062255859375, "learning_rate": 1.5224297856776663e-06, "loss": 27.5938, "step": 10398 }, { "epoch": 0.6906422262070797, "grad_norm": 173.81231689453125, "learning_rate": 1.5223380750055967e-06, "loss": 16.5938, "step": 10399 }, { "epoch": 0.6907086404994355, "grad_norm": 132.6194610595703, "learning_rate": 1.5222463582914213e-06, "loss": 16.3594, "step": 10400 }, { "epoch": 0.6907750547917912, "grad_norm": 204.84207153320312, "learning_rate": 1.5221546355362012e-06, "loss": 18.3906, "step": 10401 }, { "epoch": 0.6908414690841469, "grad_norm": 125.66350555419922, "learning_rate": 1.5220629067409973e-06, "loss": 17.2969, "step": 10402 }, { "epoch": 0.6909078833765027, "grad_norm": 127.58906555175781, "learning_rate": 1.5219711719068709e-06, "loss": 13.8125, "step": 10403 }, { "epoch": 0.6909742976688583, "grad_norm": 190.75392150878906, "learning_rate": 1.5218794310348825e-06, "loss": 15.5, "step": 10404 }, { "epoch": 0.6910407119612141, "grad_norm": 346.60809326171875, "learning_rate": 1.5217876841260942e-06, "loss": 17.1875, "step": 10405 }, { "epoch": 0.6911071262535697, "grad_norm": 1032.826904296875, "learning_rate": 1.5216959311815667e-06, "loss": 27.5469, "step": 10406 }, { "epoch": 0.6911735405459255, "grad_norm": 93.63036346435547, "learning_rate": 1.5216041722023615e-06, "loss": 11.3203, "step": 10407 }, { "epoch": 0.6912399548382812, "grad_norm": 181.65478515625, "learning_rate": 1.5215124071895403e-06, "loss": 15.1406, "step": 10408 }, { "epoch": 0.6913063691306369, "grad_norm": 153.61204528808594, "learning_rate": 1.521420636144164e-06, "loss": 20.7188, "step": 10409 }, { "epoch": 0.6913727834229926, "grad_norm": 216.10365295410156, "learning_rate": 1.5213288590672946e-06, "loss": 21.8438, "step": 10410 }, { "epoch": 0.6914391977153483, "grad_norm": 203.6261444091797, "learning_rate": 1.5212370759599933e-06, "loss": 17.1094, "step": 10411 }, { "epoch": 0.6915056120077041, "grad_norm": 188.0611572265625, "learning_rate": 1.521145286823322e-06, "loss": 15.3438, "step": 10412 }, { "epoch": 0.6915720263000598, "grad_norm": 214.0611572265625, "learning_rate": 1.521053491658343e-06, "loss": 18.8906, "step": 10413 }, { "epoch": 0.6916384405924155, "grad_norm": 173.93771362304688, "learning_rate": 1.5209616904661172e-06, "loss": 19.6094, "step": 10414 }, { "epoch": 0.6917048548847712, "grad_norm": 177.5112762451172, "learning_rate": 1.5208698832477072e-06, "loss": 14.9062, "step": 10415 }, { "epoch": 0.691771269177127, "grad_norm": 304.2138366699219, "learning_rate": 1.5207780700041748e-06, "loss": 21.7344, "step": 10416 }, { "epoch": 0.6918376834694826, "grad_norm": 287.4450378417969, "learning_rate": 1.5206862507365822e-06, "loss": 16.9062, "step": 10417 }, { "epoch": 0.6919040977618384, "grad_norm": 396.22943115234375, "learning_rate": 1.520594425445991e-06, "loss": 19.5312, "step": 10418 }, { "epoch": 0.691970512054194, "grad_norm": 310.16961669921875, "learning_rate": 1.5205025941334634e-06, "loss": 19.2188, "step": 10419 }, { "epoch": 0.6920369263465498, "grad_norm": 147.61434936523438, "learning_rate": 1.5204107568000624e-06, "loss": 23.125, "step": 10420 }, { "epoch": 0.6921033406389054, "grad_norm": 194.30105590820312, "learning_rate": 1.5203189134468497e-06, "loss": 15.2344, "step": 10421 }, { "epoch": 0.6921697549312612, "grad_norm": 115.5793228149414, "learning_rate": 1.5202270640748879e-06, "loss": 19.2188, "step": 10422 }, { "epoch": 0.692236169223617, "grad_norm": 210.06085205078125, "learning_rate": 1.520135208685239e-06, "loss": 17.6406, "step": 10423 }, { "epoch": 0.6923025835159726, "grad_norm": 758.615966796875, "learning_rate": 1.5200433472789663e-06, "loss": 15.2969, "step": 10424 }, { "epoch": 0.6923689978083284, "grad_norm": 132.85708618164062, "learning_rate": 1.519951479857132e-06, "loss": 17.0469, "step": 10425 }, { "epoch": 0.692435412100684, "grad_norm": 286.01141357421875, "learning_rate": 1.5198596064207986e-06, "loss": 17.9688, "step": 10426 }, { "epoch": 0.6925018263930398, "grad_norm": 193.65359497070312, "learning_rate": 1.5197677269710293e-06, "loss": 14.8594, "step": 10427 }, { "epoch": 0.6925682406853955, "grad_norm": 228.98330688476562, "learning_rate": 1.5196758415088868e-06, "loss": 17.7188, "step": 10428 }, { "epoch": 0.6926346549777512, "grad_norm": 216.9939422607422, "learning_rate": 1.5195839500354335e-06, "loss": 16.8125, "step": 10429 }, { "epoch": 0.6927010692701069, "grad_norm": 156.7633819580078, "learning_rate": 1.5194920525517327e-06, "loss": 16.5781, "step": 10430 }, { "epoch": 0.6927674835624626, "grad_norm": 320.13555908203125, "learning_rate": 1.5194001490588476e-06, "loss": 16.8906, "step": 10431 }, { "epoch": 0.6928338978548183, "grad_norm": 201.68634033203125, "learning_rate": 1.5193082395578412e-06, "loss": 17.9844, "step": 10432 }, { "epoch": 0.6929003121471741, "grad_norm": 386.57269287109375, "learning_rate": 1.519216324049776e-06, "loss": 21.0625, "step": 10433 }, { "epoch": 0.6929667264395298, "grad_norm": 176.3898162841797, "learning_rate": 1.519124402535716e-06, "loss": 16.5156, "step": 10434 }, { "epoch": 0.6930331407318855, "grad_norm": 300.7463684082031, "learning_rate": 1.5190324750167244e-06, "loss": 22.2578, "step": 10435 }, { "epoch": 0.6930995550242413, "grad_norm": 137.2085723876953, "learning_rate": 1.5189405414938644e-06, "loss": 15.25, "step": 10436 }, { "epoch": 0.6931659693165969, "grad_norm": 172.38888549804688, "learning_rate": 1.5188486019681995e-06, "loss": 18.4531, "step": 10437 }, { "epoch": 0.6932323836089527, "grad_norm": 208.18565368652344, "learning_rate": 1.5187566564407928e-06, "loss": 14.3906, "step": 10438 }, { "epoch": 0.6932987979013083, "grad_norm": 252.4950714111328, "learning_rate": 1.5186647049127085e-06, "loss": 12.5, "step": 10439 }, { "epoch": 0.6933652121936641, "grad_norm": 166.42547607421875, "learning_rate": 1.51857274738501e-06, "loss": 14.8125, "step": 10440 }, { "epoch": 0.6934316264860197, "grad_norm": 154.82823181152344, "learning_rate": 1.5184807838587608e-06, "loss": 16.1406, "step": 10441 }, { "epoch": 0.6934980407783755, "grad_norm": 244.7970428466797, "learning_rate": 1.5183888143350253e-06, "loss": 20.9375, "step": 10442 }, { "epoch": 0.6935644550707312, "grad_norm": 133.9368438720703, "learning_rate": 1.5182968388148666e-06, "loss": 16.1875, "step": 10443 }, { "epoch": 0.6936308693630869, "grad_norm": 203.35272216796875, "learning_rate": 1.5182048572993487e-06, "loss": 17.9688, "step": 10444 }, { "epoch": 0.6936972836554427, "grad_norm": 220.2457275390625, "learning_rate": 1.518112869789536e-06, "loss": 14.3594, "step": 10445 }, { "epoch": 0.6937636979477984, "grad_norm": 100.4342041015625, "learning_rate": 1.5180208762864926e-06, "loss": 17.6875, "step": 10446 }, { "epoch": 0.6938301122401541, "grad_norm": 200.98724365234375, "learning_rate": 1.5179288767912821e-06, "loss": 17.8281, "step": 10447 }, { "epoch": 0.6938965265325098, "grad_norm": 184.0283966064453, "learning_rate": 1.5178368713049695e-06, "loss": 13.9844, "step": 10448 }, { "epoch": 0.6939629408248655, "grad_norm": 121.43354797363281, "learning_rate": 1.517744859828618e-06, "loss": 12.1562, "step": 10449 }, { "epoch": 0.6940293551172212, "grad_norm": 243.09375, "learning_rate": 1.5176528423632928e-06, "loss": 17.5, "step": 10450 }, { "epoch": 0.694095769409577, "grad_norm": 309.7833251953125, "learning_rate": 1.517560818910058e-06, "loss": 19.9844, "step": 10451 }, { "epoch": 0.6941621837019326, "grad_norm": 279.87786865234375, "learning_rate": 1.517468789469978e-06, "loss": 21.5938, "step": 10452 }, { "epoch": 0.6942285979942884, "grad_norm": 162.88998413085938, "learning_rate": 1.5173767540441173e-06, "loss": 16.75, "step": 10453 }, { "epoch": 0.694295012286644, "grad_norm": 300.3447265625, "learning_rate": 1.5172847126335411e-06, "loss": 25.75, "step": 10454 }, { "epoch": 0.6943614265789998, "grad_norm": 326.3660583496094, "learning_rate": 1.5171926652393133e-06, "loss": 24.4219, "step": 10455 }, { "epoch": 0.6944278408713556, "grad_norm": 619.5293579101562, "learning_rate": 1.517100611862499e-06, "loss": 23.1094, "step": 10456 }, { "epoch": 0.6944942551637112, "grad_norm": 192.22064208984375, "learning_rate": 1.5170085525041628e-06, "loss": 15.6719, "step": 10457 }, { "epoch": 0.694560669456067, "grad_norm": 134.3676300048828, "learning_rate": 1.5169164871653703e-06, "loss": 15.3906, "step": 10458 }, { "epoch": 0.6946270837484226, "grad_norm": 221.31434631347656, "learning_rate": 1.5168244158471856e-06, "loss": 16.0938, "step": 10459 }, { "epoch": 0.6946934980407784, "grad_norm": 212.6770782470703, "learning_rate": 1.5167323385506743e-06, "loss": 17.1875, "step": 10460 }, { "epoch": 0.694759912333134, "grad_norm": 245.1401824951172, "learning_rate": 1.516640255276901e-06, "loss": 17.6875, "step": 10461 }, { "epoch": 0.6948263266254898, "grad_norm": 276.3620300292969, "learning_rate": 1.5165481660269312e-06, "loss": 21.0781, "step": 10462 }, { "epoch": 0.6948927409178455, "grad_norm": 223.46405029296875, "learning_rate": 1.5164560708018303e-06, "loss": 20.2188, "step": 10463 }, { "epoch": 0.6949591552102012, "grad_norm": 439.4665832519531, "learning_rate": 1.5163639696026632e-06, "loss": 21.9688, "step": 10464 }, { "epoch": 0.695025569502557, "grad_norm": 305.1612854003906, "learning_rate": 1.5162718624304956e-06, "loss": 16.5469, "step": 10465 }, { "epoch": 0.6950919837949127, "grad_norm": 222.0346221923828, "learning_rate": 1.5161797492863925e-06, "loss": 18.3438, "step": 10466 }, { "epoch": 0.6951583980872684, "grad_norm": 182.06251525878906, "learning_rate": 1.51608763017142e-06, "loss": 15.125, "step": 10467 }, { "epoch": 0.6952248123796241, "grad_norm": 175.3001708984375, "learning_rate": 1.5159955050866433e-06, "loss": 14.8906, "step": 10468 }, { "epoch": 0.6952912266719798, "grad_norm": 174.6790771484375, "learning_rate": 1.515903374033128e-06, "loss": 19.6406, "step": 10469 }, { "epoch": 0.6953576409643355, "grad_norm": 199.04490661621094, "learning_rate": 1.51581123701194e-06, "loss": 19.7344, "step": 10470 }, { "epoch": 0.6954240552566913, "grad_norm": 165.0767364501953, "learning_rate": 1.515719094024145e-06, "loss": 14.7031, "step": 10471 }, { "epoch": 0.6954904695490469, "grad_norm": 180.398193359375, "learning_rate": 1.5156269450708092e-06, "loss": 17.0781, "step": 10472 }, { "epoch": 0.6955568838414027, "grad_norm": 120.12216186523438, "learning_rate": 1.5155347901529978e-06, "loss": 15.5, "step": 10473 }, { "epoch": 0.6956232981337583, "grad_norm": 496.71923828125, "learning_rate": 1.5154426292717776e-06, "loss": 25.3125, "step": 10474 }, { "epoch": 0.6956897124261141, "grad_norm": 534.7210693359375, "learning_rate": 1.5153504624282141e-06, "loss": 25.5156, "step": 10475 }, { "epoch": 0.6957561267184699, "grad_norm": 232.46304321289062, "learning_rate": 1.5152582896233736e-06, "loss": 15.6562, "step": 10476 }, { "epoch": 0.6958225410108255, "grad_norm": 205.8087615966797, "learning_rate": 1.5151661108583224e-06, "loss": 13.8906, "step": 10477 }, { "epoch": 0.6958889553031813, "grad_norm": 337.477294921875, "learning_rate": 1.5150739261341268e-06, "loss": 15.625, "step": 10478 }, { "epoch": 0.6959553695955369, "grad_norm": 450.5589599609375, "learning_rate": 1.5149817354518527e-06, "loss": 17.0469, "step": 10479 }, { "epoch": 0.6960217838878927, "grad_norm": 1025.21142578125, "learning_rate": 1.514889538812567e-06, "loss": 30.6406, "step": 10480 }, { "epoch": 0.6960881981802484, "grad_norm": 212.68544006347656, "learning_rate": 1.5147973362173363e-06, "loss": 19.9062, "step": 10481 }, { "epoch": 0.6961546124726041, "grad_norm": 212.16302490234375, "learning_rate": 1.5147051276672266e-06, "loss": 19.4844, "step": 10482 }, { "epoch": 0.6962210267649598, "grad_norm": 466.2159118652344, "learning_rate": 1.5146129131633048e-06, "loss": 22.875, "step": 10483 }, { "epoch": 0.6962874410573155, "grad_norm": 212.09068298339844, "learning_rate": 1.5145206927066374e-06, "loss": 13.2656, "step": 10484 }, { "epoch": 0.6963538553496712, "grad_norm": 366.0629577636719, "learning_rate": 1.5144284662982915e-06, "loss": 20.9531, "step": 10485 }, { "epoch": 0.696420269642027, "grad_norm": 169.51332092285156, "learning_rate": 1.5143362339393338e-06, "loss": 16.2656, "step": 10486 }, { "epoch": 0.6964866839343827, "grad_norm": 174.2707061767578, "learning_rate": 1.514243995630831e-06, "loss": 19.5156, "step": 10487 }, { "epoch": 0.6965530982267384, "grad_norm": 194.68548583984375, "learning_rate": 1.51415175137385e-06, "loss": 14.6719, "step": 10488 }, { "epoch": 0.6966195125190942, "grad_norm": 188.62554931640625, "learning_rate": 1.5140595011694585e-06, "loss": 17.0312, "step": 10489 }, { "epoch": 0.6966859268114498, "grad_norm": 226.3241729736328, "learning_rate": 1.5139672450187227e-06, "loss": 11.75, "step": 10490 }, { "epoch": 0.6967523411038056, "grad_norm": 449.6986389160156, "learning_rate": 1.5138749829227102e-06, "loss": 22.625, "step": 10491 }, { "epoch": 0.6968187553961612, "grad_norm": 177.03262329101562, "learning_rate": 1.5137827148824887e-06, "loss": 17.6719, "step": 10492 }, { "epoch": 0.696885169688517, "grad_norm": 151.15989685058594, "learning_rate": 1.5136904408991247e-06, "loss": 20.75, "step": 10493 }, { "epoch": 0.6969515839808726, "grad_norm": 366.7550048828125, "learning_rate": 1.5135981609736859e-06, "loss": 19.4531, "step": 10494 }, { "epoch": 0.6970179982732284, "grad_norm": 135.83047485351562, "learning_rate": 1.51350587510724e-06, "loss": 15.0625, "step": 10495 }, { "epoch": 0.6970844125655841, "grad_norm": 202.30499267578125, "learning_rate": 1.5134135833008536e-06, "loss": 16.7344, "step": 10496 }, { "epoch": 0.6971508268579398, "grad_norm": 95.76197052001953, "learning_rate": 1.513321285555595e-06, "loss": 20.625, "step": 10497 }, { "epoch": 0.6972172411502956, "grad_norm": 162.0401611328125, "learning_rate": 1.5132289818725322e-06, "loss": 19.4219, "step": 10498 }, { "epoch": 0.6972836554426513, "grad_norm": 256.9873352050781, "learning_rate": 1.5131366722527324e-06, "loss": 25.0312, "step": 10499 }, { "epoch": 0.697350069735007, "grad_norm": 229.43870544433594, "learning_rate": 1.5130443566972633e-06, "loss": 20.1875, "step": 10500 }, { "epoch": 0.6974164840273627, "grad_norm": 222.35194396972656, "learning_rate": 1.5129520352071929e-06, "loss": 21.0781, "step": 10501 }, { "epoch": 0.6974828983197184, "grad_norm": 229.52810668945312, "learning_rate": 1.5128597077835891e-06, "loss": 22.125, "step": 10502 }, { "epoch": 0.6975493126120741, "grad_norm": 158.31422424316406, "learning_rate": 1.5127673744275201e-06, "loss": 18.0156, "step": 10503 }, { "epoch": 0.6976157269044299, "grad_norm": 191.9716033935547, "learning_rate": 1.512675035140054e-06, "loss": 19.2812, "step": 10504 }, { "epoch": 0.6976821411967855, "grad_norm": 214.66371154785156, "learning_rate": 1.5125826899222585e-06, "loss": 13.2812, "step": 10505 }, { "epoch": 0.6977485554891413, "grad_norm": 251.3826904296875, "learning_rate": 1.5124903387752022e-06, "loss": 21.1875, "step": 10506 }, { "epoch": 0.6978149697814969, "grad_norm": 230.04486083984375, "learning_rate": 1.5123979816999528e-06, "loss": 18.7344, "step": 10507 }, { "epoch": 0.6978813840738527, "grad_norm": 120.1944808959961, "learning_rate": 1.512305618697579e-06, "loss": 13.2969, "step": 10508 }, { "epoch": 0.6979477983662085, "grad_norm": 304.7348937988281, "learning_rate": 1.5122132497691493e-06, "loss": 19.2656, "step": 10509 }, { "epoch": 0.6980142126585641, "grad_norm": 97.90092468261719, "learning_rate": 1.5121208749157326e-06, "loss": 16.2344, "step": 10510 }, { "epoch": 0.6980806269509199, "grad_norm": 151.20803833007812, "learning_rate": 1.5120284941383962e-06, "loss": 15.3906, "step": 10511 }, { "epoch": 0.6981470412432755, "grad_norm": 275.17401123046875, "learning_rate": 1.5119361074382098e-06, "loss": 21.0469, "step": 10512 }, { "epoch": 0.6982134555356313, "grad_norm": 391.81463623046875, "learning_rate": 1.5118437148162416e-06, "loss": 17.9062, "step": 10513 }, { "epoch": 0.698279869827987, "grad_norm": 890.5555419921875, "learning_rate": 1.5117513162735608e-06, "loss": 12.0625, "step": 10514 }, { "epoch": 0.6983462841203427, "grad_norm": 176.61196899414062, "learning_rate": 1.5116589118112354e-06, "loss": 11.0312, "step": 10515 }, { "epoch": 0.6984126984126984, "grad_norm": 312.67340087890625, "learning_rate": 1.5115665014303349e-06, "loss": 16.125, "step": 10516 }, { "epoch": 0.6984791127050541, "grad_norm": 448.9553527832031, "learning_rate": 1.5114740851319283e-06, "loss": 25.875, "step": 10517 }, { "epoch": 0.6985455269974098, "grad_norm": 240.8087158203125, "learning_rate": 1.511381662917084e-06, "loss": 16.3125, "step": 10518 }, { "epoch": 0.6986119412897656, "grad_norm": 350.3414001464844, "learning_rate": 1.5112892347868717e-06, "loss": 19.3594, "step": 10519 }, { "epoch": 0.6986783555821213, "grad_norm": 363.86395263671875, "learning_rate": 1.5111968007423605e-06, "loss": 17.8125, "step": 10520 }, { "epoch": 0.698744769874477, "grad_norm": 187.7853240966797, "learning_rate": 1.5111043607846195e-06, "loss": 15.4062, "step": 10521 }, { "epoch": 0.6988111841668327, "grad_norm": 251.82302856445312, "learning_rate": 1.511011914914718e-06, "loss": 22.1875, "step": 10522 }, { "epoch": 0.6988775984591884, "grad_norm": 512.4706420898438, "learning_rate": 1.5109194631337249e-06, "loss": 26.4375, "step": 10523 }, { "epoch": 0.6989440127515442, "grad_norm": 146.1150360107422, "learning_rate": 1.5108270054427103e-06, "loss": 18.0312, "step": 10524 }, { "epoch": 0.6990104270438998, "grad_norm": 293.29461669921875, "learning_rate": 1.5107345418427436e-06, "loss": 19.0, "step": 10525 }, { "epoch": 0.6990768413362556, "grad_norm": 364.0216369628906, "learning_rate": 1.510642072334894e-06, "loss": 19.1094, "step": 10526 }, { "epoch": 0.6991432556286112, "grad_norm": 223.32054138183594, "learning_rate": 1.5105495969202315e-06, "loss": 19.4688, "step": 10527 }, { "epoch": 0.699209669920967, "grad_norm": 496.1676025390625, "learning_rate": 1.510457115599826e-06, "loss": 16.8125, "step": 10528 }, { "epoch": 0.6992760842133227, "grad_norm": 513.3734130859375, "learning_rate": 1.5103646283747464e-06, "loss": 23.8125, "step": 10529 }, { "epoch": 0.6993424985056784, "grad_norm": 397.8069763183594, "learning_rate": 1.5102721352460633e-06, "loss": 20.0938, "step": 10530 }, { "epoch": 0.6994089127980342, "grad_norm": 392.3775329589844, "learning_rate": 1.5101796362148465e-06, "loss": 22.2031, "step": 10531 }, { "epoch": 0.6994753270903898, "grad_norm": 253.99905395507812, "learning_rate": 1.5100871312821655e-06, "loss": 17.7031, "step": 10532 }, { "epoch": 0.6995417413827456, "grad_norm": 179.2161407470703, "learning_rate": 1.509994620449091e-06, "loss": 16.6016, "step": 10533 }, { "epoch": 0.6996081556751013, "grad_norm": 275.1574401855469, "learning_rate": 1.5099021037166924e-06, "loss": 17.9375, "step": 10534 }, { "epoch": 0.699674569967457, "grad_norm": 260.01959228515625, "learning_rate": 1.5098095810860408e-06, "loss": 18.5156, "step": 10535 }, { "epoch": 0.6997409842598127, "grad_norm": 168.76370239257812, "learning_rate": 1.5097170525582057e-06, "loss": 13.2734, "step": 10536 }, { "epoch": 0.6998073985521684, "grad_norm": 320.25592041015625, "learning_rate": 1.5096245181342578e-06, "loss": 20.7188, "step": 10537 }, { "epoch": 0.6998738128445241, "grad_norm": 253.08148193359375, "learning_rate": 1.5095319778152673e-06, "loss": 17.625, "step": 10538 }, { "epoch": 0.6999402271368799, "grad_norm": 205.7589111328125, "learning_rate": 1.5094394316023045e-06, "loss": 21.6172, "step": 10539 }, { "epoch": 0.7000066414292355, "grad_norm": 230.79627990722656, "learning_rate": 1.5093468794964403e-06, "loss": 22.5312, "step": 10540 }, { "epoch": 0.7000730557215913, "grad_norm": 183.50099182128906, "learning_rate": 1.509254321498745e-06, "loss": 19.8516, "step": 10541 }, { "epoch": 0.700139470013947, "grad_norm": 170.29942321777344, "learning_rate": 1.5091617576102895e-06, "loss": 14.6562, "step": 10542 }, { "epoch": 0.7002058843063027, "grad_norm": 1022.9906005859375, "learning_rate": 1.5090691878321442e-06, "loss": 13.5, "step": 10543 }, { "epoch": 0.7002722985986585, "grad_norm": 207.69644165039062, "learning_rate": 1.5089766121653802e-06, "loss": 17.2812, "step": 10544 }, { "epoch": 0.7003387128910141, "grad_norm": 353.34735107421875, "learning_rate": 1.5088840306110682e-06, "loss": 15.1094, "step": 10545 }, { "epoch": 0.7004051271833699, "grad_norm": 209.1546630859375, "learning_rate": 1.5087914431702793e-06, "loss": 17.6562, "step": 10546 }, { "epoch": 0.7004715414757255, "grad_norm": 157.42198181152344, "learning_rate": 1.5086988498440842e-06, "loss": 19.3281, "step": 10547 }, { "epoch": 0.7005379557680813, "grad_norm": 589.6384887695312, "learning_rate": 1.5086062506335542e-06, "loss": 19.9531, "step": 10548 }, { "epoch": 0.700604370060437, "grad_norm": 140.67794799804688, "learning_rate": 1.5085136455397604e-06, "loss": 21.0938, "step": 10549 }, { "epoch": 0.7006707843527927, "grad_norm": 118.72273254394531, "learning_rate": 1.508421034563774e-06, "loss": 16.2656, "step": 10550 }, { "epoch": 0.7007371986451484, "grad_norm": 192.54623413085938, "learning_rate": 1.5083284177066661e-06, "loss": 14.625, "step": 10551 }, { "epoch": 0.7008036129375042, "grad_norm": 374.6380615234375, "learning_rate": 1.5082357949695085e-06, "loss": 17.6094, "step": 10552 }, { "epoch": 0.7008700272298599, "grad_norm": 195.94912719726562, "learning_rate": 1.5081431663533719e-06, "loss": 14.8906, "step": 10553 }, { "epoch": 0.7009364415222156, "grad_norm": 238.35238647460938, "learning_rate": 1.5080505318593285e-06, "loss": 21.3906, "step": 10554 }, { "epoch": 0.7010028558145713, "grad_norm": 174.7404327392578, "learning_rate": 1.507957891488449e-06, "loss": 16.0781, "step": 10555 }, { "epoch": 0.701069270106927, "grad_norm": 249.82998657226562, "learning_rate": 1.5078652452418061e-06, "loss": 19.2031, "step": 10556 }, { "epoch": 0.7011356843992828, "grad_norm": 159.96766662597656, "learning_rate": 1.5077725931204705e-06, "loss": 19.0, "step": 10557 }, { "epoch": 0.7012020986916384, "grad_norm": 641.6934204101562, "learning_rate": 1.5076799351255145e-06, "loss": 17.6406, "step": 10558 }, { "epoch": 0.7012685129839942, "grad_norm": 160.52406311035156, "learning_rate": 1.5075872712580097e-06, "loss": 18.375, "step": 10559 }, { "epoch": 0.7013349272763498, "grad_norm": 253.1974639892578, "learning_rate": 1.507494601519028e-06, "loss": 19.4688, "step": 10560 }, { "epoch": 0.7014013415687056, "grad_norm": 188.14559936523438, "learning_rate": 1.5074019259096414e-06, "loss": 17.0781, "step": 10561 }, { "epoch": 0.7014677558610612, "grad_norm": 253.5604705810547, "learning_rate": 1.507309244430922e-06, "loss": 20.875, "step": 10562 }, { "epoch": 0.701534170153417, "grad_norm": 242.72911071777344, "learning_rate": 1.507216557083942e-06, "loss": 23.7344, "step": 10563 }, { "epoch": 0.7016005844457728, "grad_norm": 550.4786376953125, "learning_rate": 1.507123863869773e-06, "loss": 17.6719, "step": 10564 }, { "epoch": 0.7016669987381284, "grad_norm": 108.82546997070312, "learning_rate": 1.5070311647894878e-06, "loss": 13.2344, "step": 10565 }, { "epoch": 0.7017334130304842, "grad_norm": 331.7522277832031, "learning_rate": 1.506938459844158e-06, "loss": 18.0, "step": 10566 }, { "epoch": 0.7017998273228399, "grad_norm": 328.1200866699219, "learning_rate": 1.506845749034857e-06, "loss": 22.3281, "step": 10567 }, { "epoch": 0.7018662416151956, "grad_norm": 207.28704833984375, "learning_rate": 1.5067530323626563e-06, "loss": 18.7969, "step": 10568 }, { "epoch": 0.7019326559075513, "grad_norm": 136.81040954589844, "learning_rate": 1.5066603098286289e-06, "loss": 13.9844, "step": 10569 }, { "epoch": 0.701999070199907, "grad_norm": 268.8230895996094, "learning_rate": 1.5065675814338472e-06, "loss": 15.9219, "step": 10570 }, { "epoch": 0.7020654844922627, "grad_norm": 116.10707092285156, "learning_rate": 1.5064748471793838e-06, "loss": 11.4688, "step": 10571 }, { "epoch": 0.7021318987846185, "grad_norm": 233.9385528564453, "learning_rate": 1.5063821070663111e-06, "loss": 16.5312, "step": 10572 }, { "epoch": 0.7021983130769741, "grad_norm": 199.6911163330078, "learning_rate": 1.5062893610957024e-06, "loss": 17.9375, "step": 10573 }, { "epoch": 0.7022647273693299, "grad_norm": 208.58065795898438, "learning_rate": 1.5061966092686307e-06, "loss": 18.4062, "step": 10574 }, { "epoch": 0.7023311416616856, "grad_norm": 335.20770263671875, "learning_rate": 1.5061038515861681e-06, "loss": 19.2266, "step": 10575 }, { "epoch": 0.7023975559540413, "grad_norm": 172.4426727294922, "learning_rate": 1.506011088049388e-06, "loss": 14.9609, "step": 10576 }, { "epoch": 0.7024639702463971, "grad_norm": 152.34107971191406, "learning_rate": 1.505918318659363e-06, "loss": 18.3281, "step": 10577 }, { "epoch": 0.7025303845387527, "grad_norm": 238.12960815429688, "learning_rate": 1.5058255434171674e-06, "loss": 20.3438, "step": 10578 }, { "epoch": 0.7025967988311085, "grad_norm": 362.1971130371094, "learning_rate": 1.5057327623238733e-06, "loss": 18.3281, "step": 10579 }, { "epoch": 0.7026632131234641, "grad_norm": 136.61285400390625, "learning_rate": 1.5056399753805542e-06, "loss": 13.5781, "step": 10580 }, { "epoch": 0.7027296274158199, "grad_norm": 234.56076049804688, "learning_rate": 1.5055471825882835e-06, "loss": 14.4375, "step": 10581 }, { "epoch": 0.7027960417081756, "grad_norm": 344.989013671875, "learning_rate": 1.5054543839481344e-06, "loss": 16.6719, "step": 10582 }, { "epoch": 0.7028624560005313, "grad_norm": 95.96487426757812, "learning_rate": 1.5053615794611805e-06, "loss": 14.8438, "step": 10583 }, { "epoch": 0.702928870292887, "grad_norm": 132.0398712158203, "learning_rate": 1.505268769128495e-06, "loss": 15.0312, "step": 10584 }, { "epoch": 0.7029952845852427, "grad_norm": 180.3923797607422, "learning_rate": 1.5051759529511524e-06, "loss": 16.7188, "step": 10585 }, { "epoch": 0.7030616988775985, "grad_norm": 193.08026123046875, "learning_rate": 1.5050831309302251e-06, "loss": 17.2031, "step": 10586 }, { "epoch": 0.7031281131699542, "grad_norm": 268.2498779296875, "learning_rate": 1.5049903030667873e-06, "loss": 13.3281, "step": 10587 }, { "epoch": 0.7031945274623099, "grad_norm": 297.59771728515625, "learning_rate": 1.5048974693619135e-06, "loss": 23.5625, "step": 10588 }, { "epoch": 0.7032609417546656, "grad_norm": 206.70150756835938, "learning_rate": 1.5048046298166765e-06, "loss": 17.2969, "step": 10589 }, { "epoch": 0.7033273560470213, "grad_norm": 254.0386505126953, "learning_rate": 1.5047117844321506e-06, "loss": 17.5, "step": 10590 }, { "epoch": 0.703393770339377, "grad_norm": 454.2064208984375, "learning_rate": 1.50461893320941e-06, "loss": 17.2812, "step": 10591 }, { "epoch": 0.7034601846317328, "grad_norm": 221.7898712158203, "learning_rate": 1.504526076149528e-06, "loss": 18.4688, "step": 10592 }, { "epoch": 0.7035265989240884, "grad_norm": 210.8260040283203, "learning_rate": 1.5044332132535799e-06, "loss": 17.5703, "step": 10593 }, { "epoch": 0.7035930132164442, "grad_norm": 518.7715454101562, "learning_rate": 1.5043403445226392e-06, "loss": 16.9062, "step": 10594 }, { "epoch": 0.7036594275087998, "grad_norm": 182.8852996826172, "learning_rate": 1.50424746995778e-06, "loss": 17.0312, "step": 10595 }, { "epoch": 0.7037258418011556, "grad_norm": 213.69554138183594, "learning_rate": 1.5041545895600769e-06, "loss": 17.6719, "step": 10596 }, { "epoch": 0.7037922560935114, "grad_norm": 198.50253295898438, "learning_rate": 1.5040617033306044e-06, "loss": 23.5312, "step": 10597 }, { "epoch": 0.703858670385867, "grad_norm": 202.0691375732422, "learning_rate": 1.5039688112704362e-06, "loss": 17.5156, "step": 10598 }, { "epoch": 0.7039250846782228, "grad_norm": 266.4459533691406, "learning_rate": 1.503875913380648e-06, "loss": 19.2656, "step": 10599 }, { "epoch": 0.7039914989705784, "grad_norm": 242.9009552001953, "learning_rate": 1.5037830096623135e-06, "loss": 18.0781, "step": 10600 }, { "epoch": 0.7040579132629342, "grad_norm": 108.09676361083984, "learning_rate": 1.5036901001165077e-06, "loss": 13.9531, "step": 10601 }, { "epoch": 0.7041243275552899, "grad_norm": 176.27731323242188, "learning_rate": 1.5035971847443057e-06, "loss": 15.3906, "step": 10602 }, { "epoch": 0.7041907418476456, "grad_norm": 308.13409423828125, "learning_rate": 1.5035042635467811e-06, "loss": 16.8125, "step": 10603 }, { "epoch": 0.7042571561400013, "grad_norm": 216.30340576171875, "learning_rate": 1.5034113365250098e-06, "loss": 18.8438, "step": 10604 }, { "epoch": 0.704323570432357, "grad_norm": 231.2771453857422, "learning_rate": 1.5033184036800665e-06, "loss": 19.9219, "step": 10605 }, { "epoch": 0.7043899847247127, "grad_norm": 255.24679565429688, "learning_rate": 1.5032254650130263e-06, "loss": 23.7188, "step": 10606 }, { "epoch": 0.7044563990170685, "grad_norm": 228.7524871826172, "learning_rate": 1.5031325205249638e-06, "loss": 15.2188, "step": 10607 }, { "epoch": 0.7045228133094242, "grad_norm": 310.75006103515625, "learning_rate": 1.5030395702169548e-06, "loss": 17.4688, "step": 10608 }, { "epoch": 0.7045892276017799, "grad_norm": 197.02508544921875, "learning_rate": 1.5029466140900736e-06, "loss": 21.0469, "step": 10609 }, { "epoch": 0.7046556418941357, "grad_norm": 211.3747100830078, "learning_rate": 1.5028536521453966e-06, "loss": 19.375, "step": 10610 }, { "epoch": 0.7047220561864913, "grad_norm": 173.1385498046875, "learning_rate": 1.502760684383998e-06, "loss": 17.9219, "step": 10611 }, { "epoch": 0.7047884704788471, "grad_norm": 166.4267578125, "learning_rate": 1.5026677108069539e-06, "loss": 20.0156, "step": 10612 }, { "epoch": 0.7048548847712027, "grad_norm": 227.88165283203125, "learning_rate": 1.5025747314153398e-06, "loss": 18.4219, "step": 10613 }, { "epoch": 0.7049212990635585, "grad_norm": 162.1963348388672, "learning_rate": 1.502481746210231e-06, "loss": 17.5, "step": 10614 }, { "epoch": 0.7049877133559141, "grad_norm": 212.75479125976562, "learning_rate": 1.5023887551927025e-06, "loss": 16.0, "step": 10615 }, { "epoch": 0.7050541276482699, "grad_norm": 297.16326904296875, "learning_rate": 1.502295758363831e-06, "loss": 21.3594, "step": 10616 }, { "epoch": 0.7051205419406257, "grad_norm": 499.68206787109375, "learning_rate": 1.5022027557246919e-06, "loss": 23.4375, "step": 10617 }, { "epoch": 0.7051869562329813, "grad_norm": 411.20538330078125, "learning_rate": 1.5021097472763607e-06, "loss": 19.4531, "step": 10618 }, { "epoch": 0.7052533705253371, "grad_norm": 318.0949401855469, "learning_rate": 1.5020167330199135e-06, "loss": 25.5, "step": 10619 }, { "epoch": 0.7053197848176928, "grad_norm": 259.70599365234375, "learning_rate": 1.5019237129564264e-06, "loss": 19.5781, "step": 10620 }, { "epoch": 0.7053861991100485, "grad_norm": 193.61997985839844, "learning_rate": 1.5018306870869752e-06, "loss": 14.5938, "step": 10621 }, { "epoch": 0.7054526134024042, "grad_norm": 260.35296630859375, "learning_rate": 1.5017376554126358e-06, "loss": 15.3672, "step": 10622 }, { "epoch": 0.7055190276947599, "grad_norm": 333.7824401855469, "learning_rate": 1.5016446179344846e-06, "loss": 23.1406, "step": 10623 }, { "epoch": 0.7055854419871156, "grad_norm": 340.469482421875, "learning_rate": 1.5015515746535978e-06, "loss": 14.5781, "step": 10624 }, { "epoch": 0.7056518562794714, "grad_norm": 178.0784912109375, "learning_rate": 1.5014585255710516e-06, "loss": 15.7812, "step": 10625 }, { "epoch": 0.705718270571827, "grad_norm": 134.7810516357422, "learning_rate": 1.5013654706879223e-06, "loss": 16.2188, "step": 10626 }, { "epoch": 0.7057846848641828, "grad_norm": 406.74578857421875, "learning_rate": 1.5012724100052861e-06, "loss": 22.6094, "step": 10627 }, { "epoch": 0.7058510991565385, "grad_norm": 299.2531433105469, "learning_rate": 1.5011793435242198e-06, "loss": 20.625, "step": 10628 }, { "epoch": 0.7059175134488942, "grad_norm": 347.9764099121094, "learning_rate": 1.5010862712457998e-06, "loss": 20.8594, "step": 10629 }, { "epoch": 0.70598392774125, "grad_norm": 232.96827697753906, "learning_rate": 1.500993193171103e-06, "loss": 16.3438, "step": 10630 }, { "epoch": 0.7060503420336056, "grad_norm": 233.17408752441406, "learning_rate": 1.500900109301206e-06, "loss": 20.6719, "step": 10631 }, { "epoch": 0.7061167563259614, "grad_norm": 352.4247741699219, "learning_rate": 1.5008070196371852e-06, "loss": 19.5938, "step": 10632 }, { "epoch": 0.706183170618317, "grad_norm": 112.69752502441406, "learning_rate": 1.5007139241801175e-06, "loss": 15.0625, "step": 10633 }, { "epoch": 0.7062495849106728, "grad_norm": 201.53213500976562, "learning_rate": 1.5006208229310798e-06, "loss": 18.8281, "step": 10634 }, { "epoch": 0.7063159992030285, "grad_norm": 293.50897216796875, "learning_rate": 1.5005277158911489e-06, "loss": 19.6094, "step": 10635 }, { "epoch": 0.7063824134953842, "grad_norm": 400.62982177734375, "learning_rate": 1.5004346030614022e-06, "loss": 18.4297, "step": 10636 }, { "epoch": 0.7064488277877399, "grad_norm": 187.813232421875, "learning_rate": 1.5003414844429165e-06, "loss": 16.8906, "step": 10637 }, { "epoch": 0.7065152420800956, "grad_norm": 111.78599548339844, "learning_rate": 1.5002483600367692e-06, "loss": 13.3438, "step": 10638 }, { "epoch": 0.7065816563724514, "grad_norm": 130.41964721679688, "learning_rate": 1.5001552298440374e-06, "loss": 16.4219, "step": 10639 }, { "epoch": 0.7066480706648071, "grad_norm": 188.8112335205078, "learning_rate": 1.500062093865798e-06, "loss": 14.0781, "step": 10640 }, { "epoch": 0.7067144849571628, "grad_norm": 167.32437133789062, "learning_rate": 1.4999689521031289e-06, "loss": 17.4688, "step": 10641 }, { "epoch": 0.7067808992495185, "grad_norm": 493.92034912109375, "learning_rate": 1.4998758045571075e-06, "loss": 15.6406, "step": 10642 }, { "epoch": 0.7068473135418742, "grad_norm": 189.18350219726562, "learning_rate": 1.4997826512288106e-06, "loss": 20.0312, "step": 10643 }, { "epoch": 0.7069137278342299, "grad_norm": 138.76332092285156, "learning_rate": 1.4996894921193163e-06, "loss": 13.5469, "step": 10644 }, { "epoch": 0.7069801421265857, "grad_norm": 139.99656677246094, "learning_rate": 1.4995963272297021e-06, "loss": 16.125, "step": 10645 }, { "epoch": 0.7070465564189413, "grad_norm": 284.2795715332031, "learning_rate": 1.4995031565610456e-06, "loss": 18.4844, "step": 10646 }, { "epoch": 0.7071129707112971, "grad_norm": 190.60508728027344, "learning_rate": 1.4994099801144246e-06, "loss": 15.0, "step": 10647 }, { "epoch": 0.7071793850036527, "grad_norm": 160.1535186767578, "learning_rate": 1.499316797890917e-06, "loss": 15.75, "step": 10648 }, { "epoch": 0.7072457992960085, "grad_norm": 226.28524780273438, "learning_rate": 1.4992236098916006e-06, "loss": 15.6484, "step": 10649 }, { "epoch": 0.7073122135883643, "grad_norm": 226.26467895507812, "learning_rate": 1.4991304161175533e-06, "loss": 14.9531, "step": 10650 }, { "epoch": 0.7073786278807199, "grad_norm": 149.84664916992188, "learning_rate": 1.4990372165698533e-06, "loss": 18.9844, "step": 10651 }, { "epoch": 0.7074450421730757, "grad_norm": 134.3569793701172, "learning_rate": 1.4989440112495785e-06, "loss": 19.2812, "step": 10652 }, { "epoch": 0.7075114564654313, "grad_norm": 132.00804138183594, "learning_rate": 1.498850800157807e-06, "loss": 14.4688, "step": 10653 }, { "epoch": 0.7075778707577871, "grad_norm": 100.93675231933594, "learning_rate": 1.4987575832956172e-06, "loss": 15.5938, "step": 10654 }, { "epoch": 0.7076442850501428, "grad_norm": 917.6132202148438, "learning_rate": 1.4986643606640872e-06, "loss": 16.4844, "step": 10655 }, { "epoch": 0.7077106993424985, "grad_norm": 347.8126525878906, "learning_rate": 1.4985711322642955e-06, "loss": 19.5, "step": 10656 }, { "epoch": 0.7077771136348542, "grad_norm": 321.51165771484375, "learning_rate": 1.4984778980973205e-06, "loss": 17.2031, "step": 10657 }, { "epoch": 0.70784352792721, "grad_norm": 143.38034057617188, "learning_rate": 1.4983846581642405e-06, "loss": 13.1562, "step": 10658 }, { "epoch": 0.7079099422195656, "grad_norm": 646.8802490234375, "learning_rate": 1.4982914124661342e-06, "loss": 17.375, "step": 10659 }, { "epoch": 0.7079763565119214, "grad_norm": 850.437255859375, "learning_rate": 1.49819816100408e-06, "loss": 30.1562, "step": 10660 }, { "epoch": 0.7080427708042771, "grad_norm": 207.89004516601562, "learning_rate": 1.4981049037791568e-06, "loss": 16.7344, "step": 10661 }, { "epoch": 0.7081091850966328, "grad_norm": 131.9641571044922, "learning_rate": 1.4980116407924436e-06, "loss": 12.7656, "step": 10662 }, { "epoch": 0.7081755993889886, "grad_norm": 180.5456085205078, "learning_rate": 1.4979183720450186e-06, "loss": 21.7031, "step": 10663 }, { "epoch": 0.7082420136813442, "grad_norm": 194.1867218017578, "learning_rate": 1.4978250975379612e-06, "loss": 19.9219, "step": 10664 }, { "epoch": 0.7083084279737, "grad_norm": 468.40802001953125, "learning_rate": 1.4977318172723499e-06, "loss": 27.4062, "step": 10665 }, { "epoch": 0.7083748422660556, "grad_norm": 270.06512451171875, "learning_rate": 1.4976385312492638e-06, "loss": 14.375, "step": 10666 }, { "epoch": 0.7084412565584114, "grad_norm": 1817.0780029296875, "learning_rate": 1.4975452394697826e-06, "loss": 21.5938, "step": 10667 }, { "epoch": 0.708507670850767, "grad_norm": 186.1110382080078, "learning_rate": 1.497451941934985e-06, "loss": 15.2969, "step": 10668 }, { "epoch": 0.7085740851431228, "grad_norm": 806.538330078125, "learning_rate": 1.4973586386459496e-06, "loss": 16.125, "step": 10669 }, { "epoch": 0.7086404994354785, "grad_norm": 221.76333618164062, "learning_rate": 1.4972653296037568e-06, "loss": 14.3438, "step": 10670 }, { "epoch": 0.7087069137278342, "grad_norm": 523.7637939453125, "learning_rate": 1.497172014809485e-06, "loss": 20.125, "step": 10671 }, { "epoch": 0.70877332802019, "grad_norm": 408.15985107421875, "learning_rate": 1.4970786942642142e-06, "loss": 22.5781, "step": 10672 }, { "epoch": 0.7088397423125457, "grad_norm": 131.77828979492188, "learning_rate": 1.4969853679690239e-06, "loss": 13.3438, "step": 10673 }, { "epoch": 0.7089061566049014, "grad_norm": 140.1720428466797, "learning_rate": 1.496892035924993e-06, "loss": 13.6562, "step": 10674 }, { "epoch": 0.7089725708972571, "grad_norm": 171.7752685546875, "learning_rate": 1.496798698133202e-06, "loss": 17.7969, "step": 10675 }, { "epoch": 0.7090389851896128, "grad_norm": 175.3634796142578, "learning_rate": 1.49670535459473e-06, "loss": 15.9688, "step": 10676 }, { "epoch": 0.7091053994819685, "grad_norm": 396.6622619628906, "learning_rate": 1.4966120053106568e-06, "loss": 17.0312, "step": 10677 }, { "epoch": 0.7091718137743243, "grad_norm": 216.79884338378906, "learning_rate": 1.496518650282062e-06, "loss": 16.6094, "step": 10678 }, { "epoch": 0.7092382280666799, "grad_norm": 195.12933349609375, "learning_rate": 1.4964252895100262e-06, "loss": 18.4219, "step": 10679 }, { "epoch": 0.7093046423590357, "grad_norm": 187.1980743408203, "learning_rate": 1.4963319229956287e-06, "loss": 20.3594, "step": 10680 }, { "epoch": 0.7093710566513913, "grad_norm": 385.3461608886719, "learning_rate": 1.49623855073995e-06, "loss": 18.1562, "step": 10681 }, { "epoch": 0.7094374709437471, "grad_norm": 326.93939208984375, "learning_rate": 1.4961451727440694e-06, "loss": 20.0625, "step": 10682 }, { "epoch": 0.7095038852361029, "grad_norm": 116.47844696044922, "learning_rate": 1.496051789009068e-06, "loss": 13.5938, "step": 10683 }, { "epoch": 0.7095702995284585, "grad_norm": 141.08856201171875, "learning_rate": 1.4959583995360255e-06, "loss": 14.1562, "step": 10684 }, { "epoch": 0.7096367138208143, "grad_norm": 233.2213134765625, "learning_rate": 1.495865004326022e-06, "loss": 13.7031, "step": 10685 }, { "epoch": 0.7097031281131699, "grad_norm": 478.35662841796875, "learning_rate": 1.4957716033801382e-06, "loss": 12.6719, "step": 10686 }, { "epoch": 0.7097695424055257, "grad_norm": 487.82232666015625, "learning_rate": 1.4956781966994542e-06, "loss": 18.1875, "step": 10687 }, { "epoch": 0.7098359566978814, "grad_norm": 339.6650085449219, "learning_rate": 1.495584784285051e-06, "loss": 27.2656, "step": 10688 }, { "epoch": 0.7099023709902371, "grad_norm": 352.4935302734375, "learning_rate": 1.4954913661380085e-06, "loss": 14.5312, "step": 10689 }, { "epoch": 0.7099687852825928, "grad_norm": 492.8771667480469, "learning_rate": 1.4953979422594075e-06, "loss": 23.4375, "step": 10690 }, { "epoch": 0.7100351995749485, "grad_norm": 170.60447692871094, "learning_rate": 1.4953045126503288e-06, "loss": 17.9688, "step": 10691 }, { "epoch": 0.7101016138673042, "grad_norm": 215.92013549804688, "learning_rate": 1.4952110773118533e-06, "loss": 22.9531, "step": 10692 }, { "epoch": 0.71016802815966, "grad_norm": 210.70042419433594, "learning_rate": 1.4951176362450616e-06, "loss": 17.5625, "step": 10693 }, { "epoch": 0.7102344424520157, "grad_norm": 298.7304382324219, "learning_rate": 1.4950241894510345e-06, "loss": 18.6875, "step": 10694 }, { "epoch": 0.7103008567443714, "grad_norm": 118.08859252929688, "learning_rate": 1.494930736930853e-06, "loss": 17.3281, "step": 10695 }, { "epoch": 0.7103672710367271, "grad_norm": 264.6043395996094, "learning_rate": 1.494837278685598e-06, "loss": 19.5625, "step": 10696 }, { "epoch": 0.7104336853290828, "grad_norm": 157.50843811035156, "learning_rate": 1.494743814716351e-06, "loss": 18.0938, "step": 10697 }, { "epoch": 0.7105000996214386, "grad_norm": 143.71913146972656, "learning_rate": 1.4946503450241928e-06, "loss": 14.9219, "step": 10698 }, { "epoch": 0.7105665139137942, "grad_norm": 224.88365173339844, "learning_rate": 1.4945568696102044e-06, "loss": 17.2188, "step": 10699 }, { "epoch": 0.71063292820615, "grad_norm": 191.67703247070312, "learning_rate": 1.4944633884754676e-06, "loss": 17.4688, "step": 10700 }, { "epoch": 0.7106993424985056, "grad_norm": 135.88218688964844, "learning_rate": 1.4943699016210632e-06, "loss": 13.2031, "step": 10701 }, { "epoch": 0.7107657567908614, "grad_norm": 181.4799041748047, "learning_rate": 1.4942764090480728e-06, "loss": 15.7344, "step": 10702 }, { "epoch": 0.7108321710832171, "grad_norm": 109.99715423583984, "learning_rate": 1.4941829107575783e-06, "loss": 18.4062, "step": 10703 }, { "epoch": 0.7108985853755728, "grad_norm": 102.5196304321289, "learning_rate": 1.4940894067506604e-06, "loss": 14.8906, "step": 10704 }, { "epoch": 0.7109649996679286, "grad_norm": 387.375732421875, "learning_rate": 1.4939958970284015e-06, "loss": 16.9375, "step": 10705 }, { "epoch": 0.7110314139602842, "grad_norm": 239.21600341796875, "learning_rate": 1.4939023815918828e-06, "loss": 16.375, "step": 10706 }, { "epoch": 0.71109782825264, "grad_norm": 88.2515869140625, "learning_rate": 1.493808860442186e-06, "loss": 17.8125, "step": 10707 }, { "epoch": 0.7111642425449957, "grad_norm": 186.27099609375, "learning_rate": 1.4937153335803934e-06, "loss": 21.0781, "step": 10708 }, { "epoch": 0.7112306568373514, "grad_norm": 185.159912109375, "learning_rate": 1.493621801007586e-06, "loss": 13.4844, "step": 10709 }, { "epoch": 0.7112970711297071, "grad_norm": 157.20965576171875, "learning_rate": 1.4935282627248468e-06, "loss": 15.6094, "step": 10710 }, { "epoch": 0.7113634854220628, "grad_norm": 760.7379760742188, "learning_rate": 1.4934347187332568e-06, "loss": 22.5, "step": 10711 }, { "epoch": 0.7114298997144185, "grad_norm": 439.0007629394531, "learning_rate": 1.4933411690338986e-06, "loss": 13.9844, "step": 10712 }, { "epoch": 0.7114963140067743, "grad_norm": 125.49678039550781, "learning_rate": 1.4932476136278545e-06, "loss": 14.25, "step": 10713 }, { "epoch": 0.7115627282991299, "grad_norm": 180.29513549804688, "learning_rate": 1.4931540525162062e-06, "loss": 16.5938, "step": 10714 }, { "epoch": 0.7116291425914857, "grad_norm": 279.19525146484375, "learning_rate": 1.4930604857000362e-06, "loss": 18.7031, "step": 10715 }, { "epoch": 0.7116955568838415, "grad_norm": 524.8045043945312, "learning_rate": 1.4929669131804268e-06, "loss": 24.3438, "step": 10716 }, { "epoch": 0.7117619711761971, "grad_norm": 225.28497314453125, "learning_rate": 1.4928733349584605e-06, "loss": 19.0312, "step": 10717 }, { "epoch": 0.7118283854685529, "grad_norm": 251.1799774169922, "learning_rate": 1.4927797510352197e-06, "loss": 19.1406, "step": 10718 }, { "epoch": 0.7118947997609085, "grad_norm": 182.17958068847656, "learning_rate": 1.4926861614117868e-06, "loss": 14.5469, "step": 10719 }, { "epoch": 0.7119612140532643, "grad_norm": 138.17913818359375, "learning_rate": 1.4925925660892445e-06, "loss": 12.7031, "step": 10720 }, { "epoch": 0.71202762834562, "grad_norm": 936.4984741210938, "learning_rate": 1.4924989650686754e-06, "loss": 30.9062, "step": 10721 }, { "epoch": 0.7120940426379757, "grad_norm": 264.1756286621094, "learning_rate": 1.4924053583511621e-06, "loss": 14.5156, "step": 10722 }, { "epoch": 0.7121604569303314, "grad_norm": 180.4110565185547, "learning_rate": 1.492311745937788e-06, "loss": 18.75, "step": 10723 }, { "epoch": 0.7122268712226871, "grad_norm": 162.84474182128906, "learning_rate": 1.4922181278296352e-06, "loss": 17.8125, "step": 10724 }, { "epoch": 0.7122932855150428, "grad_norm": 204.14500427246094, "learning_rate": 1.492124504027787e-06, "loss": 16.2812, "step": 10725 }, { "epoch": 0.7123596998073985, "grad_norm": 273.37811279296875, "learning_rate": 1.4920308745333263e-06, "loss": 18.5625, "step": 10726 }, { "epoch": 0.7124261140997543, "grad_norm": 152.0563507080078, "learning_rate": 1.4919372393473362e-06, "loss": 16.4688, "step": 10727 }, { "epoch": 0.71249252839211, "grad_norm": 173.50685119628906, "learning_rate": 1.4918435984708997e-06, "loss": 17.0, "step": 10728 }, { "epoch": 0.7125589426844657, "grad_norm": 236.82077026367188, "learning_rate": 1.4917499519051003e-06, "loss": 16.3906, "step": 10729 }, { "epoch": 0.7126253569768214, "grad_norm": 161.21334838867188, "learning_rate": 1.4916562996510207e-06, "loss": 15.25, "step": 10730 }, { "epoch": 0.7126917712691772, "grad_norm": 247.0641326904297, "learning_rate": 1.491562641709745e-06, "loss": 17.0469, "step": 10731 }, { "epoch": 0.7127581855615328, "grad_norm": 394.5417785644531, "learning_rate": 1.4914689780823555e-06, "loss": 25.1719, "step": 10732 }, { "epoch": 0.7128245998538886, "grad_norm": 304.5413513183594, "learning_rate": 1.4913753087699365e-06, "loss": 13.7188, "step": 10733 }, { "epoch": 0.7128910141462442, "grad_norm": 669.0170288085938, "learning_rate": 1.4912816337735713e-06, "loss": 17.6406, "step": 10734 }, { "epoch": 0.7129574284386, "grad_norm": 154.56166076660156, "learning_rate": 1.4911879530943434e-06, "loss": 21.7344, "step": 10735 }, { "epoch": 0.7130238427309556, "grad_norm": 229.18365478515625, "learning_rate": 1.4910942667333365e-06, "loss": 19.0625, "step": 10736 }, { "epoch": 0.7130902570233114, "grad_norm": 318.9759521484375, "learning_rate": 1.4910005746916346e-06, "loss": 20.75, "step": 10737 }, { "epoch": 0.7131566713156672, "grad_norm": 131.64132690429688, "learning_rate": 1.490906876970321e-06, "loss": 19.1562, "step": 10738 }, { "epoch": 0.7132230856080228, "grad_norm": 124.3873291015625, "learning_rate": 1.4908131735704795e-06, "loss": 13.9844, "step": 10739 }, { "epoch": 0.7132894999003786, "grad_norm": 361.7628173828125, "learning_rate": 1.4907194644931943e-06, "loss": 20.4688, "step": 10740 }, { "epoch": 0.7133559141927343, "grad_norm": 570.35986328125, "learning_rate": 1.4906257497395494e-06, "loss": 20.3906, "step": 10741 }, { "epoch": 0.71342232848509, "grad_norm": 748.100830078125, "learning_rate": 1.490532029310629e-06, "loss": 22.6875, "step": 10742 }, { "epoch": 0.7134887427774457, "grad_norm": 226.59188842773438, "learning_rate": 1.4904383032075166e-06, "loss": 16.0625, "step": 10743 }, { "epoch": 0.7135551570698014, "grad_norm": 216.05191040039062, "learning_rate": 1.4903445714312966e-06, "loss": 16.3906, "step": 10744 }, { "epoch": 0.7136215713621571, "grad_norm": 264.7319641113281, "learning_rate": 1.4902508339830535e-06, "loss": 24.1094, "step": 10745 }, { "epoch": 0.7136879856545129, "grad_norm": 341.5062255859375, "learning_rate": 1.4901570908638715e-06, "loss": 23.7344, "step": 10746 }, { "epoch": 0.7137543999468685, "grad_norm": 224.11373901367188, "learning_rate": 1.4900633420748347e-06, "loss": 19.3906, "step": 10747 }, { "epoch": 0.7138208142392243, "grad_norm": 284.60931396484375, "learning_rate": 1.4899695876170282e-06, "loss": 16.75, "step": 10748 }, { "epoch": 0.71388722853158, "grad_norm": 156.7825164794922, "learning_rate": 1.4898758274915358e-06, "loss": 15.4688, "step": 10749 }, { "epoch": 0.7139536428239357, "grad_norm": 273.7509460449219, "learning_rate": 1.489782061699442e-06, "loss": 16.7656, "step": 10750 }, { "epoch": 0.7140200571162915, "grad_norm": 171.28448486328125, "learning_rate": 1.489688290241832e-06, "loss": 19.75, "step": 10751 }, { "epoch": 0.7140864714086471, "grad_norm": 219.48367309570312, "learning_rate": 1.4895945131197904e-06, "loss": 14.6875, "step": 10752 }, { "epoch": 0.7141528857010029, "grad_norm": 297.7299499511719, "learning_rate": 1.4895007303344016e-06, "loss": 15.5156, "step": 10753 }, { "epoch": 0.7142192999933585, "grad_norm": 109.13818359375, "learning_rate": 1.4894069418867505e-06, "loss": 11.8125, "step": 10754 }, { "epoch": 0.7142857142857143, "grad_norm": 125.42398834228516, "learning_rate": 1.489313147777922e-06, "loss": 14.9062, "step": 10755 }, { "epoch": 0.71435212857807, "grad_norm": 453.5758361816406, "learning_rate": 1.4892193480090018e-06, "loss": 15.2969, "step": 10756 }, { "epoch": 0.7144185428704257, "grad_norm": 132.90957641601562, "learning_rate": 1.4891255425810739e-06, "loss": 11.6094, "step": 10757 }, { "epoch": 0.7144849571627814, "grad_norm": 155.5829315185547, "learning_rate": 1.4890317314952236e-06, "loss": 12.9375, "step": 10758 }, { "epoch": 0.7145513714551371, "grad_norm": 235.86740112304688, "learning_rate": 1.4889379147525364e-06, "loss": 17.0469, "step": 10759 }, { "epoch": 0.7146177857474929, "grad_norm": 128.5901641845703, "learning_rate": 1.4888440923540976e-06, "loss": 13.7969, "step": 10760 }, { "epoch": 0.7146842000398486, "grad_norm": 144.39630126953125, "learning_rate": 1.4887502643009917e-06, "loss": 18.2188, "step": 10761 }, { "epoch": 0.7147506143322043, "grad_norm": 204.25765991210938, "learning_rate": 1.4886564305943047e-06, "loss": 22.2812, "step": 10762 }, { "epoch": 0.71481702862456, "grad_norm": 263.8466491699219, "learning_rate": 1.4885625912351223e-06, "loss": 15.4062, "step": 10763 }, { "epoch": 0.7148834429169157, "grad_norm": 150.68072509765625, "learning_rate": 1.488468746224529e-06, "loss": 14.0938, "step": 10764 }, { "epoch": 0.7149498572092714, "grad_norm": 307.9772033691406, "learning_rate": 1.4883748955636114e-06, "loss": 23.9844, "step": 10765 }, { "epoch": 0.7150162715016272, "grad_norm": 451.5121765136719, "learning_rate": 1.4882810392534542e-06, "loss": 23.1875, "step": 10766 }, { "epoch": 0.7150826857939828, "grad_norm": 265.6407470703125, "learning_rate": 1.4881871772951437e-06, "loss": 16.8594, "step": 10767 }, { "epoch": 0.7151491000863386, "grad_norm": 387.7401428222656, "learning_rate": 1.4880933096897654e-06, "loss": 12.875, "step": 10768 }, { "epoch": 0.7152155143786944, "grad_norm": 303.51190185546875, "learning_rate": 1.4879994364384053e-06, "loss": 16.3906, "step": 10769 }, { "epoch": 0.71528192867105, "grad_norm": 356.7572326660156, "learning_rate": 1.487905557542149e-06, "loss": 20.875, "step": 10770 }, { "epoch": 0.7153483429634058, "grad_norm": 348.36907958984375, "learning_rate": 1.4878116730020827e-06, "loss": 14.4219, "step": 10771 }, { "epoch": 0.7154147572557614, "grad_norm": 335.408447265625, "learning_rate": 1.4877177828192919e-06, "loss": 23.5, "step": 10772 }, { "epoch": 0.7154811715481172, "grad_norm": 277.4454650878906, "learning_rate": 1.4876238869948633e-06, "loss": 28.0, "step": 10773 }, { "epoch": 0.7155475858404728, "grad_norm": 151.3734588623047, "learning_rate": 1.4875299855298828e-06, "loss": 17.375, "step": 10774 }, { "epoch": 0.7156140001328286, "grad_norm": 162.82057189941406, "learning_rate": 1.4874360784254364e-06, "loss": 22.5469, "step": 10775 }, { "epoch": 0.7156804144251843, "grad_norm": 253.44070434570312, "learning_rate": 1.4873421656826104e-06, "loss": 20.7188, "step": 10776 }, { "epoch": 0.71574682871754, "grad_norm": 219.12864685058594, "learning_rate": 1.4872482473024915e-06, "loss": 17.2031, "step": 10777 }, { "epoch": 0.7158132430098957, "grad_norm": 160.50979614257812, "learning_rate": 1.4871543232861658e-06, "loss": 16.5156, "step": 10778 }, { "epoch": 0.7158796573022514, "grad_norm": 257.2391662597656, "learning_rate": 1.4870603936347196e-06, "loss": 16.7812, "step": 10779 }, { "epoch": 0.7159460715946072, "grad_norm": 126.013427734375, "learning_rate": 1.48696645834924e-06, "loss": 12.9297, "step": 10780 }, { "epoch": 0.7160124858869629, "grad_norm": 264.97088623046875, "learning_rate": 1.4868725174308129e-06, "loss": 17.6562, "step": 10781 }, { "epoch": 0.7160789001793186, "grad_norm": 212.4232177734375, "learning_rate": 1.4867785708805251e-06, "loss": 15.5312, "step": 10782 }, { "epoch": 0.7161453144716743, "grad_norm": 276.3628845214844, "learning_rate": 1.4866846186994637e-06, "loss": 17.1562, "step": 10783 }, { "epoch": 0.71621172876403, "grad_norm": 909.97021484375, "learning_rate": 1.4865906608887153e-06, "loss": 30.4375, "step": 10784 }, { "epoch": 0.7162781430563857, "grad_norm": 345.4924621582031, "learning_rate": 1.4864966974493667e-06, "loss": 21.2812, "step": 10785 }, { "epoch": 0.7163445573487415, "grad_norm": 226.04193115234375, "learning_rate": 1.4864027283825049e-06, "loss": 19.4375, "step": 10786 }, { "epoch": 0.7164109716410971, "grad_norm": 271.9830017089844, "learning_rate": 1.4863087536892165e-06, "loss": 21.1719, "step": 10787 }, { "epoch": 0.7164773859334529, "grad_norm": 222.5939178466797, "learning_rate": 1.4862147733705894e-06, "loss": 19.875, "step": 10788 }, { "epoch": 0.7165438002258085, "grad_norm": 144.6584014892578, "learning_rate": 1.4861207874277097e-06, "loss": 14.6719, "step": 10789 }, { "epoch": 0.7166102145181643, "grad_norm": 460.5335998535156, "learning_rate": 1.4860267958616652e-06, "loss": 15.6328, "step": 10790 }, { "epoch": 0.7166766288105201, "grad_norm": 127.49623107910156, "learning_rate": 1.4859327986735432e-06, "loss": 16.1719, "step": 10791 }, { "epoch": 0.7167430431028757, "grad_norm": 1499.3741455078125, "learning_rate": 1.4858387958644304e-06, "loss": 13.4375, "step": 10792 }, { "epoch": 0.7168094573952315, "grad_norm": 233.76736450195312, "learning_rate": 1.485744787435415e-06, "loss": 19.7812, "step": 10793 }, { "epoch": 0.7168758716875872, "grad_norm": 273.392578125, "learning_rate": 1.4856507733875835e-06, "loss": 19.2812, "step": 10794 }, { "epoch": 0.7169422859799429, "grad_norm": 313.15631103515625, "learning_rate": 1.4855567537220244e-06, "loss": 20.3594, "step": 10795 }, { "epoch": 0.7170087002722986, "grad_norm": 258.1300354003906, "learning_rate": 1.4854627284398245e-06, "loss": 17.4844, "step": 10796 }, { "epoch": 0.7170751145646543, "grad_norm": 450.45355224609375, "learning_rate": 1.4853686975420717e-06, "loss": 26.1875, "step": 10797 }, { "epoch": 0.71714152885701, "grad_norm": 515.7175903320312, "learning_rate": 1.4852746610298533e-06, "loss": 27.9141, "step": 10798 }, { "epoch": 0.7172079431493658, "grad_norm": 359.9009704589844, "learning_rate": 1.4851806189042583e-06, "loss": 16.375, "step": 10799 }, { "epoch": 0.7172743574417214, "grad_norm": 281.06402587890625, "learning_rate": 1.4850865711663732e-06, "loss": 17.3438, "step": 10800 }, { "epoch": 0.7173407717340772, "grad_norm": 193.3817901611328, "learning_rate": 1.4849925178172862e-06, "loss": 14.7656, "step": 10801 }, { "epoch": 0.7174071860264329, "grad_norm": 870.6890258789062, "learning_rate": 1.4848984588580858e-06, "loss": 29.5, "step": 10802 }, { "epoch": 0.7174736003187886, "grad_norm": 153.29176330566406, "learning_rate": 1.4848043942898593e-06, "loss": 14.9062, "step": 10803 }, { "epoch": 0.7175400146111444, "grad_norm": 295.60858154296875, "learning_rate": 1.484710324113695e-06, "loss": 20.0312, "step": 10804 }, { "epoch": 0.7176064289035, "grad_norm": 204.74826049804688, "learning_rate": 1.4846162483306815e-06, "loss": 12.5625, "step": 10805 }, { "epoch": 0.7176728431958558, "grad_norm": 117.0429916381836, "learning_rate": 1.4845221669419069e-06, "loss": 18.75, "step": 10806 }, { "epoch": 0.7177392574882114, "grad_norm": 254.92440795898438, "learning_rate": 1.4844280799484588e-06, "loss": 17.4844, "step": 10807 }, { "epoch": 0.7178056717805672, "grad_norm": 462.9102783203125, "learning_rate": 1.484333987351426e-06, "loss": 14.0156, "step": 10808 }, { "epoch": 0.7178720860729229, "grad_norm": 358.22332763671875, "learning_rate": 1.4842398891518972e-06, "loss": 21.875, "step": 10809 }, { "epoch": 0.7179385003652786, "grad_norm": 188.9109649658203, "learning_rate": 1.4841457853509606e-06, "loss": 13.1719, "step": 10810 }, { "epoch": 0.7180049146576343, "grad_norm": 276.8013610839844, "learning_rate": 1.4840516759497046e-06, "loss": 17.25, "step": 10811 }, { "epoch": 0.71807132894999, "grad_norm": 398.83465576171875, "learning_rate": 1.4839575609492178e-06, "loss": 17.3438, "step": 10812 }, { "epoch": 0.7181377432423458, "grad_norm": 159.72927856445312, "learning_rate": 1.483863440350589e-06, "loss": 17.3125, "step": 10813 }, { "epoch": 0.7182041575347015, "grad_norm": 163.169189453125, "learning_rate": 1.4837693141549072e-06, "loss": 16.0469, "step": 10814 }, { "epoch": 0.7182705718270572, "grad_norm": 210.39810180664062, "learning_rate": 1.483675182363261e-06, "loss": 17.6094, "step": 10815 }, { "epoch": 0.7183369861194129, "grad_norm": 318.38092041015625, "learning_rate": 1.4835810449767388e-06, "loss": 15.6562, "step": 10816 }, { "epoch": 0.7184034004117686, "grad_norm": 212.07151794433594, "learning_rate": 1.4834869019964302e-06, "loss": 17.1406, "step": 10817 }, { "epoch": 0.7184698147041243, "grad_norm": 583.6727294921875, "learning_rate": 1.4833927534234239e-06, "loss": 22.2344, "step": 10818 }, { "epoch": 0.7185362289964801, "grad_norm": 411.4305725097656, "learning_rate": 1.4832985992588087e-06, "loss": 21.9531, "step": 10819 }, { "epoch": 0.7186026432888357, "grad_norm": 786.5626831054688, "learning_rate": 1.4832044395036743e-06, "loss": 14.6094, "step": 10820 }, { "epoch": 0.7186690575811915, "grad_norm": 210.68089294433594, "learning_rate": 1.4831102741591095e-06, "loss": 22.4062, "step": 10821 }, { "epoch": 0.7187354718735471, "grad_norm": 148.60374450683594, "learning_rate": 1.4830161032262034e-06, "loss": 14.7812, "step": 10822 }, { "epoch": 0.7188018861659029, "grad_norm": 184.3419189453125, "learning_rate": 1.4829219267060457e-06, "loss": 18.7812, "step": 10823 }, { "epoch": 0.7188683004582587, "grad_norm": 198.0956573486328, "learning_rate": 1.4828277445997257e-06, "loss": 16.0781, "step": 10824 }, { "epoch": 0.7189347147506143, "grad_norm": 279.26361083984375, "learning_rate": 1.4827335569083327e-06, "loss": 20.8438, "step": 10825 }, { "epoch": 0.7190011290429701, "grad_norm": 286.28582763671875, "learning_rate": 1.4826393636329563e-06, "loss": 15.2188, "step": 10826 }, { "epoch": 0.7190675433353257, "grad_norm": 313.505859375, "learning_rate": 1.4825451647746863e-06, "loss": 16.1094, "step": 10827 }, { "epoch": 0.7191339576276815, "grad_norm": 368.4177551269531, "learning_rate": 1.4824509603346117e-06, "loss": 20.9375, "step": 10828 }, { "epoch": 0.7192003719200372, "grad_norm": 196.578857421875, "learning_rate": 1.4823567503138225e-06, "loss": 20.5312, "step": 10829 }, { "epoch": 0.7192667862123929, "grad_norm": 215.8731231689453, "learning_rate": 1.4822625347134089e-06, "loss": 19.5625, "step": 10830 }, { "epoch": 0.7193332005047486, "grad_norm": 249.18934631347656, "learning_rate": 1.4821683135344603e-06, "loss": 23.8125, "step": 10831 }, { "epoch": 0.7193996147971043, "grad_norm": 424.5755310058594, "learning_rate": 1.4820740867780669e-06, "loss": 22.0781, "step": 10832 }, { "epoch": 0.71946602908946, "grad_norm": 305.75128173828125, "learning_rate": 1.481979854445318e-06, "loss": 18.7812, "step": 10833 }, { "epoch": 0.7195324433818158, "grad_norm": 315.2439880371094, "learning_rate": 1.4818856165373044e-06, "loss": 15.1719, "step": 10834 }, { "epoch": 0.7195988576741715, "grad_norm": 177.82235717773438, "learning_rate": 1.4817913730551155e-06, "loss": 17.2188, "step": 10835 }, { "epoch": 0.7196652719665272, "grad_norm": 488.2502746582031, "learning_rate": 1.481697123999842e-06, "loss": 21.0312, "step": 10836 }, { "epoch": 0.719731686258883, "grad_norm": 241.2085723876953, "learning_rate": 1.4816028693725739e-06, "loss": 15.8281, "step": 10837 }, { "epoch": 0.7197981005512386, "grad_norm": 161.1261749267578, "learning_rate": 1.4815086091744019e-06, "loss": 12.3906, "step": 10838 }, { "epoch": 0.7198645148435944, "grad_norm": 291.86993408203125, "learning_rate": 1.4814143434064155e-06, "loss": 15.7031, "step": 10839 }, { "epoch": 0.71993092913595, "grad_norm": 124.72647094726562, "learning_rate": 1.4813200720697054e-06, "loss": 15.7969, "step": 10840 }, { "epoch": 0.7199973434283058, "grad_norm": 167.44400024414062, "learning_rate": 1.4812257951653628e-06, "loss": 16.9844, "step": 10841 }, { "epoch": 0.7200637577206614, "grad_norm": 145.99977111816406, "learning_rate": 1.4811315126944775e-06, "loss": 16.0781, "step": 10842 }, { "epoch": 0.7201301720130172, "grad_norm": 222.69363403320312, "learning_rate": 1.4810372246581402e-06, "loss": 23.0, "step": 10843 }, { "epoch": 0.7201965863053729, "grad_norm": 143.77243041992188, "learning_rate": 1.4809429310574418e-06, "loss": 15.5, "step": 10844 }, { "epoch": 0.7202630005977286, "grad_norm": 204.9259490966797, "learning_rate": 1.4808486318934728e-06, "loss": 18.0, "step": 10845 }, { "epoch": 0.7203294148900844, "grad_norm": 174.54376220703125, "learning_rate": 1.480754327167324e-06, "loss": 13.2656, "step": 10846 }, { "epoch": 0.72039582918244, "grad_norm": 565.8750610351562, "learning_rate": 1.4806600168800864e-06, "loss": 19.4219, "step": 10847 }, { "epoch": 0.7204622434747958, "grad_norm": 359.7299499511719, "learning_rate": 1.4805657010328512e-06, "loss": 18.0469, "step": 10848 }, { "epoch": 0.7205286577671515, "grad_norm": 346.067626953125, "learning_rate": 1.4804713796267088e-06, "loss": 29.2188, "step": 10849 }, { "epoch": 0.7205950720595072, "grad_norm": 190.24615478515625, "learning_rate": 1.4803770526627506e-06, "loss": 18.5469, "step": 10850 }, { "epoch": 0.7206614863518629, "grad_norm": 551.0979614257812, "learning_rate": 1.4802827201420676e-06, "loss": 27.7188, "step": 10851 }, { "epoch": 0.7207279006442187, "grad_norm": 147.72515869140625, "learning_rate": 1.4801883820657512e-06, "loss": 17.5312, "step": 10852 }, { "epoch": 0.7207943149365743, "grad_norm": 281.38330078125, "learning_rate": 1.4800940384348923e-06, "loss": 20.25, "step": 10853 }, { "epoch": 0.7208607292289301, "grad_norm": 113.4882583618164, "learning_rate": 1.4799996892505825e-06, "loss": 13.5625, "step": 10854 }, { "epoch": 0.7209271435212857, "grad_norm": 233.67430114746094, "learning_rate": 1.479905334513913e-06, "loss": 19.2969, "step": 10855 }, { "epoch": 0.7209935578136415, "grad_norm": 244.8924102783203, "learning_rate": 1.4798109742259753e-06, "loss": 20.6797, "step": 10856 }, { "epoch": 0.7210599721059973, "grad_norm": 495.88330078125, "learning_rate": 1.479716608387861e-06, "loss": 26.2031, "step": 10857 }, { "epoch": 0.7211263863983529, "grad_norm": 287.7066650390625, "learning_rate": 1.4796222370006614e-06, "loss": 23.7188, "step": 10858 }, { "epoch": 0.7211928006907087, "grad_norm": 159.04815673828125, "learning_rate": 1.4795278600654687e-06, "loss": 18.1094, "step": 10859 }, { "epoch": 0.7212592149830643, "grad_norm": 269.2866516113281, "learning_rate": 1.4794334775833738e-06, "loss": 17.3906, "step": 10860 }, { "epoch": 0.7213256292754201, "grad_norm": 188.647216796875, "learning_rate": 1.4793390895554692e-06, "loss": 17.0156, "step": 10861 }, { "epoch": 0.7213920435677758, "grad_norm": 177.24769592285156, "learning_rate": 1.4792446959828464e-06, "loss": 11.6406, "step": 10862 }, { "epoch": 0.7214584578601315, "grad_norm": 258.3697814941406, "learning_rate": 1.4791502968665974e-06, "loss": 17.4219, "step": 10863 }, { "epoch": 0.7215248721524872, "grad_norm": 336.1249694824219, "learning_rate": 1.4790558922078139e-06, "loss": 16.875, "step": 10864 }, { "epoch": 0.7215912864448429, "grad_norm": 249.99539184570312, "learning_rate": 1.478961482007588e-06, "loss": 14.0312, "step": 10865 }, { "epoch": 0.7216577007371986, "grad_norm": 190.0615997314453, "learning_rate": 1.478867066267012e-06, "loss": 20.1875, "step": 10866 }, { "epoch": 0.7217241150295544, "grad_norm": 109.2402572631836, "learning_rate": 1.478772644987178e-06, "loss": 17.0625, "step": 10867 }, { "epoch": 0.7217905293219101, "grad_norm": 198.94723510742188, "learning_rate": 1.4786782181691778e-06, "loss": 20.1875, "step": 10868 }, { "epoch": 0.7218569436142658, "grad_norm": 191.0261993408203, "learning_rate": 1.4785837858141042e-06, "loss": 13.9531, "step": 10869 }, { "epoch": 0.7219233579066215, "grad_norm": 200.97232055664062, "learning_rate": 1.4784893479230494e-06, "loss": 22.2344, "step": 10870 }, { "epoch": 0.7219897721989772, "grad_norm": 190.45428466796875, "learning_rate": 1.4783949044971055e-06, "loss": 15.375, "step": 10871 }, { "epoch": 0.722056186491333, "grad_norm": 190.44317626953125, "learning_rate": 1.4783004555373655e-06, "loss": 14.1719, "step": 10872 }, { "epoch": 0.7221226007836886, "grad_norm": 323.2987365722656, "learning_rate": 1.4782060010449215e-06, "loss": 18.9688, "step": 10873 }, { "epoch": 0.7221890150760444, "grad_norm": 236.23899841308594, "learning_rate": 1.4781115410208662e-06, "loss": 14.5, "step": 10874 }, { "epoch": 0.7222554293684, "grad_norm": 436.46441650390625, "learning_rate": 1.4780170754662921e-06, "loss": 18.3438, "step": 10875 }, { "epoch": 0.7223218436607558, "grad_norm": 164.59909057617188, "learning_rate": 1.4779226043822923e-06, "loss": 19.5781, "step": 10876 }, { "epoch": 0.7223882579531115, "grad_norm": 259.4053039550781, "learning_rate": 1.4778281277699596e-06, "loss": 21.2188, "step": 10877 }, { "epoch": 0.7224546722454672, "grad_norm": 173.3037109375, "learning_rate": 1.4777336456303863e-06, "loss": 17.9219, "step": 10878 }, { "epoch": 0.722521086537823, "grad_norm": 154.35357666015625, "learning_rate": 1.4776391579646658e-06, "loss": 15.4375, "step": 10879 }, { "epoch": 0.7225875008301786, "grad_norm": 168.07557678222656, "learning_rate": 1.4775446647738909e-06, "loss": 17.9531, "step": 10880 }, { "epoch": 0.7226539151225344, "grad_norm": 166.6332550048828, "learning_rate": 1.477450166059155e-06, "loss": 17.0312, "step": 10881 }, { "epoch": 0.7227203294148901, "grad_norm": 156.9446258544922, "learning_rate": 1.4773556618215506e-06, "loss": 16.875, "step": 10882 }, { "epoch": 0.7227867437072458, "grad_norm": 119.47017669677734, "learning_rate": 1.477261152062171e-06, "loss": 18.9062, "step": 10883 }, { "epoch": 0.7228531579996015, "grad_norm": 124.36385345458984, "learning_rate": 1.4771666367821101e-06, "loss": 16.6406, "step": 10884 }, { "epoch": 0.7229195722919572, "grad_norm": 257.2664489746094, "learning_rate": 1.4770721159824604e-06, "loss": 23.3438, "step": 10885 }, { "epoch": 0.7229859865843129, "grad_norm": 169.18128967285156, "learning_rate": 1.4769775896643156e-06, "loss": 16.2969, "step": 10886 }, { "epoch": 0.7230524008766687, "grad_norm": 197.51507568359375, "learning_rate": 1.4768830578287689e-06, "loss": 18.3906, "step": 10887 }, { "epoch": 0.7231188151690243, "grad_norm": 410.3912353515625, "learning_rate": 1.4767885204769141e-06, "loss": 16.0625, "step": 10888 }, { "epoch": 0.7231852294613801, "grad_norm": 135.61151123046875, "learning_rate": 1.4766939776098445e-06, "loss": 17.0312, "step": 10889 }, { "epoch": 0.7232516437537359, "grad_norm": 151.2401123046875, "learning_rate": 1.4765994292286541e-06, "loss": 12.2188, "step": 10890 }, { "epoch": 0.7233180580460915, "grad_norm": 112.2914810180664, "learning_rate": 1.4765048753344363e-06, "loss": 18.2031, "step": 10891 }, { "epoch": 0.7233844723384473, "grad_norm": 133.69107055664062, "learning_rate": 1.4764103159282848e-06, "loss": 16.5312, "step": 10892 }, { "epoch": 0.7234508866308029, "grad_norm": 217.4771728515625, "learning_rate": 1.476315751011293e-06, "loss": 20.1094, "step": 10893 }, { "epoch": 0.7235173009231587, "grad_norm": 178.984130859375, "learning_rate": 1.476221180584556e-06, "loss": 13.5938, "step": 10894 }, { "epoch": 0.7235837152155143, "grad_norm": 184.4279022216797, "learning_rate": 1.4761266046491668e-06, "loss": 17.0938, "step": 10895 }, { "epoch": 0.7236501295078701, "grad_norm": 180.72479248046875, "learning_rate": 1.4760320232062196e-06, "loss": 13.7656, "step": 10896 }, { "epoch": 0.7237165438002258, "grad_norm": 222.05140686035156, "learning_rate": 1.475937436256808e-06, "loss": 13.7969, "step": 10897 }, { "epoch": 0.7237829580925815, "grad_norm": 623.8473510742188, "learning_rate": 1.4758428438020271e-06, "loss": 18.6562, "step": 10898 }, { "epoch": 0.7238493723849372, "grad_norm": 112.89977264404297, "learning_rate": 1.4757482458429705e-06, "loss": 14.4062, "step": 10899 }, { "epoch": 0.723915786677293, "grad_norm": 186.30636596679688, "learning_rate": 1.4756536423807323e-06, "loss": 13.6562, "step": 10900 }, { "epoch": 0.7239822009696487, "grad_norm": 481.8658447265625, "learning_rate": 1.475559033416407e-06, "loss": 12.6562, "step": 10901 }, { "epoch": 0.7240486152620044, "grad_norm": 414.5346374511719, "learning_rate": 1.4754644189510895e-06, "loss": 17.4062, "step": 10902 }, { "epoch": 0.7241150295543601, "grad_norm": 435.14093017578125, "learning_rate": 1.4753697989858733e-06, "loss": 24.3594, "step": 10903 }, { "epoch": 0.7241814438467158, "grad_norm": 192.33848571777344, "learning_rate": 1.4752751735218535e-06, "loss": 21.3125, "step": 10904 }, { "epoch": 0.7242478581390716, "grad_norm": 617.6685791015625, "learning_rate": 1.4751805425601247e-06, "loss": 17.0312, "step": 10905 }, { "epoch": 0.7243142724314272, "grad_norm": 144.9732666015625, "learning_rate": 1.4750859061017814e-06, "loss": 17.7812, "step": 10906 }, { "epoch": 0.724380686723783, "grad_norm": 298.4784240722656, "learning_rate": 1.474991264147918e-06, "loss": 22.8906, "step": 10907 }, { "epoch": 0.7244471010161386, "grad_norm": 249.14085388183594, "learning_rate": 1.4748966166996298e-06, "loss": 18.1562, "step": 10908 }, { "epoch": 0.7245135153084944, "grad_norm": 283.57940673828125, "learning_rate": 1.4748019637580113e-06, "loss": 16.8125, "step": 10909 }, { "epoch": 0.72457992960085, "grad_norm": 565.6939086914062, "learning_rate": 1.4747073053241574e-06, "loss": 16.5, "step": 10910 }, { "epoch": 0.7246463438932058, "grad_norm": 318.3634948730469, "learning_rate": 1.474612641399163e-06, "loss": 17.6719, "step": 10911 }, { "epoch": 0.7247127581855616, "grad_norm": 331.6219482421875, "learning_rate": 1.4745179719841234e-06, "loss": 21.1875, "step": 10912 }, { "epoch": 0.7247791724779172, "grad_norm": 402.58538818359375, "learning_rate": 1.4744232970801337e-06, "loss": 16.7031, "step": 10913 }, { "epoch": 0.724845586770273, "grad_norm": 633.4483032226562, "learning_rate": 1.4743286166882887e-06, "loss": 30.375, "step": 10914 }, { "epoch": 0.7249120010626287, "grad_norm": 335.9993591308594, "learning_rate": 1.474233930809684e-06, "loss": 16.8906, "step": 10915 }, { "epoch": 0.7249784153549844, "grad_norm": 177.9086151123047, "learning_rate": 1.4741392394454143e-06, "loss": 14.9844, "step": 10916 }, { "epoch": 0.7250448296473401, "grad_norm": 200.28892517089844, "learning_rate": 1.4740445425965753e-06, "loss": 15.9375, "step": 10917 }, { "epoch": 0.7251112439396958, "grad_norm": 224.66744995117188, "learning_rate": 1.4739498402642626e-06, "loss": 20.375, "step": 10918 }, { "epoch": 0.7251776582320515, "grad_norm": 270.4108581542969, "learning_rate": 1.473855132449571e-06, "loss": 15.6094, "step": 10919 }, { "epoch": 0.7252440725244073, "grad_norm": 266.36956787109375, "learning_rate": 1.4737604191535972e-06, "loss": 20.1562, "step": 10920 }, { "epoch": 0.725310486816763, "grad_norm": 182.59417724609375, "learning_rate": 1.4736657003774356e-06, "loss": 16.125, "step": 10921 }, { "epoch": 0.7253769011091187, "grad_norm": 755.0926513671875, "learning_rate": 1.4735709761221823e-06, "loss": 14.7812, "step": 10922 }, { "epoch": 0.7254433154014744, "grad_norm": 823.8045654296875, "learning_rate": 1.4734762463889331e-06, "loss": 18.7344, "step": 10923 }, { "epoch": 0.7255097296938301, "grad_norm": 293.0620422363281, "learning_rate": 1.473381511178784e-06, "loss": 17.4062, "step": 10924 }, { "epoch": 0.7255761439861859, "grad_norm": 5228.18115234375, "learning_rate": 1.47328677049283e-06, "loss": 11.4375, "step": 10925 }, { "epoch": 0.7256425582785415, "grad_norm": 146.022705078125, "learning_rate": 1.4731920243321678e-06, "loss": 13.2031, "step": 10926 }, { "epoch": 0.7257089725708973, "grad_norm": 125.69758605957031, "learning_rate": 1.4730972726978933e-06, "loss": 16.5938, "step": 10927 }, { "epoch": 0.7257753868632529, "grad_norm": 1293.5233154296875, "learning_rate": 1.4730025155911022e-06, "loss": 14.7812, "step": 10928 }, { "epoch": 0.7258418011556087, "grad_norm": 159.9552001953125, "learning_rate": 1.4729077530128903e-06, "loss": 17.2969, "step": 10929 }, { "epoch": 0.7259082154479644, "grad_norm": 213.6688995361328, "learning_rate": 1.4728129849643548e-06, "loss": 16.2969, "step": 10930 }, { "epoch": 0.7259746297403201, "grad_norm": 305.3029479980469, "learning_rate": 1.4727182114465913e-06, "loss": 15.9844, "step": 10931 }, { "epoch": 0.7260410440326759, "grad_norm": 182.15475463867188, "learning_rate": 1.472623432460696e-06, "loss": 20.3281, "step": 10932 }, { "epoch": 0.7261074583250315, "grad_norm": 217.9873504638672, "learning_rate": 1.4725286480077652e-06, "loss": 14.2188, "step": 10933 }, { "epoch": 0.7261738726173873, "grad_norm": 141.13192749023438, "learning_rate": 1.4724338580888958e-06, "loss": 17.0938, "step": 10934 }, { "epoch": 0.726240286909743, "grad_norm": 268.3170166015625, "learning_rate": 1.4723390627051836e-06, "loss": 17.8438, "step": 10935 }, { "epoch": 0.7263067012020987, "grad_norm": 166.25779724121094, "learning_rate": 1.4722442618577255e-06, "loss": 20.7812, "step": 10936 }, { "epoch": 0.7263731154944544, "grad_norm": 122.42266082763672, "learning_rate": 1.4721494555476187e-06, "loss": 19.4844, "step": 10937 }, { "epoch": 0.7264395297868101, "grad_norm": 177.6004180908203, "learning_rate": 1.4720546437759586e-06, "loss": 16.6719, "step": 10938 }, { "epoch": 0.7265059440791658, "grad_norm": 159.13995361328125, "learning_rate": 1.4719598265438426e-06, "loss": 15.2656, "step": 10939 }, { "epoch": 0.7265723583715216, "grad_norm": 330.8898620605469, "learning_rate": 1.4718650038523676e-06, "loss": 18.4062, "step": 10940 }, { "epoch": 0.7266387726638772, "grad_norm": 156.18368530273438, "learning_rate": 1.4717701757026307e-06, "loss": 11.7188, "step": 10941 }, { "epoch": 0.726705186956233, "grad_norm": 256.55755615234375, "learning_rate": 1.471675342095728e-06, "loss": 23.4375, "step": 10942 }, { "epoch": 0.7267716012485888, "grad_norm": 223.4319305419922, "learning_rate": 1.471580503032757e-06, "loss": 14.8906, "step": 10943 }, { "epoch": 0.7268380155409444, "grad_norm": 195.04501342773438, "learning_rate": 1.4714856585148148e-06, "loss": 15.1953, "step": 10944 }, { "epoch": 0.7269044298333002, "grad_norm": 427.0220031738281, "learning_rate": 1.4713908085429984e-06, "loss": 36.7812, "step": 10945 }, { "epoch": 0.7269708441256558, "grad_norm": 262.89874267578125, "learning_rate": 1.4712959531184049e-06, "loss": 18.5156, "step": 10946 }, { "epoch": 0.7270372584180116, "grad_norm": 319.5506896972656, "learning_rate": 1.4712010922421315e-06, "loss": 25.6719, "step": 10947 }, { "epoch": 0.7271036727103672, "grad_norm": 299.0372314453125, "learning_rate": 1.4711062259152757e-06, "loss": 21.9375, "step": 10948 }, { "epoch": 0.727170087002723, "grad_norm": 174.43267822265625, "learning_rate": 1.4710113541389347e-06, "loss": 16.8438, "step": 10949 }, { "epoch": 0.7272365012950787, "grad_norm": 291.30303955078125, "learning_rate": 1.4709164769142056e-06, "loss": 20.1562, "step": 10950 }, { "epoch": 0.7273029155874344, "grad_norm": 170.72738647460938, "learning_rate": 1.4708215942421866e-06, "loss": 19.1562, "step": 10951 }, { "epoch": 0.7273693298797901, "grad_norm": 142.6446990966797, "learning_rate": 1.470726706123975e-06, "loss": 12.3906, "step": 10952 }, { "epoch": 0.7274357441721458, "grad_norm": 438.8959655761719, "learning_rate": 1.470631812560668e-06, "loss": 23.8906, "step": 10953 }, { "epoch": 0.7275021584645016, "grad_norm": 128.803466796875, "learning_rate": 1.4705369135533636e-06, "loss": 14.0312, "step": 10954 }, { "epoch": 0.7275685727568573, "grad_norm": 157.9051513671875, "learning_rate": 1.4704420091031597e-06, "loss": 17.5, "step": 10955 }, { "epoch": 0.727634987049213, "grad_norm": 250.8037872314453, "learning_rate": 1.4703470992111539e-06, "loss": 19.3281, "step": 10956 }, { "epoch": 0.7277014013415687, "grad_norm": 420.9826354980469, "learning_rate": 1.4702521838784441e-06, "loss": 21.3906, "step": 10957 }, { "epoch": 0.7277678156339245, "grad_norm": 193.4832000732422, "learning_rate": 1.470157263106128e-06, "loss": 17.0, "step": 10958 }, { "epoch": 0.7278342299262801, "grad_norm": 404.54595947265625, "learning_rate": 1.470062336895304e-06, "loss": 13.0469, "step": 10959 }, { "epoch": 0.7279006442186359, "grad_norm": 197.71978759765625, "learning_rate": 1.4699674052470697e-06, "loss": 15.6094, "step": 10960 }, { "epoch": 0.7279670585109915, "grad_norm": 273.56304931640625, "learning_rate": 1.4698724681625236e-06, "loss": 13.5312, "step": 10961 }, { "epoch": 0.7280334728033473, "grad_norm": 173.4640655517578, "learning_rate": 1.4697775256427635e-06, "loss": 15.7031, "step": 10962 }, { "epoch": 0.728099887095703, "grad_norm": 327.9992980957031, "learning_rate": 1.4696825776888884e-06, "loss": 13.125, "step": 10963 }, { "epoch": 0.7281663013880587, "grad_norm": 214.0906524658203, "learning_rate": 1.4695876243019957e-06, "loss": 19.5469, "step": 10964 }, { "epoch": 0.7282327156804145, "grad_norm": 213.58204650878906, "learning_rate": 1.4694926654831843e-06, "loss": 16.8438, "step": 10965 }, { "epoch": 0.7282991299727701, "grad_norm": 175.3373260498047, "learning_rate": 1.469397701233552e-06, "loss": 19.5, "step": 10966 }, { "epoch": 0.7283655442651259, "grad_norm": 910.9044799804688, "learning_rate": 1.4693027315541983e-06, "loss": 14.9375, "step": 10967 }, { "epoch": 0.7284319585574816, "grad_norm": 306.9888916015625, "learning_rate": 1.469207756446221e-06, "loss": 19.4688, "step": 10968 }, { "epoch": 0.7284983728498373, "grad_norm": 185.1011199951172, "learning_rate": 1.4691127759107191e-06, "loss": 13.5078, "step": 10969 }, { "epoch": 0.728564787142193, "grad_norm": 135.17823791503906, "learning_rate": 1.469017789948791e-06, "loss": 14.8281, "step": 10970 }, { "epoch": 0.7286312014345487, "grad_norm": 399.60211181640625, "learning_rate": 1.4689227985615356e-06, "loss": 19.9375, "step": 10971 }, { "epoch": 0.7286976157269044, "grad_norm": 520.8013305664062, "learning_rate": 1.4688278017500515e-06, "loss": 17.8906, "step": 10972 }, { "epoch": 0.7287640300192602, "grad_norm": 125.99934387207031, "learning_rate": 1.4687327995154379e-06, "loss": 17.4531, "step": 10973 }, { "epoch": 0.7288304443116158, "grad_norm": 271.7379455566406, "learning_rate": 1.4686377918587936e-06, "loss": 19.5781, "step": 10974 }, { "epoch": 0.7288968586039716, "grad_norm": 296.54449462890625, "learning_rate": 1.4685427787812174e-06, "loss": 19.8906, "step": 10975 }, { "epoch": 0.7289632728963273, "grad_norm": 172.7583465576172, "learning_rate": 1.4684477602838086e-06, "loss": 17.7656, "step": 10976 }, { "epoch": 0.729029687188683, "grad_norm": 259.4151611328125, "learning_rate": 1.4683527363676662e-06, "loss": 17.375, "step": 10977 }, { "epoch": 0.7290961014810388, "grad_norm": 301.22857666015625, "learning_rate": 1.4682577070338896e-06, "loss": 16.25, "step": 10978 }, { "epoch": 0.7291625157733944, "grad_norm": 722.4583740234375, "learning_rate": 1.4681626722835776e-06, "loss": 18.4531, "step": 10979 }, { "epoch": 0.7292289300657502, "grad_norm": 181.65826416015625, "learning_rate": 1.4680676321178298e-06, "loss": 12.4844, "step": 10980 }, { "epoch": 0.7292953443581058, "grad_norm": 327.2948913574219, "learning_rate": 1.4679725865377455e-06, "loss": 19.7344, "step": 10981 }, { "epoch": 0.7293617586504616, "grad_norm": 365.0848388671875, "learning_rate": 1.4678775355444244e-06, "loss": 15.5938, "step": 10982 }, { "epoch": 0.7294281729428173, "grad_norm": 272.5102844238281, "learning_rate": 1.4677824791389655e-06, "loss": 21.3125, "step": 10983 }, { "epoch": 0.729494587235173, "grad_norm": 218.28631591796875, "learning_rate": 1.467687417322469e-06, "loss": 13.4062, "step": 10984 }, { "epoch": 0.7295610015275287, "grad_norm": 326.7148742675781, "learning_rate": 1.4675923500960338e-06, "loss": 27.0312, "step": 10985 }, { "epoch": 0.7296274158198844, "grad_norm": 212.8975067138672, "learning_rate": 1.46749727746076e-06, "loss": 12.6875, "step": 10986 }, { "epoch": 0.7296938301122402, "grad_norm": 238.55445861816406, "learning_rate": 1.467402199417747e-06, "loss": 15.1562, "step": 10987 }, { "epoch": 0.7297602444045959, "grad_norm": 125.88195037841797, "learning_rate": 1.4673071159680953e-06, "loss": 20.6875, "step": 10988 }, { "epoch": 0.7298266586969516, "grad_norm": 232.01609802246094, "learning_rate": 1.4672120271129042e-06, "loss": 13.1562, "step": 10989 }, { "epoch": 0.7298930729893073, "grad_norm": 225.09750366210938, "learning_rate": 1.4671169328532738e-06, "loss": 19.1094, "step": 10990 }, { "epoch": 0.729959487281663, "grad_norm": 242.08534240722656, "learning_rate": 1.4670218331903042e-06, "loss": 25.2188, "step": 10991 }, { "epoch": 0.7300259015740187, "grad_norm": 241.9068603515625, "learning_rate": 1.466926728125095e-06, "loss": 16.2969, "step": 10992 }, { "epoch": 0.7300923158663745, "grad_norm": 97.7287368774414, "learning_rate": 1.4668316176587467e-06, "loss": 15.4844, "step": 10993 }, { "epoch": 0.7301587301587301, "grad_norm": 388.5367431640625, "learning_rate": 1.4667365017923595e-06, "loss": 16.8125, "step": 10994 }, { "epoch": 0.7302251444510859, "grad_norm": 520.6586303710938, "learning_rate": 1.4666413805270339e-06, "loss": 26.8906, "step": 10995 }, { "epoch": 0.7302915587434415, "grad_norm": 195.79379272460938, "learning_rate": 1.4665462538638697e-06, "loss": 17.5781, "step": 10996 }, { "epoch": 0.7303579730357973, "grad_norm": 294.7816467285156, "learning_rate": 1.4664511218039676e-06, "loss": 23.4531, "step": 10997 }, { "epoch": 0.7304243873281531, "grad_norm": 344.1625061035156, "learning_rate": 1.4663559843484275e-06, "loss": 20.1719, "step": 10998 }, { "epoch": 0.7304908016205087, "grad_norm": 238.49819946289062, "learning_rate": 1.4662608414983505e-06, "loss": 18.625, "step": 10999 }, { "epoch": 0.7305572159128645, "grad_norm": 209.14564514160156, "learning_rate": 1.466165693254837e-06, "loss": 16.3438, "step": 11000 }, { "epoch": 0.7306236302052201, "grad_norm": 352.91583251953125, "learning_rate": 1.4660705396189873e-06, "loss": 17.0312, "step": 11001 }, { "epoch": 0.7306900444975759, "grad_norm": 236.2237091064453, "learning_rate": 1.4659753805919029e-06, "loss": 16.7969, "step": 11002 }, { "epoch": 0.7307564587899316, "grad_norm": 248.63330078125, "learning_rate": 1.4658802161746832e-06, "loss": 25.4688, "step": 11003 }, { "epoch": 0.7308228730822873, "grad_norm": 307.1005554199219, "learning_rate": 1.4657850463684303e-06, "loss": 19.7969, "step": 11004 }, { "epoch": 0.730889287374643, "grad_norm": 157.964111328125, "learning_rate": 1.4656898711742446e-06, "loss": 12.8438, "step": 11005 }, { "epoch": 0.7309557016669987, "grad_norm": 221.61614990234375, "learning_rate": 1.4655946905932267e-06, "loss": 15.9219, "step": 11006 }, { "epoch": 0.7310221159593544, "grad_norm": 111.99630737304688, "learning_rate": 1.4654995046264782e-06, "loss": 11.5625, "step": 11007 }, { "epoch": 0.7310885302517102, "grad_norm": 135.86122131347656, "learning_rate": 1.4654043132750998e-06, "loss": 13.7031, "step": 11008 }, { "epoch": 0.7311549445440659, "grad_norm": 131.2996368408203, "learning_rate": 1.465309116540193e-06, "loss": 15.9375, "step": 11009 }, { "epoch": 0.7312213588364216, "grad_norm": 268.0259094238281, "learning_rate": 1.4652139144228582e-06, "loss": 17.9062, "step": 11010 }, { "epoch": 0.7312877731287774, "grad_norm": 200.53271484375, "learning_rate": 1.465118706924197e-06, "loss": 15.7031, "step": 11011 }, { "epoch": 0.731354187421133, "grad_norm": 191.49171447753906, "learning_rate": 1.4650234940453112e-06, "loss": 13.0781, "step": 11012 }, { "epoch": 0.7314206017134888, "grad_norm": 221.60157775878906, "learning_rate": 1.4649282757873014e-06, "loss": 13.7812, "step": 11013 }, { "epoch": 0.7314870160058444, "grad_norm": 859.490478515625, "learning_rate": 1.4648330521512695e-06, "loss": 21.7656, "step": 11014 }, { "epoch": 0.7315534302982002, "grad_norm": 216.062744140625, "learning_rate": 1.464737823138317e-06, "loss": 13.7031, "step": 11015 }, { "epoch": 0.7316198445905558, "grad_norm": 168.1242218017578, "learning_rate": 1.4646425887495454e-06, "loss": 16.4219, "step": 11016 }, { "epoch": 0.7316862588829116, "grad_norm": 103.00238800048828, "learning_rate": 1.4645473489860562e-06, "loss": 13.0781, "step": 11017 }, { "epoch": 0.7317526731752673, "grad_norm": 185.50564575195312, "learning_rate": 1.464452103848951e-06, "loss": 17.0469, "step": 11018 }, { "epoch": 0.731819087467623, "grad_norm": 337.0030822753906, "learning_rate": 1.4643568533393318e-06, "loss": 18.0469, "step": 11019 }, { "epoch": 0.7318855017599788, "grad_norm": 240.4058380126953, "learning_rate": 1.4642615974583003e-06, "loss": 19.1406, "step": 11020 }, { "epoch": 0.7319519160523345, "grad_norm": 187.82550048828125, "learning_rate": 1.4641663362069583e-06, "loss": 16.2188, "step": 11021 }, { "epoch": 0.7320183303446902, "grad_norm": 186.66778564453125, "learning_rate": 1.4640710695864078e-06, "loss": 18.9375, "step": 11022 }, { "epoch": 0.7320847446370459, "grad_norm": 189.74948120117188, "learning_rate": 1.463975797597751e-06, "loss": 14.2656, "step": 11023 }, { "epoch": 0.7321511589294016, "grad_norm": 251.69989013671875, "learning_rate": 1.4638805202420894e-06, "loss": 20.0625, "step": 11024 }, { "epoch": 0.7322175732217573, "grad_norm": 109.3502197265625, "learning_rate": 1.4637852375205255e-06, "loss": 15.3438, "step": 11025 }, { "epoch": 0.732283987514113, "grad_norm": 154.95086669921875, "learning_rate": 1.4636899494341613e-06, "loss": 16.625, "step": 11026 }, { "epoch": 0.7323504018064687, "grad_norm": 164.04122924804688, "learning_rate": 1.4635946559840994e-06, "loss": 15.4531, "step": 11027 }, { "epoch": 0.7324168160988245, "grad_norm": 432.26287841796875, "learning_rate": 1.4634993571714415e-06, "loss": 24.5469, "step": 11028 }, { "epoch": 0.7324832303911801, "grad_norm": 283.7737731933594, "learning_rate": 1.4634040529972906e-06, "loss": 27.7969, "step": 11029 }, { "epoch": 0.7325496446835359, "grad_norm": 179.44190979003906, "learning_rate": 1.4633087434627486e-06, "loss": 18.6562, "step": 11030 }, { "epoch": 0.7326160589758917, "grad_norm": 344.40185546875, "learning_rate": 1.4632134285689186e-06, "loss": 15.8125, "step": 11031 }, { "epoch": 0.7326824732682473, "grad_norm": 410.8682861328125, "learning_rate": 1.4631181083169024e-06, "loss": 17.7656, "step": 11032 }, { "epoch": 0.7327488875606031, "grad_norm": 222.4490966796875, "learning_rate": 1.463022782707803e-06, "loss": 17.9688, "step": 11033 }, { "epoch": 0.7328153018529587, "grad_norm": 157.41583251953125, "learning_rate": 1.4629274517427233e-06, "loss": 15.5, "step": 11034 }, { "epoch": 0.7328817161453145, "grad_norm": 352.2274475097656, "learning_rate": 1.4628321154227657e-06, "loss": 22.7031, "step": 11035 }, { "epoch": 0.7329481304376702, "grad_norm": 208.9132537841797, "learning_rate": 1.4627367737490328e-06, "loss": 18.1875, "step": 11036 }, { "epoch": 0.7330145447300259, "grad_norm": 167.888916015625, "learning_rate": 1.4626414267226279e-06, "loss": 18.3125, "step": 11037 }, { "epoch": 0.7330809590223816, "grad_norm": 166.64564514160156, "learning_rate": 1.462546074344654e-06, "loss": 18.2656, "step": 11038 }, { "epoch": 0.7331473733147373, "grad_norm": 186.8219757080078, "learning_rate": 1.4624507166162134e-06, "loss": 16.4688, "step": 11039 }, { "epoch": 0.733213787607093, "grad_norm": 195.938720703125, "learning_rate": 1.4623553535384094e-06, "loss": 19.3125, "step": 11040 }, { "epoch": 0.7332802018994488, "grad_norm": 229.18264770507812, "learning_rate": 1.4622599851123458e-06, "loss": 22.9688, "step": 11041 }, { "epoch": 0.7333466161918045, "grad_norm": 167.75588989257812, "learning_rate": 1.4621646113391251e-06, "loss": 17.2188, "step": 11042 }, { "epoch": 0.7334130304841602, "grad_norm": 183.88937377929688, "learning_rate": 1.4620692322198507e-06, "loss": 15.8594, "step": 11043 }, { "epoch": 0.7334794447765159, "grad_norm": 263.9002380371094, "learning_rate": 1.461973847755626e-06, "loss": 22.3906, "step": 11044 }, { "epoch": 0.7335458590688716, "grad_norm": 139.5924530029297, "learning_rate": 1.461878457947554e-06, "loss": 14.7344, "step": 11045 }, { "epoch": 0.7336122733612274, "grad_norm": 247.9339141845703, "learning_rate": 1.4617830627967381e-06, "loss": 17.1562, "step": 11046 }, { "epoch": 0.733678687653583, "grad_norm": 302.62188720703125, "learning_rate": 1.4616876623042824e-06, "loss": 16.1719, "step": 11047 }, { "epoch": 0.7337451019459388, "grad_norm": 233.40545654296875, "learning_rate": 1.4615922564712902e-06, "loss": 13.1406, "step": 11048 }, { "epoch": 0.7338115162382944, "grad_norm": 1192.5015869140625, "learning_rate": 1.4614968452988646e-06, "loss": 27.4531, "step": 11049 }, { "epoch": 0.7338779305306502, "grad_norm": 312.9728698730469, "learning_rate": 1.4614014287881095e-06, "loss": 16.0625, "step": 11050 }, { "epoch": 0.7339443448230059, "grad_norm": 452.4162902832031, "learning_rate": 1.461306006940129e-06, "loss": 16.625, "step": 11051 }, { "epoch": 0.7340107591153616, "grad_norm": 167.1629180908203, "learning_rate": 1.4612105797560265e-06, "loss": 17.375, "step": 11052 }, { "epoch": 0.7340771734077174, "grad_norm": 244.597412109375, "learning_rate": 1.461115147236906e-06, "loss": 28.5312, "step": 11053 }, { "epoch": 0.734143587700073, "grad_norm": 247.48712158203125, "learning_rate": 1.4610197093838714e-06, "loss": 18.2031, "step": 11054 }, { "epoch": 0.7342100019924288, "grad_norm": 358.4862060546875, "learning_rate": 1.4609242661980268e-06, "loss": 18.0, "step": 11055 }, { "epoch": 0.7342764162847845, "grad_norm": 462.78765869140625, "learning_rate": 1.4608288176804759e-06, "loss": 20.9375, "step": 11056 }, { "epoch": 0.7343428305771402, "grad_norm": 166.0139617919922, "learning_rate": 1.4607333638323228e-06, "loss": 19.5938, "step": 11057 }, { "epoch": 0.7344092448694959, "grad_norm": 360.66302490234375, "learning_rate": 1.4606379046546719e-06, "loss": 18.7656, "step": 11058 }, { "epoch": 0.7344756591618516, "grad_norm": 194.79251098632812, "learning_rate": 1.4605424401486276e-06, "loss": 20.25, "step": 11059 }, { "epoch": 0.7345420734542073, "grad_norm": 467.3775939941406, "learning_rate": 1.4604469703152938e-06, "loss": 27.375, "step": 11060 }, { "epoch": 0.7346084877465631, "grad_norm": 575.6510009765625, "learning_rate": 1.4603514951557748e-06, "loss": 26.5938, "step": 11061 }, { "epoch": 0.7346749020389187, "grad_norm": 376.835693359375, "learning_rate": 1.4602560146711753e-06, "loss": 19.4531, "step": 11062 }, { "epoch": 0.7347413163312745, "grad_norm": 503.8837890625, "learning_rate": 1.4601605288625995e-06, "loss": 16.3906, "step": 11063 }, { "epoch": 0.7348077306236303, "grad_norm": 352.32281494140625, "learning_rate": 1.460065037731152e-06, "loss": 14.0312, "step": 11064 }, { "epoch": 0.7348741449159859, "grad_norm": 247.62799072265625, "learning_rate": 1.4599695412779375e-06, "loss": 14.2656, "step": 11065 }, { "epoch": 0.7349405592083417, "grad_norm": 206.42874145507812, "learning_rate": 1.459874039504061e-06, "loss": 16.7031, "step": 11066 }, { "epoch": 0.7350069735006973, "grad_norm": 298.7139892578125, "learning_rate": 1.4597785324106261e-06, "loss": 20.5, "step": 11067 }, { "epoch": 0.7350733877930531, "grad_norm": 360.83203125, "learning_rate": 1.4596830199987386e-06, "loss": 20.9531, "step": 11068 }, { "epoch": 0.7351398020854087, "grad_norm": 487.8641662597656, "learning_rate": 1.459587502269503e-06, "loss": 22.7812, "step": 11069 }, { "epoch": 0.7352062163777645, "grad_norm": 248.55142211914062, "learning_rate": 1.4594919792240243e-06, "loss": 18.4531, "step": 11070 }, { "epoch": 0.7352726306701202, "grad_norm": 179.40725708007812, "learning_rate": 1.4593964508634071e-06, "loss": 20.8125, "step": 11071 }, { "epoch": 0.7353390449624759, "grad_norm": 224.19937133789062, "learning_rate": 1.4593009171887566e-06, "loss": 13.9219, "step": 11072 }, { "epoch": 0.7354054592548317, "grad_norm": 166.71969604492188, "learning_rate": 1.4592053782011783e-06, "loss": 13.9844, "step": 11073 }, { "epoch": 0.7354718735471873, "grad_norm": 267.5677795410156, "learning_rate": 1.4591098339017768e-06, "loss": 17.2812, "step": 11074 }, { "epoch": 0.7355382878395431, "grad_norm": 199.7803497314453, "learning_rate": 1.4590142842916574e-06, "loss": 22.2969, "step": 11075 }, { "epoch": 0.7356047021318988, "grad_norm": 286.40777587890625, "learning_rate": 1.4589187293719254e-06, "loss": 20.375, "step": 11076 }, { "epoch": 0.7356711164242545, "grad_norm": 233.94371032714844, "learning_rate": 1.4588231691436862e-06, "loss": 19.3438, "step": 11077 }, { "epoch": 0.7357375307166102, "grad_norm": 330.59619140625, "learning_rate": 1.4587276036080453e-06, "loss": 21.1875, "step": 11078 }, { "epoch": 0.735803945008966, "grad_norm": 147.6966552734375, "learning_rate": 1.4586320327661077e-06, "loss": 13.875, "step": 11079 }, { "epoch": 0.7358703593013216, "grad_norm": 693.7396850585938, "learning_rate": 1.4585364566189797e-06, "loss": 24.1875, "step": 11080 }, { "epoch": 0.7359367735936774, "grad_norm": 263.29193115234375, "learning_rate": 1.458440875167766e-06, "loss": 25.0, "step": 11081 }, { "epoch": 0.736003187886033, "grad_norm": 247.8480987548828, "learning_rate": 1.4583452884135728e-06, "loss": 23.8281, "step": 11082 }, { "epoch": 0.7360696021783888, "grad_norm": 173.5769805908203, "learning_rate": 1.4582496963575052e-06, "loss": 17.1328, "step": 11083 }, { "epoch": 0.7361360164707446, "grad_norm": 82.58214569091797, "learning_rate": 1.45815409900067e-06, "loss": 11.5312, "step": 11084 }, { "epoch": 0.7362024307631002, "grad_norm": 433.48907470703125, "learning_rate": 1.4580584963441718e-06, "loss": 18.0312, "step": 11085 }, { "epoch": 0.736268845055456, "grad_norm": 280.105712890625, "learning_rate": 1.4579628883891173e-06, "loss": 18.7969, "step": 11086 }, { "epoch": 0.7363352593478116, "grad_norm": 225.03652954101562, "learning_rate": 1.4578672751366123e-06, "loss": 13.25, "step": 11087 }, { "epoch": 0.7364016736401674, "grad_norm": 239.52398681640625, "learning_rate": 1.4577716565877624e-06, "loss": 15.5156, "step": 11088 }, { "epoch": 0.736468087932523, "grad_norm": 251.99249267578125, "learning_rate": 1.457676032743674e-06, "loss": 13.6719, "step": 11089 }, { "epoch": 0.7365345022248788, "grad_norm": 447.4302978515625, "learning_rate": 1.4575804036054532e-06, "loss": 19.2969, "step": 11090 }, { "epoch": 0.7366009165172345, "grad_norm": 281.23138427734375, "learning_rate": 1.4574847691742064e-06, "loss": 20.9062, "step": 11091 }, { "epoch": 0.7366673308095902, "grad_norm": 171.76673889160156, "learning_rate": 1.4573891294510394e-06, "loss": 15.125, "step": 11092 }, { "epoch": 0.7367337451019459, "grad_norm": 207.27928161621094, "learning_rate": 1.4572934844370587e-06, "loss": 21.1406, "step": 11093 }, { "epoch": 0.7368001593943017, "grad_norm": 337.86761474609375, "learning_rate": 1.4571978341333708e-06, "loss": 22.0156, "step": 11094 }, { "epoch": 0.7368665736866574, "grad_norm": 1173.16064453125, "learning_rate": 1.4571021785410817e-06, "loss": 24.4688, "step": 11095 }, { "epoch": 0.7369329879790131, "grad_norm": 442.8369445800781, "learning_rate": 1.4570065176612983e-06, "loss": 21.0938, "step": 11096 }, { "epoch": 0.7369994022713688, "grad_norm": 137.35122680664062, "learning_rate": 1.456910851495127e-06, "loss": 15.9375, "step": 11097 }, { "epoch": 0.7370658165637245, "grad_norm": 356.11761474609375, "learning_rate": 1.4568151800436747e-06, "loss": 15.1562, "step": 11098 }, { "epoch": 0.7371322308560803, "grad_norm": 136.92904663085938, "learning_rate": 1.4567195033080475e-06, "loss": 16.5625, "step": 11099 }, { "epoch": 0.7371986451484359, "grad_norm": 113.07292938232422, "learning_rate": 1.4566238212893525e-06, "loss": 16.2812, "step": 11100 }, { "epoch": 0.7372650594407917, "grad_norm": 663.5532836914062, "learning_rate": 1.4565281339886963e-06, "loss": 20.5, "step": 11101 }, { "epoch": 0.7373314737331473, "grad_norm": 171.8426055908203, "learning_rate": 1.4564324414071863e-06, "loss": 16.2188, "step": 11102 }, { "epoch": 0.7373978880255031, "grad_norm": 208.47744750976562, "learning_rate": 1.4563367435459287e-06, "loss": 16.8594, "step": 11103 }, { "epoch": 0.7374643023178588, "grad_norm": 180.46090698242188, "learning_rate": 1.4562410404060309e-06, "loss": 28.75, "step": 11104 }, { "epoch": 0.7375307166102145, "grad_norm": 272.8990478515625, "learning_rate": 1.4561453319885998e-06, "loss": 20.1094, "step": 11105 }, { "epoch": 0.7375971309025703, "grad_norm": 196.01498413085938, "learning_rate": 1.4560496182947426e-06, "loss": 18.8281, "step": 11106 }, { "epoch": 0.7376635451949259, "grad_norm": 191.2445831298828, "learning_rate": 1.4559538993255663e-06, "loss": 13.5938, "step": 11107 }, { "epoch": 0.7377299594872817, "grad_norm": 260.8106384277344, "learning_rate": 1.4558581750821782e-06, "loss": 25.3594, "step": 11108 }, { "epoch": 0.7377963737796374, "grad_norm": 253.89007568359375, "learning_rate": 1.4557624455656856e-06, "loss": 18.4531, "step": 11109 }, { "epoch": 0.7378627880719931, "grad_norm": 274.2738342285156, "learning_rate": 1.455666710777196e-06, "loss": 19.2656, "step": 11110 }, { "epoch": 0.7379292023643488, "grad_norm": 147.66744995117188, "learning_rate": 1.4555709707178166e-06, "loss": 18.4531, "step": 11111 }, { "epoch": 0.7379956166567045, "grad_norm": 174.9722900390625, "learning_rate": 1.4554752253886546e-06, "loss": 17.5781, "step": 11112 }, { "epoch": 0.7380620309490602, "grad_norm": 242.11341857910156, "learning_rate": 1.4553794747908179e-06, "loss": 16.9766, "step": 11113 }, { "epoch": 0.738128445241416, "grad_norm": 492.52545166015625, "learning_rate": 1.455283718925414e-06, "loss": 16.1406, "step": 11114 }, { "epoch": 0.7381948595337716, "grad_norm": 163.95584106445312, "learning_rate": 1.4551879577935508e-06, "loss": 14.5312, "step": 11115 }, { "epoch": 0.7382612738261274, "grad_norm": 101.15542602539062, "learning_rate": 1.455092191396336e-06, "loss": 19.6406, "step": 11116 }, { "epoch": 0.7383276881184831, "grad_norm": 143.2543182373047, "learning_rate": 1.4549964197348768e-06, "loss": 15.0938, "step": 11117 }, { "epoch": 0.7383941024108388, "grad_norm": 223.18899536132812, "learning_rate": 1.4549006428102815e-06, "loss": 13.8281, "step": 11118 }, { "epoch": 0.7384605167031946, "grad_norm": 178.297607421875, "learning_rate": 1.4548048606236577e-06, "loss": 17.3125, "step": 11119 }, { "epoch": 0.7385269309955502, "grad_norm": 185.9589080810547, "learning_rate": 1.4547090731761138e-06, "loss": 19.9375, "step": 11120 }, { "epoch": 0.738593345287906, "grad_norm": 179.2346649169922, "learning_rate": 1.4546132804687573e-06, "loss": 16.4219, "step": 11121 }, { "epoch": 0.7386597595802616, "grad_norm": 138.06976318359375, "learning_rate": 1.4545174825026966e-06, "loss": 21.3125, "step": 11122 }, { "epoch": 0.7387261738726174, "grad_norm": 386.8233337402344, "learning_rate": 1.45442167927904e-06, "loss": 25.125, "step": 11123 }, { "epoch": 0.7387925881649731, "grad_norm": 187.48301696777344, "learning_rate": 1.454325870798895e-06, "loss": 22.5938, "step": 11124 }, { "epoch": 0.7388590024573288, "grad_norm": 164.42938232421875, "learning_rate": 1.4542300570633706e-06, "loss": 17.6875, "step": 11125 }, { "epoch": 0.7389254167496845, "grad_norm": 227.53970336914062, "learning_rate": 1.4541342380735748e-06, "loss": 19.5156, "step": 11126 }, { "epoch": 0.7389918310420402, "grad_norm": 233.69798278808594, "learning_rate": 1.4540384138306158e-06, "loss": 15.875, "step": 11127 }, { "epoch": 0.739058245334396, "grad_norm": 395.8743591308594, "learning_rate": 1.4539425843356026e-06, "loss": 18.1094, "step": 11128 }, { "epoch": 0.7391246596267517, "grad_norm": 107.94930267333984, "learning_rate": 1.453846749589643e-06, "loss": 14.4688, "step": 11129 }, { "epoch": 0.7391910739191074, "grad_norm": 232.3001251220703, "learning_rate": 1.4537509095938462e-06, "loss": 16.1875, "step": 11130 }, { "epoch": 0.7392574882114631, "grad_norm": 302.27886962890625, "learning_rate": 1.4536550643493204e-06, "loss": 20.2344, "step": 11131 }, { "epoch": 0.7393239025038189, "grad_norm": 247.74606323242188, "learning_rate": 1.4535592138571742e-06, "loss": 23.3281, "step": 11132 }, { "epoch": 0.7393903167961745, "grad_norm": 401.9045715332031, "learning_rate": 1.4534633581185166e-06, "loss": 21.5781, "step": 11133 }, { "epoch": 0.7394567310885303, "grad_norm": 154.1597900390625, "learning_rate": 1.4533674971344568e-06, "loss": 26.7812, "step": 11134 }, { "epoch": 0.7395231453808859, "grad_norm": 223.22677612304688, "learning_rate": 1.4532716309061028e-06, "loss": 23.9844, "step": 11135 }, { "epoch": 0.7395895596732417, "grad_norm": 116.83114624023438, "learning_rate": 1.4531757594345639e-06, "loss": 16.875, "step": 11136 }, { "epoch": 0.7396559739655973, "grad_norm": 970.5089111328125, "learning_rate": 1.4530798827209493e-06, "loss": 20.5, "step": 11137 }, { "epoch": 0.7397223882579531, "grad_norm": 255.70391845703125, "learning_rate": 1.452984000766368e-06, "loss": 24.25, "step": 11138 }, { "epoch": 0.7397888025503089, "grad_norm": 149.87501525878906, "learning_rate": 1.4528881135719288e-06, "loss": 16.7188, "step": 11139 }, { "epoch": 0.7398552168426645, "grad_norm": 130.58056640625, "learning_rate": 1.4527922211387412e-06, "loss": 16.0312, "step": 11140 }, { "epoch": 0.7399216311350203, "grad_norm": 171.75360107421875, "learning_rate": 1.4526963234679141e-06, "loss": 18.2969, "step": 11141 }, { "epoch": 0.739988045427376, "grad_norm": 167.01596069335938, "learning_rate": 1.452600420560557e-06, "loss": 16.0156, "step": 11142 }, { "epoch": 0.7400544597197317, "grad_norm": 212.35519409179688, "learning_rate": 1.4525045124177792e-06, "loss": 22.0625, "step": 11143 }, { "epoch": 0.7401208740120874, "grad_norm": 424.8717041015625, "learning_rate": 1.4524085990406902e-06, "loss": 14.75, "step": 11144 }, { "epoch": 0.7401872883044431, "grad_norm": 195.84828186035156, "learning_rate": 1.4523126804303996e-06, "loss": 14.8906, "step": 11145 }, { "epoch": 0.7402537025967988, "grad_norm": 137.31765747070312, "learning_rate": 1.4522167565880163e-06, "loss": 16.4531, "step": 11146 }, { "epoch": 0.7403201168891546, "grad_norm": 284.7680969238281, "learning_rate": 1.4521208275146505e-06, "loss": 14.0312, "step": 11147 }, { "epoch": 0.7403865311815102, "grad_norm": 277.65753173828125, "learning_rate": 1.4520248932114118e-06, "loss": 20.5312, "step": 11148 }, { "epoch": 0.740452945473866, "grad_norm": 265.5086669921875, "learning_rate": 1.4519289536794097e-06, "loss": 16.9688, "step": 11149 }, { "epoch": 0.7405193597662217, "grad_norm": 415.9654235839844, "learning_rate": 1.451833008919754e-06, "loss": 19.9531, "step": 11150 }, { "epoch": 0.7405857740585774, "grad_norm": 909.4015502929688, "learning_rate": 1.4517370589335548e-06, "loss": 15.7812, "step": 11151 }, { "epoch": 0.7406521883509332, "grad_norm": 212.4221954345703, "learning_rate": 1.4516411037219217e-06, "loss": 19.1094, "step": 11152 }, { "epoch": 0.7407186026432888, "grad_norm": 154.74365234375, "learning_rate": 1.4515451432859644e-06, "loss": 16.3125, "step": 11153 }, { "epoch": 0.7407850169356446, "grad_norm": 341.970703125, "learning_rate": 1.4514491776267936e-06, "loss": 23.375, "step": 11154 }, { "epoch": 0.7408514312280002, "grad_norm": 168.9257354736328, "learning_rate": 1.4513532067455189e-06, "loss": 26.2188, "step": 11155 }, { "epoch": 0.740917845520356, "grad_norm": 167.6292724609375, "learning_rate": 1.4512572306432508e-06, "loss": 13.25, "step": 11156 }, { "epoch": 0.7409842598127117, "grad_norm": 179.28964233398438, "learning_rate": 1.451161249321099e-06, "loss": 18.2969, "step": 11157 }, { "epoch": 0.7410506741050674, "grad_norm": 277.295166015625, "learning_rate": 1.4510652627801743e-06, "loss": 28.7188, "step": 11158 }, { "epoch": 0.7411170883974231, "grad_norm": 129.1345672607422, "learning_rate": 1.4509692710215866e-06, "loss": 12.5469, "step": 11159 }, { "epoch": 0.7411835026897788, "grad_norm": 189.24887084960938, "learning_rate": 1.4508732740464464e-06, "loss": 21.2188, "step": 11160 }, { "epoch": 0.7412499169821346, "grad_norm": 164.0281982421875, "learning_rate": 1.4507772718558639e-06, "loss": 15.9062, "step": 11161 }, { "epoch": 0.7413163312744903, "grad_norm": 193.79861450195312, "learning_rate": 1.4506812644509503e-06, "loss": 15.9219, "step": 11162 }, { "epoch": 0.741382745566846, "grad_norm": 107.21314239501953, "learning_rate": 1.4505852518328156e-06, "loss": 19.3906, "step": 11163 }, { "epoch": 0.7414491598592017, "grad_norm": 528.6544799804688, "learning_rate": 1.4504892340025704e-06, "loss": 22.6406, "step": 11164 }, { "epoch": 0.7415155741515574, "grad_norm": 144.17068481445312, "learning_rate": 1.4503932109613255e-06, "loss": 12.875, "step": 11165 }, { "epoch": 0.7415819884439131, "grad_norm": 645.791259765625, "learning_rate": 1.4502971827101917e-06, "loss": 18.3594, "step": 11166 }, { "epoch": 0.7416484027362689, "grad_norm": 277.9241943359375, "learning_rate": 1.45020114925028e-06, "loss": 25.1875, "step": 11167 }, { "epoch": 0.7417148170286245, "grad_norm": 255.94882202148438, "learning_rate": 1.4501051105827004e-06, "loss": 15.5312, "step": 11168 }, { "epoch": 0.7417812313209803, "grad_norm": 263.3934326171875, "learning_rate": 1.4500090667085652e-06, "loss": 13.375, "step": 11169 }, { "epoch": 0.7418476456133359, "grad_norm": 243.30332946777344, "learning_rate": 1.449913017628984e-06, "loss": 18.625, "step": 11170 }, { "epoch": 0.7419140599056917, "grad_norm": 158.5773468017578, "learning_rate": 1.4498169633450686e-06, "loss": 15.0312, "step": 11171 }, { "epoch": 0.7419804741980475, "grad_norm": 161.3467559814453, "learning_rate": 1.4497209038579298e-06, "loss": 17.0469, "step": 11172 }, { "epoch": 0.7420468884904031, "grad_norm": 252.11322021484375, "learning_rate": 1.4496248391686795e-06, "loss": 19.25, "step": 11173 }, { "epoch": 0.7421133027827589, "grad_norm": 275.48553466796875, "learning_rate": 1.4495287692784277e-06, "loss": 14.7344, "step": 11174 }, { "epoch": 0.7421797170751145, "grad_norm": 159.58558654785156, "learning_rate": 1.4494326941882866e-06, "loss": 20.7344, "step": 11175 }, { "epoch": 0.7422461313674703, "grad_norm": 166.42938232421875, "learning_rate": 1.449336613899367e-06, "loss": 15.8594, "step": 11176 }, { "epoch": 0.742312545659826, "grad_norm": 333.12115478515625, "learning_rate": 1.4492405284127808e-06, "loss": 19.2344, "step": 11177 }, { "epoch": 0.7423789599521817, "grad_norm": 181.4632568359375, "learning_rate": 1.4491444377296392e-06, "loss": 14.6172, "step": 11178 }, { "epoch": 0.7424453742445374, "grad_norm": 149.00888061523438, "learning_rate": 1.4490483418510537e-06, "loss": 14.0781, "step": 11179 }, { "epoch": 0.7425117885368931, "grad_norm": 494.6948547363281, "learning_rate": 1.4489522407781361e-06, "loss": 12.5625, "step": 11180 }, { "epoch": 0.7425782028292488, "grad_norm": 271.2200927734375, "learning_rate": 1.4488561345119977e-06, "loss": 20.25, "step": 11181 }, { "epoch": 0.7426446171216046, "grad_norm": 153.60289001464844, "learning_rate": 1.44876002305375e-06, "loss": 17.6094, "step": 11182 }, { "epoch": 0.7427110314139603, "grad_norm": 167.53640747070312, "learning_rate": 1.4486639064045054e-06, "loss": 20.1406, "step": 11183 }, { "epoch": 0.742777445706316, "grad_norm": 224.8329315185547, "learning_rate": 1.4485677845653757e-06, "loss": 18.3281, "step": 11184 }, { "epoch": 0.7428438599986718, "grad_norm": 243.15550231933594, "learning_rate": 1.4484716575374722e-06, "loss": 23.5312, "step": 11185 }, { "epoch": 0.7429102742910274, "grad_norm": 183.97019958496094, "learning_rate": 1.4483755253219073e-06, "loss": 25.4062, "step": 11186 }, { "epoch": 0.7429766885833832, "grad_norm": 254.12106323242188, "learning_rate": 1.4482793879197927e-06, "loss": 16.0781, "step": 11187 }, { "epoch": 0.7430431028757388, "grad_norm": 309.244384765625, "learning_rate": 1.448183245332241e-06, "loss": 23.25, "step": 11188 }, { "epoch": 0.7431095171680946, "grad_norm": 244.70294189453125, "learning_rate": 1.4480870975603636e-06, "loss": 18.5, "step": 11189 }, { "epoch": 0.7431759314604502, "grad_norm": 304.4116516113281, "learning_rate": 1.4479909446052734e-06, "loss": 19.6094, "step": 11190 }, { "epoch": 0.743242345752806, "grad_norm": 320.46600341796875, "learning_rate": 1.447894786468082e-06, "loss": 20.1406, "step": 11191 }, { "epoch": 0.7433087600451617, "grad_norm": 141.3367462158203, "learning_rate": 1.4477986231499019e-06, "loss": 17.2188, "step": 11192 }, { "epoch": 0.7433751743375174, "grad_norm": 231.73995971679688, "learning_rate": 1.4477024546518458e-06, "loss": 24.8438, "step": 11193 }, { "epoch": 0.7434415886298732, "grad_norm": 313.744140625, "learning_rate": 1.4476062809750258e-06, "loss": 15.625, "step": 11194 }, { "epoch": 0.7435080029222289, "grad_norm": 393.4567565917969, "learning_rate": 1.4475101021205543e-06, "loss": 15.3438, "step": 11195 }, { "epoch": 0.7435744172145846, "grad_norm": 245.97293090820312, "learning_rate": 1.4474139180895439e-06, "loss": 21.7031, "step": 11196 }, { "epoch": 0.7436408315069403, "grad_norm": 452.3081970214844, "learning_rate": 1.4473177288831075e-06, "loss": 22.0625, "step": 11197 }, { "epoch": 0.743707245799296, "grad_norm": 249.31703186035156, "learning_rate": 1.4472215345023577e-06, "loss": 20.0312, "step": 11198 }, { "epoch": 0.7437736600916517, "grad_norm": 220.31895446777344, "learning_rate": 1.447125334948407e-06, "loss": 20.3906, "step": 11199 }, { "epoch": 0.7438400743840075, "grad_norm": 198.2100830078125, "learning_rate": 1.447029130222368e-06, "loss": 21.4375, "step": 11200 }, { "epoch": 0.7439064886763631, "grad_norm": 174.90415954589844, "learning_rate": 1.4469329203253539e-06, "loss": 17.5156, "step": 11201 }, { "epoch": 0.7439729029687189, "grad_norm": 128.22850036621094, "learning_rate": 1.4468367052584775e-06, "loss": 14.3281, "step": 11202 }, { "epoch": 0.7440393172610745, "grad_norm": 391.3538513183594, "learning_rate": 1.446740485022852e-06, "loss": 17.1562, "step": 11203 }, { "epoch": 0.7441057315534303, "grad_norm": 307.63330078125, "learning_rate": 1.4466442596195898e-06, "loss": 21.125, "step": 11204 }, { "epoch": 0.7441721458457861, "grad_norm": 229.1699676513672, "learning_rate": 1.4465480290498046e-06, "loss": 21.1562, "step": 11205 }, { "epoch": 0.7442385601381417, "grad_norm": 217.39820861816406, "learning_rate": 1.4464517933146092e-06, "loss": 18.5938, "step": 11206 }, { "epoch": 0.7443049744304975, "grad_norm": 237.33392333984375, "learning_rate": 1.4463555524151168e-06, "loss": 14.3125, "step": 11207 }, { "epoch": 0.7443713887228531, "grad_norm": 189.62118530273438, "learning_rate": 1.4462593063524409e-06, "loss": 17.8125, "step": 11208 }, { "epoch": 0.7444378030152089, "grad_norm": 182.5901336669922, "learning_rate": 1.4461630551276946e-06, "loss": 17.9531, "step": 11209 }, { "epoch": 0.7445042173075646, "grad_norm": 213.53134155273438, "learning_rate": 1.4460667987419914e-06, "loss": 16.9375, "step": 11210 }, { "epoch": 0.7445706315999203, "grad_norm": 125.96320343017578, "learning_rate": 1.445970537196445e-06, "loss": 11.9375, "step": 11211 }, { "epoch": 0.744637045892276, "grad_norm": 108.19163513183594, "learning_rate": 1.4458742704921681e-06, "loss": 18.7344, "step": 11212 }, { "epoch": 0.7447034601846317, "grad_norm": 132.62742614746094, "learning_rate": 1.445777998630275e-06, "loss": 13.6094, "step": 11213 }, { "epoch": 0.7447698744769874, "grad_norm": 1005.7559204101562, "learning_rate": 1.4456817216118789e-06, "loss": 18.2656, "step": 11214 }, { "epoch": 0.7448362887693432, "grad_norm": 792.1834106445312, "learning_rate": 1.4455854394380935e-06, "loss": 16.9375, "step": 11215 }, { "epoch": 0.7449027030616989, "grad_norm": 360.716796875, "learning_rate": 1.4454891521100332e-06, "loss": 29.0938, "step": 11216 }, { "epoch": 0.7449691173540546, "grad_norm": 324.3837585449219, "learning_rate": 1.4453928596288108e-06, "loss": 20.8125, "step": 11217 }, { "epoch": 0.7450355316464103, "grad_norm": 163.08395385742188, "learning_rate": 1.4452965619955406e-06, "loss": 14.9062, "step": 11218 }, { "epoch": 0.745101945938766, "grad_norm": 354.81756591796875, "learning_rate": 1.4452002592113368e-06, "loss": 21.3906, "step": 11219 }, { "epoch": 0.7451683602311218, "grad_norm": 182.25469970703125, "learning_rate": 1.4451039512773133e-06, "loss": 17.3281, "step": 11220 }, { "epoch": 0.7452347745234774, "grad_norm": 181.5875701904297, "learning_rate": 1.4450076381945835e-06, "loss": 25.4062, "step": 11221 }, { "epoch": 0.7453011888158332, "grad_norm": 100.93574523925781, "learning_rate": 1.444911319964262e-06, "loss": 17.5156, "step": 11222 }, { "epoch": 0.7453676031081888, "grad_norm": 217.1322479248047, "learning_rate": 1.4448149965874632e-06, "loss": 23.625, "step": 11223 }, { "epoch": 0.7454340174005446, "grad_norm": 353.405517578125, "learning_rate": 1.4447186680653008e-06, "loss": 25.625, "step": 11224 }, { "epoch": 0.7455004316929004, "grad_norm": 597.1238403320312, "learning_rate": 1.4446223343988892e-06, "loss": 22.8438, "step": 11225 }, { "epoch": 0.745566845985256, "grad_norm": 184.3382568359375, "learning_rate": 1.4445259955893431e-06, "loss": 23.5625, "step": 11226 }, { "epoch": 0.7456332602776118, "grad_norm": 141.63250732421875, "learning_rate": 1.4444296516377766e-06, "loss": 18.3438, "step": 11227 }, { "epoch": 0.7456996745699674, "grad_norm": 143.11073303222656, "learning_rate": 1.444333302545304e-06, "loss": 14.5234, "step": 11228 }, { "epoch": 0.7457660888623232, "grad_norm": 124.76231384277344, "learning_rate": 1.44423694831304e-06, "loss": 14.5625, "step": 11229 }, { "epoch": 0.7458325031546789, "grad_norm": 242.89988708496094, "learning_rate": 1.4441405889420992e-06, "loss": 15.8125, "step": 11230 }, { "epoch": 0.7458989174470346, "grad_norm": 144.40850830078125, "learning_rate": 1.444044224433596e-06, "loss": 17.9531, "step": 11231 }, { "epoch": 0.7459653317393903, "grad_norm": 339.1640319824219, "learning_rate": 1.4439478547886455e-06, "loss": 18.9375, "step": 11232 }, { "epoch": 0.746031746031746, "grad_norm": 224.0801544189453, "learning_rate": 1.4438514800083621e-06, "loss": 18.5938, "step": 11233 }, { "epoch": 0.7460981603241017, "grad_norm": 193.0325469970703, "learning_rate": 1.4437551000938606e-06, "loss": 18.4219, "step": 11234 }, { "epoch": 0.7461645746164575, "grad_norm": 174.1920928955078, "learning_rate": 1.443658715046256e-06, "loss": 22.625, "step": 11235 }, { "epoch": 0.7462309889088132, "grad_norm": 177.20565795898438, "learning_rate": 1.4435623248666633e-06, "loss": 15.9375, "step": 11236 }, { "epoch": 0.7462974032011689, "grad_norm": 141.77601623535156, "learning_rate": 1.4434659295561978e-06, "loss": 16.8125, "step": 11237 }, { "epoch": 0.7463638174935247, "grad_norm": 247.94728088378906, "learning_rate": 1.4433695291159736e-06, "loss": 15.7188, "step": 11238 }, { "epoch": 0.7464302317858803, "grad_norm": 368.2116394042969, "learning_rate": 1.4432731235471064e-06, "loss": 17.6562, "step": 11239 }, { "epoch": 0.7464966460782361, "grad_norm": 125.5940933227539, "learning_rate": 1.4431767128507114e-06, "loss": 15.7031, "step": 11240 }, { "epoch": 0.7465630603705917, "grad_norm": 406.2734680175781, "learning_rate": 1.4430802970279038e-06, "loss": 18.3438, "step": 11241 }, { "epoch": 0.7466294746629475, "grad_norm": 268.4615478515625, "learning_rate": 1.4429838760797987e-06, "loss": 17.0312, "step": 11242 }, { "epoch": 0.7466958889553031, "grad_norm": 131.7454833984375, "learning_rate": 1.442887450007512e-06, "loss": 16.375, "step": 11243 }, { "epoch": 0.7467623032476589, "grad_norm": 129.6715545654297, "learning_rate": 1.442791018812158e-06, "loss": 21.3125, "step": 11244 }, { "epoch": 0.7468287175400146, "grad_norm": 156.20816040039062, "learning_rate": 1.4426945824948534e-06, "loss": 12.0938, "step": 11245 }, { "epoch": 0.7468951318323703, "grad_norm": 168.5193328857422, "learning_rate": 1.4425981410567128e-06, "loss": 18.1406, "step": 11246 }, { "epoch": 0.7469615461247261, "grad_norm": 260.18804931640625, "learning_rate": 1.4425016944988522e-06, "loss": 19.9844, "step": 11247 }, { "epoch": 0.7470279604170817, "grad_norm": 164.3672332763672, "learning_rate": 1.4424052428223872e-06, "loss": 18.1719, "step": 11248 }, { "epoch": 0.7470943747094375, "grad_norm": 358.332275390625, "learning_rate": 1.4423087860284332e-06, "loss": 16.3594, "step": 11249 }, { "epoch": 0.7471607890017932, "grad_norm": 235.84756469726562, "learning_rate": 1.4422123241181066e-06, "loss": 15.0469, "step": 11250 }, { "epoch": 0.7472272032941489, "grad_norm": 205.28717041015625, "learning_rate": 1.4421158570925224e-06, "loss": 21.3438, "step": 11251 }, { "epoch": 0.7472936175865046, "grad_norm": 621.72705078125, "learning_rate": 1.4420193849527972e-06, "loss": 19.5625, "step": 11252 }, { "epoch": 0.7473600318788604, "grad_norm": 169.40162658691406, "learning_rate": 1.4419229077000468e-06, "loss": 13.1406, "step": 11253 }, { "epoch": 0.747426446171216, "grad_norm": 340.1582336425781, "learning_rate": 1.4418264253353867e-06, "loss": 24.4062, "step": 11254 }, { "epoch": 0.7474928604635718, "grad_norm": 307.328857421875, "learning_rate": 1.4417299378599334e-06, "loss": 22.125, "step": 11255 }, { "epoch": 0.7475592747559274, "grad_norm": 132.9932861328125, "learning_rate": 1.4416334452748029e-06, "loss": 14.5938, "step": 11256 }, { "epoch": 0.7476256890482832, "grad_norm": 163.5732879638672, "learning_rate": 1.4415369475811114e-06, "loss": 15.8125, "step": 11257 }, { "epoch": 0.747692103340639, "grad_norm": 152.57272338867188, "learning_rate": 1.441440444779975e-06, "loss": 14.1641, "step": 11258 }, { "epoch": 0.7477585176329946, "grad_norm": 466.5670166015625, "learning_rate": 1.4413439368725104e-06, "loss": 20.4844, "step": 11259 }, { "epoch": 0.7478249319253504, "grad_norm": 123.97868347167969, "learning_rate": 1.4412474238598331e-06, "loss": 11.7188, "step": 11260 }, { "epoch": 0.747891346217706, "grad_norm": 327.3431396484375, "learning_rate": 1.4411509057430602e-06, "loss": 18.8594, "step": 11261 }, { "epoch": 0.7479577605100618, "grad_norm": 670.089599609375, "learning_rate": 1.4410543825233084e-06, "loss": 22.5156, "step": 11262 }, { "epoch": 0.7480241748024175, "grad_norm": 258.06402587890625, "learning_rate": 1.4409578542016934e-06, "loss": 15.2812, "step": 11263 }, { "epoch": 0.7480905890947732, "grad_norm": 170.2713623046875, "learning_rate": 1.4408613207793323e-06, "loss": 18.8672, "step": 11264 }, { "epoch": 0.7481570033871289, "grad_norm": 334.35211181640625, "learning_rate": 1.440764782257342e-06, "loss": 20.5625, "step": 11265 }, { "epoch": 0.7482234176794846, "grad_norm": 307.4074401855469, "learning_rate": 1.4406682386368383e-06, "loss": 29.9688, "step": 11266 }, { "epoch": 0.7482898319718403, "grad_norm": 263.63482666015625, "learning_rate": 1.4405716899189388e-06, "loss": 19.2656, "step": 11267 }, { "epoch": 0.7483562462641961, "grad_norm": 157.53189086914062, "learning_rate": 1.44047513610476e-06, "loss": 19.75, "step": 11268 }, { "epoch": 0.7484226605565518, "grad_norm": 402.6443786621094, "learning_rate": 1.4403785771954189e-06, "loss": 23.8281, "step": 11269 }, { "epoch": 0.7484890748489075, "grad_norm": 155.60548400878906, "learning_rate": 1.4402820131920324e-06, "loss": 13.5312, "step": 11270 }, { "epoch": 0.7485554891412632, "grad_norm": 150.9526824951172, "learning_rate": 1.4401854440957174e-06, "loss": 20.1406, "step": 11271 }, { "epoch": 0.7486219034336189, "grad_norm": 170.28855895996094, "learning_rate": 1.4400888699075909e-06, "loss": 15.7969, "step": 11272 }, { "epoch": 0.7486883177259747, "grad_norm": 318.6226501464844, "learning_rate": 1.4399922906287703e-06, "loss": 17.5312, "step": 11273 }, { "epoch": 0.7487547320183303, "grad_norm": 496.86846923828125, "learning_rate": 1.4398957062603723e-06, "loss": 25.3125, "step": 11274 }, { "epoch": 0.7488211463106861, "grad_norm": 207.76791381835938, "learning_rate": 1.4397991168035147e-06, "loss": 15.4531, "step": 11275 }, { "epoch": 0.7488875606030417, "grad_norm": 463.7041015625, "learning_rate": 1.4397025222593145e-06, "loss": 13.8125, "step": 11276 }, { "epoch": 0.7489539748953975, "grad_norm": 417.63458251953125, "learning_rate": 1.439605922628889e-06, "loss": 24.3438, "step": 11277 }, { "epoch": 0.7490203891877532, "grad_norm": 176.17196655273438, "learning_rate": 1.4395093179133556e-06, "loss": 14.4531, "step": 11278 }, { "epoch": 0.7490868034801089, "grad_norm": 598.5686645507812, "learning_rate": 1.4394127081138318e-06, "loss": 19.2031, "step": 11279 }, { "epoch": 0.7491532177724647, "grad_norm": 98.41580963134766, "learning_rate": 1.4393160932314354e-06, "loss": 13.8516, "step": 11280 }, { "epoch": 0.7492196320648203, "grad_norm": 213.61734008789062, "learning_rate": 1.4392194732672837e-06, "loss": 17.4688, "step": 11281 }, { "epoch": 0.7492860463571761, "grad_norm": 337.4973449707031, "learning_rate": 1.4391228482224942e-06, "loss": 16.9375, "step": 11282 }, { "epoch": 0.7493524606495318, "grad_norm": 300.9344177246094, "learning_rate": 1.439026218098185e-06, "loss": 13.3906, "step": 11283 }, { "epoch": 0.7494188749418875, "grad_norm": 436.42926025390625, "learning_rate": 1.438929582895474e-06, "loss": 19.9062, "step": 11284 }, { "epoch": 0.7494852892342432, "grad_norm": 614.8966674804688, "learning_rate": 1.4388329426154782e-06, "loss": 18.9062, "step": 11285 }, { "epoch": 0.749551703526599, "grad_norm": 221.57374572753906, "learning_rate": 1.438736297259316e-06, "loss": 14.5, "step": 11286 }, { "epoch": 0.7496181178189546, "grad_norm": 243.78367614746094, "learning_rate": 1.4386396468281055e-06, "loss": 17.3906, "step": 11287 }, { "epoch": 0.7496845321113104, "grad_norm": 290.010498046875, "learning_rate": 1.4385429913229644e-06, "loss": 18.3281, "step": 11288 }, { "epoch": 0.749750946403666, "grad_norm": 105.36943817138672, "learning_rate": 1.4384463307450108e-06, "loss": 14.1875, "step": 11289 }, { "epoch": 0.7498173606960218, "grad_norm": 133.09103393554688, "learning_rate": 1.438349665095363e-06, "loss": 19.1562, "step": 11290 }, { "epoch": 0.7498837749883775, "grad_norm": 263.1002502441406, "learning_rate": 1.4382529943751388e-06, "loss": 17.2969, "step": 11291 }, { "epoch": 0.7499501892807332, "grad_norm": 276.1827697753906, "learning_rate": 1.4381563185854569e-06, "loss": 11.8281, "step": 11292 }, { "epoch": 0.750016603573089, "grad_norm": 176.58221435546875, "learning_rate": 1.4380596377274351e-06, "loss": 14.7812, "step": 11293 }, { "epoch": 0.7500830178654446, "grad_norm": 196.0380401611328, "learning_rate": 1.4379629518021923e-06, "loss": 18.375, "step": 11294 }, { "epoch": 0.7501494321578004, "grad_norm": 809.9734497070312, "learning_rate": 1.4378662608108465e-06, "loss": 21.7188, "step": 11295 }, { "epoch": 0.750215846450156, "grad_norm": 323.6888122558594, "learning_rate": 1.4377695647545164e-06, "loss": 17.5312, "step": 11296 }, { "epoch": 0.7502822607425118, "grad_norm": 206.69403076171875, "learning_rate": 1.43767286363432e-06, "loss": 24.9219, "step": 11297 }, { "epoch": 0.7503486750348675, "grad_norm": 345.45538330078125, "learning_rate": 1.4375761574513767e-06, "loss": 21.1875, "step": 11298 }, { "epoch": 0.7504150893272232, "grad_norm": 240.73733520507812, "learning_rate": 1.4374794462068046e-06, "loss": 16.9844, "step": 11299 }, { "epoch": 0.7504815036195789, "grad_norm": 164.8260498046875, "learning_rate": 1.4373827299017224e-06, "loss": 14.2656, "step": 11300 }, { "epoch": 0.7505479179119346, "grad_norm": 286.97259521484375, "learning_rate": 1.4372860085372493e-06, "loss": 21.6562, "step": 11301 }, { "epoch": 0.7506143322042904, "grad_norm": 170.71144104003906, "learning_rate": 1.4371892821145037e-06, "loss": 16.6094, "step": 11302 }, { "epoch": 0.7506807464966461, "grad_norm": 130.01138305664062, "learning_rate": 1.4370925506346043e-06, "loss": 17.2344, "step": 11303 }, { "epoch": 0.7507471607890018, "grad_norm": 131.0606231689453, "learning_rate": 1.4369958140986707e-06, "loss": 12.8594, "step": 11304 }, { "epoch": 0.7508135750813575, "grad_norm": 127.24625396728516, "learning_rate": 1.4368990725078214e-06, "loss": 15.3438, "step": 11305 }, { "epoch": 0.7508799893737133, "grad_norm": 393.373046875, "learning_rate": 1.4368023258631757e-06, "loss": 9.7109, "step": 11306 }, { "epoch": 0.7509464036660689, "grad_norm": 726.9706420898438, "learning_rate": 1.4367055741658523e-06, "loss": 15.2969, "step": 11307 }, { "epoch": 0.7510128179584247, "grad_norm": 781.5476684570312, "learning_rate": 1.4366088174169709e-06, "loss": 27.4688, "step": 11308 }, { "epoch": 0.7510792322507803, "grad_norm": 278.7793884277344, "learning_rate": 1.43651205561765e-06, "loss": 12.0625, "step": 11309 }, { "epoch": 0.7511456465431361, "grad_norm": 187.72653198242188, "learning_rate": 1.4364152887690097e-06, "loss": 25.0938, "step": 11310 }, { "epoch": 0.7512120608354917, "grad_norm": 124.5443344116211, "learning_rate": 1.436318516872169e-06, "loss": 13.4922, "step": 11311 }, { "epoch": 0.7512784751278475, "grad_norm": 271.86602783203125, "learning_rate": 1.4362217399282472e-06, "loss": 16.8906, "step": 11312 }, { "epoch": 0.7513448894202033, "grad_norm": 278.48272705078125, "learning_rate": 1.4361249579383638e-06, "loss": 26.25, "step": 11313 }, { "epoch": 0.7514113037125589, "grad_norm": 527.046875, "learning_rate": 1.4360281709036385e-06, "loss": 18.625, "step": 11314 }, { "epoch": 0.7514777180049147, "grad_norm": 153.97679138183594, "learning_rate": 1.4359313788251905e-06, "loss": 18.9219, "step": 11315 }, { "epoch": 0.7515441322972704, "grad_norm": 196.00192260742188, "learning_rate": 1.4358345817041402e-06, "loss": 11.7344, "step": 11316 }, { "epoch": 0.7516105465896261, "grad_norm": 120.09199523925781, "learning_rate": 1.4357377795416064e-06, "loss": 15.125, "step": 11317 }, { "epoch": 0.7516769608819818, "grad_norm": 201.8821563720703, "learning_rate": 1.435640972338709e-06, "loss": 22.7188, "step": 11318 }, { "epoch": 0.7517433751743375, "grad_norm": 262.3896179199219, "learning_rate": 1.4355441600965685e-06, "loss": 22.4844, "step": 11319 }, { "epoch": 0.7518097894666932, "grad_norm": 284.6969299316406, "learning_rate": 1.4354473428163041e-06, "loss": 23.6875, "step": 11320 }, { "epoch": 0.751876203759049, "grad_norm": 210.24676513671875, "learning_rate": 1.435350520499036e-06, "loss": 14.6875, "step": 11321 }, { "epoch": 0.7519426180514046, "grad_norm": 167.8802947998047, "learning_rate": 1.4352536931458842e-06, "loss": 17.125, "step": 11322 }, { "epoch": 0.7520090323437604, "grad_norm": 370.7002868652344, "learning_rate": 1.4351568607579685e-06, "loss": 19.625, "step": 11323 }, { "epoch": 0.7520754466361161, "grad_norm": 247.5911407470703, "learning_rate": 1.4350600233364093e-06, "loss": 18.4375, "step": 11324 }, { "epoch": 0.7521418609284718, "grad_norm": 147.2194366455078, "learning_rate": 1.4349631808823261e-06, "loss": 16.3438, "step": 11325 }, { "epoch": 0.7522082752208276, "grad_norm": 345.5111999511719, "learning_rate": 1.4348663333968402e-06, "loss": 14.875, "step": 11326 }, { "epoch": 0.7522746895131832, "grad_norm": 370.6654968261719, "learning_rate": 1.434769480881071e-06, "loss": 23.7188, "step": 11327 }, { "epoch": 0.752341103805539, "grad_norm": 547.322998046875, "learning_rate": 1.4346726233361394e-06, "loss": 26.5625, "step": 11328 }, { "epoch": 0.7524075180978946, "grad_norm": 490.0865783691406, "learning_rate": 1.4345757607631654e-06, "loss": 19.0625, "step": 11329 }, { "epoch": 0.7524739323902504, "grad_norm": 177.1226043701172, "learning_rate": 1.4344788931632698e-06, "loss": 16.8594, "step": 11330 }, { "epoch": 0.752540346682606, "grad_norm": 358.8783874511719, "learning_rate": 1.4343820205375728e-06, "loss": 16.2344, "step": 11331 }, { "epoch": 0.7526067609749618, "grad_norm": 260.9453125, "learning_rate": 1.4342851428871949e-06, "loss": 21.125, "step": 11332 }, { "epoch": 0.7526731752673175, "grad_norm": 199.57803344726562, "learning_rate": 1.4341882602132568e-06, "loss": 16.2812, "step": 11333 }, { "epoch": 0.7527395895596732, "grad_norm": 134.64939880371094, "learning_rate": 1.4340913725168794e-06, "loss": 15.8438, "step": 11334 }, { "epoch": 0.752806003852029, "grad_norm": 181.53611755371094, "learning_rate": 1.4339944797991834e-06, "loss": 16.5, "step": 11335 }, { "epoch": 0.7528724181443847, "grad_norm": 221.9235382080078, "learning_rate": 1.4338975820612895e-06, "loss": 15.1406, "step": 11336 }, { "epoch": 0.7529388324367404, "grad_norm": 207.31312561035156, "learning_rate": 1.4338006793043188e-06, "loss": 18.4531, "step": 11337 }, { "epoch": 0.7530052467290961, "grad_norm": 162.97950744628906, "learning_rate": 1.4337037715293915e-06, "loss": 16.4062, "step": 11338 }, { "epoch": 0.7530716610214518, "grad_norm": 462.0454406738281, "learning_rate": 1.4336068587376293e-06, "loss": 21.125, "step": 11339 }, { "epoch": 0.7531380753138075, "grad_norm": 176.55238342285156, "learning_rate": 1.4335099409301531e-06, "loss": 16.1562, "step": 11340 }, { "epoch": 0.7532044896061633, "grad_norm": 1203.0150146484375, "learning_rate": 1.4334130181080836e-06, "loss": 14.6719, "step": 11341 }, { "epoch": 0.7532709038985189, "grad_norm": 346.1994934082031, "learning_rate": 1.4333160902725426e-06, "loss": 20.4297, "step": 11342 }, { "epoch": 0.7533373181908747, "grad_norm": 220.1724853515625, "learning_rate": 1.4332191574246507e-06, "loss": 13.7109, "step": 11343 }, { "epoch": 0.7534037324832303, "grad_norm": 294.23907470703125, "learning_rate": 1.4331222195655295e-06, "loss": 21.6875, "step": 11344 }, { "epoch": 0.7534701467755861, "grad_norm": 136.1506805419922, "learning_rate": 1.4330252766963003e-06, "loss": 18.8438, "step": 11345 }, { "epoch": 0.7535365610679419, "grad_norm": 174.10414123535156, "learning_rate": 1.432928328818084e-06, "loss": 14.1719, "step": 11346 }, { "epoch": 0.7536029753602975, "grad_norm": 359.7867736816406, "learning_rate": 1.4328313759320027e-06, "loss": 16.6562, "step": 11347 }, { "epoch": 0.7536693896526533, "grad_norm": 580.015380859375, "learning_rate": 1.4327344180391776e-06, "loss": 22.9219, "step": 11348 }, { "epoch": 0.7537358039450089, "grad_norm": 361.365478515625, "learning_rate": 1.4326374551407304e-06, "loss": 25.8594, "step": 11349 }, { "epoch": 0.7538022182373647, "grad_norm": 141.36676025390625, "learning_rate": 1.4325404872377823e-06, "loss": 19.5625, "step": 11350 }, { "epoch": 0.7538686325297204, "grad_norm": 404.9259338378906, "learning_rate": 1.4324435143314558e-06, "loss": 23.75, "step": 11351 }, { "epoch": 0.7539350468220761, "grad_norm": 177.01612854003906, "learning_rate": 1.4323465364228715e-06, "loss": 19.8438, "step": 11352 }, { "epoch": 0.7540014611144318, "grad_norm": 284.8478088378906, "learning_rate": 1.432249553513152e-06, "loss": 14.6562, "step": 11353 }, { "epoch": 0.7540678754067875, "grad_norm": 240.09127807617188, "learning_rate": 1.4321525656034189e-06, "loss": 21.2656, "step": 11354 }, { "epoch": 0.7541342896991432, "grad_norm": 151.22567749023438, "learning_rate": 1.4320555726947942e-06, "loss": 14.5781, "step": 11355 }, { "epoch": 0.754200703991499, "grad_norm": 488.4153747558594, "learning_rate": 1.4319585747883995e-06, "loss": 22.6719, "step": 11356 }, { "epoch": 0.7542671182838547, "grad_norm": 117.016845703125, "learning_rate": 1.4318615718853572e-06, "loss": 15.4844, "step": 11357 }, { "epoch": 0.7543335325762104, "grad_norm": 321.9716491699219, "learning_rate": 1.4317645639867897e-06, "loss": 16.7188, "step": 11358 }, { "epoch": 0.7543999468685662, "grad_norm": 2634.550048828125, "learning_rate": 1.4316675510938183e-06, "loss": 19.9531, "step": 11359 }, { "epoch": 0.7544663611609218, "grad_norm": 218.30625915527344, "learning_rate": 1.4315705332075656e-06, "loss": 14.3594, "step": 11360 }, { "epoch": 0.7545327754532776, "grad_norm": 253.4535675048828, "learning_rate": 1.4314735103291538e-06, "loss": 17.2656, "step": 11361 }, { "epoch": 0.7545991897456332, "grad_norm": 176.6495819091797, "learning_rate": 1.4313764824597055e-06, "loss": 18.1719, "step": 11362 }, { "epoch": 0.754665604037989, "grad_norm": 147.70648193359375, "learning_rate": 1.4312794496003425e-06, "loss": 14.5156, "step": 11363 }, { "epoch": 0.7547320183303446, "grad_norm": 363.6097106933594, "learning_rate": 1.4311824117521875e-06, "loss": 21.625, "step": 11364 }, { "epoch": 0.7547984326227004, "grad_norm": 174.6302490234375, "learning_rate": 1.431085368916363e-06, "loss": 20.4688, "step": 11365 }, { "epoch": 0.7548648469150561, "grad_norm": 433.9065856933594, "learning_rate": 1.4309883210939917e-06, "loss": 19.9688, "step": 11366 }, { "epoch": 0.7549312612074118, "grad_norm": 396.2280578613281, "learning_rate": 1.430891268286196e-06, "loss": 23.7188, "step": 11367 }, { "epoch": 0.7549976754997676, "grad_norm": 267.60986328125, "learning_rate": 1.4307942104940983e-06, "loss": 14.3281, "step": 11368 }, { "epoch": 0.7550640897921232, "grad_norm": 215.20523071289062, "learning_rate": 1.430697147718822e-06, "loss": 18.2188, "step": 11369 }, { "epoch": 0.755130504084479, "grad_norm": 175.4080047607422, "learning_rate": 1.4306000799614892e-06, "loss": 17.8438, "step": 11370 }, { "epoch": 0.7551969183768347, "grad_norm": 256.63055419921875, "learning_rate": 1.430503007223223e-06, "loss": 16.3594, "step": 11371 }, { "epoch": 0.7552633326691904, "grad_norm": 180.26577758789062, "learning_rate": 1.4304059295051462e-06, "loss": 17.0781, "step": 11372 }, { "epoch": 0.7553297469615461, "grad_norm": 230.23350524902344, "learning_rate": 1.430308846808382e-06, "loss": 15.3594, "step": 11373 }, { "epoch": 0.7553961612539019, "grad_norm": 116.33613586425781, "learning_rate": 1.430211759134053e-06, "loss": 16.8125, "step": 11374 }, { "epoch": 0.7554625755462575, "grad_norm": 202.6083221435547, "learning_rate": 1.4301146664832824e-06, "loss": 15.8281, "step": 11375 }, { "epoch": 0.7555289898386133, "grad_norm": 168.9649200439453, "learning_rate": 1.4300175688571936e-06, "loss": 21.5625, "step": 11376 }, { "epoch": 0.755595404130969, "grad_norm": 130.99241638183594, "learning_rate": 1.4299204662569093e-06, "loss": 12.4375, "step": 11377 }, { "epoch": 0.7556618184233247, "grad_norm": 328.9798583984375, "learning_rate": 1.4298233586835531e-06, "loss": 24.9219, "step": 11378 }, { "epoch": 0.7557282327156805, "grad_norm": 318.77069091796875, "learning_rate": 1.4297262461382482e-06, "loss": 21.1562, "step": 11379 }, { "epoch": 0.7557946470080361, "grad_norm": 171.930419921875, "learning_rate": 1.4296291286221178e-06, "loss": 19.7344, "step": 11380 }, { "epoch": 0.7558610613003919, "grad_norm": 194.3369903564453, "learning_rate": 1.4295320061362852e-06, "loss": 20.6406, "step": 11381 }, { "epoch": 0.7559274755927475, "grad_norm": 134.05859375, "learning_rate": 1.4294348786818741e-06, "loss": 14.25, "step": 11382 }, { "epoch": 0.7559938898851033, "grad_norm": 591.5350341796875, "learning_rate": 1.4293377462600083e-06, "loss": 22.9219, "step": 11383 }, { "epoch": 0.756060304177459, "grad_norm": 303.60223388671875, "learning_rate": 1.4292406088718107e-06, "loss": 17.4375, "step": 11384 }, { "epoch": 0.7561267184698147, "grad_norm": 237.5203399658203, "learning_rate": 1.4291434665184053e-06, "loss": 25.2031, "step": 11385 }, { "epoch": 0.7561931327621704, "grad_norm": 226.27427673339844, "learning_rate": 1.4290463192009154e-06, "loss": 14.4688, "step": 11386 }, { "epoch": 0.7562595470545261, "grad_norm": 195.9327850341797, "learning_rate": 1.4289491669204657e-06, "loss": 22.2969, "step": 11387 }, { "epoch": 0.7563259613468819, "grad_norm": 292.80938720703125, "learning_rate": 1.4288520096781791e-06, "loss": 20.5938, "step": 11388 }, { "epoch": 0.7563923756392376, "grad_norm": 161.35574340820312, "learning_rate": 1.4287548474751798e-06, "loss": 14.5, "step": 11389 }, { "epoch": 0.7564587899315933, "grad_norm": 276.05499267578125, "learning_rate": 1.4286576803125916e-06, "loss": 19.9375, "step": 11390 }, { "epoch": 0.756525204223949, "grad_norm": 299.57537841796875, "learning_rate": 1.4285605081915386e-06, "loss": 24.8438, "step": 11391 }, { "epoch": 0.7565916185163047, "grad_norm": 207.95106506347656, "learning_rate": 1.4284633311131446e-06, "loss": 21.2031, "step": 11392 }, { "epoch": 0.7566580328086604, "grad_norm": 351.99969482421875, "learning_rate": 1.428366149078534e-06, "loss": 21.8125, "step": 11393 }, { "epoch": 0.7567244471010162, "grad_norm": 137.1719512939453, "learning_rate": 1.428268962088831e-06, "loss": 15.9375, "step": 11394 }, { "epoch": 0.7567908613933718, "grad_norm": 286.21563720703125, "learning_rate": 1.428171770145159e-06, "loss": 20.7188, "step": 11395 }, { "epoch": 0.7568572756857276, "grad_norm": 235.59930419921875, "learning_rate": 1.4280745732486433e-06, "loss": 22.0312, "step": 11396 }, { "epoch": 0.7569236899780832, "grad_norm": 116.42137908935547, "learning_rate": 1.4279773714004076e-06, "loss": 16.8125, "step": 11397 }, { "epoch": 0.756990104270439, "grad_norm": 280.7526550292969, "learning_rate": 1.4278801646015767e-06, "loss": 18.75, "step": 11398 }, { "epoch": 0.7570565185627948, "grad_norm": 175.96304321289062, "learning_rate": 1.4277829528532746e-06, "loss": 21.2812, "step": 11399 }, { "epoch": 0.7571229328551504, "grad_norm": 302.13262939453125, "learning_rate": 1.427685736156626e-06, "loss": 19.5781, "step": 11400 }, { "epoch": 0.7571893471475062, "grad_norm": 332.01885986328125, "learning_rate": 1.4275885145127556e-06, "loss": 20.5781, "step": 11401 }, { "epoch": 0.7572557614398618, "grad_norm": 151.9922637939453, "learning_rate": 1.4274912879227877e-06, "loss": 16.4922, "step": 11402 }, { "epoch": 0.7573221757322176, "grad_norm": 362.2876281738281, "learning_rate": 1.4273940563878468e-06, "loss": 15.5469, "step": 11403 }, { "epoch": 0.7573885900245733, "grad_norm": 172.42767333984375, "learning_rate": 1.427296819909058e-06, "loss": 14.9062, "step": 11404 }, { "epoch": 0.757455004316929, "grad_norm": 242.51959228515625, "learning_rate": 1.4271995784875463e-06, "loss": 17.1719, "step": 11405 }, { "epoch": 0.7575214186092847, "grad_norm": 149.30398559570312, "learning_rate": 1.427102332124436e-06, "loss": 15.8906, "step": 11406 }, { "epoch": 0.7575878329016404, "grad_norm": 194.5740509033203, "learning_rate": 1.4270050808208523e-06, "loss": 21.3438, "step": 11407 }, { "epoch": 0.7576542471939961, "grad_norm": 188.92323303222656, "learning_rate": 1.42690782457792e-06, "loss": 18.4375, "step": 11408 }, { "epoch": 0.7577206614863519, "grad_norm": 189.84286499023438, "learning_rate": 1.426810563396764e-06, "loss": 13.6406, "step": 11409 }, { "epoch": 0.7577870757787076, "grad_norm": 510.6014099121094, "learning_rate": 1.4267132972785096e-06, "loss": 12.8359, "step": 11410 }, { "epoch": 0.7578534900710633, "grad_norm": 132.79530334472656, "learning_rate": 1.4266160262242822e-06, "loss": 16.9062, "step": 11411 }, { "epoch": 0.757919904363419, "grad_norm": 342.64874267578125, "learning_rate": 1.426518750235206e-06, "loss": 18.2969, "step": 11412 }, { "epoch": 0.7579863186557747, "grad_norm": 339.5273742675781, "learning_rate": 1.4264214693124072e-06, "loss": 18.375, "step": 11413 }, { "epoch": 0.7580527329481305, "grad_norm": 158.25149536132812, "learning_rate": 1.4263241834570107e-06, "loss": 17.0781, "step": 11414 }, { "epoch": 0.7581191472404861, "grad_norm": 139.49073791503906, "learning_rate": 1.426226892670142e-06, "loss": 17.0312, "step": 11415 }, { "epoch": 0.7581855615328419, "grad_norm": 254.82244873046875, "learning_rate": 1.426129596952926e-06, "loss": 20.9375, "step": 11416 }, { "epoch": 0.7582519758251975, "grad_norm": 221.73928833007812, "learning_rate": 1.4260322963064887e-06, "loss": 20.4844, "step": 11417 }, { "epoch": 0.7583183901175533, "grad_norm": 294.1198425292969, "learning_rate": 1.4259349907319557e-06, "loss": 11.3672, "step": 11418 }, { "epoch": 0.758384804409909, "grad_norm": 134.56947326660156, "learning_rate": 1.4258376802304523e-06, "loss": 13.0781, "step": 11419 }, { "epoch": 0.7584512187022647, "grad_norm": 121.80011749267578, "learning_rate": 1.425740364803104e-06, "loss": 14.2812, "step": 11420 }, { "epoch": 0.7585176329946205, "grad_norm": 515.1925048828125, "learning_rate": 1.4256430444510366e-06, "loss": 33.125, "step": 11421 }, { "epoch": 0.7585840472869761, "grad_norm": 397.0013732910156, "learning_rate": 1.4255457191753761e-06, "loss": 22.0, "step": 11422 }, { "epoch": 0.7586504615793319, "grad_norm": 233.4967803955078, "learning_rate": 1.4254483889772479e-06, "loss": 21.75, "step": 11423 }, { "epoch": 0.7587168758716876, "grad_norm": 287.0468444824219, "learning_rate": 1.4253510538577781e-06, "loss": 14.9531, "step": 11424 }, { "epoch": 0.7587832901640433, "grad_norm": 314.5189208984375, "learning_rate": 1.4252537138180926e-06, "loss": 17.2031, "step": 11425 }, { "epoch": 0.758849704456399, "grad_norm": 218.2420196533203, "learning_rate": 1.4251563688593177e-06, "loss": 14.9688, "step": 11426 }, { "epoch": 0.7589161187487548, "grad_norm": 146.91522216796875, "learning_rate": 1.4250590189825787e-06, "loss": 18.3281, "step": 11427 }, { "epoch": 0.7589825330411104, "grad_norm": 199.03195190429688, "learning_rate": 1.424961664189002e-06, "loss": 17.2656, "step": 11428 }, { "epoch": 0.7590489473334662, "grad_norm": 161.54669189453125, "learning_rate": 1.4248643044797142e-06, "loss": 14.25, "step": 11429 }, { "epoch": 0.7591153616258218, "grad_norm": 372.61517333984375, "learning_rate": 1.4247669398558412e-06, "loss": 25.9688, "step": 11430 }, { "epoch": 0.7591817759181776, "grad_norm": 152.48329162597656, "learning_rate": 1.4246695703185086e-06, "loss": 14.1875, "step": 11431 }, { "epoch": 0.7592481902105334, "grad_norm": 140.3401336669922, "learning_rate": 1.4245721958688435e-06, "loss": 15.9688, "step": 11432 }, { "epoch": 0.759314604502889, "grad_norm": 438.4024658203125, "learning_rate": 1.4244748165079722e-06, "loss": 18.2812, "step": 11433 }, { "epoch": 0.7593810187952448, "grad_norm": 187.4120330810547, "learning_rate": 1.424377432237021e-06, "loss": 20.5625, "step": 11434 }, { "epoch": 0.7594474330876004, "grad_norm": 130.64678955078125, "learning_rate": 1.424280043057116e-06, "loss": 19.6094, "step": 11435 }, { "epoch": 0.7595138473799562, "grad_norm": 190.08114624023438, "learning_rate": 1.4241826489693845e-06, "loss": 22.5781, "step": 11436 }, { "epoch": 0.7595802616723119, "grad_norm": 132.7705078125, "learning_rate": 1.4240852499749527e-06, "loss": 15.9219, "step": 11437 }, { "epoch": 0.7596466759646676, "grad_norm": 182.7642822265625, "learning_rate": 1.4239878460749472e-06, "loss": 17.875, "step": 11438 }, { "epoch": 0.7597130902570233, "grad_norm": 124.19676208496094, "learning_rate": 1.4238904372704946e-06, "loss": 14.9375, "step": 11439 }, { "epoch": 0.759779504549379, "grad_norm": 200.36158752441406, "learning_rate": 1.423793023562722e-06, "loss": 21.6406, "step": 11440 }, { "epoch": 0.7598459188417347, "grad_norm": 140.2877197265625, "learning_rate": 1.4236956049527562e-06, "loss": 12.1875, "step": 11441 }, { "epoch": 0.7599123331340905, "grad_norm": 158.72805786132812, "learning_rate": 1.423598181441724e-06, "loss": 16.3594, "step": 11442 }, { "epoch": 0.7599787474264462, "grad_norm": 303.4570007324219, "learning_rate": 1.423500753030752e-06, "loss": 17.2344, "step": 11443 }, { "epoch": 0.7600451617188019, "grad_norm": 279.0068359375, "learning_rate": 1.4234033197209676e-06, "loss": 21.5781, "step": 11444 }, { "epoch": 0.7601115760111576, "grad_norm": 289.48046875, "learning_rate": 1.4233058815134977e-06, "loss": 19.6719, "step": 11445 }, { "epoch": 0.7601779903035133, "grad_norm": 364.2298278808594, "learning_rate": 1.4232084384094696e-06, "loss": 21.375, "step": 11446 }, { "epoch": 0.7602444045958691, "grad_norm": 393.723876953125, "learning_rate": 1.4231109904100104e-06, "loss": 20.3438, "step": 11447 }, { "epoch": 0.7603108188882247, "grad_norm": 221.44235229492188, "learning_rate": 1.4230135375162472e-06, "loss": 21.7031, "step": 11448 }, { "epoch": 0.7603772331805805, "grad_norm": 1265.5550537109375, "learning_rate": 1.422916079729307e-06, "loss": 16.5312, "step": 11449 }, { "epoch": 0.7604436474729361, "grad_norm": 239.6180419921875, "learning_rate": 1.4228186170503176e-06, "loss": 19.2031, "step": 11450 }, { "epoch": 0.7605100617652919, "grad_norm": 234.3389892578125, "learning_rate": 1.4227211494804063e-06, "loss": 21.9062, "step": 11451 }, { "epoch": 0.7605764760576476, "grad_norm": 369.90240478515625, "learning_rate": 1.4226236770207006e-06, "loss": 29.2188, "step": 11452 }, { "epoch": 0.7606428903500033, "grad_norm": 258.3165588378906, "learning_rate": 1.422526199672328e-06, "loss": 22.2656, "step": 11453 }, { "epoch": 0.7607093046423591, "grad_norm": 342.3445129394531, "learning_rate": 1.4224287174364157e-06, "loss": 20.3438, "step": 11454 }, { "epoch": 0.7607757189347147, "grad_norm": 205.48712158203125, "learning_rate": 1.4223312303140918e-06, "loss": 18.4688, "step": 11455 }, { "epoch": 0.7608421332270705, "grad_norm": 311.854248046875, "learning_rate": 1.4222337383064835e-06, "loss": 20.8125, "step": 11456 }, { "epoch": 0.7609085475194262, "grad_norm": 203.0210723876953, "learning_rate": 1.422136241414719e-06, "loss": 18.1875, "step": 11457 }, { "epoch": 0.7609749618117819, "grad_norm": 379.6282043457031, "learning_rate": 1.422038739639926e-06, "loss": 24.8281, "step": 11458 }, { "epoch": 0.7610413761041376, "grad_norm": 543.1451416015625, "learning_rate": 1.4219412329832318e-06, "loss": 21.7344, "step": 11459 }, { "epoch": 0.7611077903964933, "grad_norm": 151.27764892578125, "learning_rate": 1.4218437214457648e-06, "loss": 16.5938, "step": 11460 }, { "epoch": 0.761174204688849, "grad_norm": 310.0318603515625, "learning_rate": 1.421746205028653e-06, "loss": 24.2344, "step": 11461 }, { "epoch": 0.7612406189812048, "grad_norm": 218.07456970214844, "learning_rate": 1.4216486837330246e-06, "loss": 19.1562, "step": 11462 }, { "epoch": 0.7613070332735604, "grad_norm": 147.14776611328125, "learning_rate": 1.421551157560007e-06, "loss": 14.375, "step": 11463 }, { "epoch": 0.7613734475659162, "grad_norm": 156.4718780517578, "learning_rate": 1.4214536265107287e-06, "loss": 18.7969, "step": 11464 }, { "epoch": 0.761439861858272, "grad_norm": 286.97589111328125, "learning_rate": 1.4213560905863181e-06, "loss": 20.2656, "step": 11465 }, { "epoch": 0.7615062761506276, "grad_norm": 156.01416015625, "learning_rate": 1.421258549787903e-06, "loss": 15.3906, "step": 11466 }, { "epoch": 0.7615726904429834, "grad_norm": 142.4091339111328, "learning_rate": 1.421161004116612e-06, "loss": 14.8594, "step": 11467 }, { "epoch": 0.761639104735339, "grad_norm": 404.9075012207031, "learning_rate": 1.4210634535735734e-06, "loss": 24.0, "step": 11468 }, { "epoch": 0.7617055190276948, "grad_norm": 144.06166076660156, "learning_rate": 1.4209658981599156e-06, "loss": 16.5469, "step": 11469 }, { "epoch": 0.7617719333200504, "grad_norm": 517.7322387695312, "learning_rate": 1.4208683378767668e-06, "loss": 21.2656, "step": 11470 }, { "epoch": 0.7618383476124062, "grad_norm": 156.52001953125, "learning_rate": 1.4207707727252557e-06, "loss": 15.5781, "step": 11471 }, { "epoch": 0.7619047619047619, "grad_norm": 190.35482788085938, "learning_rate": 1.4206732027065112e-06, "loss": 15.9844, "step": 11472 }, { "epoch": 0.7619711761971176, "grad_norm": 233.16461181640625, "learning_rate": 1.4205756278216616e-06, "loss": 15.0, "step": 11473 }, { "epoch": 0.7620375904894733, "grad_norm": 385.3000183105469, "learning_rate": 1.4204780480718358e-06, "loss": 19.1406, "step": 11474 }, { "epoch": 0.762104004781829, "grad_norm": 236.2679443359375, "learning_rate": 1.420380463458162e-06, "loss": 14.9062, "step": 11475 }, { "epoch": 0.7621704190741848, "grad_norm": 147.8197021484375, "learning_rate": 1.4202828739817698e-06, "loss": 14.5312, "step": 11476 }, { "epoch": 0.7622368333665405, "grad_norm": 218.61585998535156, "learning_rate": 1.4201852796437874e-06, "loss": 20.4688, "step": 11477 }, { "epoch": 0.7623032476588962, "grad_norm": 287.5067443847656, "learning_rate": 1.420087680445344e-06, "loss": 20.8125, "step": 11478 }, { "epoch": 0.7623696619512519, "grad_norm": 319.8008117675781, "learning_rate": 1.4199900763875688e-06, "loss": 28.7031, "step": 11479 }, { "epoch": 0.7624360762436077, "grad_norm": 292.6424865722656, "learning_rate": 1.4198924674715905e-06, "loss": 26.75, "step": 11480 }, { "epoch": 0.7625024905359633, "grad_norm": 169.40277099609375, "learning_rate": 1.4197948536985377e-06, "loss": 17.4375, "step": 11481 }, { "epoch": 0.7625689048283191, "grad_norm": 236.88803100585938, "learning_rate": 1.4196972350695407e-06, "loss": 18.0, "step": 11482 }, { "epoch": 0.7626353191206747, "grad_norm": 110.5543441772461, "learning_rate": 1.419599611585728e-06, "loss": 12.625, "step": 11483 }, { "epoch": 0.7627017334130305, "grad_norm": 156.3123321533203, "learning_rate": 1.419501983248229e-06, "loss": 18.8906, "step": 11484 }, { "epoch": 0.7627681477053861, "grad_norm": 325.9300537109375, "learning_rate": 1.4194043500581728e-06, "loss": 20.2188, "step": 11485 }, { "epoch": 0.7628345619977419, "grad_norm": 298.55291748046875, "learning_rate": 1.419306712016689e-06, "loss": 21.5312, "step": 11486 }, { "epoch": 0.7629009762900977, "grad_norm": 130.16116333007812, "learning_rate": 1.419209069124907e-06, "loss": 19.0625, "step": 11487 }, { "epoch": 0.7629673905824533, "grad_norm": 161.75950622558594, "learning_rate": 1.4191114213839561e-06, "loss": 15.1875, "step": 11488 }, { "epoch": 0.7630338048748091, "grad_norm": 230.54373168945312, "learning_rate": 1.4190137687949659e-06, "loss": 25.0156, "step": 11489 }, { "epoch": 0.7631002191671648, "grad_norm": 351.3522644042969, "learning_rate": 1.4189161113590661e-06, "loss": 19.3281, "step": 11490 }, { "epoch": 0.7631666334595205, "grad_norm": 984.8161010742188, "learning_rate": 1.4188184490773864e-06, "loss": 15.4688, "step": 11491 }, { "epoch": 0.7632330477518762, "grad_norm": 192.45103454589844, "learning_rate": 1.4187207819510562e-06, "loss": 17.0938, "step": 11492 }, { "epoch": 0.7632994620442319, "grad_norm": 181.81272888183594, "learning_rate": 1.4186231099812056e-06, "loss": 16.7188, "step": 11493 }, { "epoch": 0.7633658763365876, "grad_norm": 280.9609069824219, "learning_rate": 1.4185254331689645e-06, "loss": 16.4844, "step": 11494 }, { "epoch": 0.7634322906289434, "grad_norm": 299.7467956542969, "learning_rate": 1.4184277515154621e-06, "loss": 18.2344, "step": 11495 }, { "epoch": 0.763498704921299, "grad_norm": 1041.4200439453125, "learning_rate": 1.4183300650218288e-06, "loss": 20.1719, "step": 11496 }, { "epoch": 0.7635651192136548, "grad_norm": 307.52099609375, "learning_rate": 1.4182323736891948e-06, "loss": 18.0469, "step": 11497 }, { "epoch": 0.7636315335060105, "grad_norm": 153.32557678222656, "learning_rate": 1.4181346775186894e-06, "loss": 20.2812, "step": 11498 }, { "epoch": 0.7636979477983662, "grad_norm": 229.05201721191406, "learning_rate": 1.4180369765114435e-06, "loss": 24.3125, "step": 11499 }, { "epoch": 0.763764362090722, "grad_norm": 290.345458984375, "learning_rate": 1.4179392706685868e-06, "loss": 14.8906, "step": 11500 }, { "epoch": 0.7638307763830776, "grad_norm": 188.36175537109375, "learning_rate": 1.41784155999125e-06, "loss": 13.2812, "step": 11501 }, { "epoch": 0.7638971906754334, "grad_norm": 121.76856994628906, "learning_rate": 1.4177438444805625e-06, "loss": 17.9219, "step": 11502 }, { "epoch": 0.763963604967789, "grad_norm": 304.29449462890625, "learning_rate": 1.4176461241376551e-06, "loss": 12.1094, "step": 11503 }, { "epoch": 0.7640300192601448, "grad_norm": 196.10629272460938, "learning_rate": 1.4175483989636584e-06, "loss": 14.8281, "step": 11504 }, { "epoch": 0.7640964335525005, "grad_norm": 179.90267944335938, "learning_rate": 1.4174506689597025e-06, "loss": 17.9062, "step": 11505 }, { "epoch": 0.7641628478448562, "grad_norm": 386.384033203125, "learning_rate": 1.4173529341269179e-06, "loss": 18.8203, "step": 11506 }, { "epoch": 0.7642292621372119, "grad_norm": 131.6089324951172, "learning_rate": 1.4172551944664353e-06, "loss": 15.6875, "step": 11507 }, { "epoch": 0.7642956764295676, "grad_norm": 342.986572265625, "learning_rate": 1.4171574499793853e-06, "loss": 20.3203, "step": 11508 }, { "epoch": 0.7643620907219234, "grad_norm": 181.5123748779297, "learning_rate": 1.4170597006668983e-06, "loss": 15.0469, "step": 11509 }, { "epoch": 0.7644285050142791, "grad_norm": 253.0241241455078, "learning_rate": 1.4169619465301052e-06, "loss": 27.3438, "step": 11510 }, { "epoch": 0.7644949193066348, "grad_norm": 163.03121948242188, "learning_rate": 1.4168641875701365e-06, "loss": 13.5938, "step": 11511 }, { "epoch": 0.7645613335989905, "grad_norm": 435.08447265625, "learning_rate": 1.416766423788124e-06, "loss": 16.1094, "step": 11512 }, { "epoch": 0.7646277478913462, "grad_norm": 186.01243591308594, "learning_rate": 1.4166686551851972e-06, "loss": 17.6875, "step": 11513 }, { "epoch": 0.7646941621837019, "grad_norm": 126.77181243896484, "learning_rate": 1.4165708817624877e-06, "loss": 12.0, "step": 11514 }, { "epoch": 0.7647605764760577, "grad_norm": 410.2765197753906, "learning_rate": 1.4164731035211267e-06, "loss": 14.4375, "step": 11515 }, { "epoch": 0.7648269907684133, "grad_norm": 170.51597595214844, "learning_rate": 1.4163753204622449e-06, "loss": 19.0938, "step": 11516 }, { "epoch": 0.7648934050607691, "grad_norm": 238.36563110351562, "learning_rate": 1.4162775325869734e-06, "loss": 17.75, "step": 11517 }, { "epoch": 0.7649598193531247, "grad_norm": 185.479248046875, "learning_rate": 1.4161797398964435e-06, "loss": 22.1406, "step": 11518 }, { "epoch": 0.7650262336454805, "grad_norm": 202.43043518066406, "learning_rate": 1.4160819423917862e-06, "loss": 16.5469, "step": 11519 }, { "epoch": 0.7650926479378363, "grad_norm": 306.1792297363281, "learning_rate": 1.415984140074133e-06, "loss": 23.0625, "step": 11520 }, { "epoch": 0.7651590622301919, "grad_norm": 129.00537109375, "learning_rate": 1.4158863329446152e-06, "loss": 15.3438, "step": 11521 }, { "epoch": 0.7652254765225477, "grad_norm": 223.09901428222656, "learning_rate": 1.4157885210043644e-06, "loss": 20.9531, "step": 11522 }, { "epoch": 0.7652918908149033, "grad_norm": 652.7255859375, "learning_rate": 1.4156907042545112e-06, "loss": 25.5312, "step": 11523 }, { "epoch": 0.7653583051072591, "grad_norm": 308.8670959472656, "learning_rate": 1.4155928826961879e-06, "loss": 18.5938, "step": 11524 }, { "epoch": 0.7654247193996148, "grad_norm": 284.3728942871094, "learning_rate": 1.4154950563305257e-06, "loss": 17.3438, "step": 11525 }, { "epoch": 0.7654911336919705, "grad_norm": 308.7904357910156, "learning_rate": 1.4153972251586565e-06, "loss": 27.6875, "step": 11526 }, { "epoch": 0.7655575479843262, "grad_norm": 200.63412475585938, "learning_rate": 1.4152993891817115e-06, "loss": 17.875, "step": 11527 }, { "epoch": 0.765623962276682, "grad_norm": 175.36660766601562, "learning_rate": 1.4152015484008227e-06, "loss": 13.5469, "step": 11528 }, { "epoch": 0.7656903765690377, "grad_norm": 345.8276672363281, "learning_rate": 1.415103702817122e-06, "loss": 21.0625, "step": 11529 }, { "epoch": 0.7657567908613934, "grad_norm": 129.4594268798828, "learning_rate": 1.4150058524317408e-06, "loss": 17.8125, "step": 11530 }, { "epoch": 0.7658232051537491, "grad_norm": 167.61141967773438, "learning_rate": 1.4149079972458112e-06, "loss": 18.2812, "step": 11531 }, { "epoch": 0.7658896194461048, "grad_norm": 103.38980102539062, "learning_rate": 1.414810137260465e-06, "loss": 18.2344, "step": 11532 }, { "epoch": 0.7659560337384606, "grad_norm": 179.90560913085938, "learning_rate": 1.4147122724768347e-06, "loss": 19.1875, "step": 11533 }, { "epoch": 0.7660224480308162, "grad_norm": 306.96319580078125, "learning_rate": 1.414614402896052e-06, "loss": 20.5, "step": 11534 }, { "epoch": 0.766088862323172, "grad_norm": 341.3179626464844, "learning_rate": 1.4145165285192486e-06, "loss": 19.3281, "step": 11535 }, { "epoch": 0.7661552766155276, "grad_norm": 233.50437927246094, "learning_rate": 1.414418649347557e-06, "loss": 20.4375, "step": 11536 }, { "epoch": 0.7662216909078834, "grad_norm": 286.61358642578125, "learning_rate": 1.41432076538211e-06, "loss": 20.7656, "step": 11537 }, { "epoch": 0.766288105200239, "grad_norm": 219.03530883789062, "learning_rate": 1.4142228766240387e-06, "loss": 18.2188, "step": 11538 }, { "epoch": 0.7663545194925948, "grad_norm": 148.99464416503906, "learning_rate": 1.414124983074476e-06, "loss": 15.4688, "step": 11539 }, { "epoch": 0.7664209337849506, "grad_norm": 166.6622314453125, "learning_rate": 1.4140270847345547e-06, "loss": 16.0312, "step": 11540 }, { "epoch": 0.7664873480773062, "grad_norm": 344.4269104003906, "learning_rate": 1.4139291816054066e-06, "loss": 16.2656, "step": 11541 }, { "epoch": 0.766553762369662, "grad_norm": 425.9452209472656, "learning_rate": 1.4138312736881645e-06, "loss": 18.25, "step": 11542 }, { "epoch": 0.7666201766620176, "grad_norm": 311.7181701660156, "learning_rate": 1.4137333609839607e-06, "loss": 17.5156, "step": 11543 }, { "epoch": 0.7666865909543734, "grad_norm": 309.9884033203125, "learning_rate": 1.4136354434939285e-06, "loss": 22.0625, "step": 11544 }, { "epoch": 0.7667530052467291, "grad_norm": 137.8727569580078, "learning_rate": 1.4135375212191994e-06, "loss": 14.3594, "step": 11545 }, { "epoch": 0.7668194195390848, "grad_norm": 138.75352478027344, "learning_rate": 1.4134395941609068e-06, "loss": 16.4688, "step": 11546 }, { "epoch": 0.7668858338314405, "grad_norm": 221.5641326904297, "learning_rate": 1.4133416623201836e-06, "loss": 16.0781, "step": 11547 }, { "epoch": 0.7669522481237963, "grad_norm": 128.30389404296875, "learning_rate": 1.4132437256981622e-06, "loss": 14.125, "step": 11548 }, { "epoch": 0.7670186624161519, "grad_norm": 147.14024353027344, "learning_rate": 1.4131457842959757e-06, "loss": 18.0469, "step": 11549 }, { "epoch": 0.7670850767085077, "grad_norm": 420.1864318847656, "learning_rate": 1.413047838114757e-06, "loss": 19.0781, "step": 11550 }, { "epoch": 0.7671514910008634, "grad_norm": 172.1472625732422, "learning_rate": 1.4129498871556394e-06, "loss": 15.6562, "step": 11551 }, { "epoch": 0.7672179052932191, "grad_norm": 320.4477844238281, "learning_rate": 1.412851931419755e-06, "loss": 13.1875, "step": 11552 }, { "epoch": 0.7672843195855749, "grad_norm": 239.8662872314453, "learning_rate": 1.4127539709082378e-06, "loss": 17.5078, "step": 11553 }, { "epoch": 0.7673507338779305, "grad_norm": 144.27035522460938, "learning_rate": 1.4126560056222209e-06, "loss": 15.9062, "step": 11554 }, { "epoch": 0.7674171481702863, "grad_norm": 285.7259216308594, "learning_rate": 1.412558035562837e-06, "loss": 22.125, "step": 11555 }, { "epoch": 0.7674835624626419, "grad_norm": 629.6417846679688, "learning_rate": 1.4124600607312198e-06, "loss": 30.3438, "step": 11556 }, { "epoch": 0.7675499767549977, "grad_norm": 125.88321685791016, "learning_rate": 1.412362081128502e-06, "loss": 13.0, "step": 11557 }, { "epoch": 0.7676163910473534, "grad_norm": 236.7933807373047, "learning_rate": 1.4122640967558183e-06, "loss": 16.2188, "step": 11558 }, { "epoch": 0.7676828053397091, "grad_norm": 216.11395263671875, "learning_rate": 1.4121661076143007e-06, "loss": 15.8906, "step": 11559 }, { "epoch": 0.7677492196320648, "grad_norm": 313.1263122558594, "learning_rate": 1.4120681137050832e-06, "loss": 18.75, "step": 11560 }, { "epoch": 0.7678156339244205, "grad_norm": 164.5071258544922, "learning_rate": 1.4119701150292998e-06, "loss": 16.2188, "step": 11561 }, { "epoch": 0.7678820482167763, "grad_norm": 276.0098876953125, "learning_rate": 1.4118721115880833e-06, "loss": 18.0781, "step": 11562 }, { "epoch": 0.767948462509132, "grad_norm": 434.07373046875, "learning_rate": 1.4117741033825677e-06, "loss": 18.4375, "step": 11563 }, { "epoch": 0.7680148768014877, "grad_norm": 211.69349670410156, "learning_rate": 1.4116760904138866e-06, "loss": 19.9062, "step": 11564 }, { "epoch": 0.7680812910938434, "grad_norm": 235.38136291503906, "learning_rate": 1.4115780726831743e-06, "loss": 17.1406, "step": 11565 }, { "epoch": 0.7681477053861991, "grad_norm": 181.71905517578125, "learning_rate": 1.4114800501915637e-06, "loss": 27.6406, "step": 11566 }, { "epoch": 0.7682141196785548, "grad_norm": 187.0181427001953, "learning_rate": 1.4113820229401896e-06, "loss": 24.9219, "step": 11567 }, { "epoch": 0.7682805339709106, "grad_norm": 242.19261169433594, "learning_rate": 1.4112839909301852e-06, "loss": 21.625, "step": 11568 }, { "epoch": 0.7683469482632662, "grad_norm": 329.15704345703125, "learning_rate": 1.411185954162685e-06, "loss": 20.4844, "step": 11569 }, { "epoch": 0.768413362555622, "grad_norm": 309.2724609375, "learning_rate": 1.4110879126388224e-06, "loss": 18.0938, "step": 11570 }, { "epoch": 0.7684797768479776, "grad_norm": 285.88458251953125, "learning_rate": 1.4109898663597321e-06, "loss": 13.9688, "step": 11571 }, { "epoch": 0.7685461911403334, "grad_norm": 243.81585693359375, "learning_rate": 1.4108918153265483e-06, "loss": 15.1094, "step": 11572 }, { "epoch": 0.7686126054326892, "grad_norm": 163.25384521484375, "learning_rate": 1.4107937595404045e-06, "loss": 18.2344, "step": 11573 }, { "epoch": 0.7686790197250448, "grad_norm": 130.73336791992188, "learning_rate": 1.4106956990024355e-06, "loss": 14.75, "step": 11574 }, { "epoch": 0.7687454340174006, "grad_norm": 193.24282836914062, "learning_rate": 1.4105976337137753e-06, "loss": 16.1094, "step": 11575 }, { "epoch": 0.7688118483097562, "grad_norm": 152.2552947998047, "learning_rate": 1.4104995636755588e-06, "loss": 15.0938, "step": 11576 }, { "epoch": 0.768878262602112, "grad_norm": 241.2152862548828, "learning_rate": 1.4104014888889198e-06, "loss": 19.5469, "step": 11577 }, { "epoch": 0.7689446768944677, "grad_norm": 278.8390197753906, "learning_rate": 1.410303409354993e-06, "loss": 15.6094, "step": 11578 }, { "epoch": 0.7690110911868234, "grad_norm": 225.76231384277344, "learning_rate": 1.410205325074913e-06, "loss": 10.5625, "step": 11579 }, { "epoch": 0.7690775054791791, "grad_norm": 245.40985107421875, "learning_rate": 1.4101072360498145e-06, "loss": 13.0781, "step": 11580 }, { "epoch": 0.7691439197715348, "grad_norm": 306.4455261230469, "learning_rate": 1.4100091422808318e-06, "loss": 22.875, "step": 11581 }, { "epoch": 0.7692103340638905, "grad_norm": 220.9036102294922, "learning_rate": 1.4099110437690998e-06, "loss": 22.2812, "step": 11582 }, { "epoch": 0.7692767483562463, "grad_norm": 176.10739135742188, "learning_rate": 1.4098129405157532e-06, "loss": 13.6094, "step": 11583 }, { "epoch": 0.769343162648602, "grad_norm": 260.754638671875, "learning_rate": 1.409714832521927e-06, "loss": 16.5156, "step": 11584 }, { "epoch": 0.7694095769409577, "grad_norm": 226.1063232421875, "learning_rate": 1.4096167197887556e-06, "loss": 17.9219, "step": 11585 }, { "epoch": 0.7694759912333134, "grad_norm": 210.8856201171875, "learning_rate": 1.4095186023173745e-06, "loss": 17.9062, "step": 11586 }, { "epoch": 0.7695424055256691, "grad_norm": 157.5384063720703, "learning_rate": 1.409420480108918e-06, "loss": 14.8906, "step": 11587 }, { "epoch": 0.7696088198180249, "grad_norm": 123.77973175048828, "learning_rate": 1.4093223531645216e-06, "loss": 18.125, "step": 11588 }, { "epoch": 0.7696752341103805, "grad_norm": 340.21234130859375, "learning_rate": 1.4092242214853203e-06, "loss": 18.3594, "step": 11589 }, { "epoch": 0.7697416484027363, "grad_norm": 175.34999084472656, "learning_rate": 1.4091260850724493e-06, "loss": 13.2656, "step": 11590 }, { "epoch": 0.7698080626950919, "grad_norm": 267.7229919433594, "learning_rate": 1.4090279439270435e-06, "loss": 15.0781, "step": 11591 }, { "epoch": 0.7698744769874477, "grad_norm": 229.82595825195312, "learning_rate": 1.408929798050238e-06, "loss": 13.875, "step": 11592 }, { "epoch": 0.7699408912798034, "grad_norm": 197.93203735351562, "learning_rate": 1.408831647443169e-06, "loss": 14.2656, "step": 11593 }, { "epoch": 0.7700073055721591, "grad_norm": 141.1587371826172, "learning_rate": 1.4087334921069706e-06, "loss": 15.1875, "step": 11594 }, { "epoch": 0.7700737198645149, "grad_norm": 308.5074157714844, "learning_rate": 1.4086353320427792e-06, "loss": 18.25, "step": 11595 }, { "epoch": 0.7701401341568705, "grad_norm": 115.7659912109375, "learning_rate": 1.4085371672517301e-06, "loss": 18.4688, "step": 11596 }, { "epoch": 0.7702065484492263, "grad_norm": 145.0826416015625, "learning_rate": 1.4084389977349585e-06, "loss": 12.5, "step": 11597 }, { "epoch": 0.770272962741582, "grad_norm": 426.1162109375, "learning_rate": 1.4083408234936e-06, "loss": 17.7188, "step": 11598 }, { "epoch": 0.7703393770339377, "grad_norm": 120.24160766601562, "learning_rate": 1.4082426445287902e-06, "loss": 16.5469, "step": 11599 }, { "epoch": 0.7704057913262934, "grad_norm": 533.9953002929688, "learning_rate": 1.4081444608416653e-06, "loss": 20.4062, "step": 11600 }, { "epoch": 0.7704722056186492, "grad_norm": 120.96007537841797, "learning_rate": 1.4080462724333603e-06, "loss": 19.0781, "step": 11601 }, { "epoch": 0.7705386199110048, "grad_norm": 352.07666015625, "learning_rate": 1.4079480793050114e-06, "loss": 20.7969, "step": 11602 }, { "epoch": 0.7706050342033606, "grad_norm": 150.87185668945312, "learning_rate": 1.4078498814577545e-06, "loss": 12.625, "step": 11603 }, { "epoch": 0.7706714484957162, "grad_norm": 177.8325958251953, "learning_rate": 1.407751678892725e-06, "loss": 15.2656, "step": 11604 }, { "epoch": 0.770737862788072, "grad_norm": 218.4776611328125, "learning_rate": 1.4076534716110595e-06, "loss": 16.7188, "step": 11605 }, { "epoch": 0.7708042770804278, "grad_norm": 181.0694580078125, "learning_rate": 1.4075552596138936e-06, "loss": 19.625, "step": 11606 }, { "epoch": 0.7708706913727834, "grad_norm": 308.1063537597656, "learning_rate": 1.4074570429023634e-06, "loss": 18.0781, "step": 11607 }, { "epoch": 0.7709371056651392, "grad_norm": 293.0494689941406, "learning_rate": 1.4073588214776053e-06, "loss": 22.0938, "step": 11608 }, { "epoch": 0.7710035199574948, "grad_norm": 345.75848388671875, "learning_rate": 1.407260595340755e-06, "loss": 17.0156, "step": 11609 }, { "epoch": 0.7710699342498506, "grad_norm": 259.188232421875, "learning_rate": 1.4071623644929491e-06, "loss": 16.9531, "step": 11610 }, { "epoch": 0.7711363485422063, "grad_norm": 345.0445861816406, "learning_rate": 1.4070641289353238e-06, "loss": 20.5156, "step": 11611 }, { "epoch": 0.771202762834562, "grad_norm": 260.03778076171875, "learning_rate": 1.4069658886690152e-06, "loss": 18.0469, "step": 11612 }, { "epoch": 0.7712691771269177, "grad_norm": 145.68765258789062, "learning_rate": 1.40686764369516e-06, "loss": 16.2656, "step": 11613 }, { "epoch": 0.7713355914192734, "grad_norm": 221.14723205566406, "learning_rate": 1.4067693940148946e-06, "loss": 17.1562, "step": 11614 }, { "epoch": 0.7714020057116291, "grad_norm": 304.9795227050781, "learning_rate": 1.4066711396293553e-06, "loss": 23.8281, "step": 11615 }, { "epoch": 0.7714684200039849, "grad_norm": 420.17431640625, "learning_rate": 1.4065728805396789e-06, "loss": 15.9062, "step": 11616 }, { "epoch": 0.7715348342963406, "grad_norm": 141.46145629882812, "learning_rate": 1.4064746167470016e-06, "loss": 16.4375, "step": 11617 }, { "epoch": 0.7716012485886963, "grad_norm": 335.3525695800781, "learning_rate": 1.4063763482524605e-06, "loss": 18.6562, "step": 11618 }, { "epoch": 0.771667662881052, "grad_norm": 160.38201904296875, "learning_rate": 1.406278075057192e-06, "loss": 16.7812, "step": 11619 }, { "epoch": 0.7717340771734077, "grad_norm": 295.22247314453125, "learning_rate": 1.406179797162333e-06, "loss": 19.8594, "step": 11620 }, { "epoch": 0.7718004914657635, "grad_norm": 182.32125854492188, "learning_rate": 1.4060815145690203e-06, "loss": 22.125, "step": 11621 }, { "epoch": 0.7718669057581191, "grad_norm": 355.6936340332031, "learning_rate": 1.4059832272783911e-06, "loss": 23.3125, "step": 11622 }, { "epoch": 0.7719333200504749, "grad_norm": 263.3887634277344, "learning_rate": 1.4058849352915818e-06, "loss": 22.5, "step": 11623 }, { "epoch": 0.7719997343428305, "grad_norm": 812.5503540039062, "learning_rate": 1.4057866386097297e-06, "loss": 16.0312, "step": 11624 }, { "epoch": 0.7720661486351863, "grad_norm": 114.86048126220703, "learning_rate": 1.4056883372339716e-06, "loss": 13.6875, "step": 11625 }, { "epoch": 0.772132562927542, "grad_norm": 248.52328491210938, "learning_rate": 1.4055900311654449e-06, "loss": 14.1484, "step": 11626 }, { "epoch": 0.7721989772198977, "grad_norm": 543.3864135742188, "learning_rate": 1.4054917204052866e-06, "loss": 19.4219, "step": 11627 }, { "epoch": 0.7722653915122535, "grad_norm": 211.64187622070312, "learning_rate": 1.4053934049546334e-06, "loss": 12.0625, "step": 11628 }, { "epoch": 0.7723318058046091, "grad_norm": 244.58856201171875, "learning_rate": 1.4052950848146239e-06, "loss": 21.5, "step": 11629 }, { "epoch": 0.7723982200969649, "grad_norm": 815.7459106445312, "learning_rate": 1.405196759986394e-06, "loss": 18.9062, "step": 11630 }, { "epoch": 0.7724646343893206, "grad_norm": 147.43096923828125, "learning_rate": 1.4050984304710817e-06, "loss": 22.8594, "step": 11631 }, { "epoch": 0.7725310486816763, "grad_norm": 263.64361572265625, "learning_rate": 1.4050000962698247e-06, "loss": 13.7344, "step": 11632 }, { "epoch": 0.772597462974032, "grad_norm": 223.66400146484375, "learning_rate": 1.4049017573837597e-06, "loss": 13.6719, "step": 11633 }, { "epoch": 0.7726638772663877, "grad_norm": 237.25363159179688, "learning_rate": 1.4048034138140248e-06, "loss": 22.25, "step": 11634 }, { "epoch": 0.7727302915587434, "grad_norm": 169.25631713867188, "learning_rate": 1.4047050655617575e-06, "loss": 19.2656, "step": 11635 }, { "epoch": 0.7727967058510992, "grad_norm": 216.80072021484375, "learning_rate": 1.4046067126280953e-06, "loss": 21.9531, "step": 11636 }, { "epoch": 0.7728631201434548, "grad_norm": 331.81048583984375, "learning_rate": 1.4045083550141761e-06, "loss": 19.5, "step": 11637 }, { "epoch": 0.7729295344358106, "grad_norm": 217.55059814453125, "learning_rate": 1.4044099927211373e-06, "loss": 20.1094, "step": 11638 }, { "epoch": 0.7729959487281663, "grad_norm": 201.13232421875, "learning_rate": 1.4043116257501169e-06, "loss": 17.6875, "step": 11639 }, { "epoch": 0.773062363020522, "grad_norm": 179.59117126464844, "learning_rate": 1.4042132541022528e-06, "loss": 15.25, "step": 11640 }, { "epoch": 0.7731287773128778, "grad_norm": 188.22628784179688, "learning_rate": 1.4041148777786828e-06, "loss": 23.9844, "step": 11641 }, { "epoch": 0.7731951916052334, "grad_norm": 165.9556884765625, "learning_rate": 1.404016496780545e-06, "loss": 13.2578, "step": 11642 }, { "epoch": 0.7732616058975892, "grad_norm": 169.44094848632812, "learning_rate": 1.4039181111089775e-06, "loss": 20.4844, "step": 11643 }, { "epoch": 0.7733280201899448, "grad_norm": 192.4852294921875, "learning_rate": 1.403819720765118e-06, "loss": 13.0781, "step": 11644 }, { "epoch": 0.7733944344823006, "grad_norm": 236.5330047607422, "learning_rate": 1.4037213257501048e-06, "loss": 19.1406, "step": 11645 }, { "epoch": 0.7734608487746563, "grad_norm": 197.69580078125, "learning_rate": 1.4036229260650762e-06, "loss": 20.8125, "step": 11646 }, { "epoch": 0.773527263067012, "grad_norm": 357.31463623046875, "learning_rate": 1.4035245217111704e-06, "loss": 18.1875, "step": 11647 }, { "epoch": 0.7735936773593677, "grad_norm": 121.99169921875, "learning_rate": 1.4034261126895255e-06, "loss": 13.75, "step": 11648 }, { "epoch": 0.7736600916517234, "grad_norm": 308.255859375, "learning_rate": 1.4033276990012798e-06, "loss": 20.8594, "step": 11649 }, { "epoch": 0.7737265059440792, "grad_norm": 135.8463592529297, "learning_rate": 1.4032292806475722e-06, "loss": 15.5312, "step": 11650 }, { "epoch": 0.7737929202364349, "grad_norm": 98.0473861694336, "learning_rate": 1.4031308576295407e-06, "loss": 13.4688, "step": 11651 }, { "epoch": 0.7738593345287906, "grad_norm": 193.72718811035156, "learning_rate": 1.4030324299483239e-06, "loss": 15.875, "step": 11652 }, { "epoch": 0.7739257488211463, "grad_norm": 374.2673645019531, "learning_rate": 1.4029339976050603e-06, "loss": 14.0, "step": 11653 }, { "epoch": 0.773992163113502, "grad_norm": 138.44371032714844, "learning_rate": 1.4028355606008885e-06, "loss": 22.625, "step": 11654 }, { "epoch": 0.7740585774058577, "grad_norm": 382.1343994140625, "learning_rate": 1.4027371189369473e-06, "loss": 21.9688, "step": 11655 }, { "epoch": 0.7741249916982135, "grad_norm": 217.94473266601562, "learning_rate": 1.4026386726143752e-06, "loss": 18.9844, "step": 11656 }, { "epoch": 0.7741914059905691, "grad_norm": 306.83770751953125, "learning_rate": 1.4025402216343112e-06, "loss": 19.7188, "step": 11657 }, { "epoch": 0.7742578202829249, "grad_norm": 221.59852600097656, "learning_rate": 1.4024417659978943e-06, "loss": 15.0156, "step": 11658 }, { "epoch": 0.7743242345752805, "grad_norm": 365.0985412597656, "learning_rate": 1.4023433057062626e-06, "loss": 20.1875, "step": 11659 }, { "epoch": 0.7743906488676363, "grad_norm": 177.6779327392578, "learning_rate": 1.4022448407605559e-06, "loss": 16.2188, "step": 11660 }, { "epoch": 0.7744570631599921, "grad_norm": 203.71449279785156, "learning_rate": 1.4021463711619125e-06, "loss": 18.4375, "step": 11661 }, { "epoch": 0.7745234774523477, "grad_norm": 289.24920654296875, "learning_rate": 1.4020478969114722e-06, "loss": 22.625, "step": 11662 }, { "epoch": 0.7745898917447035, "grad_norm": 246.42828369140625, "learning_rate": 1.4019494180103734e-06, "loss": 18.5, "step": 11663 }, { "epoch": 0.7746563060370592, "grad_norm": 227.4944305419922, "learning_rate": 1.4018509344597555e-06, "loss": 21.9375, "step": 11664 }, { "epoch": 0.7747227203294149, "grad_norm": 293.443603515625, "learning_rate": 1.4017524462607578e-06, "loss": 19.4375, "step": 11665 }, { "epoch": 0.7747891346217706, "grad_norm": 255.37872314453125, "learning_rate": 1.4016539534145193e-06, "loss": 19.2812, "step": 11666 }, { "epoch": 0.7748555489141263, "grad_norm": 183.45860290527344, "learning_rate": 1.4015554559221795e-06, "loss": 16.9062, "step": 11667 }, { "epoch": 0.774921963206482, "grad_norm": 495.2870788574219, "learning_rate": 1.4014569537848782e-06, "loss": 13.25, "step": 11668 }, { "epoch": 0.7749883774988378, "grad_norm": 216.1665496826172, "learning_rate": 1.401358447003754e-06, "loss": 21.1406, "step": 11669 }, { "epoch": 0.7750547917911935, "grad_norm": 137.64405822753906, "learning_rate": 1.4012599355799466e-06, "loss": 14.6094, "step": 11670 }, { "epoch": 0.7751212060835492, "grad_norm": 214.35745239257812, "learning_rate": 1.4011614195145957e-06, "loss": 14.7969, "step": 11671 }, { "epoch": 0.7751876203759049, "grad_norm": 174.5756072998047, "learning_rate": 1.4010628988088412e-06, "loss": 14.5781, "step": 11672 }, { "epoch": 0.7752540346682606, "grad_norm": 209.9064178466797, "learning_rate": 1.400964373463822e-06, "loss": 17.875, "step": 11673 }, { "epoch": 0.7753204489606164, "grad_norm": 345.0979919433594, "learning_rate": 1.4008658434806782e-06, "loss": 23.0312, "step": 11674 }, { "epoch": 0.775386863252972, "grad_norm": 200.45587158203125, "learning_rate": 1.4007673088605495e-06, "loss": 20.75, "step": 11675 }, { "epoch": 0.7754532775453278, "grad_norm": 195.0234375, "learning_rate": 1.4006687696045756e-06, "loss": 16.4062, "step": 11676 }, { "epoch": 0.7755196918376834, "grad_norm": 241.70932006835938, "learning_rate": 1.4005702257138962e-06, "loss": 14.9219, "step": 11677 }, { "epoch": 0.7755861061300392, "grad_norm": 226.4474334716797, "learning_rate": 1.4004716771896516e-06, "loss": 18.4062, "step": 11678 }, { "epoch": 0.7756525204223949, "grad_norm": 109.36518859863281, "learning_rate": 1.4003731240329819e-06, "loss": 17.2031, "step": 11679 }, { "epoch": 0.7757189347147506, "grad_norm": 166.6238555908203, "learning_rate": 1.4002745662450263e-06, "loss": 16.5312, "step": 11680 }, { "epoch": 0.7757853490071064, "grad_norm": 168.70228576660156, "learning_rate": 1.4001760038269255e-06, "loss": 15.5156, "step": 11681 }, { "epoch": 0.775851763299462, "grad_norm": 176.32534790039062, "learning_rate": 1.4000774367798194e-06, "loss": 16.1406, "step": 11682 }, { "epoch": 0.7759181775918178, "grad_norm": 477.31170654296875, "learning_rate": 1.3999788651048485e-06, "loss": 20.3281, "step": 11683 }, { "epoch": 0.7759845918841735, "grad_norm": 335.79315185546875, "learning_rate": 1.3998802888031524e-06, "loss": 24.3906, "step": 11684 }, { "epoch": 0.7760510061765292, "grad_norm": 134.63858032226562, "learning_rate": 1.3997817078758718e-06, "loss": 18.75, "step": 11685 }, { "epoch": 0.7761174204688849, "grad_norm": 400.5762939453125, "learning_rate": 1.3996831223241471e-06, "loss": 24.2188, "step": 11686 }, { "epoch": 0.7761838347612406, "grad_norm": 232.9674835205078, "learning_rate": 1.3995845321491184e-06, "loss": 22.7031, "step": 11687 }, { "epoch": 0.7762502490535963, "grad_norm": 162.9443359375, "learning_rate": 1.3994859373519261e-06, "loss": 16.8438, "step": 11688 }, { "epoch": 0.7763166633459521, "grad_norm": 201.03738403320312, "learning_rate": 1.399387337933711e-06, "loss": 21.375, "step": 11689 }, { "epoch": 0.7763830776383077, "grad_norm": 258.2417907714844, "learning_rate": 1.3992887338956135e-06, "loss": 16.6719, "step": 11690 }, { "epoch": 0.7764494919306635, "grad_norm": 188.81671142578125, "learning_rate": 1.3991901252387739e-06, "loss": 19.5312, "step": 11691 }, { "epoch": 0.7765159062230192, "grad_norm": 210.95790100097656, "learning_rate": 1.3990915119643331e-06, "loss": 18.2031, "step": 11692 }, { "epoch": 0.7765823205153749, "grad_norm": 366.6536560058594, "learning_rate": 1.3989928940734322e-06, "loss": 21.2656, "step": 11693 }, { "epoch": 0.7766487348077307, "grad_norm": 141.7607879638672, "learning_rate": 1.3988942715672113e-06, "loss": 16.1406, "step": 11694 }, { "epoch": 0.7767151491000863, "grad_norm": 157.922119140625, "learning_rate": 1.3987956444468117e-06, "loss": 15.4375, "step": 11695 }, { "epoch": 0.7767815633924421, "grad_norm": 775.3341064453125, "learning_rate": 1.3986970127133737e-06, "loss": 18.8125, "step": 11696 }, { "epoch": 0.7768479776847977, "grad_norm": 175.56063842773438, "learning_rate": 1.3985983763680389e-06, "loss": 16.0469, "step": 11697 }, { "epoch": 0.7769143919771535, "grad_norm": 166.92803955078125, "learning_rate": 1.398499735411948e-06, "loss": 17.3906, "step": 11698 }, { "epoch": 0.7769808062695092, "grad_norm": 159.458251953125, "learning_rate": 1.3984010898462415e-06, "loss": 17.8594, "step": 11699 }, { "epoch": 0.7770472205618649, "grad_norm": 316.3876037597656, "learning_rate": 1.398302439672061e-06, "loss": 17.3438, "step": 11700 }, { "epoch": 0.7771136348542206, "grad_norm": 148.24923706054688, "learning_rate": 1.398203784890548e-06, "loss": 15.1562, "step": 11701 }, { "epoch": 0.7771800491465763, "grad_norm": 197.21774291992188, "learning_rate": 1.3981051255028427e-06, "loss": 25.7812, "step": 11702 }, { "epoch": 0.7772464634389321, "grad_norm": 155.36390686035156, "learning_rate": 1.398006461510087e-06, "loss": 16.1094, "step": 11703 }, { "epoch": 0.7773128777312878, "grad_norm": 484.9010925292969, "learning_rate": 1.3979077929134224e-06, "loss": 20.5312, "step": 11704 }, { "epoch": 0.7773792920236435, "grad_norm": 161.84249877929688, "learning_rate": 1.3978091197139896e-06, "loss": 15.3281, "step": 11705 }, { "epoch": 0.7774457063159992, "grad_norm": 689.2337036132812, "learning_rate": 1.3977104419129303e-06, "loss": 20.5547, "step": 11706 }, { "epoch": 0.777512120608355, "grad_norm": 237.773193359375, "learning_rate": 1.3976117595113861e-06, "loss": 23.125, "step": 11707 }, { "epoch": 0.7775785349007106, "grad_norm": 448.9534912109375, "learning_rate": 1.3975130725104982e-06, "loss": 12.1719, "step": 11708 }, { "epoch": 0.7776449491930664, "grad_norm": 262.49456787109375, "learning_rate": 1.3974143809114083e-06, "loss": 21.5312, "step": 11709 }, { "epoch": 0.777711363485422, "grad_norm": 346.4402160644531, "learning_rate": 1.397315684715258e-06, "loss": 17.7969, "step": 11710 }, { "epoch": 0.7777777777777778, "grad_norm": 290.0448913574219, "learning_rate": 1.3972169839231893e-06, "loss": 20.8125, "step": 11711 }, { "epoch": 0.7778441920701334, "grad_norm": 287.6690673828125, "learning_rate": 1.3971182785363431e-06, "loss": 18.5469, "step": 11712 }, { "epoch": 0.7779106063624892, "grad_norm": 167.69671630859375, "learning_rate": 1.3970195685558617e-06, "loss": 17.7969, "step": 11713 }, { "epoch": 0.777977020654845, "grad_norm": 394.910888671875, "learning_rate": 1.3969208539828871e-06, "loss": 18.2031, "step": 11714 }, { "epoch": 0.7780434349472006, "grad_norm": 182.1622314453125, "learning_rate": 1.396822134818561e-06, "loss": 19.9531, "step": 11715 }, { "epoch": 0.7781098492395564, "grad_norm": 171.07901000976562, "learning_rate": 1.3967234110640251e-06, "loss": 15.9062, "step": 11716 }, { "epoch": 0.778176263531912, "grad_norm": 249.35952758789062, "learning_rate": 1.3966246827204213e-06, "loss": 14.7188, "step": 11717 }, { "epoch": 0.7782426778242678, "grad_norm": 314.8117980957031, "learning_rate": 1.3965259497888923e-06, "loss": 16.9688, "step": 11718 }, { "epoch": 0.7783090921166235, "grad_norm": 179.97320556640625, "learning_rate": 1.3964272122705795e-06, "loss": 16.4844, "step": 11719 }, { "epoch": 0.7783755064089792, "grad_norm": 165.66537475585938, "learning_rate": 1.3963284701666256e-06, "loss": 17.9062, "step": 11720 }, { "epoch": 0.7784419207013349, "grad_norm": 136.7315216064453, "learning_rate": 1.3962297234781721e-06, "loss": 16.875, "step": 11721 }, { "epoch": 0.7785083349936907, "grad_norm": 178.54763793945312, "learning_rate": 1.396130972206362e-06, "loss": 25.1562, "step": 11722 }, { "epoch": 0.7785747492860463, "grad_norm": 332.577392578125, "learning_rate": 1.396032216352337e-06, "loss": 21.1406, "step": 11723 }, { "epoch": 0.7786411635784021, "grad_norm": 310.2960205078125, "learning_rate": 1.3959334559172397e-06, "loss": 20.9844, "step": 11724 }, { "epoch": 0.7787075778707578, "grad_norm": 139.6602783203125, "learning_rate": 1.3958346909022126e-06, "loss": 22.2969, "step": 11725 }, { "epoch": 0.7787739921631135, "grad_norm": 175.50721740722656, "learning_rate": 1.3957359213083981e-06, "loss": 16.0625, "step": 11726 }, { "epoch": 0.7788404064554693, "grad_norm": 193.26930236816406, "learning_rate": 1.3956371471369383e-06, "loss": 18.9219, "step": 11727 }, { "epoch": 0.7789068207478249, "grad_norm": 374.3938293457031, "learning_rate": 1.3955383683889762e-06, "loss": 20.5625, "step": 11728 }, { "epoch": 0.7789732350401807, "grad_norm": 406.7311706542969, "learning_rate": 1.395439585065655e-06, "loss": 25.8906, "step": 11729 }, { "epoch": 0.7790396493325363, "grad_norm": 180.1151885986328, "learning_rate": 1.395340797168116e-06, "loss": 19.0312, "step": 11730 }, { "epoch": 0.7791060636248921, "grad_norm": 99.693115234375, "learning_rate": 1.395242004697503e-06, "loss": 9.7031, "step": 11731 }, { "epoch": 0.7791724779172478, "grad_norm": 343.6158752441406, "learning_rate": 1.3951432076549578e-06, "loss": 27.4062, "step": 11732 }, { "epoch": 0.7792388922096035, "grad_norm": 332.7677307128906, "learning_rate": 1.3950444060416246e-06, "loss": 23.375, "step": 11733 }, { "epoch": 0.7793053065019592, "grad_norm": 128.08160400390625, "learning_rate": 1.394945599858645e-06, "loss": 16.3438, "step": 11734 }, { "epoch": 0.7793717207943149, "grad_norm": 219.40260314941406, "learning_rate": 1.3948467891071626e-06, "loss": 13.0938, "step": 11735 }, { "epoch": 0.7794381350866707, "grad_norm": 144.9630889892578, "learning_rate": 1.3947479737883202e-06, "loss": 20.7188, "step": 11736 }, { "epoch": 0.7795045493790264, "grad_norm": 710.3654174804688, "learning_rate": 1.394649153903261e-06, "loss": 17.7031, "step": 11737 }, { "epoch": 0.7795709636713821, "grad_norm": 150.58834838867188, "learning_rate": 1.3945503294531278e-06, "loss": 15.8281, "step": 11738 }, { "epoch": 0.7796373779637378, "grad_norm": 431.5530700683594, "learning_rate": 1.3944515004390637e-06, "loss": 23.1562, "step": 11739 }, { "epoch": 0.7797037922560935, "grad_norm": 354.9440002441406, "learning_rate": 1.3943526668622126e-06, "loss": 19.0625, "step": 11740 }, { "epoch": 0.7797702065484492, "grad_norm": 201.04434204101562, "learning_rate": 1.3942538287237168e-06, "loss": 16.9531, "step": 11741 }, { "epoch": 0.779836620840805, "grad_norm": 281.20721435546875, "learning_rate": 1.3941549860247201e-06, "loss": 16.4844, "step": 11742 }, { "epoch": 0.7799030351331606, "grad_norm": 216.14404296875, "learning_rate": 1.394056138766366e-06, "loss": 13.9375, "step": 11743 }, { "epoch": 0.7799694494255164, "grad_norm": 166.48828125, "learning_rate": 1.3939572869497977e-06, "loss": 17.8906, "step": 11744 }, { "epoch": 0.780035863717872, "grad_norm": 406.9518127441406, "learning_rate": 1.3938584305761585e-06, "loss": 20.5, "step": 11745 }, { "epoch": 0.7801022780102278, "grad_norm": 161.26792907714844, "learning_rate": 1.3937595696465922e-06, "loss": 21.5, "step": 11746 }, { "epoch": 0.7801686923025836, "grad_norm": 129.7807159423828, "learning_rate": 1.3936607041622425e-06, "loss": 14.8281, "step": 11747 }, { "epoch": 0.7802351065949392, "grad_norm": 383.40802001953125, "learning_rate": 1.3935618341242522e-06, "loss": 19.9062, "step": 11748 }, { "epoch": 0.780301520887295, "grad_norm": 111.03076934814453, "learning_rate": 1.3934629595337661e-06, "loss": 13.7188, "step": 11749 }, { "epoch": 0.7803679351796506, "grad_norm": 195.71873474121094, "learning_rate": 1.3933640803919272e-06, "loss": 11.3906, "step": 11750 }, { "epoch": 0.7804343494720064, "grad_norm": 267.8840026855469, "learning_rate": 1.3932651966998794e-06, "loss": 20.3125, "step": 11751 }, { "epoch": 0.7805007637643621, "grad_norm": 211.902587890625, "learning_rate": 1.3931663084587666e-06, "loss": 14.0312, "step": 11752 }, { "epoch": 0.7805671780567178, "grad_norm": 146.7529754638672, "learning_rate": 1.3930674156697323e-06, "loss": 19.1406, "step": 11753 }, { "epoch": 0.7806335923490735, "grad_norm": 276.85516357421875, "learning_rate": 1.3929685183339215e-06, "loss": 20.6094, "step": 11754 }, { "epoch": 0.7807000066414292, "grad_norm": 163.78579711914062, "learning_rate": 1.3928696164524769e-06, "loss": 16.0625, "step": 11755 }, { "epoch": 0.7807664209337849, "grad_norm": 254.31076049804688, "learning_rate": 1.3927707100265433e-06, "loss": 26.2188, "step": 11756 }, { "epoch": 0.7808328352261407, "grad_norm": 445.08050537109375, "learning_rate": 1.3926717990572648e-06, "loss": 14.4688, "step": 11757 }, { "epoch": 0.7808992495184964, "grad_norm": 485.45233154296875, "learning_rate": 1.392572883545785e-06, "loss": 15.1719, "step": 11758 }, { "epoch": 0.7809656638108521, "grad_norm": 107.85274505615234, "learning_rate": 1.3924739634932488e-06, "loss": 14.4844, "step": 11759 }, { "epoch": 0.7810320781032078, "grad_norm": 178.62295532226562, "learning_rate": 1.3923750389007997e-06, "loss": 17.0781, "step": 11760 }, { "epoch": 0.7810984923955635, "grad_norm": 161.4015350341797, "learning_rate": 1.392276109769583e-06, "loss": 18.9688, "step": 11761 }, { "epoch": 0.7811649066879193, "grad_norm": 130.6486053466797, "learning_rate": 1.3921771761007419e-06, "loss": 15.5625, "step": 11762 }, { "epoch": 0.7812313209802749, "grad_norm": 792.2445678710938, "learning_rate": 1.3920782378954217e-06, "loss": 19.6562, "step": 11763 }, { "epoch": 0.7812977352726307, "grad_norm": 193.39633178710938, "learning_rate": 1.3919792951547662e-06, "loss": 17.7656, "step": 11764 }, { "epoch": 0.7813641495649863, "grad_norm": 219.79833984375, "learning_rate": 1.3918803478799206e-06, "loss": 14.375, "step": 11765 }, { "epoch": 0.7814305638573421, "grad_norm": 230.24510192871094, "learning_rate": 1.3917813960720287e-06, "loss": 16.625, "step": 11766 }, { "epoch": 0.7814969781496978, "grad_norm": 137.69613647460938, "learning_rate": 1.391682439732236e-06, "loss": 14.125, "step": 11767 }, { "epoch": 0.7815633924420535, "grad_norm": 159.91111755371094, "learning_rate": 1.3915834788616866e-06, "loss": 13.9141, "step": 11768 }, { "epoch": 0.7816298067344093, "grad_norm": 134.0707244873047, "learning_rate": 1.391484513461525e-06, "loss": 18.9062, "step": 11769 }, { "epoch": 0.781696221026765, "grad_norm": 666.2200927734375, "learning_rate": 1.3913855435328965e-06, "loss": 20.0156, "step": 11770 }, { "epoch": 0.7817626353191207, "grad_norm": 302.83154296875, "learning_rate": 1.3912865690769455e-06, "loss": 22.25, "step": 11771 }, { "epoch": 0.7818290496114764, "grad_norm": 418.9787902832031, "learning_rate": 1.3911875900948175e-06, "loss": 18.6719, "step": 11772 }, { "epoch": 0.7818954639038321, "grad_norm": 258.1109619140625, "learning_rate": 1.3910886065876567e-06, "loss": 20.6406, "step": 11773 }, { "epoch": 0.7819618781961878, "grad_norm": 145.68975830078125, "learning_rate": 1.3909896185566086e-06, "loss": 14.6094, "step": 11774 }, { "epoch": 0.7820282924885436, "grad_norm": 199.2551727294922, "learning_rate": 1.3908906260028178e-06, "loss": 15.7969, "step": 11775 }, { "epoch": 0.7820947067808992, "grad_norm": 181.3436279296875, "learning_rate": 1.39079162892743e-06, "loss": 17.7656, "step": 11776 }, { "epoch": 0.782161121073255, "grad_norm": 179.81564331054688, "learning_rate": 1.3906926273315898e-06, "loss": 12.5156, "step": 11777 }, { "epoch": 0.7822275353656106, "grad_norm": 262.39190673828125, "learning_rate": 1.3905936212164425e-06, "loss": 13.6094, "step": 11778 }, { "epoch": 0.7822939496579664, "grad_norm": 224.79415893554688, "learning_rate": 1.3904946105831337e-06, "loss": 20.9531, "step": 11779 }, { "epoch": 0.7823603639503222, "grad_norm": 217.21371459960938, "learning_rate": 1.3903955954328079e-06, "loss": 17.9062, "step": 11780 }, { "epoch": 0.7824267782426778, "grad_norm": 246.47280883789062, "learning_rate": 1.3902965757666114e-06, "loss": 19.5781, "step": 11781 }, { "epoch": 0.7824931925350336, "grad_norm": 349.4269104003906, "learning_rate": 1.3901975515856891e-06, "loss": 20.375, "step": 11782 }, { "epoch": 0.7825596068273892, "grad_norm": 202.54812622070312, "learning_rate": 1.3900985228911864e-06, "loss": 20.25, "step": 11783 }, { "epoch": 0.782626021119745, "grad_norm": 325.0361022949219, "learning_rate": 1.389999489684249e-06, "loss": 20.25, "step": 11784 }, { "epoch": 0.7826924354121007, "grad_norm": 939.8933715820312, "learning_rate": 1.3899004519660223e-06, "loss": 14.2344, "step": 11785 }, { "epoch": 0.7827588497044564, "grad_norm": 196.24032592773438, "learning_rate": 1.3898014097376523e-06, "loss": 14.5625, "step": 11786 }, { "epoch": 0.7828252639968121, "grad_norm": 378.7793884277344, "learning_rate": 1.3897023630002842e-06, "loss": 22.6094, "step": 11787 }, { "epoch": 0.7828916782891678, "grad_norm": 248.84432983398438, "learning_rate": 1.3896033117550636e-06, "loss": 19.9062, "step": 11788 }, { "epoch": 0.7829580925815235, "grad_norm": 140.25552368164062, "learning_rate": 1.389504256003137e-06, "loss": 16.3594, "step": 11789 }, { "epoch": 0.7830245068738793, "grad_norm": 199.17153930664062, "learning_rate": 1.3894051957456493e-06, "loss": 18.5312, "step": 11790 }, { "epoch": 0.783090921166235, "grad_norm": 264.2889404296875, "learning_rate": 1.3893061309837472e-06, "loss": 17.2969, "step": 11791 }, { "epoch": 0.7831573354585907, "grad_norm": 248.442138671875, "learning_rate": 1.3892070617185762e-06, "loss": 15.2969, "step": 11792 }, { "epoch": 0.7832237497509464, "grad_norm": 182.8451690673828, "learning_rate": 1.3891079879512824e-06, "loss": 12.1094, "step": 11793 }, { "epoch": 0.7832901640433021, "grad_norm": 204.28985595703125, "learning_rate": 1.3890089096830116e-06, "loss": 16.7344, "step": 11794 }, { "epoch": 0.7833565783356579, "grad_norm": 211.64366149902344, "learning_rate": 1.3889098269149102e-06, "loss": 18.4688, "step": 11795 }, { "epoch": 0.7834229926280135, "grad_norm": 228.0899200439453, "learning_rate": 1.3888107396481238e-06, "loss": 19.2188, "step": 11796 }, { "epoch": 0.7834894069203693, "grad_norm": 173.42437744140625, "learning_rate": 1.3887116478837996e-06, "loss": 19.9688, "step": 11797 }, { "epoch": 0.7835558212127249, "grad_norm": 472.3020324707031, "learning_rate": 1.388612551623083e-06, "loss": 19.4375, "step": 11798 }, { "epoch": 0.7836222355050807, "grad_norm": 481.1529235839844, "learning_rate": 1.3885134508671201e-06, "loss": 19.2031, "step": 11799 }, { "epoch": 0.7836886497974364, "grad_norm": 140.46449279785156, "learning_rate": 1.388414345617058e-06, "loss": 13.2656, "step": 11800 }, { "epoch": 0.7837550640897921, "grad_norm": 141.69247436523438, "learning_rate": 1.3883152358740425e-06, "loss": 18.4219, "step": 11801 }, { "epoch": 0.7838214783821479, "grad_norm": 303.4072570800781, "learning_rate": 1.3882161216392202e-06, "loss": 19.0469, "step": 11802 }, { "epoch": 0.7838878926745035, "grad_norm": 185.74005126953125, "learning_rate": 1.388117002913738e-06, "loss": 18.2812, "step": 11803 }, { "epoch": 0.7839543069668593, "grad_norm": 116.30199432373047, "learning_rate": 1.388017879698742e-06, "loss": 13.1406, "step": 11804 }, { "epoch": 0.784020721259215, "grad_norm": 147.44126892089844, "learning_rate": 1.3879187519953787e-06, "loss": 14.4219, "step": 11805 }, { "epoch": 0.7840871355515707, "grad_norm": 326.1698303222656, "learning_rate": 1.3878196198047954e-06, "loss": 18.3594, "step": 11806 }, { "epoch": 0.7841535498439264, "grad_norm": 151.73782348632812, "learning_rate": 1.3877204831281378e-06, "loss": 10.7969, "step": 11807 }, { "epoch": 0.7842199641362821, "grad_norm": 272.46514892578125, "learning_rate": 1.3876213419665536e-06, "loss": 20.2188, "step": 11808 }, { "epoch": 0.7842863784286378, "grad_norm": 202.55152893066406, "learning_rate": 1.387522196321189e-06, "loss": 16.4688, "step": 11809 }, { "epoch": 0.7843527927209936, "grad_norm": 214.77593994140625, "learning_rate": 1.3874230461931915e-06, "loss": 16.0938, "step": 11810 }, { "epoch": 0.7844192070133492, "grad_norm": 90.5692367553711, "learning_rate": 1.3873238915837073e-06, "loss": 14.4141, "step": 11811 }, { "epoch": 0.784485621305705, "grad_norm": 172.04898071289062, "learning_rate": 1.3872247324938835e-06, "loss": 19.2188, "step": 11812 }, { "epoch": 0.7845520355980607, "grad_norm": 278.2585754394531, "learning_rate": 1.3871255689248673e-06, "loss": 18.2812, "step": 11813 }, { "epoch": 0.7846184498904164, "grad_norm": 228.90785217285156, "learning_rate": 1.3870264008778061e-06, "loss": 19.4531, "step": 11814 }, { "epoch": 0.7846848641827722, "grad_norm": 186.62803649902344, "learning_rate": 1.3869272283538464e-06, "loss": 14.3594, "step": 11815 }, { "epoch": 0.7847512784751278, "grad_norm": 103.52276611328125, "learning_rate": 1.386828051354136e-06, "loss": 15.9531, "step": 11816 }, { "epoch": 0.7848176927674836, "grad_norm": 209.6914825439453, "learning_rate": 1.3867288698798212e-06, "loss": 19.7812, "step": 11817 }, { "epoch": 0.7848841070598392, "grad_norm": 171.31707763671875, "learning_rate": 1.3866296839320505e-06, "loss": 16.5625, "step": 11818 }, { "epoch": 0.784950521352195, "grad_norm": 121.63468170166016, "learning_rate": 1.38653049351197e-06, "loss": 15.3125, "step": 11819 }, { "epoch": 0.7850169356445507, "grad_norm": 227.11190795898438, "learning_rate": 1.3864312986207278e-06, "loss": 16.7969, "step": 11820 }, { "epoch": 0.7850833499369064, "grad_norm": 1087.8843994140625, "learning_rate": 1.3863320992594712e-06, "loss": 20.0625, "step": 11821 }, { "epoch": 0.7851497642292622, "grad_norm": 287.00189208984375, "learning_rate": 1.3862328954293476e-06, "loss": 25.625, "step": 11822 }, { "epoch": 0.7852161785216178, "grad_norm": 163.2241668701172, "learning_rate": 1.3861336871315045e-06, "loss": 14.25, "step": 11823 }, { "epoch": 0.7852825928139736, "grad_norm": 508.98773193359375, "learning_rate": 1.3860344743670897e-06, "loss": 18.4062, "step": 11824 }, { "epoch": 0.7853490071063293, "grad_norm": 136.9077911376953, "learning_rate": 1.385935257137251e-06, "loss": 20.5, "step": 11825 }, { "epoch": 0.785415421398685, "grad_norm": 202.54696655273438, "learning_rate": 1.3858360354431353e-06, "loss": 22.3438, "step": 11826 }, { "epoch": 0.7854818356910407, "grad_norm": 579.695556640625, "learning_rate": 1.385736809285891e-06, "loss": 15.0312, "step": 11827 }, { "epoch": 0.7855482499833965, "grad_norm": 213.09742736816406, "learning_rate": 1.3856375786666656e-06, "loss": 22.4375, "step": 11828 }, { "epoch": 0.7856146642757521, "grad_norm": 205.94248962402344, "learning_rate": 1.3855383435866076e-06, "loss": 29.4375, "step": 11829 }, { "epoch": 0.7856810785681079, "grad_norm": 156.0457763671875, "learning_rate": 1.3854391040468639e-06, "loss": 15.6875, "step": 11830 }, { "epoch": 0.7857474928604635, "grad_norm": 1329.275634765625, "learning_rate": 1.3853398600485827e-06, "loss": 20.2656, "step": 11831 }, { "epoch": 0.7858139071528193, "grad_norm": 302.26617431640625, "learning_rate": 1.3852406115929124e-06, "loss": 18.6562, "step": 11832 }, { "epoch": 0.7858803214451751, "grad_norm": 157.6507568359375, "learning_rate": 1.385141358681001e-06, "loss": 15.0, "step": 11833 }, { "epoch": 0.7859467357375307, "grad_norm": 204.3308563232422, "learning_rate": 1.3850421013139964e-06, "loss": 13.4219, "step": 11834 }, { "epoch": 0.7860131500298865, "grad_norm": 184.66973876953125, "learning_rate": 1.3849428394930466e-06, "loss": 15.3594, "step": 11835 }, { "epoch": 0.7860795643222421, "grad_norm": 489.9434814453125, "learning_rate": 1.3848435732193003e-06, "loss": 17.5625, "step": 11836 }, { "epoch": 0.7861459786145979, "grad_norm": 145.17196655273438, "learning_rate": 1.3847443024939052e-06, "loss": 14.7969, "step": 11837 }, { "epoch": 0.7862123929069535, "grad_norm": 182.0762481689453, "learning_rate": 1.3846450273180099e-06, "loss": 15.8906, "step": 11838 }, { "epoch": 0.7862788071993093, "grad_norm": 279.30718994140625, "learning_rate": 1.3845457476927627e-06, "loss": 17.8438, "step": 11839 }, { "epoch": 0.786345221491665, "grad_norm": 792.441650390625, "learning_rate": 1.3844464636193123e-06, "loss": 19.6094, "step": 11840 }, { "epoch": 0.7864116357840207, "grad_norm": 568.2211303710938, "learning_rate": 1.3843471750988068e-06, "loss": 14.9375, "step": 11841 }, { "epoch": 0.7864780500763764, "grad_norm": 184.056396484375, "learning_rate": 1.3842478821323945e-06, "loss": 18.0469, "step": 11842 }, { "epoch": 0.7865444643687322, "grad_norm": 251.7788848876953, "learning_rate": 1.3841485847212247e-06, "loss": 15.0469, "step": 11843 }, { "epoch": 0.7866108786610879, "grad_norm": 177.6350860595703, "learning_rate": 1.3840492828664453e-06, "loss": 16.0781, "step": 11844 }, { "epoch": 0.7866772929534436, "grad_norm": 209.13865661621094, "learning_rate": 1.3839499765692053e-06, "loss": 17.8047, "step": 11845 }, { "epoch": 0.7867437072457993, "grad_norm": 196.6364288330078, "learning_rate": 1.3838506658306533e-06, "loss": 23.9531, "step": 11846 }, { "epoch": 0.786810121538155, "grad_norm": 234.79327392578125, "learning_rate": 1.3837513506519383e-06, "loss": 22.875, "step": 11847 }, { "epoch": 0.7868765358305108, "grad_norm": 176.10354614257812, "learning_rate": 1.383652031034209e-06, "loss": 19.6875, "step": 11848 }, { "epoch": 0.7869429501228664, "grad_norm": 264.9471740722656, "learning_rate": 1.3835527069786137e-06, "loss": 20.7344, "step": 11849 }, { "epoch": 0.7870093644152222, "grad_norm": 270.72320556640625, "learning_rate": 1.3834533784863024e-06, "loss": 17.5156, "step": 11850 }, { "epoch": 0.7870757787075778, "grad_norm": 147.81787109375, "learning_rate": 1.3833540455584234e-06, "loss": 21.3906, "step": 11851 }, { "epoch": 0.7871421929999336, "grad_norm": 130.4127655029297, "learning_rate": 1.3832547081961258e-06, "loss": 18.0156, "step": 11852 }, { "epoch": 0.7872086072922893, "grad_norm": 186.41021728515625, "learning_rate": 1.383155366400559e-06, "loss": 22.0312, "step": 11853 }, { "epoch": 0.787275021584645, "grad_norm": 179.9949493408203, "learning_rate": 1.3830560201728717e-06, "loss": 21.6562, "step": 11854 }, { "epoch": 0.7873414358770008, "grad_norm": 351.8603820800781, "learning_rate": 1.382956669514213e-06, "loss": 27.6875, "step": 11855 }, { "epoch": 0.7874078501693564, "grad_norm": 571.8582763671875, "learning_rate": 1.3828573144257324e-06, "loss": 15.875, "step": 11856 }, { "epoch": 0.7874742644617122, "grad_norm": 128.753173828125, "learning_rate": 1.3827579549085795e-06, "loss": 13.9844, "step": 11857 }, { "epoch": 0.7875406787540679, "grad_norm": 421.3908996582031, "learning_rate": 1.3826585909639032e-06, "loss": 17.8906, "step": 11858 }, { "epoch": 0.7876070930464236, "grad_norm": 249.63427734375, "learning_rate": 1.3825592225928528e-06, "loss": 13.0156, "step": 11859 }, { "epoch": 0.7876735073387793, "grad_norm": 73.7547607421875, "learning_rate": 1.382459849796578e-06, "loss": 10.375, "step": 11860 }, { "epoch": 0.787739921631135, "grad_norm": 366.2244873046875, "learning_rate": 1.3823604725762283e-06, "loss": 17.4219, "step": 11861 }, { "epoch": 0.7878063359234907, "grad_norm": 198.00631713867188, "learning_rate": 1.3822610909329528e-06, "loss": 17.1406, "step": 11862 }, { "epoch": 0.7878727502158465, "grad_norm": 254.4634246826172, "learning_rate": 1.3821617048679018e-06, "loss": 23.7656, "step": 11863 }, { "epoch": 0.7879391645082021, "grad_norm": 152.1367950439453, "learning_rate": 1.3820623143822246e-06, "loss": 16.3438, "step": 11864 }, { "epoch": 0.7880055788005579, "grad_norm": 165.18748474121094, "learning_rate": 1.3819629194770705e-06, "loss": 15.9766, "step": 11865 }, { "epoch": 0.7880719930929136, "grad_norm": 233.00303649902344, "learning_rate": 1.3818635201535898e-06, "loss": 14.9531, "step": 11866 }, { "epoch": 0.7881384073852693, "grad_norm": 184.0866241455078, "learning_rate": 1.381764116412932e-06, "loss": 17.9531, "step": 11867 }, { "epoch": 0.7882048216776251, "grad_norm": 308.1591491699219, "learning_rate": 1.381664708256247e-06, "loss": 17.8281, "step": 11868 }, { "epoch": 0.7882712359699807, "grad_norm": 314.5524597167969, "learning_rate": 1.3815652956846851e-06, "loss": 15.4531, "step": 11869 }, { "epoch": 0.7883376502623365, "grad_norm": 917.4020385742188, "learning_rate": 1.3814658786993957e-06, "loss": 15.4688, "step": 11870 }, { "epoch": 0.7884040645546921, "grad_norm": 116.71244812011719, "learning_rate": 1.3813664573015287e-06, "loss": 19.0, "step": 11871 }, { "epoch": 0.7884704788470479, "grad_norm": 295.2850341796875, "learning_rate": 1.3812670314922347e-06, "loss": 15.2188, "step": 11872 }, { "epoch": 0.7885368931394036, "grad_norm": 184.6157684326172, "learning_rate": 1.3811676012726634e-06, "loss": 12.375, "step": 11873 }, { "epoch": 0.7886033074317593, "grad_norm": 140.46115112304688, "learning_rate": 1.3810681666439652e-06, "loss": 18.4062, "step": 11874 }, { "epoch": 0.788669721724115, "grad_norm": 223.81324768066406, "learning_rate": 1.3809687276072902e-06, "loss": 14.1875, "step": 11875 }, { "epoch": 0.7887361360164707, "grad_norm": 148.3133544921875, "learning_rate": 1.3808692841637886e-06, "loss": 11.8906, "step": 11876 }, { "epoch": 0.7888025503088265, "grad_norm": 511.83331298828125, "learning_rate": 1.3807698363146107e-06, "loss": 27.0, "step": 11877 }, { "epoch": 0.7888689646011822, "grad_norm": 472.23681640625, "learning_rate": 1.3806703840609069e-06, "loss": 19.0938, "step": 11878 }, { "epoch": 0.7889353788935379, "grad_norm": 490.16192626953125, "learning_rate": 1.3805709274038275e-06, "loss": 10.4531, "step": 11879 }, { "epoch": 0.7890017931858936, "grad_norm": 205.986328125, "learning_rate": 1.3804714663445234e-06, "loss": 20.7812, "step": 11880 }, { "epoch": 0.7890682074782494, "grad_norm": 637.317626953125, "learning_rate": 1.3803720008841444e-06, "loss": 21.3594, "step": 11881 }, { "epoch": 0.789134621770605, "grad_norm": 207.71331787109375, "learning_rate": 1.3802725310238414e-06, "loss": 20.1875, "step": 11882 }, { "epoch": 0.7892010360629608, "grad_norm": 114.90843200683594, "learning_rate": 1.3801730567647653e-06, "loss": 15.9375, "step": 11883 }, { "epoch": 0.7892674503553164, "grad_norm": 341.6466064453125, "learning_rate": 1.3800735781080666e-06, "loss": 18.0781, "step": 11884 }, { "epoch": 0.7893338646476722, "grad_norm": 201.52230834960938, "learning_rate": 1.3799740950548956e-06, "loss": 20.0625, "step": 11885 }, { "epoch": 0.7894002789400278, "grad_norm": 274.0556945800781, "learning_rate": 1.3798746076064034e-06, "loss": 18.6562, "step": 11886 }, { "epoch": 0.7894666932323836, "grad_norm": 135.8152313232422, "learning_rate": 1.379775115763741e-06, "loss": 20.0312, "step": 11887 }, { "epoch": 0.7895331075247394, "grad_norm": 564.8428344726562, "learning_rate": 1.3796756195280588e-06, "loss": 18.1562, "step": 11888 }, { "epoch": 0.789599521817095, "grad_norm": 565.3349609375, "learning_rate": 1.3795761189005081e-06, "loss": 12.4688, "step": 11889 }, { "epoch": 0.7896659361094508, "grad_norm": 352.91131591796875, "learning_rate": 1.3794766138822397e-06, "loss": 13.625, "step": 11890 }, { "epoch": 0.7897323504018064, "grad_norm": 241.7978515625, "learning_rate": 1.3793771044744048e-06, "loss": 14.3359, "step": 11891 }, { "epoch": 0.7897987646941622, "grad_norm": 313.4118347167969, "learning_rate": 1.379277590678154e-06, "loss": 19.9062, "step": 11892 }, { "epoch": 0.7898651789865179, "grad_norm": 235.65402221679688, "learning_rate": 1.3791780724946392e-06, "loss": 15.1094, "step": 11893 }, { "epoch": 0.7899315932788736, "grad_norm": 130.86094665527344, "learning_rate": 1.3790785499250109e-06, "loss": 13.8906, "step": 11894 }, { "epoch": 0.7899980075712293, "grad_norm": 172.7331085205078, "learning_rate": 1.3789790229704204e-06, "loss": 22.3906, "step": 11895 }, { "epoch": 0.790064421863585, "grad_norm": 245.49346923828125, "learning_rate": 1.3788794916320192e-06, "loss": 23.1719, "step": 11896 }, { "epoch": 0.7901308361559407, "grad_norm": 184.75205993652344, "learning_rate": 1.3787799559109586e-06, "loss": 16.5, "step": 11897 }, { "epoch": 0.7901972504482965, "grad_norm": 275.7470397949219, "learning_rate": 1.3786804158083896e-06, "loss": 21.2188, "step": 11898 }, { "epoch": 0.7902636647406522, "grad_norm": 390.3144836425781, "learning_rate": 1.3785808713254641e-06, "loss": 20.7656, "step": 11899 }, { "epoch": 0.7903300790330079, "grad_norm": 208.3260498046875, "learning_rate": 1.3784813224633334e-06, "loss": 19.7188, "step": 11900 }, { "epoch": 0.7903964933253637, "grad_norm": 163.69163513183594, "learning_rate": 1.3783817692231488e-06, "loss": 18.3125, "step": 11901 }, { "epoch": 0.7904629076177193, "grad_norm": 409.0364074707031, "learning_rate": 1.3782822116060624e-06, "loss": 22.3125, "step": 11902 }, { "epoch": 0.7905293219100751, "grad_norm": 254.55023193359375, "learning_rate": 1.3781826496132252e-06, "loss": 16.875, "step": 11903 }, { "epoch": 0.7905957362024307, "grad_norm": 177.6536865234375, "learning_rate": 1.3780830832457894e-06, "loss": 13.625, "step": 11904 }, { "epoch": 0.7906621504947865, "grad_norm": 149.89187622070312, "learning_rate": 1.3779835125049065e-06, "loss": 15.875, "step": 11905 }, { "epoch": 0.7907285647871422, "grad_norm": 655.9717407226562, "learning_rate": 1.3778839373917282e-06, "loss": 26.875, "step": 11906 }, { "epoch": 0.7907949790794979, "grad_norm": 583.1111450195312, "learning_rate": 1.3777843579074064e-06, "loss": 23.0625, "step": 11907 }, { "epoch": 0.7908613933718536, "grad_norm": 127.81324768066406, "learning_rate": 1.377684774053093e-06, "loss": 11.5234, "step": 11908 }, { "epoch": 0.7909278076642093, "grad_norm": 304.1695556640625, "learning_rate": 1.3775851858299401e-06, "loss": 17.2969, "step": 11909 }, { "epoch": 0.7909942219565651, "grad_norm": 321.2257385253906, "learning_rate": 1.3774855932390993e-06, "loss": 13.0781, "step": 11910 }, { "epoch": 0.7910606362489208, "grad_norm": 409.398681640625, "learning_rate": 1.377385996281723e-06, "loss": 18.4531, "step": 11911 }, { "epoch": 0.7911270505412765, "grad_norm": 145.3050537109375, "learning_rate": 1.3772863949589628e-06, "loss": 16.0156, "step": 11912 }, { "epoch": 0.7911934648336322, "grad_norm": 181.95492553710938, "learning_rate": 1.3771867892719712e-06, "loss": 17.7812, "step": 11913 }, { "epoch": 0.7912598791259879, "grad_norm": 221.88137817382812, "learning_rate": 1.3770871792219005e-06, "loss": 18.4844, "step": 11914 }, { "epoch": 0.7913262934183436, "grad_norm": 398.0245361328125, "learning_rate": 1.3769875648099027e-06, "loss": 16.8594, "step": 11915 }, { "epoch": 0.7913927077106994, "grad_norm": 368.9538879394531, "learning_rate": 1.3768879460371303e-06, "loss": 18.7656, "step": 11916 }, { "epoch": 0.791459122003055, "grad_norm": 252.0264434814453, "learning_rate": 1.3767883229047353e-06, "loss": 17.4688, "step": 11917 }, { "epoch": 0.7915255362954108, "grad_norm": 319.62030029296875, "learning_rate": 1.37668869541387e-06, "loss": 18.4688, "step": 11918 }, { "epoch": 0.7915919505877664, "grad_norm": 311.743408203125, "learning_rate": 1.3765890635656875e-06, "loss": 18.375, "step": 11919 }, { "epoch": 0.7916583648801222, "grad_norm": 100.55089569091797, "learning_rate": 1.3764894273613398e-06, "loss": 13.8594, "step": 11920 }, { "epoch": 0.791724779172478, "grad_norm": 237.7532196044922, "learning_rate": 1.3763897868019791e-06, "loss": 20.7812, "step": 11921 }, { "epoch": 0.7917911934648336, "grad_norm": 370.19012451171875, "learning_rate": 1.3762901418887588e-06, "loss": 15.875, "step": 11922 }, { "epoch": 0.7918576077571894, "grad_norm": 422.1231384277344, "learning_rate": 1.3761904926228306e-06, "loss": 28.6719, "step": 11923 }, { "epoch": 0.791924022049545, "grad_norm": 118.7989501953125, "learning_rate": 1.3760908390053483e-06, "loss": 15.0625, "step": 11924 }, { "epoch": 0.7919904363419008, "grad_norm": 280.5617980957031, "learning_rate": 1.375991181037464e-06, "loss": 16.0469, "step": 11925 }, { "epoch": 0.7920568506342565, "grad_norm": 138.78907775878906, "learning_rate": 1.3758915187203303e-06, "loss": 17.3281, "step": 11926 }, { "epoch": 0.7921232649266122, "grad_norm": 226.01148986816406, "learning_rate": 1.3757918520551002e-06, "loss": 18.0938, "step": 11927 }, { "epoch": 0.7921896792189679, "grad_norm": 384.9094543457031, "learning_rate": 1.375692181042927e-06, "loss": 20.625, "step": 11928 }, { "epoch": 0.7922560935113236, "grad_norm": 138.05052185058594, "learning_rate": 1.3755925056849632e-06, "loss": 14.6406, "step": 11929 }, { "epoch": 0.7923225078036793, "grad_norm": 227.82879638671875, "learning_rate": 1.3754928259823614e-06, "loss": 17.7656, "step": 11930 }, { "epoch": 0.7923889220960351, "grad_norm": 113.13165283203125, "learning_rate": 1.3753931419362755e-06, "loss": 16.8125, "step": 11931 }, { "epoch": 0.7924553363883908, "grad_norm": 283.260009765625, "learning_rate": 1.3752934535478584e-06, "loss": 13.4688, "step": 11932 }, { "epoch": 0.7925217506807465, "grad_norm": 211.5830841064453, "learning_rate": 1.3751937608182626e-06, "loss": 21.625, "step": 11933 }, { "epoch": 0.7925881649731022, "grad_norm": 329.3984680175781, "learning_rate": 1.375094063748642e-06, "loss": 12.7812, "step": 11934 }, { "epoch": 0.7926545792654579, "grad_norm": 885.050537109375, "learning_rate": 1.3749943623401494e-06, "loss": 21.0, "step": 11935 }, { "epoch": 0.7927209935578137, "grad_norm": 214.17369079589844, "learning_rate": 1.3748946565939385e-06, "loss": 17.7188, "step": 11936 }, { "epoch": 0.7927874078501693, "grad_norm": 370.07879638671875, "learning_rate": 1.3747949465111622e-06, "loss": 15.8906, "step": 11937 }, { "epoch": 0.7928538221425251, "grad_norm": 122.00337219238281, "learning_rate": 1.374695232092974e-06, "loss": 12.8125, "step": 11938 }, { "epoch": 0.7929202364348807, "grad_norm": 410.6449279785156, "learning_rate": 1.3745955133405276e-06, "loss": 21.9844, "step": 11939 }, { "epoch": 0.7929866507272365, "grad_norm": 116.34773254394531, "learning_rate": 1.374495790254976e-06, "loss": 15.1094, "step": 11940 }, { "epoch": 0.7930530650195922, "grad_norm": 168.70455932617188, "learning_rate": 1.3743960628374734e-06, "loss": 14.875, "step": 11941 }, { "epoch": 0.7931194793119479, "grad_norm": 361.9840393066406, "learning_rate": 1.3742963310891727e-06, "loss": 16.9844, "step": 11942 }, { "epoch": 0.7931858936043037, "grad_norm": 355.81524658203125, "learning_rate": 1.3741965950112282e-06, "loss": 21.5625, "step": 11943 }, { "epoch": 0.7932523078966593, "grad_norm": 209.09483337402344, "learning_rate": 1.3740968546047933e-06, "loss": 19.4375, "step": 11944 }, { "epoch": 0.7933187221890151, "grad_norm": 204.50486755371094, "learning_rate": 1.3739971098710212e-06, "loss": 17.8906, "step": 11945 }, { "epoch": 0.7933851364813708, "grad_norm": 210.7483673095703, "learning_rate": 1.3738973608110667e-06, "loss": 19.1875, "step": 11946 }, { "epoch": 0.7934515507737265, "grad_norm": 342.2593688964844, "learning_rate": 1.3737976074260832e-06, "loss": 23.5938, "step": 11947 }, { "epoch": 0.7935179650660822, "grad_norm": 172.2272491455078, "learning_rate": 1.373697849717224e-06, "loss": 16.9688, "step": 11948 }, { "epoch": 0.793584379358438, "grad_norm": 111.7103042602539, "learning_rate": 1.373598087685644e-06, "loss": 14.5781, "step": 11949 }, { "epoch": 0.7936507936507936, "grad_norm": 188.7451629638672, "learning_rate": 1.3734983213324967e-06, "loss": 15.2969, "step": 11950 }, { "epoch": 0.7937172079431494, "grad_norm": 208.73562622070312, "learning_rate": 1.3733985506589358e-06, "loss": 17.4844, "step": 11951 }, { "epoch": 0.793783622235505, "grad_norm": 388.2973937988281, "learning_rate": 1.373298775666116e-06, "loss": 18.5156, "step": 11952 }, { "epoch": 0.7938500365278608, "grad_norm": 155.34115600585938, "learning_rate": 1.3731989963551914e-06, "loss": 13.7812, "step": 11953 }, { "epoch": 0.7939164508202166, "grad_norm": 385.78375244140625, "learning_rate": 1.373099212727316e-06, "loss": 23.2656, "step": 11954 }, { "epoch": 0.7939828651125722, "grad_norm": 209.40884399414062, "learning_rate": 1.3729994247836438e-06, "loss": 18.6875, "step": 11955 }, { "epoch": 0.794049279404928, "grad_norm": 389.11053466796875, "learning_rate": 1.3728996325253296e-06, "loss": 22.9688, "step": 11956 }, { "epoch": 0.7941156936972836, "grad_norm": 193.30503845214844, "learning_rate": 1.3727998359535272e-06, "loss": 19.2656, "step": 11957 }, { "epoch": 0.7941821079896394, "grad_norm": 214.32228088378906, "learning_rate": 1.3727000350693915e-06, "loss": 17.3906, "step": 11958 }, { "epoch": 0.794248522281995, "grad_norm": 182.3995361328125, "learning_rate": 1.3726002298740765e-06, "loss": 14.3438, "step": 11959 }, { "epoch": 0.7943149365743508, "grad_norm": 158.4938201904297, "learning_rate": 1.3725004203687372e-06, "loss": 12.4062, "step": 11960 }, { "epoch": 0.7943813508667065, "grad_norm": 135.0738067626953, "learning_rate": 1.3724006065545275e-06, "loss": 13.8906, "step": 11961 }, { "epoch": 0.7944477651590622, "grad_norm": 260.7838439941406, "learning_rate": 1.3723007884326023e-06, "loss": 14.5156, "step": 11962 }, { "epoch": 0.7945141794514179, "grad_norm": 100.80319213867188, "learning_rate": 1.3722009660041165e-06, "loss": 13.7188, "step": 11963 }, { "epoch": 0.7945805937437737, "grad_norm": 130.40443420410156, "learning_rate": 1.3721011392702246e-06, "loss": 16.2344, "step": 11964 }, { "epoch": 0.7946470080361294, "grad_norm": 232.9629364013672, "learning_rate": 1.3720013082320811e-06, "loss": 15.4531, "step": 11965 }, { "epoch": 0.7947134223284851, "grad_norm": 266.16387939453125, "learning_rate": 1.3719014728908408e-06, "loss": 24.125, "step": 11966 }, { "epoch": 0.7947798366208408, "grad_norm": 208.89842224121094, "learning_rate": 1.3718016332476591e-06, "loss": 14.5, "step": 11967 }, { "epoch": 0.7948462509131965, "grad_norm": 2088.300537109375, "learning_rate": 1.3717017893036907e-06, "loss": 13.4844, "step": 11968 }, { "epoch": 0.7949126652055523, "grad_norm": 187.8250274658203, "learning_rate": 1.3716019410600899e-06, "loss": 17.0, "step": 11969 }, { "epoch": 0.7949790794979079, "grad_norm": 264.8330993652344, "learning_rate": 1.371502088518012e-06, "loss": 24.2812, "step": 11970 }, { "epoch": 0.7950454937902637, "grad_norm": 211.01712036132812, "learning_rate": 1.3714022316786124e-06, "loss": 22.3438, "step": 11971 }, { "epoch": 0.7951119080826193, "grad_norm": 217.73072814941406, "learning_rate": 1.371302370543046e-06, "loss": 14.7109, "step": 11972 }, { "epoch": 0.7951783223749751, "grad_norm": 320.3166809082031, "learning_rate": 1.3712025051124677e-06, "loss": 24.7969, "step": 11973 }, { "epoch": 0.7952447366673309, "grad_norm": 322.0607604980469, "learning_rate": 1.371102635388033e-06, "loss": 33.2188, "step": 11974 }, { "epoch": 0.7953111509596865, "grad_norm": 163.51002502441406, "learning_rate": 1.3710027613708971e-06, "loss": 14.6094, "step": 11975 }, { "epoch": 0.7953775652520423, "grad_norm": 409.720458984375, "learning_rate": 1.3709028830622148e-06, "loss": 14.9688, "step": 11976 }, { "epoch": 0.7954439795443979, "grad_norm": 138.61561584472656, "learning_rate": 1.3708030004631423e-06, "loss": 17.9844, "step": 11977 }, { "epoch": 0.7955103938367537, "grad_norm": 406.2828063964844, "learning_rate": 1.370703113574834e-06, "loss": 22.375, "step": 11978 }, { "epoch": 0.7955768081291094, "grad_norm": 157.93113708496094, "learning_rate": 1.3706032223984465e-06, "loss": 15.8281, "step": 11979 }, { "epoch": 0.7956432224214651, "grad_norm": 206.4170379638672, "learning_rate": 1.3705033269351342e-06, "loss": 19.7188, "step": 11980 }, { "epoch": 0.7957096367138208, "grad_norm": 267.1660461425781, "learning_rate": 1.3704034271860532e-06, "loss": 21.0156, "step": 11981 }, { "epoch": 0.7957760510061765, "grad_norm": 278.5445861816406, "learning_rate": 1.370303523152359e-06, "loss": 21.1719, "step": 11982 }, { "epoch": 0.7958424652985322, "grad_norm": 298.610107421875, "learning_rate": 1.3702036148352069e-06, "loss": 14.4062, "step": 11983 }, { "epoch": 0.795908879590888, "grad_norm": 165.48007202148438, "learning_rate": 1.3701037022357529e-06, "loss": 12.3203, "step": 11984 }, { "epoch": 0.7959752938832437, "grad_norm": 369.2441711425781, "learning_rate": 1.3700037853551527e-06, "loss": 15.9531, "step": 11985 }, { "epoch": 0.7960417081755994, "grad_norm": 187.7113494873047, "learning_rate": 1.3699038641945625e-06, "loss": 19.0625, "step": 11986 }, { "epoch": 0.7961081224679551, "grad_norm": 247.3917694091797, "learning_rate": 1.3698039387551372e-06, "loss": 18.0469, "step": 11987 }, { "epoch": 0.7961745367603108, "grad_norm": 522.4981689453125, "learning_rate": 1.369704009038033e-06, "loss": 25.4688, "step": 11988 }, { "epoch": 0.7962409510526666, "grad_norm": 212.9339141845703, "learning_rate": 1.3696040750444066e-06, "loss": 16.3125, "step": 11989 }, { "epoch": 0.7963073653450222, "grad_norm": 325.90118408203125, "learning_rate": 1.3695041367754133e-06, "loss": 23.2969, "step": 11990 }, { "epoch": 0.796373779637378, "grad_norm": 345.6023254394531, "learning_rate": 1.3694041942322087e-06, "loss": 17.4219, "step": 11991 }, { "epoch": 0.7964401939297336, "grad_norm": 186.9369354248047, "learning_rate": 1.3693042474159497e-06, "loss": 16.4062, "step": 11992 }, { "epoch": 0.7965066082220894, "grad_norm": 273.5599365234375, "learning_rate": 1.3692042963277923e-06, "loss": 15.6562, "step": 11993 }, { "epoch": 0.7965730225144451, "grad_norm": 1580.5765380859375, "learning_rate": 1.3691043409688922e-06, "loss": 18.25, "step": 11994 }, { "epoch": 0.7966394368068008, "grad_norm": 150.8342742919922, "learning_rate": 1.3690043813404063e-06, "loss": 13.8594, "step": 11995 }, { "epoch": 0.7967058510991566, "grad_norm": 306.3665466308594, "learning_rate": 1.3689044174434901e-06, "loss": 22.5, "step": 11996 }, { "epoch": 0.7967722653915122, "grad_norm": 191.13873291015625, "learning_rate": 1.3688044492793005e-06, "loss": 22.4219, "step": 11997 }, { "epoch": 0.796838679683868, "grad_norm": 169.200439453125, "learning_rate": 1.3687044768489935e-06, "loss": 16.3125, "step": 11998 }, { "epoch": 0.7969050939762237, "grad_norm": 287.7324523925781, "learning_rate": 1.3686045001537258e-06, "loss": 19.0156, "step": 11999 }, { "epoch": 0.7969715082685794, "grad_norm": 199.8636016845703, "learning_rate": 1.368504519194654e-06, "loss": 14.6875, "step": 12000 }, { "epoch": 0.7970379225609351, "grad_norm": 151.3450927734375, "learning_rate": 1.3684045339729344e-06, "loss": 16.8125, "step": 12001 }, { "epoch": 0.7971043368532909, "grad_norm": 158.105224609375, "learning_rate": 1.3683045444897234e-06, "loss": 13.5312, "step": 12002 }, { "epoch": 0.7971707511456465, "grad_norm": 118.84021759033203, "learning_rate": 1.3682045507461779e-06, "loss": 15.2656, "step": 12003 }, { "epoch": 0.7972371654380023, "grad_norm": 433.4229736328125, "learning_rate": 1.3681045527434544e-06, "loss": 20.5312, "step": 12004 }, { "epoch": 0.7973035797303579, "grad_norm": 253.84149169921875, "learning_rate": 1.3680045504827097e-06, "loss": 18.125, "step": 12005 }, { "epoch": 0.7973699940227137, "grad_norm": 183.69747924804688, "learning_rate": 1.3679045439651008e-06, "loss": 23.25, "step": 12006 }, { "epoch": 0.7974364083150695, "grad_norm": 146.71824645996094, "learning_rate": 1.3678045331917842e-06, "loss": 14.5469, "step": 12007 }, { "epoch": 0.7975028226074251, "grad_norm": 119.69750213623047, "learning_rate": 1.3677045181639167e-06, "loss": 13.9219, "step": 12008 }, { "epoch": 0.7975692368997809, "grad_norm": 184.6914825439453, "learning_rate": 1.3676044988826554e-06, "loss": 14.5938, "step": 12009 }, { "epoch": 0.7976356511921365, "grad_norm": 208.89895629882812, "learning_rate": 1.3675044753491572e-06, "loss": 15.9531, "step": 12010 }, { "epoch": 0.7977020654844923, "grad_norm": 195.71954345703125, "learning_rate": 1.3674044475645792e-06, "loss": 19.0156, "step": 12011 }, { "epoch": 0.797768479776848, "grad_norm": 297.6788024902344, "learning_rate": 1.3673044155300783e-06, "loss": 21.9688, "step": 12012 }, { "epoch": 0.7978348940692037, "grad_norm": 216.02606201171875, "learning_rate": 1.3672043792468118e-06, "loss": 16.3125, "step": 12013 }, { "epoch": 0.7979013083615594, "grad_norm": 179.21192932128906, "learning_rate": 1.3671043387159369e-06, "loss": 17.0312, "step": 12014 }, { "epoch": 0.7979677226539151, "grad_norm": 238.1484832763672, "learning_rate": 1.3670042939386106e-06, "loss": 16.375, "step": 12015 }, { "epoch": 0.7980341369462708, "grad_norm": 161.62551879882812, "learning_rate": 1.3669042449159903e-06, "loss": 20.0781, "step": 12016 }, { "epoch": 0.7981005512386266, "grad_norm": 426.7978515625, "learning_rate": 1.366804191649233e-06, "loss": 27.4375, "step": 12017 }, { "epoch": 0.7981669655309823, "grad_norm": 335.5798034667969, "learning_rate": 1.3667041341394967e-06, "loss": 17.8594, "step": 12018 }, { "epoch": 0.798233379823338, "grad_norm": 142.0304412841797, "learning_rate": 1.3666040723879385e-06, "loss": 17.1719, "step": 12019 }, { "epoch": 0.7982997941156937, "grad_norm": 200.78787231445312, "learning_rate": 1.3665040063957155e-06, "loss": 15.5938, "step": 12020 }, { "epoch": 0.7983662084080494, "grad_norm": 522.1283569335938, "learning_rate": 1.3664039361639854e-06, "loss": 26.75, "step": 12021 }, { "epoch": 0.7984326227004052, "grad_norm": 117.59667205810547, "learning_rate": 1.366303861693906e-06, "loss": 15.8281, "step": 12022 }, { "epoch": 0.7984990369927608, "grad_norm": 168.6320343017578, "learning_rate": 1.3662037829866347e-06, "loss": 17.875, "step": 12023 }, { "epoch": 0.7985654512851166, "grad_norm": 248.79385375976562, "learning_rate": 1.3661037000433293e-06, "loss": 19.8906, "step": 12024 }, { "epoch": 0.7986318655774722, "grad_norm": 925.8536376953125, "learning_rate": 1.3660036128651475e-06, "loss": 23.9062, "step": 12025 }, { "epoch": 0.798698279869828, "grad_norm": 307.9432678222656, "learning_rate": 1.3659035214532466e-06, "loss": 18.6562, "step": 12026 }, { "epoch": 0.7987646941621837, "grad_norm": 280.2041015625, "learning_rate": 1.3658034258087851e-06, "loss": 20.5625, "step": 12027 }, { "epoch": 0.7988311084545394, "grad_norm": 128.4700164794922, "learning_rate": 1.3657033259329202e-06, "loss": 12.7344, "step": 12028 }, { "epoch": 0.7988975227468952, "grad_norm": 1085.2115478515625, "learning_rate": 1.3656032218268105e-06, "loss": 16.375, "step": 12029 }, { "epoch": 0.7989639370392508, "grad_norm": 227.31283569335938, "learning_rate": 1.3655031134916133e-06, "loss": 14.7969, "step": 12030 }, { "epoch": 0.7990303513316066, "grad_norm": 386.27606201171875, "learning_rate": 1.3654030009284868e-06, "loss": 24.75, "step": 12031 }, { "epoch": 0.7990967656239623, "grad_norm": 226.16998291015625, "learning_rate": 1.3653028841385892e-06, "loss": 19.0781, "step": 12032 }, { "epoch": 0.799163179916318, "grad_norm": 168.0480499267578, "learning_rate": 1.3652027631230782e-06, "loss": 19.5, "step": 12033 }, { "epoch": 0.7992295942086737, "grad_norm": 302.1679382324219, "learning_rate": 1.3651026378831123e-06, "loss": 22.5, "step": 12034 }, { "epoch": 0.7992960085010294, "grad_norm": 143.4818572998047, "learning_rate": 1.3650025084198501e-06, "loss": 16.875, "step": 12035 }, { "epoch": 0.7993624227933851, "grad_norm": 241.50294494628906, "learning_rate": 1.3649023747344491e-06, "loss": 12.3438, "step": 12036 }, { "epoch": 0.7994288370857409, "grad_norm": 145.86280822753906, "learning_rate": 1.3648022368280676e-06, "loss": 16.0, "step": 12037 }, { "epoch": 0.7994952513780965, "grad_norm": 494.9770202636719, "learning_rate": 1.3647020947018644e-06, "loss": 27.125, "step": 12038 }, { "epoch": 0.7995616656704523, "grad_norm": 354.4691162109375, "learning_rate": 1.3646019483569978e-06, "loss": 20.0625, "step": 12039 }, { "epoch": 0.799628079962808, "grad_norm": 161.4558563232422, "learning_rate": 1.3645017977946256e-06, "loss": 17.0781, "step": 12040 }, { "epoch": 0.7996944942551637, "grad_norm": 172.91575622558594, "learning_rate": 1.364401643015907e-06, "loss": 16.25, "step": 12041 }, { "epoch": 0.7997609085475195, "grad_norm": 313.1368103027344, "learning_rate": 1.3643014840220007e-06, "loss": 17.0625, "step": 12042 }, { "epoch": 0.7998273228398751, "grad_norm": 246.6273956298828, "learning_rate": 1.3642013208140646e-06, "loss": 14.75, "step": 12043 }, { "epoch": 0.7998937371322309, "grad_norm": 128.33738708496094, "learning_rate": 1.3641011533932575e-06, "loss": 26.5469, "step": 12044 }, { "epoch": 0.7999601514245865, "grad_norm": 154.2776336669922, "learning_rate": 1.3640009817607386e-06, "loss": 15.8594, "step": 12045 }, { "epoch": 0.8000265657169423, "grad_norm": 120.57052612304688, "learning_rate": 1.363900805917666e-06, "loss": 14.7969, "step": 12046 }, { "epoch": 0.800092980009298, "grad_norm": 199.3022918701172, "learning_rate": 1.3638006258651986e-06, "loss": 15.7031, "step": 12047 }, { "epoch": 0.8001593943016537, "grad_norm": 170.22186279296875, "learning_rate": 1.3637004416044954e-06, "loss": 17.3594, "step": 12048 }, { "epoch": 0.8002258085940094, "grad_norm": 108.30291748046875, "learning_rate": 1.3636002531367155e-06, "loss": 13.0781, "step": 12049 }, { "epoch": 0.8002922228863651, "grad_norm": 153.83731079101562, "learning_rate": 1.3635000604630174e-06, "loss": 14.1562, "step": 12050 }, { "epoch": 0.8003586371787209, "grad_norm": 228.03431701660156, "learning_rate": 1.36339986358456e-06, "loss": 19.1875, "step": 12051 }, { "epoch": 0.8004250514710766, "grad_norm": 261.0689392089844, "learning_rate": 1.3632996625025027e-06, "loss": 18.5, "step": 12052 }, { "epoch": 0.8004914657634323, "grad_norm": 180.56668090820312, "learning_rate": 1.3631994572180044e-06, "loss": 16.6562, "step": 12053 }, { "epoch": 0.800557880055788, "grad_norm": 678.76953125, "learning_rate": 1.363099247732224e-06, "loss": 21.6562, "step": 12054 }, { "epoch": 0.8006242943481438, "grad_norm": 204.97998046875, "learning_rate": 1.362999034046321e-06, "loss": 13.5938, "step": 12055 }, { "epoch": 0.8006907086404994, "grad_norm": 210.68470764160156, "learning_rate": 1.3628988161614545e-06, "loss": 19.0156, "step": 12056 }, { "epoch": 0.8007571229328552, "grad_norm": 231.5872344970703, "learning_rate": 1.362798594078784e-06, "loss": 18.1406, "step": 12057 }, { "epoch": 0.8008235372252108, "grad_norm": 333.4286804199219, "learning_rate": 1.3626983677994684e-06, "loss": 23.8125, "step": 12058 }, { "epoch": 0.8008899515175666, "grad_norm": 295.61767578125, "learning_rate": 1.3625981373246669e-06, "loss": 16.1406, "step": 12059 }, { "epoch": 0.8009563658099222, "grad_norm": 204.08840942382812, "learning_rate": 1.3624979026555394e-06, "loss": 18.7656, "step": 12060 }, { "epoch": 0.801022780102278, "grad_norm": 150.09429931640625, "learning_rate": 1.3623976637932457e-06, "loss": 12.1094, "step": 12061 }, { "epoch": 0.8010891943946338, "grad_norm": 159.84886169433594, "learning_rate": 1.3622974207389442e-06, "loss": 18.9844, "step": 12062 }, { "epoch": 0.8011556086869894, "grad_norm": 191.11965942382812, "learning_rate": 1.362197173493795e-06, "loss": 19.3125, "step": 12063 }, { "epoch": 0.8012220229793452, "grad_norm": 154.31858825683594, "learning_rate": 1.3620969220589585e-06, "loss": 13.3125, "step": 12064 }, { "epoch": 0.8012884372717008, "grad_norm": 210.2828826904297, "learning_rate": 1.3619966664355929e-06, "loss": 17.5938, "step": 12065 }, { "epoch": 0.8013548515640566, "grad_norm": 251.4847412109375, "learning_rate": 1.3618964066248589e-06, "loss": 18.625, "step": 12066 }, { "epoch": 0.8014212658564123, "grad_norm": 107.88311767578125, "learning_rate": 1.3617961426279157e-06, "loss": 14.75, "step": 12067 }, { "epoch": 0.801487680148768, "grad_norm": 141.89691162109375, "learning_rate": 1.3616958744459238e-06, "loss": 18.125, "step": 12068 }, { "epoch": 0.8015540944411237, "grad_norm": 118.50537872314453, "learning_rate": 1.361595602080042e-06, "loss": 17.125, "step": 12069 }, { "epoch": 0.8016205087334795, "grad_norm": 221.09190368652344, "learning_rate": 1.3614953255314307e-06, "loss": 14.8281, "step": 12070 }, { "epoch": 0.8016869230258351, "grad_norm": 206.69094848632812, "learning_rate": 1.3613950448012506e-06, "loss": 17.5469, "step": 12071 }, { "epoch": 0.8017533373181909, "grad_norm": 152.88833618164062, "learning_rate": 1.3612947598906606e-06, "loss": 14.6875, "step": 12072 }, { "epoch": 0.8018197516105466, "grad_norm": 306.07275390625, "learning_rate": 1.3611944708008212e-06, "loss": 15.1719, "step": 12073 }, { "epoch": 0.8018861659029023, "grad_norm": 264.9368591308594, "learning_rate": 1.3610941775328923e-06, "loss": 20.3125, "step": 12074 }, { "epoch": 0.8019525801952581, "grad_norm": 155.1015625, "learning_rate": 1.3609938800880347e-06, "loss": 16.2969, "step": 12075 }, { "epoch": 0.8020189944876137, "grad_norm": 106.14888763427734, "learning_rate": 1.3608935784674076e-06, "loss": 15.2266, "step": 12076 }, { "epoch": 0.8020854087799695, "grad_norm": 171.8688507080078, "learning_rate": 1.3607932726721718e-06, "loss": 13.0, "step": 12077 }, { "epoch": 0.8021518230723251, "grad_norm": 242.36656188964844, "learning_rate": 1.3606929627034876e-06, "loss": 22.3906, "step": 12078 }, { "epoch": 0.8022182373646809, "grad_norm": 111.95326232910156, "learning_rate": 1.360592648562515e-06, "loss": 18.6094, "step": 12079 }, { "epoch": 0.8022846516570366, "grad_norm": 179.78689575195312, "learning_rate": 1.3604923302504146e-06, "loss": 15.375, "step": 12080 }, { "epoch": 0.8023510659493923, "grad_norm": 216.79100036621094, "learning_rate": 1.3603920077683468e-06, "loss": 14.6406, "step": 12081 }, { "epoch": 0.802417480241748, "grad_norm": 312.07623291015625, "learning_rate": 1.3602916811174724e-06, "loss": 19.5625, "step": 12082 }, { "epoch": 0.8024838945341037, "grad_norm": 172.99278259277344, "learning_rate": 1.360191350298951e-06, "loss": 12.9688, "step": 12083 }, { "epoch": 0.8025503088264595, "grad_norm": 705.0110473632812, "learning_rate": 1.360091015313944e-06, "loss": 25.8594, "step": 12084 }, { "epoch": 0.8026167231188152, "grad_norm": 183.4685821533203, "learning_rate": 1.359990676163612e-06, "loss": 19.0469, "step": 12085 }, { "epoch": 0.8026831374111709, "grad_norm": 271.20709228515625, "learning_rate": 1.3598903328491153e-06, "loss": 24.0938, "step": 12086 }, { "epoch": 0.8027495517035266, "grad_norm": 176.22816467285156, "learning_rate": 1.3597899853716146e-06, "loss": 18.75, "step": 12087 }, { "epoch": 0.8028159659958823, "grad_norm": 375.5419921875, "learning_rate": 1.359689633732271e-06, "loss": 19.2031, "step": 12088 }, { "epoch": 0.802882380288238, "grad_norm": 175.69192504882812, "learning_rate": 1.3595892779322451e-06, "loss": 15.5781, "step": 12089 }, { "epoch": 0.8029487945805938, "grad_norm": 566.1693115234375, "learning_rate": 1.359488917972698e-06, "loss": 14.5625, "step": 12090 }, { "epoch": 0.8030152088729494, "grad_norm": 412.1024475097656, "learning_rate": 1.3593885538547898e-06, "loss": 24.8125, "step": 12091 }, { "epoch": 0.8030816231653052, "grad_norm": 211.6450958251953, "learning_rate": 1.3592881855796825e-06, "loss": 16.3906, "step": 12092 }, { "epoch": 0.8031480374576608, "grad_norm": 264.03668212890625, "learning_rate": 1.3591878131485365e-06, "loss": 16.3438, "step": 12093 }, { "epoch": 0.8032144517500166, "grad_norm": 533.1929321289062, "learning_rate": 1.3590874365625129e-06, "loss": 24.8438, "step": 12094 }, { "epoch": 0.8032808660423724, "grad_norm": 128.84808349609375, "learning_rate": 1.3589870558227733e-06, "loss": 14.0781, "step": 12095 }, { "epoch": 0.803347280334728, "grad_norm": 313.880615234375, "learning_rate": 1.3588866709304783e-06, "loss": 25.9375, "step": 12096 }, { "epoch": 0.8034136946270838, "grad_norm": 294.1073913574219, "learning_rate": 1.3587862818867888e-06, "loss": 18.8906, "step": 12097 }, { "epoch": 0.8034801089194394, "grad_norm": 217.46124267578125, "learning_rate": 1.3586858886928667e-06, "loss": 14.8281, "step": 12098 }, { "epoch": 0.8035465232117952, "grad_norm": 381.3890075683594, "learning_rate": 1.3585854913498733e-06, "loss": 28.6875, "step": 12099 }, { "epoch": 0.8036129375041509, "grad_norm": 280.1849365234375, "learning_rate": 1.3584850898589696e-06, "loss": 14.2344, "step": 12100 }, { "epoch": 0.8036793517965066, "grad_norm": 211.4088134765625, "learning_rate": 1.358384684221317e-06, "loss": 21.7344, "step": 12101 }, { "epoch": 0.8037457660888623, "grad_norm": 346.400146484375, "learning_rate": 1.3582842744380769e-06, "loss": 29.75, "step": 12102 }, { "epoch": 0.803812180381218, "grad_norm": 241.31781005859375, "learning_rate": 1.3581838605104111e-06, "loss": 13.6719, "step": 12103 }, { "epoch": 0.8038785946735737, "grad_norm": 163.7884521484375, "learning_rate": 1.3580834424394812e-06, "loss": 19.2188, "step": 12104 }, { "epoch": 0.8039450089659295, "grad_norm": 336.7276611328125, "learning_rate": 1.357983020226448e-06, "loss": 16.7188, "step": 12105 }, { "epoch": 0.8040114232582852, "grad_norm": 186.2869873046875, "learning_rate": 1.357882593872474e-06, "loss": 15.1719, "step": 12106 }, { "epoch": 0.8040778375506409, "grad_norm": 177.75543212890625, "learning_rate": 1.3577821633787202e-06, "loss": 19.4688, "step": 12107 }, { "epoch": 0.8041442518429966, "grad_norm": 268.9951477050781, "learning_rate": 1.3576817287463488e-06, "loss": 20.6406, "step": 12108 }, { "epoch": 0.8042106661353523, "grad_norm": 608.4655151367188, "learning_rate": 1.3575812899765215e-06, "loss": 17.0156, "step": 12109 }, { "epoch": 0.8042770804277081, "grad_norm": 221.48220825195312, "learning_rate": 1.3574808470704003e-06, "loss": 17.4062, "step": 12110 }, { "epoch": 0.8043434947200637, "grad_norm": 179.77423095703125, "learning_rate": 1.3573804000291464e-06, "loss": 16.4375, "step": 12111 }, { "epoch": 0.8044099090124195, "grad_norm": 282.8559265136719, "learning_rate": 1.357279948853922e-06, "loss": 17.5, "step": 12112 }, { "epoch": 0.8044763233047751, "grad_norm": 119.87802124023438, "learning_rate": 1.3571794935458896e-06, "loss": 16.2812, "step": 12113 }, { "epoch": 0.8045427375971309, "grad_norm": 198.6946258544922, "learning_rate": 1.3570790341062106e-06, "loss": 25.5156, "step": 12114 }, { "epoch": 0.8046091518894866, "grad_norm": 187.6531524658203, "learning_rate": 1.3569785705360472e-06, "loss": 20.25, "step": 12115 }, { "epoch": 0.8046755661818423, "grad_norm": 197.31910705566406, "learning_rate": 1.3568781028365614e-06, "loss": 15.0625, "step": 12116 }, { "epoch": 0.8047419804741981, "grad_norm": 252.91053771972656, "learning_rate": 1.3567776310089158e-06, "loss": 23.5781, "step": 12117 }, { "epoch": 0.8048083947665537, "grad_norm": 136.0235595703125, "learning_rate": 1.3566771550542723e-06, "loss": 18.0781, "step": 12118 }, { "epoch": 0.8048748090589095, "grad_norm": 498.4566955566406, "learning_rate": 1.3565766749737929e-06, "loss": 12.2188, "step": 12119 }, { "epoch": 0.8049412233512652, "grad_norm": 240.59048461914062, "learning_rate": 1.3564761907686401e-06, "loss": 16.9688, "step": 12120 }, { "epoch": 0.8050076376436209, "grad_norm": 164.16549682617188, "learning_rate": 1.3563757024399767e-06, "loss": 16.0156, "step": 12121 }, { "epoch": 0.8050740519359766, "grad_norm": 256.45306396484375, "learning_rate": 1.3562752099889641e-06, "loss": 16.3906, "step": 12122 }, { "epoch": 0.8051404662283324, "grad_norm": 229.13482666015625, "learning_rate": 1.3561747134167655e-06, "loss": 20.9688, "step": 12123 }, { "epoch": 0.805206880520688, "grad_norm": 165.7147216796875, "learning_rate": 1.3560742127245432e-06, "loss": 19.3438, "step": 12124 }, { "epoch": 0.8052732948130438, "grad_norm": 773.2019653320312, "learning_rate": 1.35597370791346e-06, "loss": 19.2188, "step": 12125 }, { "epoch": 0.8053397091053995, "grad_norm": 148.0386962890625, "learning_rate": 1.3558731989846781e-06, "loss": 13.8281, "step": 12126 }, { "epoch": 0.8054061233977552, "grad_norm": 96.2398452758789, "learning_rate": 1.35577268593936e-06, "loss": 18.375, "step": 12127 }, { "epoch": 0.805472537690111, "grad_norm": 270.78955078125, "learning_rate": 1.355672168778669e-06, "loss": 20.6719, "step": 12128 }, { "epoch": 0.8055389519824666, "grad_norm": 136.94747924804688, "learning_rate": 1.3555716475037673e-06, "loss": 15.875, "step": 12129 }, { "epoch": 0.8056053662748224, "grad_norm": 155.7054901123047, "learning_rate": 1.3554711221158174e-06, "loss": 11.2188, "step": 12130 }, { "epoch": 0.805671780567178, "grad_norm": 192.16299438476562, "learning_rate": 1.3553705926159831e-06, "loss": 15.8594, "step": 12131 }, { "epoch": 0.8057381948595338, "grad_norm": 173.7176513671875, "learning_rate": 1.3552700590054264e-06, "loss": 25.3125, "step": 12132 }, { "epoch": 0.8058046091518895, "grad_norm": 114.26834106445312, "learning_rate": 1.3551695212853108e-06, "loss": 16.1875, "step": 12133 }, { "epoch": 0.8058710234442452, "grad_norm": 161.48362731933594, "learning_rate": 1.3550689794567987e-06, "loss": 16.4219, "step": 12134 }, { "epoch": 0.8059374377366009, "grad_norm": 223.68324279785156, "learning_rate": 1.3549684335210535e-06, "loss": 18.4688, "step": 12135 }, { "epoch": 0.8060038520289566, "grad_norm": 227.59133911132812, "learning_rate": 1.3548678834792381e-06, "loss": 17.0469, "step": 12136 }, { "epoch": 0.8060702663213124, "grad_norm": 176.11807250976562, "learning_rate": 1.3547673293325156e-06, "loss": 20.5625, "step": 12137 }, { "epoch": 0.806136680613668, "grad_norm": 206.2830047607422, "learning_rate": 1.354666771082049e-06, "loss": 22.3281, "step": 12138 }, { "epoch": 0.8062030949060238, "grad_norm": 115.61791229248047, "learning_rate": 1.3545662087290024e-06, "loss": 15.0, "step": 12139 }, { "epoch": 0.8062695091983795, "grad_norm": 203.219482421875, "learning_rate": 1.3544656422745378e-06, "loss": 16.8125, "step": 12140 }, { "epoch": 0.8063359234907352, "grad_norm": 154.84664916992188, "learning_rate": 1.3543650717198191e-06, "loss": 11.3125, "step": 12141 }, { "epoch": 0.8064023377830909, "grad_norm": 119.5597915649414, "learning_rate": 1.3542644970660097e-06, "loss": 11.8516, "step": 12142 }, { "epoch": 0.8064687520754467, "grad_norm": 124.64903259277344, "learning_rate": 1.3541639183142728e-06, "loss": 15.7969, "step": 12143 }, { "epoch": 0.8065351663678023, "grad_norm": 219.25045776367188, "learning_rate": 1.354063335465772e-06, "loss": 17.8281, "step": 12144 }, { "epoch": 0.8066015806601581, "grad_norm": 250.4407196044922, "learning_rate": 1.3539627485216705e-06, "loss": 17.2031, "step": 12145 }, { "epoch": 0.8066679949525137, "grad_norm": 127.84259033203125, "learning_rate": 1.3538621574831325e-06, "loss": 13.375, "step": 12146 }, { "epoch": 0.8067344092448695, "grad_norm": 195.0650177001953, "learning_rate": 1.3537615623513206e-06, "loss": 13.6562, "step": 12147 }, { "epoch": 0.8068008235372253, "grad_norm": 303.0751953125, "learning_rate": 1.3536609631273993e-06, "loss": 24.2188, "step": 12148 }, { "epoch": 0.8068672378295809, "grad_norm": 165.72535705566406, "learning_rate": 1.3535603598125315e-06, "loss": 19.7344, "step": 12149 }, { "epoch": 0.8069336521219367, "grad_norm": 358.6347961425781, "learning_rate": 1.3534597524078817e-06, "loss": 26.0938, "step": 12150 }, { "epoch": 0.8070000664142923, "grad_norm": 146.61361694335938, "learning_rate": 1.3533591409146133e-06, "loss": 19.3594, "step": 12151 }, { "epoch": 0.8070664807066481, "grad_norm": 135.12208557128906, "learning_rate": 1.35325852533389e-06, "loss": 16.0156, "step": 12152 }, { "epoch": 0.8071328949990038, "grad_norm": 335.5519104003906, "learning_rate": 1.353157905666876e-06, "loss": 16.7656, "step": 12153 }, { "epoch": 0.8071993092913595, "grad_norm": 188.1596221923828, "learning_rate": 1.3530572819147343e-06, "loss": 23.5938, "step": 12154 }, { "epoch": 0.8072657235837152, "grad_norm": 4174.84033203125, "learning_rate": 1.3529566540786302e-06, "loss": 17.0312, "step": 12155 }, { "epoch": 0.8073321378760709, "grad_norm": 366.5618591308594, "learning_rate": 1.3528560221597266e-06, "loss": 36.4375, "step": 12156 }, { "epoch": 0.8073985521684266, "grad_norm": 107.227294921875, "learning_rate": 1.3527553861591888e-06, "loss": 13.1094, "step": 12157 }, { "epoch": 0.8074649664607824, "grad_norm": 170.32354736328125, "learning_rate": 1.3526547460781794e-06, "loss": 15.25, "step": 12158 }, { "epoch": 0.8075313807531381, "grad_norm": 172.9994354248047, "learning_rate": 1.3525541019178631e-06, "loss": 14.0078, "step": 12159 }, { "epoch": 0.8075977950454938, "grad_norm": 138.3711395263672, "learning_rate": 1.352453453679405e-06, "loss": 16.4219, "step": 12160 }, { "epoch": 0.8076642093378495, "grad_norm": 404.1544494628906, "learning_rate": 1.352352801363968e-06, "loss": 28.9062, "step": 12161 }, { "epoch": 0.8077306236302052, "grad_norm": 281.1767578125, "learning_rate": 1.3522521449727172e-06, "loss": 15.5469, "step": 12162 }, { "epoch": 0.807797037922561, "grad_norm": 191.0990753173828, "learning_rate": 1.3521514845068166e-06, "loss": 18.9688, "step": 12163 }, { "epoch": 0.8078634522149166, "grad_norm": 433.37860107421875, "learning_rate": 1.3520508199674308e-06, "loss": 21.0156, "step": 12164 }, { "epoch": 0.8079298665072724, "grad_norm": 258.01446533203125, "learning_rate": 1.3519501513557238e-06, "loss": 17.4531, "step": 12165 }, { "epoch": 0.807996280799628, "grad_norm": 912.8414306640625, "learning_rate": 1.3518494786728608e-06, "loss": 18.6719, "step": 12166 }, { "epoch": 0.8080626950919838, "grad_norm": 273.46710205078125, "learning_rate": 1.3517488019200057e-06, "loss": 14.2969, "step": 12167 }, { "epoch": 0.8081291093843395, "grad_norm": 212.0994415283203, "learning_rate": 1.3516481210983234e-06, "loss": 19.0625, "step": 12168 }, { "epoch": 0.8081955236766952, "grad_norm": 187.48001098632812, "learning_rate": 1.3515474362089783e-06, "loss": 18.1406, "step": 12169 }, { "epoch": 0.808261937969051, "grad_norm": 132.5758819580078, "learning_rate": 1.3514467472531353e-06, "loss": 16.8438, "step": 12170 }, { "epoch": 0.8083283522614066, "grad_norm": 162.37210083007812, "learning_rate": 1.351346054231959e-06, "loss": 21.625, "step": 12171 }, { "epoch": 0.8083947665537624, "grad_norm": 156.512939453125, "learning_rate": 1.351245357146614e-06, "loss": 18.3594, "step": 12172 }, { "epoch": 0.8084611808461181, "grad_norm": 388.1722106933594, "learning_rate": 1.3511446559982651e-06, "loss": 17.4844, "step": 12173 }, { "epoch": 0.8085275951384738, "grad_norm": 300.9892883300781, "learning_rate": 1.3510439507880776e-06, "loss": 10.4844, "step": 12174 }, { "epoch": 0.8085940094308295, "grad_norm": 135.65567016601562, "learning_rate": 1.3509432415172162e-06, "loss": 12.9062, "step": 12175 }, { "epoch": 0.8086604237231853, "grad_norm": 149.30743408203125, "learning_rate": 1.3508425281868455e-06, "loss": 19.4375, "step": 12176 }, { "epoch": 0.8087268380155409, "grad_norm": 155.02159118652344, "learning_rate": 1.3507418107981304e-06, "loss": 15.1094, "step": 12177 }, { "epoch": 0.8087932523078967, "grad_norm": 252.8772735595703, "learning_rate": 1.3506410893522369e-06, "loss": 16.9375, "step": 12178 }, { "epoch": 0.8088596666002523, "grad_norm": 428.1159973144531, "learning_rate": 1.3505403638503294e-06, "loss": 17.4531, "step": 12179 }, { "epoch": 0.8089260808926081, "grad_norm": 93.69007873535156, "learning_rate": 1.3504396342935727e-06, "loss": 23.2344, "step": 12180 }, { "epoch": 0.8089924951849639, "grad_norm": 411.77606201171875, "learning_rate": 1.3503389006831328e-06, "loss": 20.7031, "step": 12181 }, { "epoch": 0.8090589094773195, "grad_norm": 328.451904296875, "learning_rate": 1.3502381630201743e-06, "loss": 13.9688, "step": 12182 }, { "epoch": 0.8091253237696753, "grad_norm": 154.09326171875, "learning_rate": 1.3501374213058627e-06, "loss": 14.125, "step": 12183 }, { "epoch": 0.8091917380620309, "grad_norm": 285.8404235839844, "learning_rate": 1.3500366755413633e-06, "loss": 23.1094, "step": 12184 }, { "epoch": 0.8092581523543867, "grad_norm": 266.2399597167969, "learning_rate": 1.3499359257278416e-06, "loss": 12.2422, "step": 12185 }, { "epoch": 0.8093245666467423, "grad_norm": 340.6810302734375, "learning_rate": 1.3498351718664628e-06, "loss": 16.2188, "step": 12186 }, { "epoch": 0.8093909809390981, "grad_norm": 329.4026794433594, "learning_rate": 1.3497344139583923e-06, "loss": 17.9375, "step": 12187 }, { "epoch": 0.8094573952314538, "grad_norm": 206.06222534179688, "learning_rate": 1.349633652004796e-06, "loss": 13.4375, "step": 12188 }, { "epoch": 0.8095238095238095, "grad_norm": 245.8817901611328, "learning_rate": 1.3495328860068394e-06, "loss": 21.5781, "step": 12189 }, { "epoch": 0.8095902238161652, "grad_norm": 243.86940002441406, "learning_rate": 1.3494321159656875e-06, "loss": 15.7344, "step": 12190 }, { "epoch": 0.809656638108521, "grad_norm": 317.58575439453125, "learning_rate": 1.3493313418825065e-06, "loss": 19.4844, "step": 12191 }, { "epoch": 0.8097230524008767, "grad_norm": 213.46568298339844, "learning_rate": 1.3492305637584623e-06, "loss": 22.8594, "step": 12192 }, { "epoch": 0.8097894666932324, "grad_norm": 866.5120239257812, "learning_rate": 1.3491297815947201e-06, "loss": 28.6875, "step": 12193 }, { "epoch": 0.8098558809855881, "grad_norm": 239.7572784423828, "learning_rate": 1.3490289953924458e-06, "loss": 13.25, "step": 12194 }, { "epoch": 0.8099222952779438, "grad_norm": 135.7097625732422, "learning_rate": 1.3489282051528052e-06, "loss": 12.1875, "step": 12195 }, { "epoch": 0.8099887095702996, "grad_norm": 244.33218383789062, "learning_rate": 1.3488274108769648e-06, "loss": 15.0781, "step": 12196 }, { "epoch": 0.8100551238626552, "grad_norm": 361.3997802734375, "learning_rate": 1.3487266125660896e-06, "loss": 24.375, "step": 12197 }, { "epoch": 0.810121538155011, "grad_norm": 1210.4857177734375, "learning_rate": 1.3486258102213464e-06, "loss": 21.8359, "step": 12198 }, { "epoch": 0.8101879524473666, "grad_norm": 306.97021484375, "learning_rate": 1.3485250038439007e-06, "loss": 18.1719, "step": 12199 }, { "epoch": 0.8102543667397224, "grad_norm": 158.5730743408203, "learning_rate": 1.3484241934349187e-06, "loss": 17.0312, "step": 12200 }, { "epoch": 0.810320781032078, "grad_norm": 197.08253479003906, "learning_rate": 1.3483233789955665e-06, "loss": 14.1719, "step": 12201 }, { "epoch": 0.8103871953244338, "grad_norm": 156.0172576904297, "learning_rate": 1.3482225605270103e-06, "loss": 11.4531, "step": 12202 }, { "epoch": 0.8104536096167896, "grad_norm": 214.64971923828125, "learning_rate": 1.3481217380304167e-06, "loss": 19.1406, "step": 12203 }, { "epoch": 0.8105200239091452, "grad_norm": 248.0511474609375, "learning_rate": 1.3480209115069511e-06, "loss": 14.0, "step": 12204 }, { "epoch": 0.810586438201501, "grad_norm": 223.6226806640625, "learning_rate": 1.3479200809577802e-06, "loss": 17.4375, "step": 12205 }, { "epoch": 0.8106528524938567, "grad_norm": 294.1480407714844, "learning_rate": 1.3478192463840708e-06, "loss": 19.375, "step": 12206 }, { "epoch": 0.8107192667862124, "grad_norm": 147.5653076171875, "learning_rate": 1.3477184077869889e-06, "loss": 13.6328, "step": 12207 }, { "epoch": 0.8107856810785681, "grad_norm": 425.9322814941406, "learning_rate": 1.3476175651677006e-06, "loss": 30.0625, "step": 12208 }, { "epoch": 0.8108520953709238, "grad_norm": 142.73361206054688, "learning_rate": 1.347516718527373e-06, "loss": 13.4531, "step": 12209 }, { "epoch": 0.8109185096632795, "grad_norm": 199.35562133789062, "learning_rate": 1.3474158678671725e-06, "loss": 14.0469, "step": 12210 }, { "epoch": 0.8109849239556353, "grad_norm": 119.72418212890625, "learning_rate": 1.3473150131882655e-06, "loss": 17.6562, "step": 12211 }, { "epoch": 0.8110513382479909, "grad_norm": 128.75318908691406, "learning_rate": 1.3472141544918185e-06, "loss": 17.8906, "step": 12212 }, { "epoch": 0.8111177525403467, "grad_norm": 136.72381591796875, "learning_rate": 1.3471132917789986e-06, "loss": 15.5625, "step": 12213 }, { "epoch": 0.8111841668327024, "grad_norm": 176.0721893310547, "learning_rate": 1.3470124250509724e-06, "loss": 16.9375, "step": 12214 }, { "epoch": 0.8112505811250581, "grad_norm": 660.605712890625, "learning_rate": 1.3469115543089063e-06, "loss": 26.7656, "step": 12215 }, { "epoch": 0.8113169954174139, "grad_norm": 292.69171142578125, "learning_rate": 1.3468106795539674e-06, "loss": 23.9531, "step": 12216 }, { "epoch": 0.8113834097097695, "grad_norm": 331.16729736328125, "learning_rate": 1.3467098007873228e-06, "loss": 20.6562, "step": 12217 }, { "epoch": 0.8114498240021253, "grad_norm": 196.90231323242188, "learning_rate": 1.3466089180101387e-06, "loss": 20.0781, "step": 12218 }, { "epoch": 0.8115162382944809, "grad_norm": 127.62309265136719, "learning_rate": 1.3465080312235828e-06, "loss": 21.1875, "step": 12219 }, { "epoch": 0.8115826525868367, "grad_norm": 139.3384552001953, "learning_rate": 1.3464071404288217e-06, "loss": 13.1406, "step": 12220 }, { "epoch": 0.8116490668791924, "grad_norm": 143.08843994140625, "learning_rate": 1.346306245627023e-06, "loss": 13.3281, "step": 12221 }, { "epoch": 0.8117154811715481, "grad_norm": 248.07122802734375, "learning_rate": 1.3462053468193526e-06, "loss": 14.9453, "step": 12222 }, { "epoch": 0.8117818954639038, "grad_norm": 468.1557922363281, "learning_rate": 1.3461044440069786e-06, "loss": 21.5312, "step": 12223 }, { "epoch": 0.8118483097562595, "grad_norm": 326.166259765625, "learning_rate": 1.3460035371910685e-06, "loss": 18.6406, "step": 12224 }, { "epoch": 0.8119147240486153, "grad_norm": 181.66041564941406, "learning_rate": 1.3459026263727884e-06, "loss": 22.4531, "step": 12225 }, { "epoch": 0.811981138340971, "grad_norm": 386.4975280761719, "learning_rate": 1.3458017115533065e-06, "loss": 22.7656, "step": 12226 }, { "epoch": 0.8120475526333267, "grad_norm": 254.4066162109375, "learning_rate": 1.3457007927337895e-06, "loss": 19.1875, "step": 12227 }, { "epoch": 0.8121139669256824, "grad_norm": 304.46221923828125, "learning_rate": 1.3455998699154054e-06, "loss": 21.3281, "step": 12228 }, { "epoch": 0.8121803812180381, "grad_norm": 348.06451416015625, "learning_rate": 1.3454989430993211e-06, "loss": 16.6094, "step": 12229 }, { "epoch": 0.8122467955103938, "grad_norm": 474.7289733886719, "learning_rate": 1.3453980122867044e-06, "loss": 20.2031, "step": 12230 }, { "epoch": 0.8123132098027496, "grad_norm": 187.55953979492188, "learning_rate": 1.3452970774787224e-06, "loss": 17.2656, "step": 12231 }, { "epoch": 0.8123796240951052, "grad_norm": 146.9546661376953, "learning_rate": 1.3451961386765434e-06, "loss": 13.7812, "step": 12232 }, { "epoch": 0.812446038387461, "grad_norm": 188.12667846679688, "learning_rate": 1.3450951958813338e-06, "loss": 16.3906, "step": 12233 }, { "epoch": 0.8125124526798166, "grad_norm": 255.69142150878906, "learning_rate": 1.3449942490942625e-06, "loss": 14.2031, "step": 12234 }, { "epoch": 0.8125788669721724, "grad_norm": 224.29270935058594, "learning_rate": 1.3448932983164967e-06, "loss": 16.9219, "step": 12235 }, { "epoch": 0.8126452812645282, "grad_norm": 305.1601867675781, "learning_rate": 1.344792343549204e-06, "loss": 23.875, "step": 12236 }, { "epoch": 0.8127116955568838, "grad_norm": 197.93081665039062, "learning_rate": 1.3446913847935522e-06, "loss": 14.2188, "step": 12237 }, { "epoch": 0.8127781098492396, "grad_norm": 260.7981262207031, "learning_rate": 1.344590422050709e-06, "loss": 13.6875, "step": 12238 }, { "epoch": 0.8128445241415952, "grad_norm": 150.98678588867188, "learning_rate": 1.344489455321843e-06, "loss": 17.1094, "step": 12239 }, { "epoch": 0.812910938433951, "grad_norm": 294.29779052734375, "learning_rate": 1.3443884846081213e-06, "loss": 17.2188, "step": 12240 }, { "epoch": 0.8129773527263067, "grad_norm": 274.7022399902344, "learning_rate": 1.3442875099107123e-06, "loss": 11.5, "step": 12241 }, { "epoch": 0.8130437670186624, "grad_norm": 496.4470520019531, "learning_rate": 1.3441865312307838e-06, "loss": 19.7969, "step": 12242 }, { "epoch": 0.8131101813110181, "grad_norm": 140.3450469970703, "learning_rate": 1.3440855485695041e-06, "loss": 14.0625, "step": 12243 }, { "epoch": 0.8131765956033739, "grad_norm": 188.80311584472656, "learning_rate": 1.3439845619280412e-06, "loss": 18.2969, "step": 12244 }, { "epoch": 0.8132430098957295, "grad_norm": 244.43106079101562, "learning_rate": 1.343883571307563e-06, "loss": 16.4219, "step": 12245 }, { "epoch": 0.8133094241880853, "grad_norm": 240.5623779296875, "learning_rate": 1.3437825767092383e-06, "loss": 15.0156, "step": 12246 }, { "epoch": 0.813375838480441, "grad_norm": 131.5184326171875, "learning_rate": 1.3436815781342348e-06, "loss": 14.3438, "step": 12247 }, { "epoch": 0.8134422527727967, "grad_norm": 1987.7625732421875, "learning_rate": 1.3435805755837208e-06, "loss": 14.625, "step": 12248 }, { "epoch": 0.8135086670651525, "grad_norm": 313.17236328125, "learning_rate": 1.3434795690588652e-06, "loss": 25.625, "step": 12249 }, { "epoch": 0.8135750813575081, "grad_norm": 149.57681274414062, "learning_rate": 1.3433785585608358e-06, "loss": 15.7969, "step": 12250 }, { "epoch": 0.8136414956498639, "grad_norm": 121.79068756103516, "learning_rate": 1.3432775440908013e-06, "loss": 10.7266, "step": 12251 }, { "epoch": 0.8137079099422195, "grad_norm": 319.63922119140625, "learning_rate": 1.3431765256499298e-06, "loss": 30.6562, "step": 12252 }, { "epoch": 0.8137743242345753, "grad_norm": 176.61544799804688, "learning_rate": 1.3430755032393909e-06, "loss": 12.2812, "step": 12253 }, { "epoch": 0.813840738526931, "grad_norm": 202.8689422607422, "learning_rate": 1.3429744768603515e-06, "loss": 19.7031, "step": 12254 }, { "epoch": 0.8139071528192867, "grad_norm": 125.7408676147461, "learning_rate": 1.3428734465139814e-06, "loss": 15.375, "step": 12255 }, { "epoch": 0.8139735671116424, "grad_norm": 236.281005859375, "learning_rate": 1.3427724122014494e-06, "loss": 19.1562, "step": 12256 }, { "epoch": 0.8140399814039981, "grad_norm": 164.4764404296875, "learning_rate": 1.3426713739239235e-06, "loss": 14.2656, "step": 12257 }, { "epoch": 0.8141063956963539, "grad_norm": 290.2684631347656, "learning_rate": 1.3425703316825727e-06, "loss": 14.0469, "step": 12258 }, { "epoch": 0.8141728099887096, "grad_norm": 153.21095275878906, "learning_rate": 1.3424692854785657e-06, "loss": 14.4375, "step": 12259 }, { "epoch": 0.8142392242810653, "grad_norm": 244.75311279296875, "learning_rate": 1.3423682353130718e-06, "loss": 16.9375, "step": 12260 }, { "epoch": 0.814305638573421, "grad_norm": 187.62716674804688, "learning_rate": 1.3422671811872594e-06, "loss": 15.3281, "step": 12261 }, { "epoch": 0.8143720528657767, "grad_norm": 175.6789093017578, "learning_rate": 1.3421661231022977e-06, "loss": 17.5469, "step": 12262 }, { "epoch": 0.8144384671581324, "grad_norm": 392.4919128417969, "learning_rate": 1.3420650610593557e-06, "loss": 15.6172, "step": 12263 }, { "epoch": 0.8145048814504882, "grad_norm": 248.66407775878906, "learning_rate": 1.3419639950596023e-06, "loss": 21.0781, "step": 12264 }, { "epoch": 0.8145712957428438, "grad_norm": 173.06982421875, "learning_rate": 1.3418629251042063e-06, "loss": 22.2031, "step": 12265 }, { "epoch": 0.8146377100351996, "grad_norm": 111.67961120605469, "learning_rate": 1.3417618511943371e-06, "loss": 16.0, "step": 12266 }, { "epoch": 0.8147041243275552, "grad_norm": 330.0981140136719, "learning_rate": 1.3416607733311642e-06, "loss": 17.5, "step": 12267 }, { "epoch": 0.814770538619911, "grad_norm": 160.21939086914062, "learning_rate": 1.3415596915158564e-06, "loss": 11.1719, "step": 12268 }, { "epoch": 0.8148369529122668, "grad_norm": 120.58063507080078, "learning_rate": 1.341458605749583e-06, "loss": 19.1094, "step": 12269 }, { "epoch": 0.8149033672046224, "grad_norm": 179.14366149902344, "learning_rate": 1.341357516033513e-06, "loss": 16.9844, "step": 12270 }, { "epoch": 0.8149697814969782, "grad_norm": 282.92291259765625, "learning_rate": 1.3412564223688169e-06, "loss": 20.4688, "step": 12271 }, { "epoch": 0.8150361957893338, "grad_norm": 215.79393005371094, "learning_rate": 1.3411553247566627e-06, "loss": 16.3906, "step": 12272 }, { "epoch": 0.8151026100816896, "grad_norm": 128.74400329589844, "learning_rate": 1.3410542231982206e-06, "loss": 13.4844, "step": 12273 }, { "epoch": 0.8151690243740453, "grad_norm": 198.4833526611328, "learning_rate": 1.3409531176946598e-06, "loss": 13.8594, "step": 12274 }, { "epoch": 0.815235438666401, "grad_norm": 290.56683349609375, "learning_rate": 1.34085200824715e-06, "loss": 15.2031, "step": 12275 }, { "epoch": 0.8153018529587567, "grad_norm": 133.67897033691406, "learning_rate": 1.3407508948568608e-06, "loss": 14.7656, "step": 12276 }, { "epoch": 0.8153682672511124, "grad_norm": 210.1242218017578, "learning_rate": 1.3406497775249615e-06, "loss": 12.7188, "step": 12277 }, { "epoch": 0.8154346815434682, "grad_norm": 317.6610412597656, "learning_rate": 1.3405486562526223e-06, "loss": 16.25, "step": 12278 }, { "epoch": 0.8155010958358239, "grad_norm": 148.4939422607422, "learning_rate": 1.3404475310410123e-06, "loss": 17.3125, "step": 12279 }, { "epoch": 0.8155675101281796, "grad_norm": 112.5234375, "learning_rate": 1.3403464018913018e-06, "loss": 10.8125, "step": 12280 }, { "epoch": 0.8156339244205353, "grad_norm": 170.26644897460938, "learning_rate": 1.3402452688046601e-06, "loss": 16.7344, "step": 12281 }, { "epoch": 0.815700338712891, "grad_norm": 425.571044921875, "learning_rate": 1.3401441317822578e-06, "loss": 16.0, "step": 12282 }, { "epoch": 0.8157667530052467, "grad_norm": 229.87921142578125, "learning_rate": 1.340042990825264e-06, "loss": 22.2344, "step": 12283 }, { "epoch": 0.8158331672976025, "grad_norm": 229.36680603027344, "learning_rate": 1.3399418459348488e-06, "loss": 15.5469, "step": 12284 }, { "epoch": 0.8158995815899581, "grad_norm": 129.96617126464844, "learning_rate": 1.3398406971121827e-06, "loss": 15.5938, "step": 12285 }, { "epoch": 0.8159659958823139, "grad_norm": 220.70018005371094, "learning_rate": 1.3397395443584355e-06, "loss": 17.1094, "step": 12286 }, { "epoch": 0.8160324101746695, "grad_norm": 192.0188446044922, "learning_rate": 1.3396383876747766e-06, "loss": 13.3984, "step": 12287 }, { "epoch": 0.8160988244670253, "grad_norm": 141.92230224609375, "learning_rate": 1.339537227062377e-06, "loss": 14.7031, "step": 12288 }, { "epoch": 0.8161652387593811, "grad_norm": 398.6700439453125, "learning_rate": 1.3394360625224065e-06, "loss": 18.3125, "step": 12289 }, { "epoch": 0.8162316530517367, "grad_norm": 286.51727294921875, "learning_rate": 1.3393348940560354e-06, "loss": 17.6094, "step": 12290 }, { "epoch": 0.8162980673440925, "grad_norm": 353.3476867675781, "learning_rate": 1.3392337216644339e-06, "loss": 26.2188, "step": 12291 }, { "epoch": 0.8163644816364481, "grad_norm": 106.53269958496094, "learning_rate": 1.3391325453487722e-06, "loss": 12.9375, "step": 12292 }, { "epoch": 0.8164308959288039, "grad_norm": 283.2341613769531, "learning_rate": 1.339031365110221e-06, "loss": 19.6094, "step": 12293 }, { "epoch": 0.8164973102211596, "grad_norm": 181.5869598388672, "learning_rate": 1.3389301809499502e-06, "loss": 14.4688, "step": 12294 }, { "epoch": 0.8165637245135153, "grad_norm": 225.16656494140625, "learning_rate": 1.3388289928691307e-06, "loss": 19.375, "step": 12295 }, { "epoch": 0.816630138805871, "grad_norm": 588.17822265625, "learning_rate": 1.3387278008689328e-06, "loss": 17.4062, "step": 12296 }, { "epoch": 0.8166965530982268, "grad_norm": 280.5227966308594, "learning_rate": 1.338626604950527e-06, "loss": 21.0, "step": 12297 }, { "epoch": 0.8167629673905824, "grad_norm": 300.976806640625, "learning_rate": 1.3385254051150837e-06, "loss": 21.3125, "step": 12298 }, { "epoch": 0.8168293816829382, "grad_norm": 732.4041137695312, "learning_rate": 1.3384242013637744e-06, "loss": 11.3984, "step": 12299 }, { "epoch": 0.8168957959752939, "grad_norm": 415.36669921875, "learning_rate": 1.3383229936977682e-06, "loss": 18.8906, "step": 12300 }, { "epoch": 0.8169622102676496, "grad_norm": 470.068359375, "learning_rate": 1.338221782118237e-06, "loss": 20.0625, "step": 12301 }, { "epoch": 0.8170286245600054, "grad_norm": 410.0634460449219, "learning_rate": 1.3381205666263514e-06, "loss": 19.8281, "step": 12302 }, { "epoch": 0.817095038852361, "grad_norm": 179.67982482910156, "learning_rate": 1.338019347223282e-06, "loss": 19.875, "step": 12303 }, { "epoch": 0.8171614531447168, "grad_norm": 447.9197998046875, "learning_rate": 1.3379181239101997e-06, "loss": 25.0781, "step": 12304 }, { "epoch": 0.8172278674370724, "grad_norm": 200.34642028808594, "learning_rate": 1.3378168966882755e-06, "loss": 18.2812, "step": 12305 }, { "epoch": 0.8172942817294282, "grad_norm": 166.0726318359375, "learning_rate": 1.3377156655586804e-06, "loss": 18.0156, "step": 12306 }, { "epoch": 0.8173606960217839, "grad_norm": 249.90594482421875, "learning_rate": 1.3376144305225848e-06, "loss": 15.1406, "step": 12307 }, { "epoch": 0.8174271103141396, "grad_norm": 494.61627197265625, "learning_rate": 1.33751319158116e-06, "loss": 14.8125, "step": 12308 }, { "epoch": 0.8174935246064953, "grad_norm": 147.0213623046875, "learning_rate": 1.3374119487355774e-06, "loss": 20.2188, "step": 12309 }, { "epoch": 0.817559938898851, "grad_norm": 250.69097900390625, "learning_rate": 1.3373107019870084e-06, "loss": 15.6562, "step": 12310 }, { "epoch": 0.8176263531912068, "grad_norm": 214.0755615234375, "learning_rate": 1.3372094513366231e-06, "loss": 22.6094, "step": 12311 }, { "epoch": 0.8176927674835625, "grad_norm": 135.0819854736328, "learning_rate": 1.3371081967855938e-06, "loss": 14.5, "step": 12312 }, { "epoch": 0.8177591817759182, "grad_norm": 337.1545715332031, "learning_rate": 1.3370069383350907e-06, "loss": 15.5625, "step": 12313 }, { "epoch": 0.8178255960682739, "grad_norm": 165.46853637695312, "learning_rate": 1.3369056759862862e-06, "loss": 17.1719, "step": 12314 }, { "epoch": 0.8178920103606296, "grad_norm": 132.2030792236328, "learning_rate": 1.3368044097403511e-06, "loss": 14.2188, "step": 12315 }, { "epoch": 0.8179584246529853, "grad_norm": 195.31768798828125, "learning_rate": 1.3367031395984564e-06, "loss": 14.625, "step": 12316 }, { "epoch": 0.8180248389453411, "grad_norm": 290.209228515625, "learning_rate": 1.3366018655617744e-06, "loss": 18.4844, "step": 12317 }, { "epoch": 0.8180912532376967, "grad_norm": 238.5867919921875, "learning_rate": 1.336500587631476e-06, "loss": 20.2734, "step": 12318 }, { "epoch": 0.8181576675300525, "grad_norm": 286.2577819824219, "learning_rate": 1.3363993058087324e-06, "loss": 16.9922, "step": 12319 }, { "epoch": 0.8182240818224081, "grad_norm": 494.35430908203125, "learning_rate": 1.336298020094716e-06, "loss": 16.4844, "step": 12320 }, { "epoch": 0.8182904961147639, "grad_norm": 622.9877319335938, "learning_rate": 1.3361967304905979e-06, "loss": 27.0312, "step": 12321 }, { "epoch": 0.8183569104071197, "grad_norm": 168.71253967285156, "learning_rate": 1.3360954369975498e-06, "loss": 21.9062, "step": 12322 }, { "epoch": 0.8184233246994753, "grad_norm": 296.94915771484375, "learning_rate": 1.3359941396167438e-06, "loss": 13.9688, "step": 12323 }, { "epoch": 0.8184897389918311, "grad_norm": 932.1262817382812, "learning_rate": 1.3358928383493512e-06, "loss": 18.625, "step": 12324 }, { "epoch": 0.8185561532841867, "grad_norm": 348.1695251464844, "learning_rate": 1.3357915331965437e-06, "loss": 17.0, "step": 12325 }, { "epoch": 0.8186225675765425, "grad_norm": 214.4438934326172, "learning_rate": 1.3356902241594936e-06, "loss": 21.3438, "step": 12326 }, { "epoch": 0.8186889818688982, "grad_norm": 164.44149780273438, "learning_rate": 1.3355889112393724e-06, "loss": 20.0312, "step": 12327 }, { "epoch": 0.8187553961612539, "grad_norm": 605.6113891601562, "learning_rate": 1.3354875944373522e-06, "loss": 15.4688, "step": 12328 }, { "epoch": 0.8188218104536096, "grad_norm": 271.3616027832031, "learning_rate": 1.3353862737546052e-06, "loss": 18.9688, "step": 12329 }, { "epoch": 0.8188882247459653, "grad_norm": 221.72222900390625, "learning_rate": 1.3352849491923028e-06, "loss": 15.0469, "step": 12330 }, { "epoch": 0.818954639038321, "grad_norm": 199.48548889160156, "learning_rate": 1.3351836207516179e-06, "loss": 18.3594, "step": 12331 }, { "epoch": 0.8190210533306768, "grad_norm": 422.88751220703125, "learning_rate": 1.335082288433722e-06, "loss": 19.2656, "step": 12332 }, { "epoch": 0.8190874676230325, "grad_norm": 278.970947265625, "learning_rate": 1.3349809522397873e-06, "loss": 19.75, "step": 12333 }, { "epoch": 0.8191538819153882, "grad_norm": 147.27671813964844, "learning_rate": 1.334879612170986e-06, "loss": 16.0938, "step": 12334 }, { "epoch": 0.819220296207744, "grad_norm": 167.0838165283203, "learning_rate": 1.3347782682284908e-06, "loss": 17.0625, "step": 12335 }, { "epoch": 0.8192867105000996, "grad_norm": 418.778564453125, "learning_rate": 1.3346769204134735e-06, "loss": 13.6094, "step": 12336 }, { "epoch": 0.8193531247924554, "grad_norm": 235.92462158203125, "learning_rate": 1.3345755687271062e-06, "loss": 23.4219, "step": 12337 }, { "epoch": 0.819419539084811, "grad_norm": 203.84231567382812, "learning_rate": 1.3344742131705622e-06, "loss": 21.8438, "step": 12338 }, { "epoch": 0.8194859533771668, "grad_norm": 328.4044189453125, "learning_rate": 1.3343728537450129e-06, "loss": 21.7656, "step": 12339 }, { "epoch": 0.8195523676695224, "grad_norm": 124.92474365234375, "learning_rate": 1.3342714904516314e-06, "loss": 22.1719, "step": 12340 }, { "epoch": 0.8196187819618782, "grad_norm": 114.4851303100586, "learning_rate": 1.3341701232915903e-06, "loss": 11.1094, "step": 12341 }, { "epoch": 0.8196851962542339, "grad_norm": 285.5232238769531, "learning_rate": 1.334068752266062e-06, "loss": 23.0469, "step": 12342 }, { "epoch": 0.8197516105465896, "grad_norm": 240.5295867919922, "learning_rate": 1.3339673773762186e-06, "loss": 16.6406, "step": 12343 }, { "epoch": 0.8198180248389454, "grad_norm": 227.50099182128906, "learning_rate": 1.3338659986232332e-06, "loss": 19.2656, "step": 12344 }, { "epoch": 0.819884439131301, "grad_norm": 288.105224609375, "learning_rate": 1.3337646160082785e-06, "loss": 12.8516, "step": 12345 }, { "epoch": 0.8199508534236568, "grad_norm": 461.266845703125, "learning_rate": 1.3336632295325272e-06, "loss": 20.2031, "step": 12346 }, { "epoch": 0.8200172677160125, "grad_norm": 184.2607421875, "learning_rate": 1.3335618391971522e-06, "loss": 21.1875, "step": 12347 }, { "epoch": 0.8200836820083682, "grad_norm": 2400.22802734375, "learning_rate": 1.3334604450033259e-06, "loss": 21.5625, "step": 12348 }, { "epoch": 0.8201500963007239, "grad_norm": 188.5697021484375, "learning_rate": 1.3333590469522218e-06, "loss": 15.0625, "step": 12349 }, { "epoch": 0.8202165105930797, "grad_norm": 1370.5477294921875, "learning_rate": 1.3332576450450122e-06, "loss": 17.8281, "step": 12350 }, { "epoch": 0.8202829248854353, "grad_norm": 131.22056579589844, "learning_rate": 1.3331562392828704e-06, "loss": 15.5156, "step": 12351 }, { "epoch": 0.8203493391777911, "grad_norm": 120.5605697631836, "learning_rate": 1.3330548296669693e-06, "loss": 16.5938, "step": 12352 }, { "epoch": 0.8204157534701467, "grad_norm": 235.15011596679688, "learning_rate": 1.332953416198482e-06, "loss": 25.0625, "step": 12353 }, { "epoch": 0.8204821677625025, "grad_norm": 258.0333251953125, "learning_rate": 1.3328519988785819e-06, "loss": 21.1562, "step": 12354 }, { "epoch": 0.8205485820548583, "grad_norm": 298.0406494140625, "learning_rate": 1.3327505777084414e-06, "loss": 16.6875, "step": 12355 }, { "epoch": 0.8206149963472139, "grad_norm": 244.88186645507812, "learning_rate": 1.332649152689234e-06, "loss": 26.7812, "step": 12356 }, { "epoch": 0.8206814106395697, "grad_norm": 153.3087921142578, "learning_rate": 1.3325477238221335e-06, "loss": 18.7344, "step": 12357 }, { "epoch": 0.8207478249319253, "grad_norm": 174.71522521972656, "learning_rate": 1.332446291108312e-06, "loss": 16.7031, "step": 12358 }, { "epoch": 0.8208142392242811, "grad_norm": 175.31417846679688, "learning_rate": 1.332344854548944e-06, "loss": 12.4922, "step": 12359 }, { "epoch": 0.8208806535166367, "grad_norm": 225.91346740722656, "learning_rate": 1.3322434141452025e-06, "loss": 16.2188, "step": 12360 }, { "epoch": 0.8209470678089925, "grad_norm": 215.73605346679688, "learning_rate": 1.3321419698982605e-06, "loss": 16.1094, "step": 12361 }, { "epoch": 0.8210134821013482, "grad_norm": 444.552734375, "learning_rate": 1.3320405218092917e-06, "loss": 15.4062, "step": 12362 }, { "epoch": 0.8210798963937039, "grad_norm": 207.32371520996094, "learning_rate": 1.3319390698794697e-06, "loss": 18.2344, "step": 12363 }, { "epoch": 0.8211463106860596, "grad_norm": 385.2734680175781, "learning_rate": 1.331837614109968e-06, "loss": 19.125, "step": 12364 }, { "epoch": 0.8212127249784154, "grad_norm": 411.8468017578125, "learning_rate": 1.33173615450196e-06, "loss": 20.5156, "step": 12365 }, { "epoch": 0.8212791392707711, "grad_norm": 175.97525024414062, "learning_rate": 1.3316346910566191e-06, "loss": 14.1094, "step": 12366 }, { "epoch": 0.8213455535631268, "grad_norm": 268.88824462890625, "learning_rate": 1.33153322377512e-06, "loss": 19.125, "step": 12367 }, { "epoch": 0.8214119678554825, "grad_norm": 186.4407196044922, "learning_rate": 1.3314317526586353e-06, "loss": 18.25, "step": 12368 }, { "epoch": 0.8214783821478382, "grad_norm": 161.2687225341797, "learning_rate": 1.3313302777083395e-06, "loss": 16.9375, "step": 12369 }, { "epoch": 0.821544796440194, "grad_norm": 261.33819580078125, "learning_rate": 1.331228798925406e-06, "loss": 17.8125, "step": 12370 }, { "epoch": 0.8216112107325496, "grad_norm": 352.79180908203125, "learning_rate": 1.3311273163110087e-06, "loss": 18.3984, "step": 12371 }, { "epoch": 0.8216776250249054, "grad_norm": 347.60699462890625, "learning_rate": 1.3310258298663214e-06, "loss": 12.1719, "step": 12372 }, { "epoch": 0.821744039317261, "grad_norm": 387.9404296875, "learning_rate": 1.3309243395925185e-06, "loss": 16.2656, "step": 12373 }, { "epoch": 0.8218104536096168, "grad_norm": 319.0614929199219, "learning_rate": 1.3308228454907736e-06, "loss": 21.4688, "step": 12374 }, { "epoch": 0.8218768679019725, "grad_norm": 591.1944580078125, "learning_rate": 1.3307213475622607e-06, "loss": 31.8125, "step": 12375 }, { "epoch": 0.8219432821943282, "grad_norm": 180.09347534179688, "learning_rate": 1.3306198458081538e-06, "loss": 16.2656, "step": 12376 }, { "epoch": 0.822009696486684, "grad_norm": 514.022216796875, "learning_rate": 1.3305183402296273e-06, "loss": 20.1562, "step": 12377 }, { "epoch": 0.8220761107790396, "grad_norm": 169.46026611328125, "learning_rate": 1.3304168308278553e-06, "loss": 14.8125, "step": 12378 }, { "epoch": 0.8221425250713954, "grad_norm": 253.90135192871094, "learning_rate": 1.330315317604012e-06, "loss": 13.8906, "step": 12379 }, { "epoch": 0.8222089393637511, "grad_norm": 171.90037536621094, "learning_rate": 1.3302138005592714e-06, "loss": 16.0781, "step": 12380 }, { "epoch": 0.8222753536561068, "grad_norm": 140.93228149414062, "learning_rate": 1.3301122796948082e-06, "loss": 16.1875, "step": 12381 }, { "epoch": 0.8223417679484625, "grad_norm": 250.90838623046875, "learning_rate": 1.3300107550117963e-06, "loss": 19.0156, "step": 12382 }, { "epoch": 0.8224081822408182, "grad_norm": 375.099365234375, "learning_rate": 1.3299092265114103e-06, "loss": 16.5312, "step": 12383 }, { "epoch": 0.8224745965331739, "grad_norm": 397.9615173339844, "learning_rate": 1.3298076941948245e-06, "loss": 19.7188, "step": 12384 }, { "epoch": 0.8225410108255297, "grad_norm": 109.05181121826172, "learning_rate": 1.3297061580632137e-06, "loss": 19.8438, "step": 12385 }, { "epoch": 0.8226074251178853, "grad_norm": 226.62405395507812, "learning_rate": 1.329604618117752e-06, "loss": 19.4844, "step": 12386 }, { "epoch": 0.8226738394102411, "grad_norm": 263.81103515625, "learning_rate": 1.3295030743596143e-06, "loss": 18.25, "step": 12387 }, { "epoch": 0.8227402537025968, "grad_norm": 203.05897521972656, "learning_rate": 1.3294015267899746e-06, "loss": 12.4375, "step": 12388 }, { "epoch": 0.8228066679949525, "grad_norm": 147.51727294921875, "learning_rate": 1.3292999754100088e-06, "loss": 14.8906, "step": 12389 }, { "epoch": 0.8228730822873083, "grad_norm": 116.6507339477539, "learning_rate": 1.3291984202208902e-06, "loss": 18.6875, "step": 12390 }, { "epoch": 0.8229394965796639, "grad_norm": 297.3681945800781, "learning_rate": 1.329096861223794e-06, "loss": 23.5, "step": 12391 }, { "epoch": 0.8230059108720197, "grad_norm": 124.34605407714844, "learning_rate": 1.3289952984198954e-06, "loss": 16.5938, "step": 12392 }, { "epoch": 0.8230723251643753, "grad_norm": 208.5237579345703, "learning_rate": 1.3288937318103688e-06, "loss": 17.0781, "step": 12393 }, { "epoch": 0.8231387394567311, "grad_norm": 302.8031005859375, "learning_rate": 1.3287921613963891e-06, "loss": 20.625, "step": 12394 }, { "epoch": 0.8232051537490868, "grad_norm": 348.03875732421875, "learning_rate": 1.328690587179131e-06, "loss": 15.7969, "step": 12395 }, { "epoch": 0.8232715680414425, "grad_norm": 306.60760498046875, "learning_rate": 1.3285890091597704e-06, "loss": 22.9219, "step": 12396 }, { "epoch": 0.8233379823337982, "grad_norm": 177.0059356689453, "learning_rate": 1.328487427339481e-06, "loss": 14.9375, "step": 12397 }, { "epoch": 0.823404396626154, "grad_norm": 214.5164794921875, "learning_rate": 1.328385841719439e-06, "loss": 13.8281, "step": 12398 }, { "epoch": 0.8234708109185097, "grad_norm": 298.28460693359375, "learning_rate": 1.3282842523008184e-06, "loss": 12.9844, "step": 12399 }, { "epoch": 0.8235372252108654, "grad_norm": 276.1708984375, "learning_rate": 1.3281826590847953e-06, "loss": 19.875, "step": 12400 }, { "epoch": 0.8236036395032211, "grad_norm": 164.29945373535156, "learning_rate": 1.328081062072544e-06, "loss": 14.1562, "step": 12401 }, { "epoch": 0.8236700537955768, "grad_norm": 183.9212188720703, "learning_rate": 1.3279794612652407e-06, "loss": 15.0312, "step": 12402 }, { "epoch": 0.8237364680879325, "grad_norm": 172.91116333007812, "learning_rate": 1.3278778566640596e-06, "loss": 15.5625, "step": 12403 }, { "epoch": 0.8238028823802882, "grad_norm": 164.33233642578125, "learning_rate": 1.3277762482701767e-06, "loss": 12.7344, "step": 12404 }, { "epoch": 0.823869296672644, "grad_norm": 212.97097778320312, "learning_rate": 1.327674636084767e-06, "loss": 19.0, "step": 12405 }, { "epoch": 0.8239357109649996, "grad_norm": 273.2392578125, "learning_rate": 1.3275730201090065e-06, "loss": 19.9844, "step": 12406 }, { "epoch": 0.8240021252573554, "grad_norm": 345.4556579589844, "learning_rate": 1.3274714003440699e-06, "loss": 24.0781, "step": 12407 }, { "epoch": 0.824068539549711, "grad_norm": 332.57366943359375, "learning_rate": 1.3273697767911333e-06, "loss": 16.1406, "step": 12408 }, { "epoch": 0.8241349538420668, "grad_norm": 283.3999938964844, "learning_rate": 1.3272681494513713e-06, "loss": 16.2031, "step": 12409 }, { "epoch": 0.8242013681344226, "grad_norm": 251.74624633789062, "learning_rate": 1.3271665183259606e-06, "loss": 13.9531, "step": 12410 }, { "epoch": 0.8242677824267782, "grad_norm": 255.64993286132812, "learning_rate": 1.3270648834160759e-06, "loss": 30.5156, "step": 12411 }, { "epoch": 0.824334196719134, "grad_norm": 187.61073303222656, "learning_rate": 1.3269632447228933e-06, "loss": 15.375, "step": 12412 }, { "epoch": 0.8244006110114896, "grad_norm": 374.3650817871094, "learning_rate": 1.3268616022475888e-06, "loss": 15.7969, "step": 12413 }, { "epoch": 0.8244670253038454, "grad_norm": 145.06729125976562, "learning_rate": 1.3267599559913373e-06, "loss": 15.3594, "step": 12414 }, { "epoch": 0.8245334395962011, "grad_norm": 148.7285919189453, "learning_rate": 1.326658305955315e-06, "loss": 18.2812, "step": 12415 }, { "epoch": 0.8245998538885568, "grad_norm": 140.72222900390625, "learning_rate": 1.3265566521406981e-06, "loss": 14.7656, "step": 12416 }, { "epoch": 0.8246662681809125, "grad_norm": 159.63880920410156, "learning_rate": 1.3264549945486623e-06, "loss": 16.4531, "step": 12417 }, { "epoch": 0.8247326824732683, "grad_norm": 245.9364013671875, "learning_rate": 1.3263533331803828e-06, "loss": 18.3906, "step": 12418 }, { "epoch": 0.8247990967656239, "grad_norm": 3175.8740234375, "learning_rate": 1.3262516680370366e-06, "loss": 20.4219, "step": 12419 }, { "epoch": 0.8248655110579797, "grad_norm": 173.43881225585938, "learning_rate": 1.3261499991197991e-06, "loss": 16.0469, "step": 12420 }, { "epoch": 0.8249319253503354, "grad_norm": 438.1042785644531, "learning_rate": 1.3260483264298466e-06, "loss": 25.2031, "step": 12421 }, { "epoch": 0.8249983396426911, "grad_norm": 169.99009704589844, "learning_rate": 1.3259466499683548e-06, "loss": 13.75, "step": 12422 }, { "epoch": 0.8250647539350469, "grad_norm": 244.6053924560547, "learning_rate": 1.3258449697365003e-06, "loss": 16.8359, "step": 12423 }, { "epoch": 0.8251311682274025, "grad_norm": 233.72708129882812, "learning_rate": 1.325743285735459e-06, "loss": 17.6562, "step": 12424 }, { "epoch": 0.8251975825197583, "grad_norm": 195.9425811767578, "learning_rate": 1.3256415979664073e-06, "loss": 20.7812, "step": 12425 }, { "epoch": 0.8252639968121139, "grad_norm": 197.62135314941406, "learning_rate": 1.3255399064305212e-06, "loss": 16.4062, "step": 12426 }, { "epoch": 0.8253304111044697, "grad_norm": 106.27420043945312, "learning_rate": 1.325438211128977e-06, "loss": 15.25, "step": 12427 }, { "epoch": 0.8253968253968254, "grad_norm": 601.6533203125, "learning_rate": 1.3253365120629518e-06, "loss": 18.4531, "step": 12428 }, { "epoch": 0.8254632396891811, "grad_norm": 319.6971435546875, "learning_rate": 1.325234809233621e-06, "loss": 16.4844, "step": 12429 }, { "epoch": 0.8255296539815369, "grad_norm": 191.63348388671875, "learning_rate": 1.3251331026421618e-06, "loss": 19.1094, "step": 12430 }, { "epoch": 0.8255960682738925, "grad_norm": 146.9741668701172, "learning_rate": 1.32503139228975e-06, "loss": 18.8438, "step": 12431 }, { "epoch": 0.8256624825662483, "grad_norm": 109.48062896728516, "learning_rate": 1.3249296781775628e-06, "loss": 20.2812, "step": 12432 }, { "epoch": 0.825728896858604, "grad_norm": 219.73558044433594, "learning_rate": 1.3248279603067763e-06, "loss": 18.5469, "step": 12433 }, { "epoch": 0.8257953111509597, "grad_norm": 291.8213806152344, "learning_rate": 1.324726238678567e-06, "loss": 18.0938, "step": 12434 }, { "epoch": 0.8258617254433154, "grad_norm": 141.28628540039062, "learning_rate": 1.3246245132941124e-06, "loss": 15.0938, "step": 12435 }, { "epoch": 0.8259281397356711, "grad_norm": 204.4477081298828, "learning_rate": 1.324522784154588e-06, "loss": 19.6406, "step": 12436 }, { "epoch": 0.8259945540280268, "grad_norm": 168.93629455566406, "learning_rate": 1.3244210512611714e-06, "loss": 20.3906, "step": 12437 }, { "epoch": 0.8260609683203826, "grad_norm": 251.18057250976562, "learning_rate": 1.324319314615039e-06, "loss": 15.75, "step": 12438 }, { "epoch": 0.8261273826127382, "grad_norm": 313.8257141113281, "learning_rate": 1.324217574217368e-06, "loss": 24.125, "step": 12439 }, { "epoch": 0.826193796905094, "grad_norm": 205.64198303222656, "learning_rate": 1.324115830069335e-06, "loss": 16.7344, "step": 12440 }, { "epoch": 0.8262602111974497, "grad_norm": 143.1813201904297, "learning_rate": 1.324014082172117e-06, "loss": 14.6406, "step": 12441 }, { "epoch": 0.8263266254898054, "grad_norm": 279.4252624511719, "learning_rate": 1.3239123305268911e-06, "loss": 15.2031, "step": 12442 }, { "epoch": 0.8263930397821612, "grad_norm": 132.16616821289062, "learning_rate": 1.323810575134834e-06, "loss": 13.1562, "step": 12443 }, { "epoch": 0.8264594540745168, "grad_norm": 223.5581817626953, "learning_rate": 1.3237088159971226e-06, "loss": 18.9531, "step": 12444 }, { "epoch": 0.8265258683668726, "grad_norm": 259.01171875, "learning_rate": 1.3236070531149347e-06, "loss": 13.2344, "step": 12445 }, { "epoch": 0.8265922826592282, "grad_norm": 249.16336059570312, "learning_rate": 1.323505286489447e-06, "loss": 25.1875, "step": 12446 }, { "epoch": 0.826658696951584, "grad_norm": 701.5310668945312, "learning_rate": 1.3234035161218365e-06, "loss": 14.4219, "step": 12447 }, { "epoch": 0.8267251112439397, "grad_norm": 140.72341918945312, "learning_rate": 1.3233017420132803e-06, "loss": 15.375, "step": 12448 }, { "epoch": 0.8267915255362954, "grad_norm": 495.3719177246094, "learning_rate": 1.3231999641649567e-06, "loss": 19.3438, "step": 12449 }, { "epoch": 0.8268579398286511, "grad_norm": 365.4999694824219, "learning_rate": 1.3230981825780418e-06, "loss": 22.1562, "step": 12450 }, { "epoch": 0.8269243541210068, "grad_norm": 145.77362060546875, "learning_rate": 1.3229963972537135e-06, "loss": 17.2656, "step": 12451 }, { "epoch": 0.8269907684133626, "grad_norm": 793.880126953125, "learning_rate": 1.3228946081931491e-06, "loss": 23.7188, "step": 12452 }, { "epoch": 0.8270571827057183, "grad_norm": 323.2615661621094, "learning_rate": 1.3227928153975263e-06, "loss": 14.9688, "step": 12453 }, { "epoch": 0.827123596998074, "grad_norm": 351.0602111816406, "learning_rate": 1.322691018868022e-06, "loss": 18.2031, "step": 12454 }, { "epoch": 0.8271900112904297, "grad_norm": 177.40536499023438, "learning_rate": 1.3225892186058144e-06, "loss": 15.4844, "step": 12455 }, { "epoch": 0.8272564255827854, "grad_norm": 545.5108642578125, "learning_rate": 1.3224874146120805e-06, "loss": 11.4297, "step": 12456 }, { "epoch": 0.8273228398751411, "grad_norm": 249.21774291992188, "learning_rate": 1.3223856068879981e-06, "loss": 18.2812, "step": 12457 }, { "epoch": 0.8273892541674969, "grad_norm": 224.88478088378906, "learning_rate": 1.3222837954347452e-06, "loss": 17.5938, "step": 12458 }, { "epoch": 0.8274556684598525, "grad_norm": 123.6905288696289, "learning_rate": 1.3221819802534988e-06, "loss": 17.1562, "step": 12459 }, { "epoch": 0.8275220827522083, "grad_norm": 152.59524536132812, "learning_rate": 1.3220801613454376e-06, "loss": 17.2812, "step": 12460 }, { "epoch": 0.8275884970445639, "grad_norm": 177.8843536376953, "learning_rate": 1.3219783387117383e-06, "loss": 19.8281, "step": 12461 }, { "epoch": 0.8276549113369197, "grad_norm": 438.07855224609375, "learning_rate": 1.3218765123535794e-06, "loss": 23.2812, "step": 12462 }, { "epoch": 0.8277213256292755, "grad_norm": 128.19842529296875, "learning_rate": 1.3217746822721386e-06, "loss": 18.25, "step": 12463 }, { "epoch": 0.8277877399216311, "grad_norm": 142.07107543945312, "learning_rate": 1.321672848468594e-06, "loss": 22.0469, "step": 12464 }, { "epoch": 0.8278541542139869, "grad_norm": 163.47450256347656, "learning_rate": 1.321571010944123e-06, "loss": 16.3906, "step": 12465 }, { "epoch": 0.8279205685063425, "grad_norm": 182.0317840576172, "learning_rate": 1.3214691696999042e-06, "loss": 20.2188, "step": 12466 }, { "epoch": 0.8279869827986983, "grad_norm": 245.2580108642578, "learning_rate": 1.3213673247371156e-06, "loss": 18.625, "step": 12467 }, { "epoch": 0.828053397091054, "grad_norm": 577.9190673828125, "learning_rate": 1.3212654760569349e-06, "loss": 17.3125, "step": 12468 }, { "epoch": 0.8281198113834097, "grad_norm": 749.8203125, "learning_rate": 1.3211636236605405e-06, "loss": 21.8281, "step": 12469 }, { "epoch": 0.8281862256757654, "grad_norm": 136.46177673339844, "learning_rate": 1.3210617675491103e-06, "loss": 15.1094, "step": 12470 }, { "epoch": 0.8282526399681212, "grad_norm": 124.16651153564453, "learning_rate": 1.320959907723823e-06, "loss": 15.4844, "step": 12471 }, { "epoch": 0.8283190542604768, "grad_norm": 332.060302734375, "learning_rate": 1.3208580441858562e-06, "loss": 16.3438, "step": 12472 }, { "epoch": 0.8283854685528326, "grad_norm": 239.16107177734375, "learning_rate": 1.3207561769363885e-06, "loss": 16.6875, "step": 12473 }, { "epoch": 0.8284518828451883, "grad_norm": 271.64306640625, "learning_rate": 1.3206543059765986e-06, "loss": 17.3125, "step": 12474 }, { "epoch": 0.828518297137544, "grad_norm": 372.03570556640625, "learning_rate": 1.3205524313076646e-06, "loss": 13.8594, "step": 12475 }, { "epoch": 0.8285847114298998, "grad_norm": 132.23182678222656, "learning_rate": 1.3204505529307648e-06, "loss": 13.8125, "step": 12476 }, { "epoch": 0.8286511257222554, "grad_norm": 385.0556640625, "learning_rate": 1.3203486708470779e-06, "loss": 18.375, "step": 12477 }, { "epoch": 0.8287175400146112, "grad_norm": 173.70140075683594, "learning_rate": 1.3202467850577823e-06, "loss": 11.3438, "step": 12478 }, { "epoch": 0.8287839543069668, "grad_norm": 313.09698486328125, "learning_rate": 1.3201448955640564e-06, "loss": 14.5469, "step": 12479 }, { "epoch": 0.8288503685993226, "grad_norm": 643.0908203125, "learning_rate": 1.320043002367079e-06, "loss": 25.4375, "step": 12480 }, { "epoch": 0.8289167828916782, "grad_norm": 323.7018737792969, "learning_rate": 1.3199411054680289e-06, "loss": 18.5, "step": 12481 }, { "epoch": 0.828983197184034, "grad_norm": 101.5922622680664, "learning_rate": 1.3198392048680844e-06, "loss": 16.0, "step": 12482 }, { "epoch": 0.8290496114763897, "grad_norm": 270.2245788574219, "learning_rate": 1.3197373005684246e-06, "loss": 18.3594, "step": 12483 }, { "epoch": 0.8291160257687454, "grad_norm": 428.2550964355469, "learning_rate": 1.3196353925702278e-06, "loss": 15.6406, "step": 12484 }, { "epoch": 0.8291824400611012, "grad_norm": 255.1143035888672, "learning_rate": 1.3195334808746733e-06, "loss": 13.0156, "step": 12485 }, { "epoch": 0.8292488543534569, "grad_norm": 209.666748046875, "learning_rate": 1.3194315654829395e-06, "loss": 19.7812, "step": 12486 }, { "epoch": 0.8293152686458126, "grad_norm": 197.6190948486328, "learning_rate": 1.3193296463962056e-06, "loss": 19.7812, "step": 12487 }, { "epoch": 0.8293816829381683, "grad_norm": 261.24359130859375, "learning_rate": 1.3192277236156507e-06, "loss": 18.125, "step": 12488 }, { "epoch": 0.829448097230524, "grad_norm": 159.49842834472656, "learning_rate": 1.3191257971424535e-06, "loss": 15.8594, "step": 12489 }, { "epoch": 0.8295145115228797, "grad_norm": 168.23167419433594, "learning_rate": 1.3190238669777932e-06, "loss": 18.6719, "step": 12490 }, { "epoch": 0.8295809258152355, "grad_norm": 107.23474884033203, "learning_rate": 1.3189219331228482e-06, "loss": 16.5156, "step": 12491 }, { "epoch": 0.8296473401075911, "grad_norm": 257.37860107421875, "learning_rate": 1.3188199955787988e-06, "loss": 19.1719, "step": 12492 }, { "epoch": 0.8297137543999469, "grad_norm": 540.560302734375, "learning_rate": 1.3187180543468234e-06, "loss": 22.0781, "step": 12493 }, { "epoch": 0.8297801686923025, "grad_norm": 396.51446533203125, "learning_rate": 1.318616109428101e-06, "loss": 16.6406, "step": 12494 }, { "epoch": 0.8298465829846583, "grad_norm": 268.4734802246094, "learning_rate": 1.3185141608238114e-06, "loss": 18.6094, "step": 12495 }, { "epoch": 0.8299129972770141, "grad_norm": 299.949462890625, "learning_rate": 1.3184122085351342e-06, "loss": 15.0, "step": 12496 }, { "epoch": 0.8299794115693697, "grad_norm": 268.8620910644531, "learning_rate": 1.3183102525632475e-06, "loss": 19.8125, "step": 12497 }, { "epoch": 0.8300458258617255, "grad_norm": 161.4425811767578, "learning_rate": 1.3182082929093315e-06, "loss": 11.6562, "step": 12498 }, { "epoch": 0.8301122401540811, "grad_norm": 153.58731079101562, "learning_rate": 1.3181063295745656e-06, "loss": 13.5, "step": 12499 }, { "epoch": 0.8301786544464369, "grad_norm": 154.1974334716797, "learning_rate": 1.318004362560129e-06, "loss": 12.7188, "step": 12500 }, { "epoch": 0.8302450687387926, "grad_norm": 299.78076171875, "learning_rate": 1.3179023918672012e-06, "loss": 20.4531, "step": 12501 }, { "epoch": 0.8303114830311483, "grad_norm": 105.2322998046875, "learning_rate": 1.3178004174969619e-06, "loss": 13.5469, "step": 12502 }, { "epoch": 0.830377897323504, "grad_norm": 314.6122131347656, "learning_rate": 1.3176984394505909e-06, "loss": 19.7031, "step": 12503 }, { "epoch": 0.8304443116158597, "grad_norm": 529.106689453125, "learning_rate": 1.317596457729267e-06, "loss": 19.1406, "step": 12504 }, { "epoch": 0.8305107259082154, "grad_norm": 148.14599609375, "learning_rate": 1.3174944723341707e-06, "loss": 15.2031, "step": 12505 }, { "epoch": 0.8305771402005712, "grad_norm": 104.4599380493164, "learning_rate": 1.3173924832664815e-06, "loss": 15.6094, "step": 12506 }, { "epoch": 0.8306435544929269, "grad_norm": 350.4196472167969, "learning_rate": 1.317290490527379e-06, "loss": 14.3906, "step": 12507 }, { "epoch": 0.8307099687852826, "grad_norm": 235.79324340820312, "learning_rate": 1.3171884941180432e-06, "loss": 21.6719, "step": 12508 }, { "epoch": 0.8307763830776383, "grad_norm": 170.08192443847656, "learning_rate": 1.3170864940396536e-06, "loss": 13.25, "step": 12509 }, { "epoch": 0.830842797369994, "grad_norm": 297.0580749511719, "learning_rate": 1.3169844902933905e-06, "loss": 16.75, "step": 12510 }, { "epoch": 0.8309092116623498, "grad_norm": 153.66806030273438, "learning_rate": 1.316882482880433e-06, "loss": 15.5469, "step": 12511 }, { "epoch": 0.8309756259547054, "grad_norm": 232.54708862304688, "learning_rate": 1.3167804718019621e-06, "loss": 14.6406, "step": 12512 }, { "epoch": 0.8310420402470612, "grad_norm": 280.8389587402344, "learning_rate": 1.3166784570591575e-06, "loss": 18.3281, "step": 12513 }, { "epoch": 0.8311084545394168, "grad_norm": 249.5870361328125, "learning_rate": 1.316576438653199e-06, "loss": 20.75, "step": 12514 }, { "epoch": 0.8311748688317726, "grad_norm": 196.13949584960938, "learning_rate": 1.316474416585267e-06, "loss": 19.25, "step": 12515 }, { "epoch": 0.8312412831241283, "grad_norm": 120.28459167480469, "learning_rate": 1.3163723908565411e-06, "loss": 14.2031, "step": 12516 }, { "epoch": 0.831307697416484, "grad_norm": 110.05408477783203, "learning_rate": 1.316270361468202e-06, "loss": 14.1562, "step": 12517 }, { "epoch": 0.8313741117088398, "grad_norm": 624.8565673828125, "learning_rate": 1.3161683284214297e-06, "loss": 23.6875, "step": 12518 }, { "epoch": 0.8314405260011954, "grad_norm": 391.6110534667969, "learning_rate": 1.3160662917174043e-06, "loss": 14.25, "step": 12519 }, { "epoch": 0.8315069402935512, "grad_norm": 171.00352478027344, "learning_rate": 1.3159642513573069e-06, "loss": 12.7344, "step": 12520 }, { "epoch": 0.8315733545859069, "grad_norm": 281.6897888183594, "learning_rate": 1.3158622073423167e-06, "loss": 14.9531, "step": 12521 }, { "epoch": 0.8316397688782626, "grad_norm": 252.3961944580078, "learning_rate": 1.3157601596736146e-06, "loss": 14.7969, "step": 12522 }, { "epoch": 0.8317061831706183, "grad_norm": 349.4245910644531, "learning_rate": 1.3156581083523812e-06, "loss": 16.0, "step": 12523 }, { "epoch": 0.831772597462974, "grad_norm": 614.0189208984375, "learning_rate": 1.315556053379797e-06, "loss": 19.6875, "step": 12524 }, { "epoch": 0.8318390117553297, "grad_norm": 401.8611145019531, "learning_rate": 1.315453994757042e-06, "loss": 17.5312, "step": 12525 }, { "epoch": 0.8319054260476855, "grad_norm": 122.97771453857422, "learning_rate": 1.315351932485297e-06, "loss": 12.0938, "step": 12526 }, { "epoch": 0.8319718403400411, "grad_norm": 159.4851531982422, "learning_rate": 1.3152498665657432e-06, "loss": 17.9375, "step": 12527 }, { "epoch": 0.8320382546323969, "grad_norm": 199.14378356933594, "learning_rate": 1.3151477969995604e-06, "loss": 16.9844, "step": 12528 }, { "epoch": 0.8321046689247527, "grad_norm": 177.67369079589844, "learning_rate": 1.3150457237879296e-06, "loss": 16.6094, "step": 12529 }, { "epoch": 0.8321710832171083, "grad_norm": 995.3067016601562, "learning_rate": 1.3149436469320315e-06, "loss": 12.9062, "step": 12530 }, { "epoch": 0.8322374975094641, "grad_norm": 321.4031066894531, "learning_rate": 1.3148415664330472e-06, "loss": 26.5, "step": 12531 }, { "epoch": 0.8323039118018197, "grad_norm": 250.0628662109375, "learning_rate": 1.3147394822921569e-06, "loss": 12.5, "step": 12532 }, { "epoch": 0.8323703260941755, "grad_norm": 719.08251953125, "learning_rate": 1.3146373945105417e-06, "loss": 17.2344, "step": 12533 }, { "epoch": 0.8324367403865311, "grad_norm": 496.4030456542969, "learning_rate": 1.3145353030893826e-06, "loss": 12.8125, "step": 12534 }, { "epoch": 0.8325031546788869, "grad_norm": 188.09059143066406, "learning_rate": 1.314433208029861e-06, "loss": 12.9219, "step": 12535 }, { "epoch": 0.8325695689712426, "grad_norm": 205.78118896484375, "learning_rate": 1.3143311093331568e-06, "loss": 15.5625, "step": 12536 }, { "epoch": 0.8326359832635983, "grad_norm": 146.70889282226562, "learning_rate": 1.3142290070004518e-06, "loss": 17.3438, "step": 12537 }, { "epoch": 0.832702397555954, "grad_norm": 141.52081298828125, "learning_rate": 1.3141269010329268e-06, "loss": 15.4219, "step": 12538 }, { "epoch": 0.8327688118483098, "grad_norm": 136.38233947753906, "learning_rate": 1.3140247914317628e-06, "loss": 12.7031, "step": 12539 }, { "epoch": 0.8328352261406655, "grad_norm": 378.4273376464844, "learning_rate": 1.3139226781981413e-06, "loss": 30.5938, "step": 12540 }, { "epoch": 0.8329016404330212, "grad_norm": 822.1807861328125, "learning_rate": 1.313820561333243e-06, "loss": 15.9375, "step": 12541 }, { "epoch": 0.8329680547253769, "grad_norm": 207.09820556640625, "learning_rate": 1.3137184408382498e-06, "loss": 17.875, "step": 12542 }, { "epoch": 0.8330344690177326, "grad_norm": 295.3564453125, "learning_rate": 1.3136163167143424e-06, "loss": 17.3984, "step": 12543 }, { "epoch": 0.8331008833100884, "grad_norm": 531.1746826171875, "learning_rate": 1.313514188962702e-06, "loss": 15.4688, "step": 12544 }, { "epoch": 0.833167297602444, "grad_norm": 206.8489227294922, "learning_rate": 1.3134120575845105e-06, "loss": 15.6875, "step": 12545 }, { "epoch": 0.8332337118947998, "grad_norm": 180.14210510253906, "learning_rate": 1.3133099225809492e-06, "loss": 14.8047, "step": 12546 }, { "epoch": 0.8333001261871554, "grad_norm": 294.98419189453125, "learning_rate": 1.3132077839531992e-06, "loss": 19.7188, "step": 12547 }, { "epoch": 0.8333665404795112, "grad_norm": 140.262939453125, "learning_rate": 1.3131056417024419e-06, "loss": 12.1875, "step": 12548 }, { "epoch": 0.8334329547718669, "grad_norm": 183.39349365234375, "learning_rate": 1.3130034958298594e-06, "loss": 15.8906, "step": 12549 }, { "epoch": 0.8334993690642226, "grad_norm": 159.4159698486328, "learning_rate": 1.3129013463366329e-06, "loss": 13.1562, "step": 12550 }, { "epoch": 0.8335657833565784, "grad_norm": 174.31192016601562, "learning_rate": 1.3127991932239442e-06, "loss": 17.1406, "step": 12551 }, { "epoch": 0.833632197648934, "grad_norm": 106.29486083984375, "learning_rate": 1.3126970364929749e-06, "loss": 16.8906, "step": 12552 }, { "epoch": 0.8336986119412898, "grad_norm": 453.65283203125, "learning_rate": 1.3125948761449061e-06, "loss": 18.0469, "step": 12553 }, { "epoch": 0.8337650262336455, "grad_norm": 243.89215087890625, "learning_rate": 1.3124927121809205e-06, "loss": 15.4062, "step": 12554 }, { "epoch": 0.8338314405260012, "grad_norm": 238.18960571289062, "learning_rate": 1.312390544602199e-06, "loss": 16.5547, "step": 12555 }, { "epoch": 0.8338978548183569, "grad_norm": 194.1602325439453, "learning_rate": 1.3122883734099241e-06, "loss": 13.2031, "step": 12556 }, { "epoch": 0.8339642691107126, "grad_norm": 166.6223907470703, "learning_rate": 1.312186198605277e-06, "loss": 13.6562, "step": 12557 }, { "epoch": 0.8340306834030683, "grad_norm": 206.21908569335938, "learning_rate": 1.3120840201894404e-06, "loss": 19.4844, "step": 12558 }, { "epoch": 0.8340970976954241, "grad_norm": 325.5756530761719, "learning_rate": 1.3119818381635958e-06, "loss": 15.3906, "step": 12559 }, { "epoch": 0.8341635119877797, "grad_norm": 205.64959716796875, "learning_rate": 1.311879652528925e-06, "loss": 18.0, "step": 12560 }, { "epoch": 0.8342299262801355, "grad_norm": 136.5126190185547, "learning_rate": 1.3117774632866103e-06, "loss": 21.8438, "step": 12561 }, { "epoch": 0.8342963405724912, "grad_norm": 207.18544006347656, "learning_rate": 1.3116752704378336e-06, "loss": 13.9531, "step": 12562 }, { "epoch": 0.8343627548648469, "grad_norm": 381.0657043457031, "learning_rate": 1.3115730739837774e-06, "loss": 22.8438, "step": 12563 }, { "epoch": 0.8344291691572027, "grad_norm": 219.6517791748047, "learning_rate": 1.3114708739256232e-06, "loss": 14.3359, "step": 12564 }, { "epoch": 0.8344955834495583, "grad_norm": 241.25192260742188, "learning_rate": 1.3113686702645535e-06, "loss": 16.4219, "step": 12565 }, { "epoch": 0.8345619977419141, "grad_norm": 138.61526489257812, "learning_rate": 1.3112664630017507e-06, "loss": 14.4844, "step": 12566 }, { "epoch": 0.8346284120342697, "grad_norm": 185.34982299804688, "learning_rate": 1.3111642521383969e-06, "loss": 19.625, "step": 12567 }, { "epoch": 0.8346948263266255, "grad_norm": 311.4930419921875, "learning_rate": 1.3110620376756747e-06, "loss": 21.7656, "step": 12568 }, { "epoch": 0.8347612406189812, "grad_norm": 291.5190734863281, "learning_rate": 1.3109598196147658e-06, "loss": 26.2344, "step": 12569 }, { "epoch": 0.8348276549113369, "grad_norm": 309.2635192871094, "learning_rate": 1.3108575979568536e-06, "loss": 20.1562, "step": 12570 }, { "epoch": 0.8348940692036926, "grad_norm": 164.82252502441406, "learning_rate": 1.3107553727031194e-06, "loss": 15.1562, "step": 12571 }, { "epoch": 0.8349604834960483, "grad_norm": 313.44671630859375, "learning_rate": 1.3106531438547464e-06, "loss": 22.1562, "step": 12572 }, { "epoch": 0.8350268977884041, "grad_norm": 158.9281463623047, "learning_rate": 1.310550911412917e-06, "loss": 17.7812, "step": 12573 }, { "epoch": 0.8350933120807598, "grad_norm": 303.94073486328125, "learning_rate": 1.3104486753788137e-06, "loss": 17.0312, "step": 12574 }, { "epoch": 0.8351597263731155, "grad_norm": 293.44158935546875, "learning_rate": 1.3103464357536194e-06, "loss": 18.9688, "step": 12575 }, { "epoch": 0.8352261406654712, "grad_norm": 172.402587890625, "learning_rate": 1.3102441925385162e-06, "loss": 18.1953, "step": 12576 }, { "epoch": 0.835292554957827, "grad_norm": 344.8641357421875, "learning_rate": 1.310141945734687e-06, "loss": 20.5625, "step": 12577 }, { "epoch": 0.8353589692501826, "grad_norm": 133.02146911621094, "learning_rate": 1.3100396953433148e-06, "loss": 15.6562, "step": 12578 }, { "epoch": 0.8354253835425384, "grad_norm": 199.83905029296875, "learning_rate": 1.309937441365582e-06, "loss": 14.0625, "step": 12579 }, { "epoch": 0.835491797834894, "grad_norm": 347.7621765136719, "learning_rate": 1.3098351838026717e-06, "loss": 13.7031, "step": 12580 }, { "epoch": 0.8355582121272498, "grad_norm": 308.7303161621094, "learning_rate": 1.309732922655767e-06, "loss": 19.3594, "step": 12581 }, { "epoch": 0.8356246264196056, "grad_norm": 156.32261657714844, "learning_rate": 1.30963065792605e-06, "loss": 15.4375, "step": 12582 }, { "epoch": 0.8356910407119612, "grad_norm": 351.56005859375, "learning_rate": 1.3095283896147042e-06, "loss": 21.7344, "step": 12583 }, { "epoch": 0.835757455004317, "grad_norm": 231.62445068359375, "learning_rate": 1.3094261177229124e-06, "loss": 13.1875, "step": 12584 }, { "epoch": 0.8358238692966726, "grad_norm": 198.0587921142578, "learning_rate": 1.3093238422518578e-06, "loss": 16.4219, "step": 12585 }, { "epoch": 0.8358902835890284, "grad_norm": 226.294677734375, "learning_rate": 1.3092215632027232e-06, "loss": 23.5, "step": 12586 }, { "epoch": 0.835956697881384, "grad_norm": 308.3963623046875, "learning_rate": 1.309119280576692e-06, "loss": 17.75, "step": 12587 }, { "epoch": 0.8360231121737398, "grad_norm": 225.35574340820312, "learning_rate": 1.3090169943749473e-06, "loss": 17.6875, "step": 12588 }, { "epoch": 0.8360895264660955, "grad_norm": 162.98825073242188, "learning_rate": 1.3089147045986721e-06, "loss": 18.2188, "step": 12589 }, { "epoch": 0.8361559407584512, "grad_norm": 121.71546936035156, "learning_rate": 1.3088124112490498e-06, "loss": 15.2344, "step": 12590 }, { "epoch": 0.8362223550508069, "grad_norm": 146.56298828125, "learning_rate": 1.3087101143272636e-06, "loss": 19.0625, "step": 12591 }, { "epoch": 0.8362887693431627, "grad_norm": 162.92044067382812, "learning_rate": 1.308607813834497e-06, "loss": 17.1875, "step": 12592 }, { "epoch": 0.8363551836355184, "grad_norm": 313.31304931640625, "learning_rate": 1.3085055097719327e-06, "loss": 19.2344, "step": 12593 }, { "epoch": 0.8364215979278741, "grad_norm": 90.87071990966797, "learning_rate": 1.3084032021407547e-06, "loss": 11.0, "step": 12594 }, { "epoch": 0.8364880122202298, "grad_norm": 393.6062927246094, "learning_rate": 1.3083008909421464e-06, "loss": 14.5625, "step": 12595 }, { "epoch": 0.8365544265125855, "grad_norm": 168.9282684326172, "learning_rate": 1.3081985761772911e-06, "loss": 14.2344, "step": 12596 }, { "epoch": 0.8366208408049413, "grad_norm": 168.0930633544922, "learning_rate": 1.3080962578473725e-06, "loss": 18.0156, "step": 12597 }, { "epoch": 0.8366872550972969, "grad_norm": 2214.443359375, "learning_rate": 1.307993935953574e-06, "loss": 15.3594, "step": 12598 }, { "epoch": 0.8367536693896527, "grad_norm": 169.49493408203125, "learning_rate": 1.3078916104970792e-06, "loss": 16.5312, "step": 12599 }, { "epoch": 0.8368200836820083, "grad_norm": 256.28228759765625, "learning_rate": 1.3077892814790719e-06, "loss": 18.3594, "step": 12600 }, { "epoch": 0.8368864979743641, "grad_norm": 497.36669921875, "learning_rate": 1.3076869489007355e-06, "loss": 20.1094, "step": 12601 }, { "epoch": 0.8369529122667198, "grad_norm": 232.19825744628906, "learning_rate": 1.3075846127632543e-06, "loss": 16.6406, "step": 12602 }, { "epoch": 0.8370193265590755, "grad_norm": 1101.3739013671875, "learning_rate": 1.3074822730678113e-06, "loss": 15.0781, "step": 12603 }, { "epoch": 0.8370857408514313, "grad_norm": 118.50247192382812, "learning_rate": 1.3073799298155906e-06, "loss": 14.6016, "step": 12604 }, { "epoch": 0.8371521551437869, "grad_norm": 251.0173797607422, "learning_rate": 1.3072775830077762e-06, "loss": 18.25, "step": 12605 }, { "epoch": 0.8372185694361427, "grad_norm": 319.24462890625, "learning_rate": 1.307175232645552e-06, "loss": 21.75, "step": 12606 }, { "epoch": 0.8372849837284984, "grad_norm": 271.3386535644531, "learning_rate": 1.3070728787301018e-06, "loss": 23.3281, "step": 12607 }, { "epoch": 0.8373513980208541, "grad_norm": 196.8643798828125, "learning_rate": 1.3069705212626097e-06, "loss": 17.4531, "step": 12608 }, { "epoch": 0.8374178123132098, "grad_norm": 372.9674072265625, "learning_rate": 1.3068681602442594e-06, "loss": 25.4375, "step": 12609 }, { "epoch": 0.8374842266055655, "grad_norm": 312.5112609863281, "learning_rate": 1.3067657956762351e-06, "loss": 15.3438, "step": 12610 }, { "epoch": 0.8375506408979212, "grad_norm": 177.53497314453125, "learning_rate": 1.3066634275597212e-06, "loss": 14.3125, "step": 12611 }, { "epoch": 0.837617055190277, "grad_norm": 272.23687744140625, "learning_rate": 1.3065610558959014e-06, "loss": 15.6875, "step": 12612 }, { "epoch": 0.8376834694826326, "grad_norm": 154.12384033203125, "learning_rate": 1.3064586806859605e-06, "loss": 16.1875, "step": 12613 }, { "epoch": 0.8377498837749884, "grad_norm": 174.07449340820312, "learning_rate": 1.3063563019310822e-06, "loss": 16.7344, "step": 12614 }, { "epoch": 0.8378162980673441, "grad_norm": 233.10098266601562, "learning_rate": 1.3062539196324504e-06, "loss": 15.1719, "step": 12615 }, { "epoch": 0.8378827123596998, "grad_norm": 137.2353973388672, "learning_rate": 1.30615153379125e-06, "loss": 13.7031, "step": 12616 }, { "epoch": 0.8379491266520556, "grad_norm": 189.98829650878906, "learning_rate": 1.3060491444086657e-06, "loss": 13.0469, "step": 12617 }, { "epoch": 0.8380155409444112, "grad_norm": 511.7220153808594, "learning_rate": 1.305946751485881e-06, "loss": 19.8125, "step": 12618 }, { "epoch": 0.838081955236767, "grad_norm": 154.28909301757812, "learning_rate": 1.3058443550240804e-06, "loss": 16.8438, "step": 12619 }, { "epoch": 0.8381483695291226, "grad_norm": 257.04150390625, "learning_rate": 1.305741955024449e-06, "loss": 13.6719, "step": 12620 }, { "epoch": 0.8382147838214784, "grad_norm": 273.7958679199219, "learning_rate": 1.3056395514881712e-06, "loss": 19.2344, "step": 12621 }, { "epoch": 0.8382811981138341, "grad_norm": 117.69554901123047, "learning_rate": 1.3055371444164307e-06, "loss": 13.7344, "step": 12622 }, { "epoch": 0.8383476124061898, "grad_norm": 305.8514404296875, "learning_rate": 1.3054347338104133e-06, "loss": 22.5781, "step": 12623 }, { "epoch": 0.8384140266985455, "grad_norm": 313.12005615234375, "learning_rate": 1.3053323196713027e-06, "loss": 16.4844, "step": 12624 }, { "epoch": 0.8384804409909012, "grad_norm": 570.9639282226562, "learning_rate": 1.305229902000284e-06, "loss": 27.125, "step": 12625 }, { "epoch": 0.838546855283257, "grad_norm": 124.15129089355469, "learning_rate": 1.3051274807985415e-06, "loss": 15.7812, "step": 12626 }, { "epoch": 0.8386132695756127, "grad_norm": 169.75376892089844, "learning_rate": 1.3050250560672606e-06, "loss": 14.1406, "step": 12627 }, { "epoch": 0.8386796838679684, "grad_norm": 234.70184326171875, "learning_rate": 1.3049226278076258e-06, "loss": 14.9219, "step": 12628 }, { "epoch": 0.8387460981603241, "grad_norm": 229.638427734375, "learning_rate": 1.3048201960208215e-06, "loss": 18.1406, "step": 12629 }, { "epoch": 0.8388125124526798, "grad_norm": 177.72238159179688, "learning_rate": 1.3047177607080332e-06, "loss": 19.0625, "step": 12630 }, { "epoch": 0.8388789267450355, "grad_norm": 294.5747985839844, "learning_rate": 1.3046153218704457e-06, "loss": 20.125, "step": 12631 }, { "epoch": 0.8389453410373913, "grad_norm": 221.888427734375, "learning_rate": 1.3045128795092434e-06, "loss": 10.5938, "step": 12632 }, { "epoch": 0.8390117553297469, "grad_norm": 181.93821716308594, "learning_rate": 1.304410433625612e-06, "loss": 15.9688, "step": 12633 }, { "epoch": 0.8390781696221027, "grad_norm": 232.6693878173828, "learning_rate": 1.304307984220736e-06, "loss": 21.1719, "step": 12634 }, { "epoch": 0.8391445839144583, "grad_norm": 248.39793395996094, "learning_rate": 1.304205531295801e-06, "loss": 21.3281, "step": 12635 }, { "epoch": 0.8392109982068141, "grad_norm": 370.92333984375, "learning_rate": 1.3041030748519918e-06, "loss": 20.4688, "step": 12636 }, { "epoch": 0.8392774124991699, "grad_norm": 433.6223449707031, "learning_rate": 1.3040006148904932e-06, "loss": 18.0781, "step": 12637 }, { "epoch": 0.8393438267915255, "grad_norm": 127.96577453613281, "learning_rate": 1.3038981514124912e-06, "loss": 12.2188, "step": 12638 }, { "epoch": 0.8394102410838813, "grad_norm": 265.16656494140625, "learning_rate": 1.3037956844191705e-06, "loss": 15.2656, "step": 12639 }, { "epoch": 0.839476655376237, "grad_norm": 1625.436279296875, "learning_rate": 1.3036932139117166e-06, "loss": 16.3281, "step": 12640 }, { "epoch": 0.8395430696685927, "grad_norm": 234.9710693359375, "learning_rate": 1.3035907398913144e-06, "loss": 20.0, "step": 12641 }, { "epoch": 0.8396094839609484, "grad_norm": 256.5756530761719, "learning_rate": 1.3034882623591499e-06, "loss": 16.9688, "step": 12642 }, { "epoch": 0.8396758982533041, "grad_norm": 218.13400268554688, "learning_rate": 1.303385781316408e-06, "loss": 15.0625, "step": 12643 }, { "epoch": 0.8397423125456598, "grad_norm": 1158.8841552734375, "learning_rate": 1.3032832967642745e-06, "loss": 19.6719, "step": 12644 }, { "epoch": 0.8398087268380156, "grad_norm": 99.36349487304688, "learning_rate": 1.3031808087039347e-06, "loss": 15.4688, "step": 12645 }, { "epoch": 0.8398751411303712, "grad_norm": 442.7270812988281, "learning_rate": 1.303078317136574e-06, "loss": 24.1094, "step": 12646 }, { "epoch": 0.839941555422727, "grad_norm": 262.8492431640625, "learning_rate": 1.302975822063378e-06, "loss": 25.4375, "step": 12647 }, { "epoch": 0.8400079697150827, "grad_norm": 93.49967956542969, "learning_rate": 1.3028733234855326e-06, "loss": 11.2969, "step": 12648 }, { "epoch": 0.8400743840074384, "grad_norm": 360.34918212890625, "learning_rate": 1.3027708214042232e-06, "loss": 15.125, "step": 12649 }, { "epoch": 0.8401407982997942, "grad_norm": 289.7733459472656, "learning_rate": 1.3026683158206356e-06, "loss": 19.7812, "step": 12650 }, { "epoch": 0.8402072125921498, "grad_norm": 777.894775390625, "learning_rate": 1.302565806735955e-06, "loss": 26.3125, "step": 12651 }, { "epoch": 0.8402736268845056, "grad_norm": 173.0105743408203, "learning_rate": 1.302463294151368e-06, "loss": 14.2812, "step": 12652 }, { "epoch": 0.8403400411768612, "grad_norm": 331.73089599609375, "learning_rate": 1.3023607780680602e-06, "loss": 15.4375, "step": 12653 }, { "epoch": 0.840406455469217, "grad_norm": 555.8416748046875, "learning_rate": 1.3022582584872167e-06, "loss": 23.5469, "step": 12654 }, { "epoch": 0.8404728697615726, "grad_norm": 605.2822265625, "learning_rate": 1.3021557354100243e-06, "loss": 16.0938, "step": 12655 }, { "epoch": 0.8405392840539284, "grad_norm": 182.6859130859375, "learning_rate": 1.3020532088376689e-06, "loss": 12.0312, "step": 12656 }, { "epoch": 0.8406056983462841, "grad_norm": 304.8769226074219, "learning_rate": 1.3019506787713357e-06, "loss": 20.1719, "step": 12657 }, { "epoch": 0.8406721126386398, "grad_norm": 273.0811462402344, "learning_rate": 1.3018481452122112e-06, "loss": 16.9688, "step": 12658 }, { "epoch": 0.8407385269309956, "grad_norm": 137.43707275390625, "learning_rate": 1.3017456081614816e-06, "loss": 18.6875, "step": 12659 }, { "epoch": 0.8408049412233513, "grad_norm": 193.51885986328125, "learning_rate": 1.3016430676203326e-06, "loss": 13.1719, "step": 12660 }, { "epoch": 0.840871355515707, "grad_norm": 160.8354949951172, "learning_rate": 1.3015405235899505e-06, "loss": 14.9844, "step": 12661 }, { "epoch": 0.8409377698080627, "grad_norm": 152.61366271972656, "learning_rate": 1.3014379760715218e-06, "loss": 15.6875, "step": 12662 }, { "epoch": 0.8410041841004184, "grad_norm": 421.6048583984375, "learning_rate": 1.3013354250662323e-06, "loss": 14.8438, "step": 12663 }, { "epoch": 0.8410705983927741, "grad_norm": 189.5106658935547, "learning_rate": 1.3012328705752685e-06, "loss": 13.6562, "step": 12664 }, { "epoch": 0.8411370126851299, "grad_norm": 161.4349822998047, "learning_rate": 1.3011303125998162e-06, "loss": 18.3125, "step": 12665 }, { "epoch": 0.8412034269774855, "grad_norm": 193.13011169433594, "learning_rate": 1.3010277511410624e-06, "loss": 19.625, "step": 12666 }, { "epoch": 0.8412698412698413, "grad_norm": 318.7922058105469, "learning_rate": 1.3009251862001928e-06, "loss": 21.5312, "step": 12667 }, { "epoch": 0.8413362555621969, "grad_norm": 346.8516540527344, "learning_rate": 1.3008226177783946e-06, "loss": 18.0, "step": 12668 }, { "epoch": 0.8414026698545527, "grad_norm": 312.3756103515625, "learning_rate": 1.3007200458768535e-06, "loss": 13.9844, "step": 12669 }, { "epoch": 0.8414690841469085, "grad_norm": 252.86007690429688, "learning_rate": 1.3006174704967566e-06, "loss": 15.25, "step": 12670 }, { "epoch": 0.8415354984392641, "grad_norm": 361.0843505859375, "learning_rate": 1.3005148916392899e-06, "loss": 24.3125, "step": 12671 }, { "epoch": 0.8416019127316199, "grad_norm": 116.79106140136719, "learning_rate": 1.3004123093056403e-06, "loss": 14.2656, "step": 12672 }, { "epoch": 0.8416683270239755, "grad_norm": 193.02027893066406, "learning_rate": 1.3003097234969941e-06, "loss": 13.7031, "step": 12673 }, { "epoch": 0.8417347413163313, "grad_norm": 210.7046356201172, "learning_rate": 1.3002071342145387e-06, "loss": 18.4375, "step": 12674 }, { "epoch": 0.841801155608687, "grad_norm": 139.993408203125, "learning_rate": 1.30010454145946e-06, "loss": 14.5312, "step": 12675 }, { "epoch": 0.8418675699010427, "grad_norm": 469.10626220703125, "learning_rate": 1.3000019452329449e-06, "loss": 21.7031, "step": 12676 }, { "epoch": 0.8419339841933984, "grad_norm": 224.87210083007812, "learning_rate": 1.2998993455361804e-06, "loss": 20.4219, "step": 12677 }, { "epoch": 0.8420003984857541, "grad_norm": 276.5128173828125, "learning_rate": 1.2997967423703534e-06, "loss": 19.5156, "step": 12678 }, { "epoch": 0.8420668127781098, "grad_norm": 149.0807647705078, "learning_rate": 1.2996941357366502e-06, "loss": 16.5938, "step": 12679 }, { "epoch": 0.8421332270704656, "grad_norm": 311.6507263183594, "learning_rate": 1.2995915256362582e-06, "loss": 13.6406, "step": 12680 }, { "epoch": 0.8421996413628213, "grad_norm": 282.2438659667969, "learning_rate": 1.2994889120703643e-06, "loss": 20.6875, "step": 12681 }, { "epoch": 0.842266055655177, "grad_norm": 298.082763671875, "learning_rate": 1.299386295040155e-06, "loss": 18.3594, "step": 12682 }, { "epoch": 0.8423324699475327, "grad_norm": 214.11672973632812, "learning_rate": 1.299283674546818e-06, "loss": 11.9688, "step": 12683 }, { "epoch": 0.8423988842398884, "grad_norm": 298.51446533203125, "learning_rate": 1.29918105059154e-06, "loss": 18.8281, "step": 12684 }, { "epoch": 0.8424652985322442, "grad_norm": 273.9247741699219, "learning_rate": 1.2990784231755084e-06, "loss": 17.4531, "step": 12685 }, { "epoch": 0.8425317128245998, "grad_norm": 127.26083374023438, "learning_rate": 1.2989757922999096e-06, "loss": 16.6406, "step": 12686 }, { "epoch": 0.8425981271169556, "grad_norm": 251.35813903808594, "learning_rate": 1.2988731579659313e-06, "loss": 16.1094, "step": 12687 }, { "epoch": 0.8426645414093112, "grad_norm": 545.8447875976562, "learning_rate": 1.2987705201747606e-06, "loss": 20.7031, "step": 12688 }, { "epoch": 0.842730955701667, "grad_norm": 153.33375549316406, "learning_rate": 1.298667878927585e-06, "loss": 20.125, "step": 12689 }, { "epoch": 0.8427973699940227, "grad_norm": 193.25340270996094, "learning_rate": 1.2985652342255918e-06, "loss": 24.2812, "step": 12690 }, { "epoch": 0.8428637842863784, "grad_norm": 157.36669921875, "learning_rate": 1.2984625860699676e-06, "loss": 19.5625, "step": 12691 }, { "epoch": 0.8429301985787342, "grad_norm": 292.431396484375, "learning_rate": 1.2983599344619007e-06, "loss": 22.3281, "step": 12692 }, { "epoch": 0.8429966128710898, "grad_norm": 571.2057495117188, "learning_rate": 1.2982572794025779e-06, "loss": 17.0, "step": 12693 }, { "epoch": 0.8430630271634456, "grad_norm": 139.96151733398438, "learning_rate": 1.298154620893187e-06, "loss": 11.5625, "step": 12694 }, { "epoch": 0.8431294414558013, "grad_norm": 205.38357543945312, "learning_rate": 1.2980519589349153e-06, "loss": 14.5625, "step": 12695 }, { "epoch": 0.843195855748157, "grad_norm": 309.39801025390625, "learning_rate": 1.2979492935289503e-06, "loss": 14.5781, "step": 12696 }, { "epoch": 0.8432622700405127, "grad_norm": 137.6136474609375, "learning_rate": 1.29784662467648e-06, "loss": 14.6406, "step": 12697 }, { "epoch": 0.8433286843328685, "grad_norm": 203.2205810546875, "learning_rate": 1.2977439523786916e-06, "loss": 20.625, "step": 12698 }, { "epoch": 0.8433950986252241, "grad_norm": 147.43299865722656, "learning_rate": 1.2976412766367726e-06, "loss": 13.9531, "step": 12699 }, { "epoch": 0.8434615129175799, "grad_norm": 291.5956115722656, "learning_rate": 1.2975385974519111e-06, "loss": 14.5469, "step": 12700 }, { "epoch": 0.8435279272099355, "grad_norm": 182.71524047851562, "learning_rate": 1.2974359148252946e-06, "loss": 19.625, "step": 12701 }, { "epoch": 0.8435943415022913, "grad_norm": 186.4183349609375, "learning_rate": 1.297333228758111e-06, "loss": 20.0625, "step": 12702 }, { "epoch": 0.843660755794647, "grad_norm": 223.7973175048828, "learning_rate": 1.2972305392515484e-06, "loss": 15.7969, "step": 12703 }, { "epoch": 0.8437271700870027, "grad_norm": 443.37921142578125, "learning_rate": 1.297127846306794e-06, "loss": 16.2188, "step": 12704 }, { "epoch": 0.8437935843793585, "grad_norm": 1772.1812744140625, "learning_rate": 1.2970251499250357e-06, "loss": 21.0312, "step": 12705 }, { "epoch": 0.8438599986717141, "grad_norm": 374.7422180175781, "learning_rate": 1.2969224501074622e-06, "loss": 18.9219, "step": 12706 }, { "epoch": 0.8439264129640699, "grad_norm": 210.8341064453125, "learning_rate": 1.2968197468552609e-06, "loss": 16.3594, "step": 12707 }, { "epoch": 0.8439928272564255, "grad_norm": 211.0576629638672, "learning_rate": 1.2967170401696198e-06, "loss": 24.7812, "step": 12708 }, { "epoch": 0.8440592415487813, "grad_norm": 234.33438110351562, "learning_rate": 1.2966143300517273e-06, "loss": 20.5938, "step": 12709 }, { "epoch": 0.844125655841137, "grad_norm": 160.11083984375, "learning_rate": 1.2965116165027712e-06, "loss": 11.5781, "step": 12710 }, { "epoch": 0.8441920701334927, "grad_norm": 339.0686950683594, "learning_rate": 1.2964088995239398e-06, "loss": 20.0, "step": 12711 }, { "epoch": 0.8442584844258484, "grad_norm": 266.9835510253906, "learning_rate": 1.2963061791164207e-06, "loss": 19.6406, "step": 12712 }, { "epoch": 0.8443248987182042, "grad_norm": 239.46060180664062, "learning_rate": 1.2962034552814029e-06, "loss": 14.0, "step": 12713 }, { "epoch": 0.8443913130105599, "grad_norm": 116.36650848388672, "learning_rate": 1.2961007280200743e-06, "loss": 15.2344, "step": 12714 }, { "epoch": 0.8444577273029156, "grad_norm": 309.0690612792969, "learning_rate": 1.2959979973336234e-06, "loss": 14.4688, "step": 12715 }, { "epoch": 0.8445241415952713, "grad_norm": 141.05409240722656, "learning_rate": 1.295895263223238e-06, "loss": 15.3906, "step": 12716 }, { "epoch": 0.844590555887627, "grad_norm": 252.674072265625, "learning_rate": 1.2957925256901072e-06, "loss": 17.3281, "step": 12717 }, { "epoch": 0.8446569701799828, "grad_norm": 357.86810302734375, "learning_rate": 1.295689784735419e-06, "loss": 19.6094, "step": 12718 }, { "epoch": 0.8447233844723384, "grad_norm": 136.72357177734375, "learning_rate": 1.2955870403603614e-06, "loss": 13.6406, "step": 12719 }, { "epoch": 0.8447897987646942, "grad_norm": 130.31324768066406, "learning_rate": 1.2954842925661237e-06, "loss": 15.375, "step": 12720 }, { "epoch": 0.8448562130570498, "grad_norm": 253.7264862060547, "learning_rate": 1.2953815413538942e-06, "loss": 17.5469, "step": 12721 }, { "epoch": 0.8449226273494056, "grad_norm": 210.7888946533203, "learning_rate": 1.2952787867248612e-06, "loss": 16.4219, "step": 12722 }, { "epoch": 0.8449890416417613, "grad_norm": 178.02178955078125, "learning_rate": 1.2951760286802134e-06, "loss": 15.2344, "step": 12723 }, { "epoch": 0.845055455934117, "grad_norm": 187.67083740234375, "learning_rate": 1.2950732672211398e-06, "loss": 12.625, "step": 12724 }, { "epoch": 0.8451218702264728, "grad_norm": 253.55355834960938, "learning_rate": 1.2949705023488285e-06, "loss": 17.3594, "step": 12725 }, { "epoch": 0.8451882845188284, "grad_norm": 249.67739868164062, "learning_rate": 1.2948677340644685e-06, "loss": 19.4531, "step": 12726 }, { "epoch": 0.8452546988111842, "grad_norm": 166.35308837890625, "learning_rate": 1.294764962369249e-06, "loss": 16.0, "step": 12727 }, { "epoch": 0.8453211131035399, "grad_norm": 172.40493774414062, "learning_rate": 1.294662187264358e-06, "loss": 20.9375, "step": 12728 }, { "epoch": 0.8453875273958956, "grad_norm": 268.2577819824219, "learning_rate": 1.2945594087509845e-06, "loss": 28.5312, "step": 12729 }, { "epoch": 0.8454539416882513, "grad_norm": 153.65512084960938, "learning_rate": 1.294456626830318e-06, "loss": 14.7656, "step": 12730 }, { "epoch": 0.845520355980607, "grad_norm": 394.4131164550781, "learning_rate": 1.2943538415035471e-06, "loss": 22.9688, "step": 12731 }, { "epoch": 0.8455867702729627, "grad_norm": 166.8795166015625, "learning_rate": 1.2942510527718602e-06, "loss": 15.2344, "step": 12732 }, { "epoch": 0.8456531845653185, "grad_norm": 113.3866958618164, "learning_rate": 1.2941482606364472e-06, "loss": 16.2969, "step": 12733 }, { "epoch": 0.8457195988576742, "grad_norm": 175.37940979003906, "learning_rate": 1.2940454650984967e-06, "loss": 13.4375, "step": 12734 }, { "epoch": 0.8457860131500299, "grad_norm": 164.34521484375, "learning_rate": 1.293942666159198e-06, "loss": 18.1875, "step": 12735 }, { "epoch": 0.8458524274423856, "grad_norm": 463.7251281738281, "learning_rate": 1.2938398638197397e-06, "loss": 14.6406, "step": 12736 }, { "epoch": 0.8459188417347413, "grad_norm": 279.45556640625, "learning_rate": 1.293737058081311e-06, "loss": 17.7031, "step": 12737 }, { "epoch": 0.8459852560270971, "grad_norm": 214.07974243164062, "learning_rate": 1.2936342489451021e-06, "loss": 17.3438, "step": 12738 }, { "epoch": 0.8460516703194527, "grad_norm": 171.9545440673828, "learning_rate": 1.2935314364123014e-06, "loss": 13.5938, "step": 12739 }, { "epoch": 0.8461180846118085, "grad_norm": 236.91017150878906, "learning_rate": 1.2934286204840978e-06, "loss": 12.6719, "step": 12740 }, { "epoch": 0.8461844989041641, "grad_norm": 392.14349365234375, "learning_rate": 1.2933258011616817e-06, "loss": 16.9219, "step": 12741 }, { "epoch": 0.8462509131965199, "grad_norm": 191.14064025878906, "learning_rate": 1.2932229784462416e-06, "loss": 18.1406, "step": 12742 }, { "epoch": 0.8463173274888756, "grad_norm": 227.84439086914062, "learning_rate": 1.2931201523389671e-06, "loss": 18.1562, "step": 12743 }, { "epoch": 0.8463837417812313, "grad_norm": 237.47015380859375, "learning_rate": 1.293017322841048e-06, "loss": 11.0, "step": 12744 }, { "epoch": 0.8464501560735871, "grad_norm": 263.90106201171875, "learning_rate": 1.2929144899536732e-06, "loss": 21.0781, "step": 12745 }, { "epoch": 0.8465165703659427, "grad_norm": 169.09854125976562, "learning_rate": 1.2928116536780325e-06, "loss": 17.1719, "step": 12746 }, { "epoch": 0.8465829846582985, "grad_norm": 182.99195861816406, "learning_rate": 1.2927088140153155e-06, "loss": 22.6719, "step": 12747 }, { "epoch": 0.8466493989506542, "grad_norm": 183.33363342285156, "learning_rate": 1.2926059709667117e-06, "loss": 15.7656, "step": 12748 }, { "epoch": 0.8467158132430099, "grad_norm": 208.93995666503906, "learning_rate": 1.292503124533411e-06, "loss": 19.9531, "step": 12749 }, { "epoch": 0.8467822275353656, "grad_norm": 313.8275146484375, "learning_rate": 1.2924002747166024e-06, "loss": 17.5, "step": 12750 }, { "epoch": 0.8468486418277213, "grad_norm": 221.9882354736328, "learning_rate": 1.2922974215174763e-06, "loss": 15.8125, "step": 12751 }, { "epoch": 0.846915056120077, "grad_norm": 245.2118682861328, "learning_rate": 1.2921945649372221e-06, "loss": 22.7812, "step": 12752 }, { "epoch": 0.8469814704124328, "grad_norm": 122.64601135253906, "learning_rate": 1.2920917049770297e-06, "loss": 12.9531, "step": 12753 }, { "epoch": 0.8470478847047884, "grad_norm": 271.097900390625, "learning_rate": 1.291988841638089e-06, "loss": 24.2344, "step": 12754 }, { "epoch": 0.8471142989971442, "grad_norm": 197.0985870361328, "learning_rate": 1.2918859749215894e-06, "loss": 14.9219, "step": 12755 }, { "epoch": 0.8471807132895, "grad_norm": 169.0715789794922, "learning_rate": 1.2917831048287213e-06, "loss": 11.8438, "step": 12756 }, { "epoch": 0.8472471275818556, "grad_norm": 321.69635009765625, "learning_rate": 1.2916802313606745e-06, "loss": 17.0, "step": 12757 }, { "epoch": 0.8473135418742114, "grad_norm": 265.4857177734375, "learning_rate": 1.291577354518639e-06, "loss": 22.4688, "step": 12758 }, { "epoch": 0.847379956166567, "grad_norm": 268.9989318847656, "learning_rate": 1.291474474303805e-06, "loss": 18.2188, "step": 12759 }, { "epoch": 0.8474463704589228, "grad_norm": 190.4879150390625, "learning_rate": 1.291371590717362e-06, "loss": 25.2188, "step": 12760 }, { "epoch": 0.8475127847512784, "grad_norm": 201.41590881347656, "learning_rate": 1.2912687037605008e-06, "loss": 19.4531, "step": 12761 }, { "epoch": 0.8475791990436342, "grad_norm": 536.8358154296875, "learning_rate": 1.2911658134344108e-06, "loss": 14.3984, "step": 12762 }, { "epoch": 0.8476456133359899, "grad_norm": 118.37727355957031, "learning_rate": 1.2910629197402832e-06, "loss": 15.3281, "step": 12763 }, { "epoch": 0.8477120276283456, "grad_norm": 149.73959350585938, "learning_rate": 1.290960022679307e-06, "loss": 15.5781, "step": 12764 }, { "epoch": 0.8477784419207013, "grad_norm": 231.7434539794922, "learning_rate": 1.2908571222526732e-06, "loss": 21.2812, "step": 12765 }, { "epoch": 0.847844856213057, "grad_norm": 266.07379150390625, "learning_rate": 1.290754218461572e-06, "loss": 21.5, "step": 12766 }, { "epoch": 0.8479112705054128, "grad_norm": 182.2005615234375, "learning_rate": 1.2906513113071935e-06, "loss": 16.2031, "step": 12767 }, { "epoch": 0.8479776847977685, "grad_norm": 293.9453125, "learning_rate": 1.2905484007907283e-06, "loss": 15.8281, "step": 12768 }, { "epoch": 0.8480440990901242, "grad_norm": 144.5561065673828, "learning_rate": 1.2904454869133664e-06, "loss": 15.7969, "step": 12769 }, { "epoch": 0.8481105133824799, "grad_norm": 211.230224609375, "learning_rate": 1.2903425696762994e-06, "loss": 13.2812, "step": 12770 }, { "epoch": 0.8481769276748357, "grad_norm": 242.3510284423828, "learning_rate": 1.2902396490807164e-06, "loss": 12.4219, "step": 12771 }, { "epoch": 0.8482433419671913, "grad_norm": 204.1693115234375, "learning_rate": 1.2901367251278084e-06, "loss": 13.7969, "step": 12772 }, { "epoch": 0.8483097562595471, "grad_norm": 424.77191162109375, "learning_rate": 1.2900337978187662e-06, "loss": 13.2344, "step": 12773 }, { "epoch": 0.8483761705519027, "grad_norm": 348.5525207519531, "learning_rate": 1.2899308671547806e-06, "loss": 14.8594, "step": 12774 }, { "epoch": 0.8484425848442585, "grad_norm": 230.9778594970703, "learning_rate": 1.2898279331370416e-06, "loss": 17.6406, "step": 12775 }, { "epoch": 0.8485089991366142, "grad_norm": 1174.6685791015625, "learning_rate": 1.2897249957667402e-06, "loss": 22.2656, "step": 12776 }, { "epoch": 0.8485754134289699, "grad_norm": 409.49090576171875, "learning_rate": 1.2896220550450674e-06, "loss": 21.75, "step": 12777 }, { "epoch": 0.8486418277213257, "grad_norm": 329.2752685546875, "learning_rate": 1.2895191109732133e-06, "loss": 14.125, "step": 12778 }, { "epoch": 0.8487082420136813, "grad_norm": 368.7546691894531, "learning_rate": 1.289416163552369e-06, "loss": 15.0781, "step": 12779 }, { "epoch": 0.8487746563060371, "grad_norm": 323.7210388183594, "learning_rate": 1.2893132127837257e-06, "loss": 20.7969, "step": 12780 }, { "epoch": 0.8488410705983928, "grad_norm": 155.73216247558594, "learning_rate": 1.2892102586684742e-06, "loss": 20.5938, "step": 12781 }, { "epoch": 0.8489074848907485, "grad_norm": 246.09913635253906, "learning_rate": 1.289107301207805e-06, "loss": 13.5781, "step": 12782 }, { "epoch": 0.8489738991831042, "grad_norm": 320.2209167480469, "learning_rate": 1.2890043404029087e-06, "loss": 15.2344, "step": 12783 }, { "epoch": 0.8490403134754599, "grad_norm": 153.6020050048828, "learning_rate": 1.2889013762549776e-06, "loss": 17.2656, "step": 12784 }, { "epoch": 0.8491067277678156, "grad_norm": 224.753662109375, "learning_rate": 1.2887984087652015e-06, "loss": 23.9688, "step": 12785 }, { "epoch": 0.8491731420601714, "grad_norm": 235.60418701171875, "learning_rate": 1.2886954379347721e-06, "loss": 18.5, "step": 12786 }, { "epoch": 0.849239556352527, "grad_norm": 533.8629150390625, "learning_rate": 1.2885924637648804e-06, "loss": 19.4531, "step": 12787 }, { "epoch": 0.8493059706448828, "grad_norm": 155.0106964111328, "learning_rate": 1.2884894862567175e-06, "loss": 16.3594, "step": 12788 }, { "epoch": 0.8493723849372385, "grad_norm": 162.62631225585938, "learning_rate": 1.2883865054114745e-06, "loss": 13.0938, "step": 12789 }, { "epoch": 0.8494387992295942, "grad_norm": 176.57089233398438, "learning_rate": 1.2882835212303426e-06, "loss": 14.8047, "step": 12790 }, { "epoch": 0.84950521352195, "grad_norm": 259.44903564453125, "learning_rate": 1.2881805337145136e-06, "loss": 16.0, "step": 12791 }, { "epoch": 0.8495716278143056, "grad_norm": 175.44215393066406, "learning_rate": 1.2880775428651777e-06, "loss": 16.0156, "step": 12792 }, { "epoch": 0.8496380421066614, "grad_norm": 197.59616088867188, "learning_rate": 1.2879745486835274e-06, "loss": 15.1406, "step": 12793 }, { "epoch": 0.849704456399017, "grad_norm": 273.7281494140625, "learning_rate": 1.2878715511707534e-06, "loss": 16.2188, "step": 12794 }, { "epoch": 0.8497708706913728, "grad_norm": 330.03076171875, "learning_rate": 1.2877685503280474e-06, "loss": 20.375, "step": 12795 }, { "epoch": 0.8498372849837285, "grad_norm": 158.494873046875, "learning_rate": 1.2876655461566004e-06, "loss": 16.75, "step": 12796 }, { "epoch": 0.8499036992760842, "grad_norm": 126.50872802734375, "learning_rate": 1.2875625386576046e-06, "loss": 16.125, "step": 12797 }, { "epoch": 0.8499701135684399, "grad_norm": 183.708740234375, "learning_rate": 1.287459527832251e-06, "loss": 15.0312, "step": 12798 }, { "epoch": 0.8500365278607956, "grad_norm": 317.7119445800781, "learning_rate": 1.2873565136817316e-06, "loss": 15.3281, "step": 12799 }, { "epoch": 0.8501029421531514, "grad_norm": 761.3805541992188, "learning_rate": 1.2872534962072374e-06, "loss": 15.5312, "step": 12800 }, { "epoch": 0.8501693564455071, "grad_norm": 280.7548828125, "learning_rate": 1.2871504754099605e-06, "loss": 18.0469, "step": 12801 }, { "epoch": 0.8502357707378628, "grad_norm": 224.65328979492188, "learning_rate": 1.2870474512910928e-06, "loss": 21.0469, "step": 12802 }, { "epoch": 0.8503021850302185, "grad_norm": 817.0177612304688, "learning_rate": 1.2869444238518253e-06, "loss": 13.375, "step": 12803 }, { "epoch": 0.8503685993225742, "grad_norm": 365.3186950683594, "learning_rate": 1.2868413930933503e-06, "loss": 25.8438, "step": 12804 }, { "epoch": 0.8504350136149299, "grad_norm": 189.40167236328125, "learning_rate": 1.2867383590168592e-06, "loss": 18.2188, "step": 12805 }, { "epoch": 0.8505014279072857, "grad_norm": 140.63412475585938, "learning_rate": 1.2866353216235444e-06, "loss": 20.8906, "step": 12806 }, { "epoch": 0.8505678421996413, "grad_norm": 546.657470703125, "learning_rate": 1.2865322809145975e-06, "loss": 24.25, "step": 12807 }, { "epoch": 0.8506342564919971, "grad_norm": 290.8182373046875, "learning_rate": 1.2864292368912102e-06, "loss": 19.0938, "step": 12808 }, { "epoch": 0.8507006707843527, "grad_norm": 206.6729278564453, "learning_rate": 1.286326189554575e-06, "loss": 14.9688, "step": 12809 }, { "epoch": 0.8507670850767085, "grad_norm": 303.2252197265625, "learning_rate": 1.2862231389058832e-06, "loss": 20.8594, "step": 12810 }, { "epoch": 0.8508334993690643, "grad_norm": 186.6210174560547, "learning_rate": 1.286120084946327e-06, "loss": 14.5469, "step": 12811 }, { "epoch": 0.8508999136614199, "grad_norm": 367.00811767578125, "learning_rate": 1.2860170276770994e-06, "loss": 19.3125, "step": 12812 }, { "epoch": 0.8509663279537757, "grad_norm": 304.3200378417969, "learning_rate": 1.2859139670993913e-06, "loss": 19.6875, "step": 12813 }, { "epoch": 0.8510327422461313, "grad_norm": 271.8568115234375, "learning_rate": 1.2858109032143954e-06, "loss": 20.4062, "step": 12814 }, { "epoch": 0.8510991565384871, "grad_norm": 163.43882751464844, "learning_rate": 1.2857078360233034e-06, "loss": 13.6875, "step": 12815 }, { "epoch": 0.8511655708308428, "grad_norm": 122.8276138305664, "learning_rate": 1.2856047655273083e-06, "loss": 21.0, "step": 12816 }, { "epoch": 0.8512319851231985, "grad_norm": 362.03912353515625, "learning_rate": 1.2855016917276018e-06, "loss": 15.3203, "step": 12817 }, { "epoch": 0.8512983994155542, "grad_norm": 241.6290283203125, "learning_rate": 1.2853986146253766e-06, "loss": 19.3906, "step": 12818 }, { "epoch": 0.85136481370791, "grad_norm": 225.27810668945312, "learning_rate": 1.2852955342218243e-06, "loss": 14.2031, "step": 12819 }, { "epoch": 0.8514312280002656, "grad_norm": 317.6091613769531, "learning_rate": 1.2851924505181384e-06, "loss": 17.5156, "step": 12820 }, { "epoch": 0.8514976422926214, "grad_norm": 120.51985168457031, "learning_rate": 1.2850893635155102e-06, "loss": 16.2969, "step": 12821 }, { "epoch": 0.8515640565849771, "grad_norm": 584.0440673828125, "learning_rate": 1.2849862732151329e-06, "loss": 15.4844, "step": 12822 }, { "epoch": 0.8516304708773328, "grad_norm": 199.26138305664062, "learning_rate": 1.2848831796181988e-06, "loss": 14.2969, "step": 12823 }, { "epoch": 0.8516968851696886, "grad_norm": 202.6002197265625, "learning_rate": 1.2847800827259e-06, "loss": 14.0312, "step": 12824 }, { "epoch": 0.8517632994620442, "grad_norm": 245.976806640625, "learning_rate": 1.2846769825394294e-06, "loss": 21.8594, "step": 12825 }, { "epoch": 0.8518297137544, "grad_norm": 243.7339324951172, "learning_rate": 1.2845738790599798e-06, "loss": 15.8281, "step": 12826 }, { "epoch": 0.8518961280467556, "grad_norm": 356.9544372558594, "learning_rate": 1.284470772288744e-06, "loss": 15.1719, "step": 12827 }, { "epoch": 0.8519625423391114, "grad_norm": 1780.545654296875, "learning_rate": 1.284367662226914e-06, "loss": 13.8125, "step": 12828 }, { "epoch": 0.852028956631467, "grad_norm": 177.9933319091797, "learning_rate": 1.2842645488756826e-06, "loss": 15.0469, "step": 12829 }, { "epoch": 0.8520953709238228, "grad_norm": 117.82281494140625, "learning_rate": 1.284161432236243e-06, "loss": 17.0, "step": 12830 }, { "epoch": 0.8521617852161785, "grad_norm": 247.1942138671875, "learning_rate": 1.2840583123097882e-06, "loss": 14.875, "step": 12831 }, { "epoch": 0.8522281995085342, "grad_norm": 187.2289276123047, "learning_rate": 1.2839551890975103e-06, "loss": 17.1406, "step": 12832 }, { "epoch": 0.85229461380089, "grad_norm": 549.4033813476562, "learning_rate": 1.2838520626006023e-06, "loss": 21.5156, "step": 12833 }, { "epoch": 0.8523610280932457, "grad_norm": 324.4795837402344, "learning_rate": 1.283748932820258e-06, "loss": 37.7656, "step": 12834 }, { "epoch": 0.8524274423856014, "grad_norm": 186.0381317138672, "learning_rate": 1.2836457997576692e-06, "loss": 16.9375, "step": 12835 }, { "epoch": 0.8524938566779571, "grad_norm": 533.7081298828125, "learning_rate": 1.2835426634140292e-06, "loss": 20.1562, "step": 12836 }, { "epoch": 0.8525602709703128, "grad_norm": 147.33689880371094, "learning_rate": 1.2834395237905314e-06, "loss": 20.4375, "step": 12837 }, { "epoch": 0.8526266852626685, "grad_norm": 175.83370971679688, "learning_rate": 1.2833363808883687e-06, "loss": 16.9062, "step": 12838 }, { "epoch": 0.8526930995550243, "grad_norm": 208.52871704101562, "learning_rate": 1.2832332347087338e-06, "loss": 13.4922, "step": 12839 }, { "epoch": 0.8527595138473799, "grad_norm": 233.53469848632812, "learning_rate": 1.2831300852528205e-06, "loss": 16.0938, "step": 12840 }, { "epoch": 0.8528259281397357, "grad_norm": 456.0227355957031, "learning_rate": 1.2830269325218213e-06, "loss": 14.5938, "step": 12841 }, { "epoch": 0.8528923424320913, "grad_norm": 193.72377014160156, "learning_rate": 1.28292377651693e-06, "loss": 14.8125, "step": 12842 }, { "epoch": 0.8529587567244471, "grad_norm": 336.7586364746094, "learning_rate": 1.2828206172393395e-06, "loss": 15.3594, "step": 12843 }, { "epoch": 0.8530251710168029, "grad_norm": 152.01197814941406, "learning_rate": 1.2827174546902434e-06, "loss": 14.0391, "step": 12844 }, { "epoch": 0.8530915853091585, "grad_norm": 139.98341369628906, "learning_rate": 1.2826142888708345e-06, "loss": 17.5938, "step": 12845 }, { "epoch": 0.8531579996015143, "grad_norm": 157.54522705078125, "learning_rate": 1.2825111197823067e-06, "loss": 14.9844, "step": 12846 }, { "epoch": 0.8532244138938699, "grad_norm": 687.2432861328125, "learning_rate": 1.282407947425853e-06, "loss": 22.9688, "step": 12847 }, { "epoch": 0.8532908281862257, "grad_norm": 305.2090759277344, "learning_rate": 1.282304771802667e-06, "loss": 16.0938, "step": 12848 }, { "epoch": 0.8533572424785814, "grad_norm": 555.3751831054688, "learning_rate": 1.2822015929139422e-06, "loss": 30.0625, "step": 12849 }, { "epoch": 0.8534236567709371, "grad_norm": 176.7960968017578, "learning_rate": 1.2820984107608723e-06, "loss": 18.5469, "step": 12850 }, { "epoch": 0.8534900710632928, "grad_norm": 109.8116683959961, "learning_rate": 1.2819952253446504e-06, "loss": 13.8125, "step": 12851 }, { "epoch": 0.8535564853556485, "grad_norm": 417.1734924316406, "learning_rate": 1.2818920366664708e-06, "loss": 15.9688, "step": 12852 }, { "epoch": 0.8536228996480042, "grad_norm": 174.0549774169922, "learning_rate": 1.2817888447275263e-06, "loss": 22.0, "step": 12853 }, { "epoch": 0.85368931394036, "grad_norm": 159.59117126464844, "learning_rate": 1.2816856495290107e-06, "loss": 15.5469, "step": 12854 }, { "epoch": 0.8537557282327157, "grad_norm": 262.4289855957031, "learning_rate": 1.2815824510721184e-06, "loss": 18.0469, "step": 12855 }, { "epoch": 0.8538221425250714, "grad_norm": 455.01434326171875, "learning_rate": 1.2814792493580425e-06, "loss": 21.0625, "step": 12856 }, { "epoch": 0.8538885568174271, "grad_norm": 256.89556884765625, "learning_rate": 1.281376044387977e-06, "loss": 18.6406, "step": 12857 }, { "epoch": 0.8539549711097828, "grad_norm": 130.23211669921875, "learning_rate": 1.2812728361631153e-06, "loss": 10.25, "step": 12858 }, { "epoch": 0.8540213854021386, "grad_norm": 137.74143981933594, "learning_rate": 1.2811696246846521e-06, "loss": 12.4062, "step": 12859 }, { "epoch": 0.8540877996944942, "grad_norm": 179.280029296875, "learning_rate": 1.2810664099537806e-06, "loss": 15.4531, "step": 12860 }, { "epoch": 0.85415421398685, "grad_norm": 186.9595947265625, "learning_rate": 1.280963191971695e-06, "loss": 17.9219, "step": 12861 }, { "epoch": 0.8542206282792056, "grad_norm": 177.62646484375, "learning_rate": 1.280859970739589e-06, "loss": 16.375, "step": 12862 }, { "epoch": 0.8542870425715614, "grad_norm": 115.08154296875, "learning_rate": 1.2807567462586573e-06, "loss": 15.7188, "step": 12863 }, { "epoch": 0.8543534568639171, "grad_norm": 168.28712463378906, "learning_rate": 1.2806535185300931e-06, "loss": 17.7031, "step": 12864 }, { "epoch": 0.8544198711562728, "grad_norm": 357.24871826171875, "learning_rate": 1.2805502875550906e-06, "loss": 18.7031, "step": 12865 }, { "epoch": 0.8544862854486286, "grad_norm": 131.1737823486328, "learning_rate": 1.2804470533348448e-06, "loss": 14.125, "step": 12866 }, { "epoch": 0.8545526997409842, "grad_norm": 239.74623107910156, "learning_rate": 1.2803438158705486e-06, "loss": 18.4062, "step": 12867 }, { "epoch": 0.85461911403334, "grad_norm": 195.5177764892578, "learning_rate": 1.280240575163397e-06, "loss": 19.6562, "step": 12868 }, { "epoch": 0.8546855283256957, "grad_norm": 190.3602294921875, "learning_rate": 1.280137331214584e-06, "loss": 12.6875, "step": 12869 }, { "epoch": 0.8547519426180514, "grad_norm": 236.36807250976562, "learning_rate": 1.280034084025304e-06, "loss": 15.875, "step": 12870 }, { "epoch": 0.8548183569104071, "grad_norm": 266.5992126464844, "learning_rate": 1.2799308335967514e-06, "loss": 14.8594, "step": 12871 }, { "epoch": 0.8548847712027628, "grad_norm": 124.1694564819336, "learning_rate": 1.2798275799301199e-06, "loss": 14.9062, "step": 12872 }, { "epoch": 0.8549511854951185, "grad_norm": 260.3453369140625, "learning_rate": 1.2797243230266043e-06, "loss": 19.4062, "step": 12873 }, { "epoch": 0.8550175997874743, "grad_norm": 219.52389526367188, "learning_rate": 1.2796210628873995e-06, "loss": 12.9844, "step": 12874 }, { "epoch": 0.85508401407983, "grad_norm": 166.30140686035156, "learning_rate": 1.2795177995136993e-06, "loss": 13.5781, "step": 12875 }, { "epoch": 0.8551504283721857, "grad_norm": 150.89024353027344, "learning_rate": 1.2794145329066981e-06, "loss": 16.0156, "step": 12876 }, { "epoch": 0.8552168426645415, "grad_norm": 178.97113037109375, "learning_rate": 1.2793112630675912e-06, "loss": 18.6875, "step": 12877 }, { "epoch": 0.8552832569568971, "grad_norm": 125.07555389404297, "learning_rate": 1.2792079899975721e-06, "loss": 16.3125, "step": 12878 }, { "epoch": 0.8553496712492529, "grad_norm": 240.11705017089844, "learning_rate": 1.279104713697836e-06, "loss": 17.9062, "step": 12879 }, { "epoch": 0.8554160855416085, "grad_norm": 498.437744140625, "learning_rate": 1.2790014341695778e-06, "loss": 22.4141, "step": 12880 }, { "epoch": 0.8554824998339643, "grad_norm": 278.2514953613281, "learning_rate": 1.278898151413992e-06, "loss": 17.5469, "step": 12881 }, { "epoch": 0.85554891412632, "grad_norm": 156.1721649169922, "learning_rate": 1.2787948654322727e-06, "loss": 14.4844, "step": 12882 }, { "epoch": 0.8556153284186757, "grad_norm": 361.2042541503906, "learning_rate": 1.2786915762256153e-06, "loss": 15.0547, "step": 12883 }, { "epoch": 0.8556817427110314, "grad_norm": 276.55560302734375, "learning_rate": 1.278588283795215e-06, "loss": 18.2969, "step": 12884 }, { "epoch": 0.8557481570033871, "grad_norm": 133.67433166503906, "learning_rate": 1.2784849881422656e-06, "loss": 17.0938, "step": 12885 }, { "epoch": 0.8558145712957429, "grad_norm": 252.7718963623047, "learning_rate": 1.2783816892679626e-06, "loss": 18.5312, "step": 12886 }, { "epoch": 0.8558809855880986, "grad_norm": 274.0630798339844, "learning_rate": 1.2782783871735007e-06, "loss": 20.0312, "step": 12887 }, { "epoch": 0.8559473998804543, "grad_norm": 179.2742462158203, "learning_rate": 1.278175081860075e-06, "loss": 16.875, "step": 12888 }, { "epoch": 0.85601381417281, "grad_norm": 102.04122161865234, "learning_rate": 1.2780717733288801e-06, "loss": 15.7578, "step": 12889 }, { "epoch": 0.8560802284651657, "grad_norm": 227.55825805664062, "learning_rate": 1.2779684615811113e-06, "loss": 19.5625, "step": 12890 }, { "epoch": 0.8561466427575214, "grad_norm": 235.6660919189453, "learning_rate": 1.2778651466179638e-06, "loss": 14.3281, "step": 12891 }, { "epoch": 0.8562130570498772, "grad_norm": 509.9578552246094, "learning_rate": 1.2777618284406324e-06, "loss": 15.3281, "step": 12892 }, { "epoch": 0.8562794713422328, "grad_norm": 206.30667114257812, "learning_rate": 1.2776585070503123e-06, "loss": 14.6406, "step": 12893 }, { "epoch": 0.8563458856345886, "grad_norm": 167.7461395263672, "learning_rate": 1.2775551824481987e-06, "loss": 18.6719, "step": 12894 }, { "epoch": 0.8564122999269442, "grad_norm": 259.1767883300781, "learning_rate": 1.2774518546354872e-06, "loss": 19.375, "step": 12895 }, { "epoch": 0.8564787142193, "grad_norm": 115.71503448486328, "learning_rate": 1.2773485236133721e-06, "loss": 15.4688, "step": 12896 }, { "epoch": 0.8565451285116558, "grad_norm": 215.2075958251953, "learning_rate": 1.2772451893830494e-06, "loss": 17.6875, "step": 12897 }, { "epoch": 0.8566115428040114, "grad_norm": 185.15777587890625, "learning_rate": 1.2771418519457145e-06, "loss": 12.8281, "step": 12898 }, { "epoch": 0.8566779570963672, "grad_norm": 107.22631072998047, "learning_rate": 1.2770385113025621e-06, "loss": 15.0469, "step": 12899 }, { "epoch": 0.8567443713887228, "grad_norm": 180.75802612304688, "learning_rate": 1.2769351674547879e-06, "loss": 17.1719, "step": 12900 }, { "epoch": 0.8568107856810786, "grad_norm": 382.1874694824219, "learning_rate": 1.2768318204035875e-06, "loss": 15.2109, "step": 12901 }, { "epoch": 0.8568771999734343, "grad_norm": 528.62841796875, "learning_rate": 1.2767284701501564e-06, "loss": 19.5, "step": 12902 }, { "epoch": 0.85694361426579, "grad_norm": 165.67713928222656, "learning_rate": 1.2766251166956896e-06, "loss": 16.875, "step": 12903 }, { "epoch": 0.8570100285581457, "grad_norm": 231.4069366455078, "learning_rate": 1.2765217600413832e-06, "loss": 13.9688, "step": 12904 }, { "epoch": 0.8570764428505014, "grad_norm": 175.55287170410156, "learning_rate": 1.2764184001884324e-06, "loss": 15.2344, "step": 12905 }, { "epoch": 0.8571428571428571, "grad_norm": 372.43951416015625, "learning_rate": 1.276315037138033e-06, "loss": 20.6719, "step": 12906 }, { "epoch": 0.8572092714352129, "grad_norm": 186.06314086914062, "learning_rate": 1.2762116708913806e-06, "loss": 18.625, "step": 12907 }, { "epoch": 0.8572756857275686, "grad_norm": 254.5530548095703, "learning_rate": 1.2761083014496708e-06, "loss": 13.5625, "step": 12908 }, { "epoch": 0.8573421000199243, "grad_norm": 124.12812042236328, "learning_rate": 1.2760049288140997e-06, "loss": 15.5625, "step": 12909 }, { "epoch": 0.85740851431228, "grad_norm": 155.62962341308594, "learning_rate": 1.2759015529858623e-06, "loss": 14.0781, "step": 12910 }, { "epoch": 0.8574749286046357, "grad_norm": 140.02293395996094, "learning_rate": 1.275798173966155e-06, "loss": 20.625, "step": 12911 }, { "epoch": 0.8575413428969915, "grad_norm": 492.1758117675781, "learning_rate": 1.2756947917561734e-06, "loss": 25.4531, "step": 12912 }, { "epoch": 0.8576077571893471, "grad_norm": 350.7093505859375, "learning_rate": 1.2755914063571136e-06, "loss": 17.7969, "step": 12913 }, { "epoch": 0.8576741714817029, "grad_norm": 262.3162536621094, "learning_rate": 1.2754880177701712e-06, "loss": 23.1094, "step": 12914 }, { "epoch": 0.8577405857740585, "grad_norm": 117.50239562988281, "learning_rate": 1.2753846259965422e-06, "loss": 15.7656, "step": 12915 }, { "epoch": 0.8578070000664143, "grad_norm": 170.16033935546875, "learning_rate": 1.2752812310374226e-06, "loss": 19.1094, "step": 12916 }, { "epoch": 0.85787341435877, "grad_norm": 126.9100570678711, "learning_rate": 1.2751778328940087e-06, "loss": 14.625, "step": 12917 }, { "epoch": 0.8579398286511257, "grad_norm": 137.6066436767578, "learning_rate": 1.275074431567496e-06, "loss": 11.9766, "step": 12918 }, { "epoch": 0.8580062429434815, "grad_norm": 268.3275451660156, "learning_rate": 1.2749710270590813e-06, "loss": 17.9688, "step": 12919 }, { "epoch": 0.8580726572358371, "grad_norm": 622.9468383789062, "learning_rate": 1.2748676193699599e-06, "loss": 21.2656, "step": 12920 }, { "epoch": 0.8581390715281929, "grad_norm": 216.56443786621094, "learning_rate": 1.2747642085013286e-06, "loss": 19.7188, "step": 12921 }, { "epoch": 0.8582054858205486, "grad_norm": 115.27566528320312, "learning_rate": 1.2746607944543833e-06, "loss": 12.1719, "step": 12922 }, { "epoch": 0.8582719001129043, "grad_norm": 141.2745361328125, "learning_rate": 1.2745573772303204e-06, "loss": 16.8281, "step": 12923 }, { "epoch": 0.85833831440526, "grad_norm": 279.552978515625, "learning_rate": 1.2744539568303361e-06, "loss": 17.0625, "step": 12924 }, { "epoch": 0.8584047286976157, "grad_norm": 491.5164489746094, "learning_rate": 1.2743505332556263e-06, "loss": 25.75, "step": 12925 }, { "epoch": 0.8584711429899714, "grad_norm": 187.86936950683594, "learning_rate": 1.2742471065073882e-06, "loss": 14.3125, "step": 12926 }, { "epoch": 0.8585375572823272, "grad_norm": 218.63597106933594, "learning_rate": 1.2741436765868177e-06, "loss": 20.8281, "step": 12927 }, { "epoch": 0.8586039715746828, "grad_norm": 159.40997314453125, "learning_rate": 1.274040243495111e-06, "loss": 17.5, "step": 12928 }, { "epoch": 0.8586703858670386, "grad_norm": 167.00173950195312, "learning_rate": 1.273936807233465e-06, "loss": 21.5938, "step": 12929 }, { "epoch": 0.8587368001593944, "grad_norm": 127.79084014892578, "learning_rate": 1.2738333678030758e-06, "loss": 14.75, "step": 12930 }, { "epoch": 0.85880321445175, "grad_norm": 141.95033264160156, "learning_rate": 1.2737299252051402e-06, "loss": 11.1172, "step": 12931 }, { "epoch": 0.8588696287441058, "grad_norm": 373.5401306152344, "learning_rate": 1.2736264794408544e-06, "loss": 14.2812, "step": 12932 }, { "epoch": 0.8589360430364614, "grad_norm": 179.37779235839844, "learning_rate": 1.2735230305114155e-06, "loss": 14.4219, "step": 12933 }, { "epoch": 0.8590024573288172, "grad_norm": 183.97042846679688, "learning_rate": 1.27341957841802e-06, "loss": 20.5312, "step": 12934 }, { "epoch": 0.8590688716211728, "grad_norm": 348.0667724609375, "learning_rate": 1.2733161231618643e-06, "loss": 18.6562, "step": 12935 }, { "epoch": 0.8591352859135286, "grad_norm": 152.12660217285156, "learning_rate": 1.2732126647441455e-06, "loss": 15.9375, "step": 12936 }, { "epoch": 0.8592017002058843, "grad_norm": 282.2208557128906, "learning_rate": 1.2731092031660597e-06, "loss": 17.8438, "step": 12937 }, { "epoch": 0.85926811449824, "grad_norm": 243.31857299804688, "learning_rate": 1.2730057384288047e-06, "loss": 17.8281, "step": 12938 }, { "epoch": 0.8593345287905957, "grad_norm": 321.3692932128906, "learning_rate": 1.2729022705335764e-06, "loss": 14.4375, "step": 12939 }, { "epoch": 0.8594009430829515, "grad_norm": 192.69053649902344, "learning_rate": 1.2727987994815718e-06, "loss": 20.2031, "step": 12940 }, { "epoch": 0.8594673573753072, "grad_norm": 125.62329864501953, "learning_rate": 1.2726953252739885e-06, "loss": 18.7188, "step": 12941 }, { "epoch": 0.8595337716676629, "grad_norm": 161.9382781982422, "learning_rate": 1.2725918479120225e-06, "loss": 16.8906, "step": 12942 }, { "epoch": 0.8596001859600186, "grad_norm": 222.41943359375, "learning_rate": 1.2724883673968713e-06, "loss": 17.2656, "step": 12943 }, { "epoch": 0.8596666002523743, "grad_norm": 515.6455688476562, "learning_rate": 1.272384883729732e-06, "loss": 16.8906, "step": 12944 }, { "epoch": 0.8597330145447301, "grad_norm": 235.47750854492188, "learning_rate": 1.2722813969118013e-06, "loss": 14.6406, "step": 12945 }, { "epoch": 0.8597994288370857, "grad_norm": 704.3545532226562, "learning_rate": 1.2721779069442763e-06, "loss": 25.5625, "step": 12946 }, { "epoch": 0.8598658431294415, "grad_norm": 636.6617431640625, "learning_rate": 1.2720744138283543e-06, "loss": 15.7031, "step": 12947 }, { "epoch": 0.8599322574217971, "grad_norm": 983.3320922851562, "learning_rate": 1.2719709175652326e-06, "loss": 19.5156, "step": 12948 }, { "epoch": 0.8599986717141529, "grad_norm": 318.3594055175781, "learning_rate": 1.2718674181561078e-06, "loss": 12.8906, "step": 12949 }, { "epoch": 0.8600650860065086, "grad_norm": 352.27410888671875, "learning_rate": 1.2717639156021777e-06, "loss": 18.9844, "step": 12950 }, { "epoch": 0.8601315002988643, "grad_norm": 276.9034118652344, "learning_rate": 1.2716604099046391e-06, "loss": 21.0, "step": 12951 }, { "epoch": 0.8601979145912201, "grad_norm": 315.0580139160156, "learning_rate": 1.2715569010646899e-06, "loss": 22.3438, "step": 12952 }, { "epoch": 0.8602643288835757, "grad_norm": 312.64068603515625, "learning_rate": 1.2714533890835264e-06, "loss": 14.6406, "step": 12953 }, { "epoch": 0.8603307431759315, "grad_norm": 383.6454772949219, "learning_rate": 1.2713498739623471e-06, "loss": 31.0312, "step": 12954 }, { "epoch": 0.8603971574682872, "grad_norm": 175.49124145507812, "learning_rate": 1.271246355702349e-06, "loss": 15.0234, "step": 12955 }, { "epoch": 0.8604635717606429, "grad_norm": 230.47622680664062, "learning_rate": 1.2711428343047292e-06, "loss": 18.4062, "step": 12956 }, { "epoch": 0.8605299860529986, "grad_norm": 200.90255737304688, "learning_rate": 1.2710393097706856e-06, "loss": 22.6562, "step": 12957 }, { "epoch": 0.8605964003453543, "grad_norm": 329.4734191894531, "learning_rate": 1.2709357821014154e-06, "loss": 24.4062, "step": 12958 }, { "epoch": 0.86066281463771, "grad_norm": 238.88204956054688, "learning_rate": 1.2708322512981166e-06, "loss": 16.9531, "step": 12959 }, { "epoch": 0.8607292289300658, "grad_norm": 293.85443115234375, "learning_rate": 1.2707287173619864e-06, "loss": 21.1875, "step": 12960 }, { "epoch": 0.8607956432224214, "grad_norm": 183.1983642578125, "learning_rate": 1.2706251802942222e-06, "loss": 18.6406, "step": 12961 }, { "epoch": 0.8608620575147772, "grad_norm": 178.15577697753906, "learning_rate": 1.2705216400960223e-06, "loss": 12.5625, "step": 12962 }, { "epoch": 0.860928471807133, "grad_norm": 144.0290985107422, "learning_rate": 1.270418096768584e-06, "loss": 14.6094, "step": 12963 }, { "epoch": 0.8609948860994886, "grad_norm": 222.906005859375, "learning_rate": 1.2703145503131053e-06, "loss": 24.9844, "step": 12964 }, { "epoch": 0.8610613003918444, "grad_norm": 222.7667694091797, "learning_rate": 1.2702110007307832e-06, "loss": 16.1094, "step": 12965 }, { "epoch": 0.8611277146842, "grad_norm": 188.9116668701172, "learning_rate": 1.2701074480228164e-06, "loss": 16.9844, "step": 12966 }, { "epoch": 0.8611941289765558, "grad_norm": 172.70608520507812, "learning_rate": 1.2700038921904025e-06, "loss": 16.8125, "step": 12967 }, { "epoch": 0.8612605432689114, "grad_norm": 89.3890151977539, "learning_rate": 1.269900333234739e-06, "loss": 19.9062, "step": 12968 }, { "epoch": 0.8613269575612672, "grad_norm": 94.3172378540039, "learning_rate": 1.2697967711570242e-06, "loss": 16.0938, "step": 12969 }, { "epoch": 0.8613933718536229, "grad_norm": 590.13232421875, "learning_rate": 1.269693205958456e-06, "loss": 25.4375, "step": 12970 }, { "epoch": 0.8614597861459786, "grad_norm": 451.902099609375, "learning_rate": 1.2695896376402323e-06, "loss": 20.875, "step": 12971 }, { "epoch": 0.8615262004383343, "grad_norm": 194.7528839111328, "learning_rate": 1.2694860662035508e-06, "loss": 13.7969, "step": 12972 }, { "epoch": 0.86159261473069, "grad_norm": 175.2845001220703, "learning_rate": 1.2693824916496105e-06, "loss": 17.4375, "step": 12973 }, { "epoch": 0.8616590290230458, "grad_norm": 172.5370330810547, "learning_rate": 1.2692789139796083e-06, "loss": 15.4531, "step": 12974 }, { "epoch": 0.8617254433154015, "grad_norm": 190.82681274414062, "learning_rate": 1.269175333194743e-06, "loss": 16.5312, "step": 12975 }, { "epoch": 0.8617918576077572, "grad_norm": 164.8719024658203, "learning_rate": 1.2690717492962129e-06, "loss": 22.7969, "step": 12976 }, { "epoch": 0.8618582719001129, "grad_norm": 276.82879638671875, "learning_rate": 1.268968162285216e-06, "loss": 22.8438, "step": 12977 }, { "epoch": 0.8619246861924686, "grad_norm": 223.68954467773438, "learning_rate": 1.2688645721629501e-06, "loss": 18.7031, "step": 12978 }, { "epoch": 0.8619911004848243, "grad_norm": 317.98284912109375, "learning_rate": 1.2687609789306143e-06, "loss": 15.5625, "step": 12979 }, { "epoch": 0.8620575147771801, "grad_norm": 96.58244323730469, "learning_rate": 1.2686573825894062e-06, "loss": 13.6875, "step": 12980 }, { "epoch": 0.8621239290695357, "grad_norm": 244.75396728515625, "learning_rate": 1.2685537831405244e-06, "loss": 20.4219, "step": 12981 }, { "epoch": 0.8621903433618915, "grad_norm": 149.5970458984375, "learning_rate": 1.2684501805851673e-06, "loss": 14.4531, "step": 12982 }, { "epoch": 0.8622567576542471, "grad_norm": 536.4680786132812, "learning_rate": 1.2683465749245335e-06, "loss": 18.6406, "step": 12983 }, { "epoch": 0.8623231719466029, "grad_norm": 186.8587188720703, "learning_rate": 1.2682429661598213e-06, "loss": 16.5156, "step": 12984 }, { "epoch": 0.8623895862389587, "grad_norm": 206.20217895507812, "learning_rate": 1.268139354292229e-06, "loss": 18.6094, "step": 12985 }, { "epoch": 0.8624560005313143, "grad_norm": 161.26785278320312, "learning_rate": 1.268035739322955e-06, "loss": 16.7344, "step": 12986 }, { "epoch": 0.8625224148236701, "grad_norm": 87.00749969482422, "learning_rate": 1.2679321212531984e-06, "loss": 16.6562, "step": 12987 }, { "epoch": 0.8625888291160257, "grad_norm": 129.55113220214844, "learning_rate": 1.2678285000841576e-06, "loss": 12.5312, "step": 12988 }, { "epoch": 0.8626552434083815, "grad_norm": 229.3662872314453, "learning_rate": 1.2677248758170307e-06, "loss": 19.9688, "step": 12989 }, { "epoch": 0.8627216577007372, "grad_norm": 360.1018981933594, "learning_rate": 1.267621248453017e-06, "loss": 15.2656, "step": 12990 }, { "epoch": 0.8627880719930929, "grad_norm": 157.22213745117188, "learning_rate": 1.2675176179933155e-06, "loss": 18.4062, "step": 12991 }, { "epoch": 0.8628544862854486, "grad_norm": 155.35704040527344, "learning_rate": 1.2674139844391242e-06, "loss": 22.2031, "step": 12992 }, { "epoch": 0.8629209005778044, "grad_norm": 437.956787109375, "learning_rate": 1.2673103477916417e-06, "loss": 30.9688, "step": 12993 }, { "epoch": 0.86298731487016, "grad_norm": 193.94717407226562, "learning_rate": 1.2672067080520679e-06, "loss": 19.9531, "step": 12994 }, { "epoch": 0.8630537291625158, "grad_norm": 143.18812561035156, "learning_rate": 1.2671030652216007e-06, "loss": 16.5625, "step": 12995 }, { "epoch": 0.8631201434548715, "grad_norm": 164.0555877685547, "learning_rate": 1.2669994193014391e-06, "loss": 15.7344, "step": 12996 }, { "epoch": 0.8631865577472272, "grad_norm": 206.51544189453125, "learning_rate": 1.2668957702927822e-06, "loss": 13.9688, "step": 12997 }, { "epoch": 0.863252972039583, "grad_norm": 289.9517517089844, "learning_rate": 1.2667921181968294e-06, "loss": 17.6562, "step": 12998 }, { "epoch": 0.8633193863319386, "grad_norm": 430.659423828125, "learning_rate": 1.2666884630147787e-06, "loss": 18.125, "step": 12999 }, { "epoch": 0.8633858006242944, "grad_norm": 905.2145385742188, "learning_rate": 1.2665848047478297e-06, "loss": 16.625, "step": 13000 }, { "epoch": 0.86345221491665, "grad_norm": 440.5875244140625, "learning_rate": 1.2664811433971815e-06, "loss": 18.6094, "step": 13001 }, { "epoch": 0.8635186292090058, "grad_norm": 194.0224151611328, "learning_rate": 1.2663774789640333e-06, "loss": 17.2188, "step": 13002 }, { "epoch": 0.8635850435013614, "grad_norm": 302.7978515625, "learning_rate": 1.266273811449584e-06, "loss": 17.1562, "step": 13003 }, { "epoch": 0.8636514577937172, "grad_norm": 210.53285217285156, "learning_rate": 1.2661701408550323e-06, "loss": 20.5, "step": 13004 }, { "epoch": 0.8637178720860729, "grad_norm": 151.52044677734375, "learning_rate": 1.2660664671815783e-06, "loss": 13.5938, "step": 13005 }, { "epoch": 0.8637842863784286, "grad_norm": 289.9966735839844, "learning_rate": 1.2659627904304208e-06, "loss": 20.3906, "step": 13006 }, { "epoch": 0.8638507006707844, "grad_norm": 186.1354217529297, "learning_rate": 1.265859110602759e-06, "loss": 15.6875, "step": 13007 }, { "epoch": 0.86391711496314, "grad_norm": 215.45248413085938, "learning_rate": 1.2657554276997924e-06, "loss": 15.75, "step": 13008 }, { "epoch": 0.8639835292554958, "grad_norm": 345.2171936035156, "learning_rate": 1.2656517417227202e-06, "loss": 23.875, "step": 13009 }, { "epoch": 0.8640499435478515, "grad_norm": 115.16160583496094, "learning_rate": 1.2655480526727418e-06, "loss": 16.7344, "step": 13010 }, { "epoch": 0.8641163578402072, "grad_norm": 203.9123077392578, "learning_rate": 1.2654443605510567e-06, "loss": 15.8281, "step": 13011 }, { "epoch": 0.8641827721325629, "grad_norm": 340.4414978027344, "learning_rate": 1.2653406653588646e-06, "loss": 20.625, "step": 13012 }, { "epoch": 0.8642491864249187, "grad_norm": 204.9192657470703, "learning_rate": 1.2652369670973642e-06, "loss": 16.4531, "step": 13013 }, { "epoch": 0.8643156007172743, "grad_norm": 260.5910339355469, "learning_rate": 1.2651332657677556e-06, "loss": 33.1406, "step": 13014 }, { "epoch": 0.8643820150096301, "grad_norm": 414.575439453125, "learning_rate": 1.2650295613712384e-06, "loss": 25.4062, "step": 13015 }, { "epoch": 0.8644484293019857, "grad_norm": 107.18244934082031, "learning_rate": 1.2649258539090123e-06, "loss": 12.4531, "step": 13016 }, { "epoch": 0.8645148435943415, "grad_norm": 661.4503173828125, "learning_rate": 1.264822143382276e-06, "loss": 13.0156, "step": 13017 }, { "epoch": 0.8645812578866973, "grad_norm": 167.00296020507812, "learning_rate": 1.2647184297922305e-06, "loss": 14.4062, "step": 13018 }, { "epoch": 0.8646476721790529, "grad_norm": 157.93089294433594, "learning_rate": 1.2646147131400743e-06, "loss": 15.4375, "step": 13019 }, { "epoch": 0.8647140864714087, "grad_norm": 368.5998229980469, "learning_rate": 1.2645109934270082e-06, "loss": 16.7031, "step": 13020 }, { "epoch": 0.8647805007637643, "grad_norm": 117.57608795166016, "learning_rate": 1.2644072706542313e-06, "loss": 11.75, "step": 13021 }, { "epoch": 0.8648469150561201, "grad_norm": 166.78265380859375, "learning_rate": 1.2643035448229436e-06, "loss": 25.8906, "step": 13022 }, { "epoch": 0.8649133293484758, "grad_norm": 159.30165100097656, "learning_rate": 1.2641998159343449e-06, "loss": 10.7031, "step": 13023 }, { "epoch": 0.8649797436408315, "grad_norm": 161.13868713378906, "learning_rate": 1.264096083989635e-06, "loss": 15.375, "step": 13024 }, { "epoch": 0.8650461579331872, "grad_norm": 149.92080688476562, "learning_rate": 1.2639923489900137e-06, "loss": 13.2812, "step": 13025 }, { "epoch": 0.8651125722255429, "grad_norm": 258.0432434082031, "learning_rate": 1.2638886109366813e-06, "loss": 14.2344, "step": 13026 }, { "epoch": 0.8651789865178987, "grad_norm": 154.10580444335938, "learning_rate": 1.2637848698308374e-06, "loss": 16.2969, "step": 13027 }, { "epoch": 0.8652454008102544, "grad_norm": 222.46852111816406, "learning_rate": 1.2636811256736825e-06, "loss": 17.9688, "step": 13028 }, { "epoch": 0.8653118151026101, "grad_norm": 311.064697265625, "learning_rate": 1.263577378466416e-06, "loss": 17.0, "step": 13029 }, { "epoch": 0.8653782293949658, "grad_norm": 362.4859924316406, "learning_rate": 1.2634736282102392e-06, "loss": 26.875, "step": 13030 }, { "epoch": 0.8654446436873215, "grad_norm": 147.87428283691406, "learning_rate": 1.263369874906351e-06, "loss": 15.7969, "step": 13031 }, { "epoch": 0.8655110579796772, "grad_norm": 360.00982666015625, "learning_rate": 1.2632661185559515e-06, "loss": 15.875, "step": 13032 }, { "epoch": 0.865577472272033, "grad_norm": 302.63970947265625, "learning_rate": 1.2631623591602419e-06, "loss": 13.875, "step": 13033 }, { "epoch": 0.8656438865643886, "grad_norm": 163.62303161621094, "learning_rate": 1.2630585967204216e-06, "loss": 14.7578, "step": 13034 }, { "epoch": 0.8657103008567444, "grad_norm": 465.8209533691406, "learning_rate": 1.2629548312376912e-06, "loss": 16.0625, "step": 13035 }, { "epoch": 0.8657767151491, "grad_norm": 280.2965087890625, "learning_rate": 1.262851062713251e-06, "loss": 15.3125, "step": 13036 }, { "epoch": 0.8658431294414558, "grad_norm": 125.6213150024414, "learning_rate": 1.2627472911483012e-06, "loss": 13.9531, "step": 13037 }, { "epoch": 0.8659095437338116, "grad_norm": 231.53045654296875, "learning_rate": 1.2626435165440424e-06, "loss": 15.1406, "step": 13038 }, { "epoch": 0.8659759580261672, "grad_norm": 216.41229248046875, "learning_rate": 1.2625397389016748e-06, "loss": 15.5469, "step": 13039 }, { "epoch": 0.866042372318523, "grad_norm": 471.40875244140625, "learning_rate": 1.2624359582223986e-06, "loss": 17.0781, "step": 13040 }, { "epoch": 0.8661087866108786, "grad_norm": 130.7140655517578, "learning_rate": 1.2623321745074149e-06, "loss": 17.3438, "step": 13041 }, { "epoch": 0.8661752009032344, "grad_norm": 192.46511840820312, "learning_rate": 1.262228387757924e-06, "loss": 21.7031, "step": 13042 }, { "epoch": 0.8662416151955901, "grad_norm": 270.2793273925781, "learning_rate": 1.2621245979751259e-06, "loss": 17.9688, "step": 13043 }, { "epoch": 0.8663080294879458, "grad_norm": 153.131103515625, "learning_rate": 1.262020805160222e-06, "loss": 15.3906, "step": 13044 }, { "epoch": 0.8663744437803015, "grad_norm": 158.85227966308594, "learning_rate": 1.261917009314412e-06, "loss": 17.8828, "step": 13045 }, { "epoch": 0.8664408580726572, "grad_norm": 420.94232177734375, "learning_rate": 1.2618132104388975e-06, "loss": 17.4062, "step": 13046 }, { "epoch": 0.8665072723650129, "grad_norm": 152.89674377441406, "learning_rate": 1.2617094085348786e-06, "loss": 18.8438, "step": 13047 }, { "epoch": 0.8665736866573687, "grad_norm": 183.05006408691406, "learning_rate": 1.2616056036035563e-06, "loss": 14.0, "step": 13048 }, { "epoch": 0.8666401009497244, "grad_norm": 152.37940979003906, "learning_rate": 1.261501795646131e-06, "loss": 17.125, "step": 13049 }, { "epoch": 0.8667065152420801, "grad_norm": 342.3727722167969, "learning_rate": 1.2613979846638038e-06, "loss": 22.125, "step": 13050 }, { "epoch": 0.8667729295344359, "grad_norm": 309.6839599609375, "learning_rate": 1.2612941706577754e-06, "loss": 15.0938, "step": 13051 }, { "epoch": 0.8668393438267915, "grad_norm": 421.6606750488281, "learning_rate": 1.261190353629247e-06, "loss": 21.25, "step": 13052 }, { "epoch": 0.8669057581191473, "grad_norm": 186.7330322265625, "learning_rate": 1.2610865335794189e-06, "loss": 16.375, "step": 13053 }, { "epoch": 0.8669721724115029, "grad_norm": 525.5520629882812, "learning_rate": 1.2609827105094923e-06, "loss": 21.5938, "step": 13054 }, { "epoch": 0.8670385867038587, "grad_norm": 154.69589233398438, "learning_rate": 1.2608788844206687e-06, "loss": 15.8125, "step": 13055 }, { "epoch": 0.8671050009962143, "grad_norm": 264.99102783203125, "learning_rate": 1.2607750553141481e-06, "loss": 21.25, "step": 13056 }, { "epoch": 0.8671714152885701, "grad_norm": 333.138916015625, "learning_rate": 1.260671223191132e-06, "loss": 19.5312, "step": 13057 }, { "epoch": 0.8672378295809258, "grad_norm": 197.69125366210938, "learning_rate": 1.2605673880528215e-06, "loss": 16.5469, "step": 13058 }, { "epoch": 0.8673042438732815, "grad_norm": 194.9182891845703, "learning_rate": 1.2604635499004181e-06, "loss": 14.1562, "step": 13059 }, { "epoch": 0.8673706581656373, "grad_norm": 299.2901611328125, "learning_rate": 1.2603597087351223e-06, "loss": 20.5, "step": 13060 }, { "epoch": 0.867437072457993, "grad_norm": 233.65794372558594, "learning_rate": 1.2602558645581354e-06, "loss": 21.9531, "step": 13061 }, { "epoch": 0.8675034867503487, "grad_norm": 140.2945556640625, "learning_rate": 1.2601520173706588e-06, "loss": 17.5781, "step": 13062 }, { "epoch": 0.8675699010427044, "grad_norm": 167.12301635742188, "learning_rate": 1.2600481671738937e-06, "loss": 15.9688, "step": 13063 }, { "epoch": 0.8676363153350601, "grad_norm": 219.05775451660156, "learning_rate": 1.2599443139690414e-06, "loss": 13.7891, "step": 13064 }, { "epoch": 0.8677027296274158, "grad_norm": 204.88375854492188, "learning_rate": 1.2598404577573033e-06, "loss": 16.6406, "step": 13065 }, { "epoch": 0.8677691439197716, "grad_norm": 242.61007690429688, "learning_rate": 1.2597365985398801e-06, "loss": 16.2656, "step": 13066 }, { "epoch": 0.8678355582121272, "grad_norm": 395.9855651855469, "learning_rate": 1.2596327363179742e-06, "loss": 14.7344, "step": 13067 }, { "epoch": 0.867901972504483, "grad_norm": 116.23129272460938, "learning_rate": 1.2595288710927863e-06, "loss": 12.25, "step": 13068 }, { "epoch": 0.8679683867968386, "grad_norm": 256.88385009765625, "learning_rate": 1.2594250028655182e-06, "loss": 20.9219, "step": 13069 }, { "epoch": 0.8680348010891944, "grad_norm": 187.26107788085938, "learning_rate": 1.2593211316373714e-06, "loss": 18.3594, "step": 13070 }, { "epoch": 0.8681012153815502, "grad_norm": 182.06980895996094, "learning_rate": 1.259217257409547e-06, "loss": 16.0156, "step": 13071 }, { "epoch": 0.8681676296739058, "grad_norm": 161.09194946289062, "learning_rate": 1.2591133801832467e-06, "loss": 16.9375, "step": 13072 }, { "epoch": 0.8682340439662616, "grad_norm": 170.64791870117188, "learning_rate": 1.2590094999596728e-06, "loss": 12.0, "step": 13073 }, { "epoch": 0.8683004582586172, "grad_norm": 215.26922607421875, "learning_rate": 1.258905616740026e-06, "loss": 18.0156, "step": 13074 }, { "epoch": 0.868366872550973, "grad_norm": 137.86468505859375, "learning_rate": 1.2588017305255084e-06, "loss": 17.4375, "step": 13075 }, { "epoch": 0.8684332868433287, "grad_norm": 289.188720703125, "learning_rate": 1.2586978413173216e-06, "loss": 14.9688, "step": 13076 }, { "epoch": 0.8684997011356844, "grad_norm": 285.6912536621094, "learning_rate": 1.2585939491166672e-06, "loss": 14.8906, "step": 13077 }, { "epoch": 0.8685661154280401, "grad_norm": 247.45077514648438, "learning_rate": 1.258490053924747e-06, "loss": 18.2188, "step": 13078 }, { "epoch": 0.8686325297203958, "grad_norm": 454.6378173828125, "learning_rate": 1.2583861557427634e-06, "loss": 11.4062, "step": 13079 }, { "epoch": 0.8686989440127515, "grad_norm": 228.14962768554688, "learning_rate": 1.2582822545719176e-06, "loss": 17.75, "step": 13080 }, { "epoch": 0.8687653583051073, "grad_norm": 280.1168212890625, "learning_rate": 1.2581783504134113e-06, "loss": 25.4688, "step": 13081 }, { "epoch": 0.868831772597463, "grad_norm": 97.117919921875, "learning_rate": 1.258074443268447e-06, "loss": 10.875, "step": 13082 }, { "epoch": 0.8688981868898187, "grad_norm": 183.22776794433594, "learning_rate": 1.2579705331382263e-06, "loss": 21.75, "step": 13083 }, { "epoch": 0.8689646011821744, "grad_norm": 186.86749267578125, "learning_rate": 1.2578666200239512e-06, "loss": 15.4219, "step": 13084 }, { "epoch": 0.8690310154745301, "grad_norm": 137.19680786132812, "learning_rate": 1.2577627039268234e-06, "loss": 15.3906, "step": 13085 }, { "epoch": 0.8690974297668859, "grad_norm": 158.97056579589844, "learning_rate": 1.2576587848480458e-06, "loss": 12.7188, "step": 13086 }, { "epoch": 0.8691638440592415, "grad_norm": 176.6074676513672, "learning_rate": 1.2575548627888198e-06, "loss": 17.0625, "step": 13087 }, { "epoch": 0.8692302583515973, "grad_norm": 157.5819549560547, "learning_rate": 1.2574509377503476e-06, "loss": 10.25, "step": 13088 }, { "epoch": 0.8692966726439529, "grad_norm": 298.23651123046875, "learning_rate": 1.2573470097338312e-06, "loss": 21.5781, "step": 13089 }, { "epoch": 0.8693630869363087, "grad_norm": 229.63992309570312, "learning_rate": 1.2572430787404732e-06, "loss": 15.625, "step": 13090 }, { "epoch": 0.8694295012286644, "grad_norm": 218.6624298095703, "learning_rate": 1.2571391447714758e-06, "loss": 20.7031, "step": 13091 }, { "epoch": 0.8694959155210201, "grad_norm": 199.8799591064453, "learning_rate": 1.257035207828041e-06, "loss": 22.2344, "step": 13092 }, { "epoch": 0.8695623298133759, "grad_norm": 148.0550537109375, "learning_rate": 1.2569312679113708e-06, "loss": 15.7188, "step": 13093 }, { "epoch": 0.8696287441057315, "grad_norm": 210.14566040039062, "learning_rate": 1.256827325022668e-06, "loss": 22.3125, "step": 13094 }, { "epoch": 0.8696951583980873, "grad_norm": 153.9512176513672, "learning_rate": 1.256723379163135e-06, "loss": 16.1719, "step": 13095 }, { "epoch": 0.869761572690443, "grad_norm": 813.6954345703125, "learning_rate": 1.2566194303339736e-06, "loss": 13.9062, "step": 13096 }, { "epoch": 0.8698279869827987, "grad_norm": 563.3770141601562, "learning_rate": 1.2565154785363868e-06, "loss": 18.7812, "step": 13097 }, { "epoch": 0.8698944012751544, "grad_norm": 209.04197692871094, "learning_rate": 1.256411523771577e-06, "loss": 13.4219, "step": 13098 }, { "epoch": 0.8699608155675101, "grad_norm": 199.40701293945312, "learning_rate": 1.2563075660407466e-06, "loss": 15.6562, "step": 13099 }, { "epoch": 0.8700272298598658, "grad_norm": 271.1600646972656, "learning_rate": 1.2562036053450975e-06, "loss": 14.9688, "step": 13100 }, { "epoch": 0.8700936441522216, "grad_norm": 296.84075927734375, "learning_rate": 1.2560996416858335e-06, "loss": 18.5625, "step": 13101 }, { "epoch": 0.8701600584445772, "grad_norm": 369.28033447265625, "learning_rate": 1.255995675064156e-06, "loss": 25.3594, "step": 13102 }, { "epoch": 0.870226472736933, "grad_norm": 188.4984130859375, "learning_rate": 1.2558917054812682e-06, "loss": 12.6094, "step": 13103 }, { "epoch": 0.8702928870292888, "grad_norm": 442.64678955078125, "learning_rate": 1.255787732938373e-06, "loss": 20.1406, "step": 13104 }, { "epoch": 0.8703593013216444, "grad_norm": 347.93707275390625, "learning_rate": 1.2556837574366727e-06, "loss": 21.3594, "step": 13105 }, { "epoch": 0.8704257156140002, "grad_norm": 434.8833312988281, "learning_rate": 1.2555797789773704e-06, "loss": 19.4219, "step": 13106 }, { "epoch": 0.8704921299063558, "grad_norm": 133.28195190429688, "learning_rate": 1.255475797561668e-06, "loss": 16.6562, "step": 13107 }, { "epoch": 0.8705585441987116, "grad_norm": 248.66307067871094, "learning_rate": 1.2553718131907696e-06, "loss": 21.3125, "step": 13108 }, { "epoch": 0.8706249584910672, "grad_norm": 177.7760467529297, "learning_rate": 1.2552678258658768e-06, "loss": 16.1094, "step": 13109 }, { "epoch": 0.870691372783423, "grad_norm": 125.90828704833984, "learning_rate": 1.255163835588193e-06, "loss": 18.0, "step": 13110 }, { "epoch": 0.8707577870757787, "grad_norm": 205.88404846191406, "learning_rate": 1.2550598423589212e-06, "loss": 17.875, "step": 13111 }, { "epoch": 0.8708242013681344, "grad_norm": 211.7703857421875, "learning_rate": 1.2549558461792643e-06, "loss": 17.1406, "step": 13112 }, { "epoch": 0.8708906156604901, "grad_norm": 254.55288696289062, "learning_rate": 1.2548518470504253e-06, "loss": 15.1406, "step": 13113 }, { "epoch": 0.8709570299528459, "grad_norm": 120.39046478271484, "learning_rate": 1.2547478449736067e-06, "loss": 15.2344, "step": 13114 }, { "epoch": 0.8710234442452016, "grad_norm": 489.443359375, "learning_rate": 1.2546438399500122e-06, "loss": 24.9062, "step": 13115 }, { "epoch": 0.8710898585375573, "grad_norm": 204.3543701171875, "learning_rate": 1.2545398319808444e-06, "loss": 16.1719, "step": 13116 }, { "epoch": 0.871156272829913, "grad_norm": 210.78814697265625, "learning_rate": 1.254435821067307e-06, "loss": 16.2656, "step": 13117 }, { "epoch": 0.8712226871222687, "grad_norm": 485.6580505371094, "learning_rate": 1.2543318072106021e-06, "loss": 22.0625, "step": 13118 }, { "epoch": 0.8712891014146245, "grad_norm": 344.0601501464844, "learning_rate": 1.2542277904119344e-06, "loss": 20.9219, "step": 13119 }, { "epoch": 0.8713555157069801, "grad_norm": 148.70989990234375, "learning_rate": 1.2541237706725054e-06, "loss": 15.2344, "step": 13120 }, { "epoch": 0.8714219299993359, "grad_norm": 301.4600524902344, "learning_rate": 1.2540197479935192e-06, "loss": 22.2969, "step": 13121 }, { "epoch": 0.8714883442916915, "grad_norm": 179.96876525878906, "learning_rate": 1.2539157223761794e-06, "loss": 16.75, "step": 13122 }, { "epoch": 0.8715547585840473, "grad_norm": 147.0988311767578, "learning_rate": 1.253811693821689e-06, "loss": 14.7812, "step": 13123 }, { "epoch": 0.871621172876403, "grad_norm": 309.3292541503906, "learning_rate": 1.253707662331251e-06, "loss": 15.1094, "step": 13124 }, { "epoch": 0.8716875871687587, "grad_norm": 276.5030517578125, "learning_rate": 1.2536036279060691e-06, "loss": 19.7344, "step": 13125 }, { "epoch": 0.8717540014611145, "grad_norm": 213.7183837890625, "learning_rate": 1.2534995905473467e-06, "loss": 20.875, "step": 13126 }, { "epoch": 0.8718204157534701, "grad_norm": 513.2782592773438, "learning_rate": 1.2533955502562875e-06, "loss": 17.0781, "step": 13127 }, { "epoch": 0.8718868300458259, "grad_norm": 3322.355712890625, "learning_rate": 1.253291507034094e-06, "loss": 17.6406, "step": 13128 }, { "epoch": 0.8719532443381816, "grad_norm": 197.0642547607422, "learning_rate": 1.2531874608819711e-06, "loss": 15.4375, "step": 13129 }, { "epoch": 0.8720196586305373, "grad_norm": 204.21701049804688, "learning_rate": 1.2530834118011214e-06, "loss": 20.8438, "step": 13130 }, { "epoch": 0.872086072922893, "grad_norm": 145.5317840576172, "learning_rate": 1.2529793597927486e-06, "loss": 17.0469, "step": 13131 }, { "epoch": 0.8721524872152487, "grad_norm": 324.92364501953125, "learning_rate": 1.2528753048580565e-06, "loss": 16.2344, "step": 13132 }, { "epoch": 0.8722189015076044, "grad_norm": 238.06471252441406, "learning_rate": 1.2527712469982488e-06, "loss": 18.9219, "step": 13133 }, { "epoch": 0.8722853157999602, "grad_norm": 251.5056915283203, "learning_rate": 1.2526671862145294e-06, "loss": 18.9219, "step": 13134 }, { "epoch": 0.8723517300923158, "grad_norm": 346.2810363769531, "learning_rate": 1.2525631225081011e-06, "loss": 13.0469, "step": 13135 }, { "epoch": 0.8724181443846716, "grad_norm": 170.2600555419922, "learning_rate": 1.2524590558801681e-06, "loss": 17.2969, "step": 13136 }, { "epoch": 0.8724845586770273, "grad_norm": 257.1200866699219, "learning_rate": 1.2523549863319347e-06, "loss": 13.4219, "step": 13137 }, { "epoch": 0.872550972969383, "grad_norm": 312.9906921386719, "learning_rate": 1.2522509138646042e-06, "loss": 21.6094, "step": 13138 }, { "epoch": 0.8726173872617388, "grad_norm": 182.33139038085938, "learning_rate": 1.2521468384793807e-06, "loss": 11.0156, "step": 13139 }, { "epoch": 0.8726838015540944, "grad_norm": 217.58802795410156, "learning_rate": 1.2520427601774682e-06, "loss": 19.5625, "step": 13140 }, { "epoch": 0.8727502158464502, "grad_norm": 191.1990203857422, "learning_rate": 1.2519386789600699e-06, "loss": 19.7188, "step": 13141 }, { "epoch": 0.8728166301388058, "grad_norm": 191.58226013183594, "learning_rate": 1.2518345948283905e-06, "loss": 13.75, "step": 13142 }, { "epoch": 0.8728830444311616, "grad_norm": 192.0200958251953, "learning_rate": 1.2517305077836336e-06, "loss": 15.9688, "step": 13143 }, { "epoch": 0.8729494587235173, "grad_norm": 242.8632354736328, "learning_rate": 1.2516264178270036e-06, "loss": 20.4531, "step": 13144 }, { "epoch": 0.873015873015873, "grad_norm": 190.68539428710938, "learning_rate": 1.251522324959704e-06, "loss": 14.4375, "step": 13145 }, { "epoch": 0.8730822873082287, "grad_norm": 222.76512145996094, "learning_rate": 1.2514182291829394e-06, "loss": 20.5156, "step": 13146 }, { "epoch": 0.8731487016005844, "grad_norm": 151.89781188964844, "learning_rate": 1.2513141304979135e-06, "loss": 19.75, "step": 13147 }, { "epoch": 0.8732151158929402, "grad_norm": 319.7282409667969, "learning_rate": 1.251210028905831e-06, "loss": 18.9062, "step": 13148 }, { "epoch": 0.8732815301852959, "grad_norm": 193.95083618164062, "learning_rate": 1.2511059244078954e-06, "loss": 12.1094, "step": 13149 }, { "epoch": 0.8733479444776516, "grad_norm": 194.9881591796875, "learning_rate": 1.2510018170053112e-06, "loss": 14.3438, "step": 13150 }, { "epoch": 0.8734143587700073, "grad_norm": 124.78750610351562, "learning_rate": 1.2508977066992832e-06, "loss": 19.3281, "step": 13151 }, { "epoch": 0.873480773062363, "grad_norm": 175.2602996826172, "learning_rate": 1.2507935934910147e-06, "loss": 16.0312, "step": 13152 }, { "epoch": 0.8735471873547187, "grad_norm": 239.41856384277344, "learning_rate": 1.2506894773817107e-06, "loss": 16.4062, "step": 13153 }, { "epoch": 0.8736136016470745, "grad_norm": 172.79974365234375, "learning_rate": 1.2505853583725753e-06, "loss": 15.0938, "step": 13154 }, { "epoch": 0.8736800159394301, "grad_norm": 242.29742431640625, "learning_rate": 1.2504812364648132e-06, "loss": 16.3594, "step": 13155 }, { "epoch": 0.8737464302317859, "grad_norm": 820.8665161132812, "learning_rate": 1.2503771116596284e-06, "loss": 16.0156, "step": 13156 }, { "epoch": 0.8738128445241415, "grad_norm": 218.29576110839844, "learning_rate": 1.2502729839582256e-06, "loss": 21.2031, "step": 13157 }, { "epoch": 0.8738792588164973, "grad_norm": 294.01641845703125, "learning_rate": 1.2501688533618092e-06, "loss": 17.0469, "step": 13158 }, { "epoch": 0.8739456731088531, "grad_norm": 264.90814208984375, "learning_rate": 1.250064719871584e-06, "loss": 16.2812, "step": 13159 }, { "epoch": 0.8740120874012087, "grad_norm": 323.0712890625, "learning_rate": 1.2499605834887538e-06, "loss": 18.8125, "step": 13160 }, { "epoch": 0.8740785016935645, "grad_norm": 217.4347381591797, "learning_rate": 1.249856444214524e-06, "loss": 24.5, "step": 13161 }, { "epoch": 0.8741449159859201, "grad_norm": 331.5655517578125, "learning_rate": 1.2497523020500993e-06, "loss": 15.5156, "step": 13162 }, { "epoch": 0.8742113302782759, "grad_norm": 136.6669158935547, "learning_rate": 1.2496481569966834e-06, "loss": 15.875, "step": 13163 }, { "epoch": 0.8742777445706316, "grad_norm": 130.62326049804688, "learning_rate": 1.2495440090554819e-06, "loss": 13.375, "step": 13164 }, { "epoch": 0.8743441588629873, "grad_norm": 144.8970947265625, "learning_rate": 1.249439858227699e-06, "loss": 12.875, "step": 13165 }, { "epoch": 0.874410573155343, "grad_norm": 215.8776092529297, "learning_rate": 1.2493357045145399e-06, "loss": 23.9375, "step": 13166 }, { "epoch": 0.8744769874476988, "grad_norm": 184.70785522460938, "learning_rate": 1.249231547917209e-06, "loss": 30.1562, "step": 13167 }, { "epoch": 0.8745434017400544, "grad_norm": 172.0, "learning_rate": 1.249127388436911e-06, "loss": 15.1406, "step": 13168 }, { "epoch": 0.8746098160324102, "grad_norm": 612.0950927734375, "learning_rate": 1.2490232260748515e-06, "loss": 18.5156, "step": 13169 }, { "epoch": 0.8746762303247659, "grad_norm": 142.60183715820312, "learning_rate": 1.2489190608322348e-06, "loss": 19.5156, "step": 13170 }, { "epoch": 0.8747426446171216, "grad_norm": 223.25546264648438, "learning_rate": 1.248814892710266e-06, "loss": 14.8125, "step": 13171 }, { "epoch": 0.8748090589094774, "grad_norm": 163.53549194335938, "learning_rate": 1.2487107217101495e-06, "loss": 16.5156, "step": 13172 }, { "epoch": 0.874875473201833, "grad_norm": 202.52064514160156, "learning_rate": 1.2486065478330911e-06, "loss": 19.3125, "step": 13173 }, { "epoch": 0.8749418874941888, "grad_norm": 247.40565490722656, "learning_rate": 1.2485023710802956e-06, "loss": 16.3281, "step": 13174 }, { "epoch": 0.8750083017865444, "grad_norm": 132.20411682128906, "learning_rate": 1.248398191452968e-06, "loss": 18.8281, "step": 13175 }, { "epoch": 0.8750747160789002, "grad_norm": 212.10316467285156, "learning_rate": 1.2482940089523133e-06, "loss": 19.5781, "step": 13176 }, { "epoch": 0.8751411303712558, "grad_norm": 177.1433868408203, "learning_rate": 1.2481898235795366e-06, "loss": 14.4219, "step": 13177 }, { "epoch": 0.8752075446636116, "grad_norm": 102.15677642822266, "learning_rate": 1.2480856353358431e-06, "loss": 13.6719, "step": 13178 }, { "epoch": 0.8752739589559674, "grad_norm": 199.2219696044922, "learning_rate": 1.247981444222438e-06, "loss": 16.3594, "step": 13179 }, { "epoch": 0.875340373248323, "grad_norm": 253.28736877441406, "learning_rate": 1.2478772502405269e-06, "loss": 18.3438, "step": 13180 }, { "epoch": 0.8754067875406788, "grad_norm": 265.74853515625, "learning_rate": 1.2477730533913145e-06, "loss": 12.1719, "step": 13181 }, { "epoch": 0.8754732018330345, "grad_norm": 125.58478546142578, "learning_rate": 1.2476688536760061e-06, "loss": 15.8281, "step": 13182 }, { "epoch": 0.8755396161253902, "grad_norm": 232.75888061523438, "learning_rate": 1.2475646510958076e-06, "loss": 13.9062, "step": 13183 }, { "epoch": 0.8756060304177459, "grad_norm": 152.93731689453125, "learning_rate": 1.2474604456519235e-06, "loss": 16.5312, "step": 13184 }, { "epoch": 0.8756724447101016, "grad_norm": 241.530029296875, "learning_rate": 1.2473562373455599e-06, "loss": 16.9062, "step": 13185 }, { "epoch": 0.8757388590024573, "grad_norm": 250.01025390625, "learning_rate": 1.247252026177922e-06, "loss": 17.1875, "step": 13186 }, { "epoch": 0.8758052732948131, "grad_norm": 270.67596435546875, "learning_rate": 1.2471478121502155e-06, "loss": 20.375, "step": 13187 }, { "epoch": 0.8758716875871687, "grad_norm": 286.0766296386719, "learning_rate": 1.247043595263645e-06, "loss": 15.1562, "step": 13188 }, { "epoch": 0.8759381018795245, "grad_norm": 213.65049743652344, "learning_rate": 1.246939375519417e-06, "loss": 23.4531, "step": 13189 }, { "epoch": 0.8760045161718802, "grad_norm": 310.2807312011719, "learning_rate": 1.2468351529187367e-06, "loss": 14.6406, "step": 13190 }, { "epoch": 0.8760709304642359, "grad_norm": 159.29541015625, "learning_rate": 1.24673092746281e-06, "loss": 13.7891, "step": 13191 }, { "epoch": 0.8761373447565917, "grad_norm": 123.25430297851562, "learning_rate": 1.2466266991528416e-06, "loss": 12.9375, "step": 13192 }, { "epoch": 0.8762037590489473, "grad_norm": 195.06361389160156, "learning_rate": 1.2465224679900379e-06, "loss": 17.1406, "step": 13193 }, { "epoch": 0.8762701733413031, "grad_norm": 270.501953125, "learning_rate": 1.246418233975605e-06, "loss": 20.5625, "step": 13194 }, { "epoch": 0.8763365876336587, "grad_norm": 239.24075317382812, "learning_rate": 1.2463139971107475e-06, "loss": 15.8438, "step": 13195 }, { "epoch": 0.8764030019260145, "grad_norm": 151.10340881347656, "learning_rate": 1.2462097573966716e-06, "loss": 17.4844, "step": 13196 }, { "epoch": 0.8764694162183702, "grad_norm": 372.5130310058594, "learning_rate": 1.2461055148345834e-06, "loss": 18.6406, "step": 13197 }, { "epoch": 0.8765358305107259, "grad_norm": 227.21380615234375, "learning_rate": 1.2460012694256888e-06, "loss": 17.7188, "step": 13198 }, { "epoch": 0.8766022448030816, "grad_norm": 135.4802703857422, "learning_rate": 1.245897021171193e-06, "loss": 15.6562, "step": 13199 }, { "epoch": 0.8766686590954373, "grad_norm": 256.5271301269531, "learning_rate": 1.2457927700723023e-06, "loss": 19.3125, "step": 13200 }, { "epoch": 0.8767350733877931, "grad_norm": 448.5172424316406, "learning_rate": 1.245688516130223e-06, "loss": 17.375, "step": 13201 }, { "epoch": 0.8768014876801488, "grad_norm": 159.5911865234375, "learning_rate": 1.24558425934616e-06, "loss": 19.1406, "step": 13202 }, { "epoch": 0.8768679019725045, "grad_norm": 89.22819519042969, "learning_rate": 1.2454799997213203e-06, "loss": 12.5938, "step": 13203 }, { "epoch": 0.8769343162648602, "grad_norm": 285.84832763671875, "learning_rate": 1.2453757372569093e-06, "loss": 18.9375, "step": 13204 }, { "epoch": 0.877000730557216, "grad_norm": 139.09889221191406, "learning_rate": 1.2452714719541336e-06, "loss": 15.4219, "step": 13205 }, { "epoch": 0.8770671448495716, "grad_norm": 88.3575210571289, "learning_rate": 1.2451672038141984e-06, "loss": 12.4141, "step": 13206 }, { "epoch": 0.8771335591419274, "grad_norm": 305.9689636230469, "learning_rate": 1.2450629328383106e-06, "loss": 14.5469, "step": 13207 }, { "epoch": 0.877199973434283, "grad_norm": 257.78668212890625, "learning_rate": 1.2449586590276762e-06, "loss": 15.2969, "step": 13208 }, { "epoch": 0.8772663877266388, "grad_norm": 514.6707763671875, "learning_rate": 1.2448543823835014e-06, "loss": 18.7188, "step": 13209 }, { "epoch": 0.8773328020189944, "grad_norm": 126.55081176757812, "learning_rate": 1.2447501029069918e-06, "loss": 12.6562, "step": 13210 }, { "epoch": 0.8773992163113502, "grad_norm": 336.39361572265625, "learning_rate": 1.2446458205993545e-06, "loss": 17.1406, "step": 13211 }, { "epoch": 0.877465630603706, "grad_norm": 154.66876220703125, "learning_rate": 1.2445415354617955e-06, "loss": 13.25, "step": 13212 }, { "epoch": 0.8775320448960616, "grad_norm": 195.29815673828125, "learning_rate": 1.2444372474955208e-06, "loss": 14.0312, "step": 13213 }, { "epoch": 0.8775984591884174, "grad_norm": 130.29136657714844, "learning_rate": 1.2443329567017372e-06, "loss": 14.4219, "step": 13214 }, { "epoch": 0.877664873480773, "grad_norm": 341.1448974609375, "learning_rate": 1.2442286630816508e-06, "loss": 27.8438, "step": 13215 }, { "epoch": 0.8777312877731288, "grad_norm": 182.48500061035156, "learning_rate": 1.2441243666364677e-06, "loss": 14.5625, "step": 13216 }, { "epoch": 0.8777977020654845, "grad_norm": 172.38597106933594, "learning_rate": 1.2440200673673952e-06, "loss": 18.0469, "step": 13217 }, { "epoch": 0.8778641163578402, "grad_norm": 409.9146728515625, "learning_rate": 1.243915765275639e-06, "loss": 22.9375, "step": 13218 }, { "epoch": 0.8779305306501959, "grad_norm": 228.06976318359375, "learning_rate": 1.243811460362406e-06, "loss": 15.9844, "step": 13219 }, { "epoch": 0.8779969449425516, "grad_norm": 167.70262145996094, "learning_rate": 1.2437071526289024e-06, "loss": 15.3906, "step": 13220 }, { "epoch": 0.8780633592349073, "grad_norm": 204.4668731689453, "learning_rate": 1.2436028420763352e-06, "loss": 17.125, "step": 13221 }, { "epoch": 0.8781297735272631, "grad_norm": 157.9784698486328, "learning_rate": 1.2434985287059107e-06, "loss": 17.1094, "step": 13222 }, { "epoch": 0.8781961878196188, "grad_norm": 186.3744659423828, "learning_rate": 1.2433942125188356e-06, "loss": 14.5781, "step": 13223 }, { "epoch": 0.8782626021119745, "grad_norm": 236.70498657226562, "learning_rate": 1.2432898935163167e-06, "loss": 21.75, "step": 13224 }, { "epoch": 0.8783290164043303, "grad_norm": 129.07688903808594, "learning_rate": 1.2431855716995606e-06, "loss": 16.4688, "step": 13225 }, { "epoch": 0.8783954306966859, "grad_norm": 166.225341796875, "learning_rate": 1.2430812470697742e-06, "loss": 16.4688, "step": 13226 }, { "epoch": 0.8784618449890417, "grad_norm": 339.3815002441406, "learning_rate": 1.2429769196281639e-06, "loss": 14.5938, "step": 13227 }, { "epoch": 0.8785282592813973, "grad_norm": 204.69229125976562, "learning_rate": 1.2428725893759367e-06, "loss": 18.5312, "step": 13228 }, { "epoch": 0.8785946735737531, "grad_norm": 154.3779296875, "learning_rate": 1.2427682563142992e-06, "loss": 11.9062, "step": 13229 }, { "epoch": 0.8786610878661087, "grad_norm": 278.63775634765625, "learning_rate": 1.242663920444459e-06, "loss": 14.7344, "step": 13230 }, { "epoch": 0.8787275021584645, "grad_norm": 373.65643310546875, "learning_rate": 1.242559581767622e-06, "loss": 18.1875, "step": 13231 }, { "epoch": 0.8787939164508202, "grad_norm": 109.41594696044922, "learning_rate": 1.242455240284996e-06, "loss": 14.5469, "step": 13232 }, { "epoch": 0.8788603307431759, "grad_norm": 182.51075744628906, "learning_rate": 1.2423508959977875e-06, "loss": 20.1875, "step": 13233 }, { "epoch": 0.8789267450355317, "grad_norm": 180.03236389160156, "learning_rate": 1.2422465489072034e-06, "loss": 15.6875, "step": 13234 }, { "epoch": 0.8789931593278874, "grad_norm": 202.24737548828125, "learning_rate": 1.242142199014451e-06, "loss": 13.5625, "step": 13235 }, { "epoch": 0.8790595736202431, "grad_norm": 211.20721435546875, "learning_rate": 1.2420378463207373e-06, "loss": 15.1562, "step": 13236 }, { "epoch": 0.8791259879125988, "grad_norm": 179.68624877929688, "learning_rate": 1.2419334908272694e-06, "loss": 14.75, "step": 13237 }, { "epoch": 0.8791924022049545, "grad_norm": 198.0214080810547, "learning_rate": 1.2418291325352544e-06, "loss": 18.7969, "step": 13238 }, { "epoch": 0.8792588164973102, "grad_norm": 250.93716430664062, "learning_rate": 1.2417247714458994e-06, "loss": 22.8125, "step": 13239 }, { "epoch": 0.879325230789666, "grad_norm": 181.4603729248047, "learning_rate": 1.2416204075604115e-06, "loss": 27.3438, "step": 13240 }, { "epoch": 0.8793916450820216, "grad_norm": 294.84521484375, "learning_rate": 1.2415160408799982e-06, "loss": 15.5938, "step": 13241 }, { "epoch": 0.8794580593743774, "grad_norm": 135.1909942626953, "learning_rate": 1.2414116714058666e-06, "loss": 14.0312, "step": 13242 }, { "epoch": 0.879524473666733, "grad_norm": 283.70794677734375, "learning_rate": 1.241307299139224e-06, "loss": 18.9375, "step": 13243 }, { "epoch": 0.8795908879590888, "grad_norm": 192.73797607421875, "learning_rate": 1.2412029240812778e-06, "loss": 19.0781, "step": 13244 }, { "epoch": 0.8796573022514446, "grad_norm": 250.00762939453125, "learning_rate": 1.241098546233235e-06, "loss": 23.4375, "step": 13245 }, { "epoch": 0.8797237165438002, "grad_norm": 399.212890625, "learning_rate": 1.2409941655963035e-06, "loss": 21.5469, "step": 13246 }, { "epoch": 0.879790130836156, "grad_norm": 192.57894897460938, "learning_rate": 1.2408897821716902e-06, "loss": 13.9688, "step": 13247 }, { "epoch": 0.8798565451285116, "grad_norm": 198.33590698242188, "learning_rate": 1.240785395960603e-06, "loss": 12.6094, "step": 13248 }, { "epoch": 0.8799229594208674, "grad_norm": 368.7816162109375, "learning_rate": 1.2406810069642491e-06, "loss": 22.2812, "step": 13249 }, { "epoch": 0.879989373713223, "grad_norm": 179.6252899169922, "learning_rate": 1.240576615183836e-06, "loss": 17.5781, "step": 13250 }, { "epoch": 0.8800557880055788, "grad_norm": 256.4098205566406, "learning_rate": 1.2404722206205715e-06, "loss": 12.9531, "step": 13251 }, { "epoch": 0.8801222022979345, "grad_norm": 117.68059539794922, "learning_rate": 1.240367823275663e-06, "loss": 13.5625, "step": 13252 }, { "epoch": 0.8801886165902902, "grad_norm": 185.77354431152344, "learning_rate": 1.240263423150318e-06, "loss": 18.3906, "step": 13253 }, { "epoch": 0.8802550308826459, "grad_norm": 182.77638244628906, "learning_rate": 1.2401590202457444e-06, "loss": 19.2656, "step": 13254 }, { "epoch": 0.8803214451750017, "grad_norm": 263.5123291015625, "learning_rate": 1.24005461456315e-06, "loss": 19.2812, "step": 13255 }, { "epoch": 0.8803878594673574, "grad_norm": 130.37095642089844, "learning_rate": 1.2399502061037416e-06, "loss": 14.1562, "step": 13256 }, { "epoch": 0.8804542737597131, "grad_norm": 181.5277862548828, "learning_rate": 1.239845794868728e-06, "loss": 18.3125, "step": 13257 }, { "epoch": 0.8805206880520688, "grad_norm": 221.0256805419922, "learning_rate": 1.2397413808593168e-06, "loss": 13.8906, "step": 13258 }, { "epoch": 0.8805871023444245, "grad_norm": 186.0400848388672, "learning_rate": 1.239636964076715e-06, "loss": 16.0781, "step": 13259 }, { "epoch": 0.8806535166367803, "grad_norm": 141.57852172851562, "learning_rate": 1.2395325445221313e-06, "loss": 17.8125, "step": 13260 }, { "epoch": 0.8807199309291359, "grad_norm": 239.21678161621094, "learning_rate": 1.239428122196773e-06, "loss": 22.6406, "step": 13261 }, { "epoch": 0.8807863452214917, "grad_norm": 222.92308044433594, "learning_rate": 1.2393236971018485e-06, "loss": 21.3438, "step": 13262 }, { "epoch": 0.8808527595138473, "grad_norm": 200.9684600830078, "learning_rate": 1.2392192692385654e-06, "loss": 19.8438, "step": 13263 }, { "epoch": 0.8809191738062031, "grad_norm": 259.9615478515625, "learning_rate": 1.2391148386081315e-06, "loss": 21.2812, "step": 13264 }, { "epoch": 0.8809855880985588, "grad_norm": 190.17335510253906, "learning_rate": 1.2390104052117556e-06, "loss": 14.5156, "step": 13265 }, { "epoch": 0.8810520023909145, "grad_norm": 103.1047592163086, "learning_rate": 1.2389059690506448e-06, "loss": 11.0234, "step": 13266 }, { "epoch": 0.8811184166832703, "grad_norm": 934.9274291992188, "learning_rate": 1.2388015301260073e-06, "loss": 13.8906, "step": 13267 }, { "epoch": 0.8811848309756259, "grad_norm": 309.20721435546875, "learning_rate": 1.2386970884390516e-06, "loss": 25.9219, "step": 13268 }, { "epoch": 0.8812512452679817, "grad_norm": 439.41033935546875, "learning_rate": 1.2385926439909858e-06, "loss": 21.7812, "step": 13269 }, { "epoch": 0.8813176595603374, "grad_norm": 276.73577880859375, "learning_rate": 1.2384881967830176e-06, "loss": 16.8125, "step": 13270 }, { "epoch": 0.8813840738526931, "grad_norm": 379.07086181640625, "learning_rate": 1.2383837468163553e-06, "loss": 21.8594, "step": 13271 }, { "epoch": 0.8814504881450488, "grad_norm": 479.03076171875, "learning_rate": 1.2382792940922077e-06, "loss": 14.2812, "step": 13272 }, { "epoch": 0.8815169024374045, "grad_norm": 263.13671875, "learning_rate": 1.2381748386117825e-06, "loss": 21.7031, "step": 13273 }, { "epoch": 0.8815833167297602, "grad_norm": 234.56912231445312, "learning_rate": 1.2380703803762882e-06, "loss": 15.3281, "step": 13274 }, { "epoch": 0.881649731022116, "grad_norm": 146.54371643066406, "learning_rate": 1.2379659193869327e-06, "loss": 15.2031, "step": 13275 }, { "epoch": 0.8817161453144716, "grad_norm": 229.5271759033203, "learning_rate": 1.237861455644925e-06, "loss": 17.5, "step": 13276 }, { "epoch": 0.8817825596068274, "grad_norm": 564.0669555664062, "learning_rate": 1.237756989151473e-06, "loss": 22.5156, "step": 13277 }, { "epoch": 0.8818489738991832, "grad_norm": 123.17646026611328, "learning_rate": 1.2376525199077852e-06, "loss": 15.0625, "step": 13278 }, { "epoch": 0.8819153881915388, "grad_norm": 188.36483764648438, "learning_rate": 1.23754804791507e-06, "loss": 21.4531, "step": 13279 }, { "epoch": 0.8819818024838946, "grad_norm": 545.96826171875, "learning_rate": 1.2374435731745362e-06, "loss": 17.7969, "step": 13280 }, { "epoch": 0.8820482167762502, "grad_norm": 181.57850646972656, "learning_rate": 1.2373390956873918e-06, "loss": 13.0, "step": 13281 }, { "epoch": 0.882114631068606, "grad_norm": 163.37255859375, "learning_rate": 1.2372346154548458e-06, "loss": 19.9062, "step": 13282 }, { "epoch": 0.8821810453609616, "grad_norm": 167.521728515625, "learning_rate": 1.2371301324781065e-06, "loss": 21.625, "step": 13283 }, { "epoch": 0.8822474596533174, "grad_norm": 92.75092315673828, "learning_rate": 1.2370256467583825e-06, "loss": 10.7891, "step": 13284 }, { "epoch": 0.8823138739456731, "grad_norm": 151.33372497558594, "learning_rate": 1.2369211582968826e-06, "loss": 14.9062, "step": 13285 }, { "epoch": 0.8823802882380288, "grad_norm": 120.98738861083984, "learning_rate": 1.2368166670948153e-06, "loss": 15.4062, "step": 13286 }, { "epoch": 0.8824467025303845, "grad_norm": 349.7657165527344, "learning_rate": 1.2367121731533894e-06, "loss": 23.1562, "step": 13287 }, { "epoch": 0.8825131168227403, "grad_norm": 290.30926513671875, "learning_rate": 1.2366076764738135e-06, "loss": 17.75, "step": 13288 }, { "epoch": 0.882579531115096, "grad_norm": 252.88885498046875, "learning_rate": 1.2365031770572964e-06, "loss": 24.9375, "step": 13289 }, { "epoch": 0.8826459454074517, "grad_norm": 555.3606567382812, "learning_rate": 1.2363986749050473e-06, "loss": 13.8281, "step": 13290 }, { "epoch": 0.8827123596998074, "grad_norm": 209.56484985351562, "learning_rate": 1.2362941700182743e-06, "loss": 23.2344, "step": 13291 }, { "epoch": 0.8827787739921631, "grad_norm": 185.72805786132812, "learning_rate": 1.2361896623981867e-06, "loss": 19.1406, "step": 13292 }, { "epoch": 0.8828451882845189, "grad_norm": 174.45187377929688, "learning_rate": 1.236085152045993e-06, "loss": 19.0781, "step": 13293 }, { "epoch": 0.8829116025768745, "grad_norm": 169.2136688232422, "learning_rate": 1.2359806389629028e-06, "loss": 18.75, "step": 13294 }, { "epoch": 0.8829780168692303, "grad_norm": 129.78854370117188, "learning_rate": 1.2358761231501245e-06, "loss": 11.625, "step": 13295 }, { "epoch": 0.8830444311615859, "grad_norm": 257.2335205078125, "learning_rate": 1.2357716046088672e-06, "loss": 21.75, "step": 13296 }, { "epoch": 0.8831108454539417, "grad_norm": 132.7586669921875, "learning_rate": 1.23566708334034e-06, "loss": 11.8438, "step": 13297 }, { "epoch": 0.8831772597462973, "grad_norm": 316.50469970703125, "learning_rate": 1.2355625593457518e-06, "loss": 17.7656, "step": 13298 }, { "epoch": 0.8832436740386531, "grad_norm": 160.89430236816406, "learning_rate": 1.2354580326263117e-06, "loss": 16.2344, "step": 13299 }, { "epoch": 0.8833100883310089, "grad_norm": 227.94058227539062, "learning_rate": 1.2353535031832287e-06, "loss": 14.2188, "step": 13300 }, { "epoch": 0.8833765026233645, "grad_norm": 198.86688232421875, "learning_rate": 1.2352489710177124e-06, "loss": 16.5, "step": 13301 }, { "epoch": 0.8834429169157203, "grad_norm": 205.1827850341797, "learning_rate": 1.2351444361309717e-06, "loss": 13.0156, "step": 13302 }, { "epoch": 0.883509331208076, "grad_norm": 198.5701141357422, "learning_rate": 1.2350398985242155e-06, "loss": 15.2031, "step": 13303 }, { "epoch": 0.8835757455004317, "grad_norm": 290.5242919921875, "learning_rate": 1.2349353581986533e-06, "loss": 19.9531, "step": 13304 }, { "epoch": 0.8836421597927874, "grad_norm": 198.7079315185547, "learning_rate": 1.2348308151554946e-06, "loss": 15.9375, "step": 13305 }, { "epoch": 0.8837085740851431, "grad_norm": 305.0760498046875, "learning_rate": 1.2347262693959482e-06, "loss": 21.6875, "step": 13306 }, { "epoch": 0.8837749883774988, "grad_norm": 147.91769409179688, "learning_rate": 1.2346217209212237e-06, "loss": 17.3594, "step": 13307 }, { "epoch": 0.8838414026698546, "grad_norm": 164.4738006591797, "learning_rate": 1.2345171697325307e-06, "loss": 17.5156, "step": 13308 }, { "epoch": 0.8839078169622102, "grad_norm": 402.89678955078125, "learning_rate": 1.234412615831078e-06, "loss": 19.9375, "step": 13309 }, { "epoch": 0.883974231254566, "grad_norm": 255.4824981689453, "learning_rate": 1.2343080592180753e-06, "loss": 19.0312, "step": 13310 }, { "epoch": 0.8840406455469217, "grad_norm": 194.63870239257812, "learning_rate": 1.2342034998947322e-06, "loss": 16.0781, "step": 13311 }, { "epoch": 0.8841070598392774, "grad_norm": 175.6243896484375, "learning_rate": 1.2340989378622582e-06, "loss": 15.5625, "step": 13312 }, { "epoch": 0.8841734741316332, "grad_norm": 249.65184020996094, "learning_rate": 1.2339943731218624e-06, "loss": 20.9688, "step": 13313 }, { "epoch": 0.8842398884239888, "grad_norm": 248.0230255126953, "learning_rate": 1.2338898056747546e-06, "loss": 21.0, "step": 13314 }, { "epoch": 0.8843063027163446, "grad_norm": 141.27284240722656, "learning_rate": 1.2337852355221446e-06, "loss": 9.5938, "step": 13315 }, { "epoch": 0.8843727170087002, "grad_norm": 320.6188049316406, "learning_rate": 1.2336806626652418e-06, "loss": 16.25, "step": 13316 }, { "epoch": 0.884439131301056, "grad_norm": 198.29782104492188, "learning_rate": 1.2335760871052558e-06, "loss": 18.3125, "step": 13317 }, { "epoch": 0.8845055455934117, "grad_norm": 577.991943359375, "learning_rate": 1.2334715088433963e-06, "loss": 18.5625, "step": 13318 }, { "epoch": 0.8845719598857674, "grad_norm": 162.18426513671875, "learning_rate": 1.233366927880873e-06, "loss": 15.0156, "step": 13319 }, { "epoch": 0.8846383741781231, "grad_norm": 304.0809020996094, "learning_rate": 1.2332623442188959e-06, "loss": 21.2031, "step": 13320 }, { "epoch": 0.8847047884704788, "grad_norm": 159.7814483642578, "learning_rate": 1.2331577578586738e-06, "loss": 15.7969, "step": 13321 }, { "epoch": 0.8847712027628346, "grad_norm": 314.06402587890625, "learning_rate": 1.233053168801418e-06, "loss": 22.7188, "step": 13322 }, { "epoch": 0.8848376170551903, "grad_norm": 289.7593688964844, "learning_rate": 1.232948577048337e-06, "loss": 17.2031, "step": 13323 }, { "epoch": 0.884904031347546, "grad_norm": 168.475341796875, "learning_rate": 1.2328439826006414e-06, "loss": 13.9219, "step": 13324 }, { "epoch": 0.8849704456399017, "grad_norm": 153.8477020263672, "learning_rate": 1.2327393854595407e-06, "loss": 17.0625, "step": 13325 }, { "epoch": 0.8850368599322574, "grad_norm": 547.4833984375, "learning_rate": 1.2326347856262453e-06, "loss": 16.4219, "step": 13326 }, { "epoch": 0.8851032742246131, "grad_norm": 318.9901123046875, "learning_rate": 1.2325301831019647e-06, "loss": 20.7188, "step": 13327 }, { "epoch": 0.8851696885169689, "grad_norm": 281.9588928222656, "learning_rate": 1.2324255778879088e-06, "loss": 21.5781, "step": 13328 }, { "epoch": 0.8852361028093245, "grad_norm": 259.97943115234375, "learning_rate": 1.2323209699852884e-06, "loss": 20.4375, "step": 13329 }, { "epoch": 0.8853025171016803, "grad_norm": 211.70346069335938, "learning_rate": 1.2322163593953126e-06, "loss": 14.2031, "step": 13330 }, { "epoch": 0.885368931394036, "grad_norm": 269.6558837890625, "learning_rate": 1.2321117461191918e-06, "loss": 11.6562, "step": 13331 }, { "epoch": 0.8854353456863917, "grad_norm": 465.61572265625, "learning_rate": 1.2320071301581362e-06, "loss": 13.3438, "step": 13332 }, { "epoch": 0.8855017599787475, "grad_norm": 179.36285400390625, "learning_rate": 1.231902511513356e-06, "loss": 16.3438, "step": 13333 }, { "epoch": 0.8855681742711031, "grad_norm": 106.20211791992188, "learning_rate": 1.2317978901860616e-06, "loss": 17.3594, "step": 13334 }, { "epoch": 0.8856345885634589, "grad_norm": 344.1048583984375, "learning_rate": 1.2316932661774622e-06, "loss": 19.0625, "step": 13335 }, { "epoch": 0.8857010028558145, "grad_norm": 148.33297729492188, "learning_rate": 1.2315886394887689e-06, "loss": 16.3281, "step": 13336 }, { "epoch": 0.8857674171481703, "grad_norm": 351.68878173828125, "learning_rate": 1.231484010121192e-06, "loss": 20.0156, "step": 13337 }, { "epoch": 0.885833831440526, "grad_norm": 142.1841278076172, "learning_rate": 1.2313793780759415e-06, "loss": 16.1562, "step": 13338 }, { "epoch": 0.8859002457328817, "grad_norm": 268.97332763671875, "learning_rate": 1.2312747433542276e-06, "loss": 21.2031, "step": 13339 }, { "epoch": 0.8859666600252374, "grad_norm": 121.05460357666016, "learning_rate": 1.2311701059572612e-06, "loss": 16.9219, "step": 13340 }, { "epoch": 0.8860330743175932, "grad_norm": 175.59654235839844, "learning_rate": 1.2310654658862518e-06, "loss": 15.7344, "step": 13341 }, { "epoch": 0.8860994886099489, "grad_norm": 133.8149871826172, "learning_rate": 1.2309608231424106e-06, "loss": 22.1562, "step": 13342 }, { "epoch": 0.8861659029023046, "grad_norm": 229.95733642578125, "learning_rate": 1.2308561777269477e-06, "loss": 19.6719, "step": 13343 }, { "epoch": 0.8862323171946603, "grad_norm": 417.4654846191406, "learning_rate": 1.2307515296410737e-06, "loss": 15.1875, "step": 13344 }, { "epoch": 0.886298731487016, "grad_norm": 211.02745056152344, "learning_rate": 1.2306468788859991e-06, "loss": 14.0156, "step": 13345 }, { "epoch": 0.8863651457793718, "grad_norm": 357.3915100097656, "learning_rate": 1.2305422254629343e-06, "loss": 18.0234, "step": 13346 }, { "epoch": 0.8864315600717274, "grad_norm": 345.0841369628906, "learning_rate": 1.2304375693730902e-06, "loss": 18.375, "step": 13347 }, { "epoch": 0.8864979743640832, "grad_norm": 177.54539489746094, "learning_rate": 1.230332910617677e-06, "loss": 13.8203, "step": 13348 }, { "epoch": 0.8865643886564388, "grad_norm": 220.49440002441406, "learning_rate": 1.2302282491979054e-06, "loss": 16.6719, "step": 13349 }, { "epoch": 0.8866308029487946, "grad_norm": 146.46310424804688, "learning_rate": 1.2301235851149864e-06, "loss": 13.5156, "step": 13350 }, { "epoch": 0.8866972172411502, "grad_norm": 386.2883605957031, "learning_rate": 1.2300189183701306e-06, "loss": 21.375, "step": 13351 }, { "epoch": 0.886763631533506, "grad_norm": 331.96856689453125, "learning_rate": 1.2299142489645483e-06, "loss": 18.2969, "step": 13352 }, { "epoch": 0.8868300458258618, "grad_norm": 140.63998413085938, "learning_rate": 1.2298095768994506e-06, "loss": 19.7031, "step": 13353 }, { "epoch": 0.8868964601182174, "grad_norm": 364.30950927734375, "learning_rate": 1.229704902176048e-06, "loss": 13.7188, "step": 13354 }, { "epoch": 0.8869628744105732, "grad_norm": 190.23727416992188, "learning_rate": 1.229600224795552e-06, "loss": 21.0, "step": 13355 }, { "epoch": 0.8870292887029289, "grad_norm": 181.3524627685547, "learning_rate": 1.2294955447591725e-06, "loss": 18.2656, "step": 13356 }, { "epoch": 0.8870957029952846, "grad_norm": 237.16104125976562, "learning_rate": 1.229390862068121e-06, "loss": 20.0625, "step": 13357 }, { "epoch": 0.8871621172876403, "grad_norm": 752.2595825195312, "learning_rate": 1.2292861767236087e-06, "loss": 16.8906, "step": 13358 }, { "epoch": 0.887228531579996, "grad_norm": 182.5940704345703, "learning_rate": 1.2291814887268457e-06, "loss": 20.2812, "step": 13359 }, { "epoch": 0.8872949458723517, "grad_norm": 185.21897888183594, "learning_rate": 1.2290767980790435e-06, "loss": 15.0938, "step": 13360 }, { "epoch": 0.8873613601647075, "grad_norm": 218.703125, "learning_rate": 1.2289721047814131e-06, "loss": 15.6094, "step": 13361 }, { "epoch": 0.8874277744570631, "grad_norm": 424.5955810546875, "learning_rate": 1.2288674088351653e-06, "loss": 18.875, "step": 13362 }, { "epoch": 0.8874941887494189, "grad_norm": 174.05633544921875, "learning_rate": 1.2287627102415114e-06, "loss": 17.8281, "step": 13363 }, { "epoch": 0.8875606030417746, "grad_norm": 157.60255432128906, "learning_rate": 1.2286580090016623e-06, "loss": 15.6406, "step": 13364 }, { "epoch": 0.8876270173341303, "grad_norm": 211.0768280029297, "learning_rate": 1.2285533051168292e-06, "loss": 19.1875, "step": 13365 }, { "epoch": 0.8876934316264861, "grad_norm": 154.10748291015625, "learning_rate": 1.2284485985882234e-06, "loss": 14.9688, "step": 13366 }, { "epoch": 0.8877598459188417, "grad_norm": 339.02569580078125, "learning_rate": 1.2283438894170558e-06, "loss": 18.7188, "step": 13367 }, { "epoch": 0.8878262602111975, "grad_norm": 112.0794906616211, "learning_rate": 1.2282391776045379e-06, "loss": 15.9062, "step": 13368 }, { "epoch": 0.8878926745035531, "grad_norm": 303.3053283691406, "learning_rate": 1.2281344631518805e-06, "loss": 19.8281, "step": 13369 }, { "epoch": 0.8879590887959089, "grad_norm": 112.37641906738281, "learning_rate": 1.2280297460602955e-06, "loss": 12.9688, "step": 13370 }, { "epoch": 0.8880255030882646, "grad_norm": 194.41238403320312, "learning_rate": 1.2279250263309937e-06, "loss": 18.7969, "step": 13371 }, { "epoch": 0.8880919173806203, "grad_norm": 131.68931579589844, "learning_rate": 1.2278203039651866e-06, "loss": 18.375, "step": 13372 }, { "epoch": 0.888158331672976, "grad_norm": 163.9571990966797, "learning_rate": 1.2277155789640858e-06, "loss": 15.3125, "step": 13373 }, { "epoch": 0.8882247459653317, "grad_norm": 228.2958526611328, "learning_rate": 1.2276108513289022e-06, "loss": 17.3594, "step": 13374 }, { "epoch": 0.8882911602576875, "grad_norm": 204.43519592285156, "learning_rate": 1.2275061210608476e-06, "loss": 20.6875, "step": 13375 }, { "epoch": 0.8883575745500432, "grad_norm": 123.6068344116211, "learning_rate": 1.2274013881611339e-06, "loss": 16.2656, "step": 13376 }, { "epoch": 0.8884239888423989, "grad_norm": 250.99786376953125, "learning_rate": 1.2272966526309714e-06, "loss": 18.9844, "step": 13377 }, { "epoch": 0.8884904031347546, "grad_norm": 570.8382568359375, "learning_rate": 1.2271919144715727e-06, "loss": 19.4141, "step": 13378 }, { "epoch": 0.8885568174271103, "grad_norm": 245.4049072265625, "learning_rate": 1.2270871736841483e-06, "loss": 16.0625, "step": 13379 }, { "epoch": 0.888623231719466, "grad_norm": 302.7064208984375, "learning_rate": 1.2269824302699111e-06, "loss": 19.6875, "step": 13380 }, { "epoch": 0.8886896460118218, "grad_norm": 268.02276611328125, "learning_rate": 1.226877684230072e-06, "loss": 18.4375, "step": 13381 }, { "epoch": 0.8887560603041774, "grad_norm": 209.88992309570312, "learning_rate": 1.2267729355658422e-06, "loss": 15.7031, "step": 13382 }, { "epoch": 0.8888224745965332, "grad_norm": 220.91944885253906, "learning_rate": 1.2266681842784344e-06, "loss": 15.2031, "step": 13383 }, { "epoch": 0.8888888888888888, "grad_norm": 252.7926025390625, "learning_rate": 1.2265634303690596e-06, "loss": 17.5781, "step": 13384 }, { "epoch": 0.8889553031812446, "grad_norm": 244.089111328125, "learning_rate": 1.2264586738389293e-06, "loss": 15.7188, "step": 13385 }, { "epoch": 0.8890217174736004, "grad_norm": 92.96882629394531, "learning_rate": 1.2263539146892558e-06, "loss": 14.2891, "step": 13386 }, { "epoch": 0.889088131765956, "grad_norm": 427.4748229980469, "learning_rate": 1.226249152921251e-06, "loss": 22.4375, "step": 13387 }, { "epoch": 0.8891545460583118, "grad_norm": 167.59494018554688, "learning_rate": 1.2261443885361264e-06, "loss": 14.4531, "step": 13388 }, { "epoch": 0.8892209603506674, "grad_norm": 178.49032592773438, "learning_rate": 1.2260396215350936e-06, "loss": 15.1406, "step": 13389 }, { "epoch": 0.8892873746430232, "grad_norm": 133.736572265625, "learning_rate": 1.225934851919365e-06, "loss": 18.1875, "step": 13390 }, { "epoch": 0.8893537889353789, "grad_norm": 108.30872344970703, "learning_rate": 1.2258300796901526e-06, "loss": 13.5938, "step": 13391 }, { "epoch": 0.8894202032277346, "grad_norm": 183.0877685546875, "learning_rate": 1.2257253048486678e-06, "loss": 20.1406, "step": 13392 }, { "epoch": 0.8894866175200903, "grad_norm": 98.32560729980469, "learning_rate": 1.2256205273961226e-06, "loss": 16.3594, "step": 13393 }, { "epoch": 0.889553031812446, "grad_norm": 165.55357360839844, "learning_rate": 1.2255157473337297e-06, "loss": 15.3125, "step": 13394 }, { "epoch": 0.8896194461048017, "grad_norm": 182.4204559326172, "learning_rate": 1.2254109646627003e-06, "loss": 17.6875, "step": 13395 }, { "epoch": 0.8896858603971575, "grad_norm": 409.5312194824219, "learning_rate": 1.2253061793842468e-06, "loss": 16.1406, "step": 13396 }, { "epoch": 0.8897522746895132, "grad_norm": 168.25440979003906, "learning_rate": 1.2252013914995817e-06, "loss": 17.6719, "step": 13397 }, { "epoch": 0.8898186889818689, "grad_norm": 237.81106567382812, "learning_rate": 1.2250966010099166e-06, "loss": 18.4688, "step": 13398 }, { "epoch": 0.8898851032742247, "grad_norm": 200.44871520996094, "learning_rate": 1.2249918079164636e-06, "loss": 18.7031, "step": 13399 }, { "epoch": 0.8899515175665803, "grad_norm": 378.0921325683594, "learning_rate": 1.2248870122204353e-06, "loss": 21.6875, "step": 13400 }, { "epoch": 0.8900179318589361, "grad_norm": 1167.893798828125, "learning_rate": 1.2247822139230439e-06, "loss": 17.4844, "step": 13401 }, { "epoch": 0.8900843461512917, "grad_norm": 225.26690673828125, "learning_rate": 1.2246774130255012e-06, "loss": 20.4375, "step": 13402 }, { "epoch": 0.8901507604436475, "grad_norm": 189.23330688476562, "learning_rate": 1.2245726095290199e-06, "loss": 19.8125, "step": 13403 }, { "epoch": 0.8902171747360031, "grad_norm": 106.10762786865234, "learning_rate": 1.2244678034348122e-06, "loss": 14.7031, "step": 13404 }, { "epoch": 0.8902835890283589, "grad_norm": 406.4256591796875, "learning_rate": 1.2243629947440904e-06, "loss": 13.3438, "step": 13405 }, { "epoch": 0.8903500033207146, "grad_norm": 236.44894409179688, "learning_rate": 1.2242581834580664e-06, "loss": 13.5859, "step": 13406 }, { "epoch": 0.8904164176130703, "grad_norm": 174.1024627685547, "learning_rate": 1.2241533695779532e-06, "loss": 18.0312, "step": 13407 }, { "epoch": 0.8904828319054261, "grad_norm": 187.51754760742188, "learning_rate": 1.2240485531049636e-06, "loss": 18.8281, "step": 13408 }, { "epoch": 0.8905492461977818, "grad_norm": 161.8714599609375, "learning_rate": 1.2239437340403091e-06, "loss": 13.2344, "step": 13409 }, { "epoch": 0.8906156604901375, "grad_norm": 142.01345825195312, "learning_rate": 1.2238389123852026e-06, "loss": 10.2422, "step": 13410 }, { "epoch": 0.8906820747824932, "grad_norm": 361.4974060058594, "learning_rate": 1.2237340881408567e-06, "loss": 18.4062, "step": 13411 }, { "epoch": 0.8907484890748489, "grad_norm": 791.7327270507812, "learning_rate": 1.223629261308484e-06, "loss": 19.125, "step": 13412 }, { "epoch": 0.8908149033672046, "grad_norm": 142.49427795410156, "learning_rate": 1.223524431889297e-06, "loss": 13.2188, "step": 13413 }, { "epoch": 0.8908813176595604, "grad_norm": 496.9122314453125, "learning_rate": 1.223419599884508e-06, "loss": 15.3281, "step": 13414 }, { "epoch": 0.890947731951916, "grad_norm": 209.95181274414062, "learning_rate": 1.2233147652953302e-06, "loss": 15.125, "step": 13415 }, { "epoch": 0.8910141462442718, "grad_norm": 217.08851623535156, "learning_rate": 1.2232099281229756e-06, "loss": 18.1562, "step": 13416 }, { "epoch": 0.8910805605366274, "grad_norm": 151.6448211669922, "learning_rate": 1.2231050883686576e-06, "loss": 11.6406, "step": 13417 }, { "epoch": 0.8911469748289832, "grad_norm": 234.389404296875, "learning_rate": 1.2230002460335883e-06, "loss": 16.9453, "step": 13418 }, { "epoch": 0.891213389121339, "grad_norm": 137.45030212402344, "learning_rate": 1.222895401118981e-06, "loss": 15.2031, "step": 13419 }, { "epoch": 0.8912798034136946, "grad_norm": 155.67523193359375, "learning_rate": 1.222790553626048e-06, "loss": 16.0938, "step": 13420 }, { "epoch": 0.8913462177060504, "grad_norm": 266.6634521484375, "learning_rate": 1.2226857035560025e-06, "loss": 17.1406, "step": 13421 }, { "epoch": 0.891412631998406, "grad_norm": 109.69886779785156, "learning_rate": 1.222580850910057e-06, "loss": 13.4219, "step": 13422 }, { "epoch": 0.8914790462907618, "grad_norm": 197.57545471191406, "learning_rate": 1.2224759956894249e-06, "loss": 23.1406, "step": 13423 }, { "epoch": 0.8915454605831175, "grad_norm": 972.5286865234375, "learning_rate": 1.2223711378953184e-06, "loss": 22.625, "step": 13424 }, { "epoch": 0.8916118748754732, "grad_norm": 196.30055236816406, "learning_rate": 1.2222662775289512e-06, "loss": 18.375, "step": 13425 }, { "epoch": 0.8916782891678289, "grad_norm": 239.6473388671875, "learning_rate": 1.2221614145915358e-06, "loss": 12.9219, "step": 13426 }, { "epoch": 0.8917447034601846, "grad_norm": 266.5517883300781, "learning_rate": 1.2220565490842848e-06, "loss": 15.9375, "step": 13427 }, { "epoch": 0.8918111177525403, "grad_norm": 120.36127471923828, "learning_rate": 1.2219516810084122e-06, "loss": 13.0781, "step": 13428 }, { "epoch": 0.8918775320448961, "grad_norm": 447.857666015625, "learning_rate": 1.2218468103651304e-06, "loss": 16.8281, "step": 13429 }, { "epoch": 0.8919439463372518, "grad_norm": 172.5892791748047, "learning_rate": 1.2217419371556525e-06, "loss": 16.9844, "step": 13430 }, { "epoch": 0.8920103606296075, "grad_norm": 99.00611877441406, "learning_rate": 1.2216370613811915e-06, "loss": 14.7266, "step": 13431 }, { "epoch": 0.8920767749219632, "grad_norm": 172.94593811035156, "learning_rate": 1.221532183042961e-06, "loss": 20.5625, "step": 13432 }, { "epoch": 0.8921431892143189, "grad_norm": 333.0482482910156, "learning_rate": 1.221427302142174e-06, "loss": 24.1406, "step": 13433 }, { "epoch": 0.8922096035066747, "grad_norm": 455.2139892578125, "learning_rate": 1.2213224186800439e-06, "loss": 18.5938, "step": 13434 }, { "epoch": 0.8922760177990303, "grad_norm": 138.60447692871094, "learning_rate": 1.2212175326577835e-06, "loss": 18.4062, "step": 13435 }, { "epoch": 0.8923424320913861, "grad_norm": 175.73599243164062, "learning_rate": 1.2211126440766063e-06, "loss": 17.4844, "step": 13436 }, { "epoch": 0.8924088463837417, "grad_norm": 152.85061645507812, "learning_rate": 1.2210077529377252e-06, "loss": 17.2031, "step": 13437 }, { "epoch": 0.8924752606760975, "grad_norm": 303.51470947265625, "learning_rate": 1.2209028592423543e-06, "loss": 16.4688, "step": 13438 }, { "epoch": 0.8925416749684532, "grad_norm": 231.08677673339844, "learning_rate": 1.220797962991706e-06, "loss": 16.625, "step": 13439 }, { "epoch": 0.8926080892608089, "grad_norm": 234.83328247070312, "learning_rate": 1.2206930641869947e-06, "loss": 15.2031, "step": 13440 }, { "epoch": 0.8926745035531647, "grad_norm": 182.4415740966797, "learning_rate": 1.220588162829433e-06, "loss": 20.0938, "step": 13441 }, { "epoch": 0.8927409178455203, "grad_norm": 273.6379699707031, "learning_rate": 1.2204832589202346e-06, "loss": 24.0312, "step": 13442 }, { "epoch": 0.8928073321378761, "grad_norm": 86.03436279296875, "learning_rate": 1.220378352460613e-06, "loss": 14.4062, "step": 13443 }, { "epoch": 0.8928737464302318, "grad_norm": 238.9188690185547, "learning_rate": 1.220273443451782e-06, "loss": 25.9688, "step": 13444 }, { "epoch": 0.8929401607225875, "grad_norm": 241.48593139648438, "learning_rate": 1.2201685318949545e-06, "loss": 18.6875, "step": 13445 }, { "epoch": 0.8930065750149432, "grad_norm": 416.7222595214844, "learning_rate": 1.2200636177913446e-06, "loss": 12.4844, "step": 13446 }, { "epoch": 0.893072989307299, "grad_norm": 129.4619598388672, "learning_rate": 1.2199587011421658e-06, "loss": 20.2344, "step": 13447 }, { "epoch": 0.8931394035996546, "grad_norm": 275.9451599121094, "learning_rate": 1.2198537819486313e-06, "loss": 20.9688, "step": 13448 }, { "epoch": 0.8932058178920104, "grad_norm": 136.7293243408203, "learning_rate": 1.2197488602119547e-06, "loss": 15.3594, "step": 13449 }, { "epoch": 0.893272232184366, "grad_norm": 268.0977783203125, "learning_rate": 1.2196439359333504e-06, "loss": 17.4219, "step": 13450 }, { "epoch": 0.8933386464767218, "grad_norm": 141.83863830566406, "learning_rate": 1.219539009114032e-06, "loss": 16.5469, "step": 13451 }, { "epoch": 0.8934050607690776, "grad_norm": 170.34909057617188, "learning_rate": 1.2194340797552124e-06, "loss": 13.1875, "step": 13452 }, { "epoch": 0.8934714750614332, "grad_norm": 107.29186248779297, "learning_rate": 1.219329147858106e-06, "loss": 14.5, "step": 13453 }, { "epoch": 0.893537889353789, "grad_norm": 330.486572265625, "learning_rate": 1.2192242134239268e-06, "loss": 18.0469, "step": 13454 }, { "epoch": 0.8936043036461446, "grad_norm": 215.60678100585938, "learning_rate": 1.219119276453888e-06, "loss": 14.6094, "step": 13455 }, { "epoch": 0.8936707179385004, "grad_norm": 418.1226501464844, "learning_rate": 1.2190143369492038e-06, "loss": 18.2656, "step": 13456 }, { "epoch": 0.893737132230856, "grad_norm": 171.37571716308594, "learning_rate": 1.2189093949110882e-06, "loss": 18.5781, "step": 13457 }, { "epoch": 0.8938035465232118, "grad_norm": 190.84251403808594, "learning_rate": 1.2188044503407551e-06, "loss": 18.6875, "step": 13458 }, { "epoch": 0.8938699608155675, "grad_norm": 295.69561767578125, "learning_rate": 1.2186995032394178e-06, "loss": 20.8281, "step": 13459 }, { "epoch": 0.8939363751079232, "grad_norm": 221.50608825683594, "learning_rate": 1.2185945536082911e-06, "loss": 15.6875, "step": 13460 }, { "epoch": 0.8940027894002789, "grad_norm": 339.2118225097656, "learning_rate": 1.2184896014485884e-06, "loss": 15.7969, "step": 13461 }, { "epoch": 0.8940692036926347, "grad_norm": 169.30691528320312, "learning_rate": 1.2183846467615242e-06, "loss": 15.1875, "step": 13462 }, { "epoch": 0.8941356179849904, "grad_norm": 157.8142852783203, "learning_rate": 1.2182796895483123e-06, "loss": 15.0781, "step": 13463 }, { "epoch": 0.8942020322773461, "grad_norm": 185.2560577392578, "learning_rate": 1.2181747298101669e-06, "loss": 15.875, "step": 13464 }, { "epoch": 0.8942684465697018, "grad_norm": 223.73875427246094, "learning_rate": 1.218069767548302e-06, "loss": 18.5781, "step": 13465 }, { "epoch": 0.8943348608620575, "grad_norm": 365.0802917480469, "learning_rate": 1.2179648027639316e-06, "loss": 19.7969, "step": 13466 }, { "epoch": 0.8944012751544133, "grad_norm": 138.48544311523438, "learning_rate": 1.2178598354582701e-06, "loss": 17.0, "step": 13467 }, { "epoch": 0.8944676894467689, "grad_norm": 263.8503112792969, "learning_rate": 1.2177548656325317e-06, "loss": 28.2031, "step": 13468 }, { "epoch": 0.8945341037391247, "grad_norm": 147.3903350830078, "learning_rate": 1.2176498932879305e-06, "loss": 15.7344, "step": 13469 }, { "epoch": 0.8946005180314803, "grad_norm": 158.9468536376953, "learning_rate": 1.2175449184256807e-06, "loss": 16.9219, "step": 13470 }, { "epoch": 0.8946669323238361, "grad_norm": 192.5030059814453, "learning_rate": 1.217439941046997e-06, "loss": 17.4062, "step": 13471 }, { "epoch": 0.8947333466161917, "grad_norm": 250.15069580078125, "learning_rate": 1.2173349611530934e-06, "loss": 23.0312, "step": 13472 }, { "epoch": 0.8947997609085475, "grad_norm": 213.32273864746094, "learning_rate": 1.2172299787451843e-06, "loss": 19.7812, "step": 13473 }, { "epoch": 0.8948661752009033, "grad_norm": 150.01531982421875, "learning_rate": 1.2171249938244842e-06, "loss": 14.7656, "step": 13474 }, { "epoch": 0.8949325894932589, "grad_norm": 204.3935089111328, "learning_rate": 1.2170200063922069e-06, "loss": 18.2812, "step": 13475 }, { "epoch": 0.8949990037856147, "grad_norm": 341.7721252441406, "learning_rate": 1.2169150164495678e-06, "loss": 17.5, "step": 13476 }, { "epoch": 0.8950654180779704, "grad_norm": 693.0833129882812, "learning_rate": 1.2168100239977809e-06, "loss": 21.7812, "step": 13477 }, { "epoch": 0.8951318323703261, "grad_norm": 169.3785400390625, "learning_rate": 1.21670502903806e-06, "loss": 14.1562, "step": 13478 }, { "epoch": 0.8951982466626818, "grad_norm": 260.78173828125, "learning_rate": 1.216600031571621e-06, "loss": 16.0469, "step": 13479 }, { "epoch": 0.8952646609550375, "grad_norm": 316.2574157714844, "learning_rate": 1.2164950315996773e-06, "loss": 19.5156, "step": 13480 }, { "epoch": 0.8953310752473932, "grad_norm": 327.6543884277344, "learning_rate": 1.216390029123444e-06, "loss": 17.9531, "step": 13481 }, { "epoch": 0.895397489539749, "grad_norm": 154.73162841796875, "learning_rate": 1.2162850241441358e-06, "loss": 15.5, "step": 13482 }, { "epoch": 0.8954639038321047, "grad_norm": 247.50460815429688, "learning_rate": 1.2161800166629672e-06, "loss": 18.25, "step": 13483 }, { "epoch": 0.8955303181244604, "grad_norm": 278.76739501953125, "learning_rate": 1.2160750066811526e-06, "loss": 19.375, "step": 13484 }, { "epoch": 0.8955967324168161, "grad_norm": 175.59771728515625, "learning_rate": 1.215969994199907e-06, "loss": 19.0625, "step": 13485 }, { "epoch": 0.8956631467091718, "grad_norm": 178.25437927246094, "learning_rate": 1.2158649792204452e-06, "loss": 18.0469, "step": 13486 }, { "epoch": 0.8957295610015276, "grad_norm": 160.88058471679688, "learning_rate": 1.2157599617439816e-06, "loss": 15.2344, "step": 13487 }, { "epoch": 0.8957959752938832, "grad_norm": 134.1755828857422, "learning_rate": 1.2156549417717313e-06, "loss": 14.1562, "step": 13488 }, { "epoch": 0.895862389586239, "grad_norm": 372.47998046875, "learning_rate": 1.2155499193049088e-06, "loss": 20.7344, "step": 13489 }, { "epoch": 0.8959288038785946, "grad_norm": 116.5598373413086, "learning_rate": 1.2154448943447293e-06, "loss": 16.0312, "step": 13490 }, { "epoch": 0.8959952181709504, "grad_norm": 277.47808837890625, "learning_rate": 1.2153398668924073e-06, "loss": 22.2344, "step": 13491 }, { "epoch": 0.8960616324633061, "grad_norm": 210.86636352539062, "learning_rate": 1.215234836949158e-06, "loss": 11.2578, "step": 13492 }, { "epoch": 0.8961280467556618, "grad_norm": 363.50164794921875, "learning_rate": 1.2151298045161962e-06, "loss": 19.4062, "step": 13493 }, { "epoch": 0.8961944610480176, "grad_norm": 136.12403869628906, "learning_rate": 1.2150247695947372e-06, "loss": 14.0156, "step": 13494 }, { "epoch": 0.8962608753403732, "grad_norm": 445.7792663574219, "learning_rate": 1.214919732185995e-06, "loss": 20.1094, "step": 13495 }, { "epoch": 0.896327289632729, "grad_norm": 147.0890655517578, "learning_rate": 1.2148146922911857e-06, "loss": 14.9219, "step": 13496 }, { "epoch": 0.8963937039250847, "grad_norm": 2622.63037109375, "learning_rate": 1.214709649911524e-06, "loss": 15.75, "step": 13497 }, { "epoch": 0.8964601182174404, "grad_norm": 311.40399169921875, "learning_rate": 1.2146046050482248e-06, "loss": 20.9219, "step": 13498 }, { "epoch": 0.8965265325097961, "grad_norm": 130.95863342285156, "learning_rate": 1.214499557702503e-06, "loss": 17.4219, "step": 13499 }, { "epoch": 0.8965929468021518, "grad_norm": 507.0401306152344, "learning_rate": 1.2143945078755742e-06, "loss": 20.5781, "step": 13500 }, { "epoch": 0.8966593610945075, "grad_norm": 262.281982421875, "learning_rate": 1.2142894555686536e-06, "loss": 15.875, "step": 13501 }, { "epoch": 0.8967257753868633, "grad_norm": 199.17022705078125, "learning_rate": 1.2141844007829559e-06, "loss": 21.2656, "step": 13502 }, { "epoch": 0.8967921896792189, "grad_norm": 189.5915985107422, "learning_rate": 1.2140793435196963e-06, "loss": 14.5156, "step": 13503 }, { "epoch": 0.8968586039715747, "grad_norm": 571.7383422851562, "learning_rate": 1.2139742837800906e-06, "loss": 23.9844, "step": 13504 }, { "epoch": 0.8969250182639305, "grad_norm": 222.19076538085938, "learning_rate": 1.2138692215653536e-06, "loss": 16.125, "step": 13505 }, { "epoch": 0.8969914325562861, "grad_norm": 186.7478485107422, "learning_rate": 1.213764156876701e-06, "loss": 14.3438, "step": 13506 }, { "epoch": 0.8970578468486419, "grad_norm": 216.72451782226562, "learning_rate": 1.2136590897153475e-06, "loss": 15.7344, "step": 13507 }, { "epoch": 0.8971242611409975, "grad_norm": 209.5721435546875, "learning_rate": 1.2135540200825095e-06, "loss": 19.9062, "step": 13508 }, { "epoch": 0.8971906754333533, "grad_norm": 317.9299011230469, "learning_rate": 1.2134489479794012e-06, "loss": 30.4375, "step": 13509 }, { "epoch": 0.897257089725709, "grad_norm": 186.86126708984375, "learning_rate": 1.2133438734072387e-06, "loss": 18.8281, "step": 13510 }, { "epoch": 0.8973235040180647, "grad_norm": 306.42510986328125, "learning_rate": 1.2132387963672375e-06, "loss": 15.9531, "step": 13511 }, { "epoch": 0.8973899183104204, "grad_norm": 352.98529052734375, "learning_rate": 1.2131337168606126e-06, "loss": 18.5156, "step": 13512 }, { "epoch": 0.8974563326027761, "grad_norm": 319.6622619628906, "learning_rate": 1.2130286348885799e-06, "loss": 18.5312, "step": 13513 }, { "epoch": 0.8975227468951318, "grad_norm": 141.39645385742188, "learning_rate": 1.2129235504523547e-06, "loss": 17.1406, "step": 13514 }, { "epoch": 0.8975891611874875, "grad_norm": 183.20455932617188, "learning_rate": 1.212818463553153e-06, "loss": 21.7031, "step": 13515 }, { "epoch": 0.8976555754798433, "grad_norm": 119.38953399658203, "learning_rate": 1.2127133741921895e-06, "loss": 16.7344, "step": 13516 }, { "epoch": 0.897721989772199, "grad_norm": 246.36219787597656, "learning_rate": 1.2126082823706805e-06, "loss": 23.25, "step": 13517 }, { "epoch": 0.8977884040645547, "grad_norm": 178.79937744140625, "learning_rate": 1.2125031880898418e-06, "loss": 19.4062, "step": 13518 }, { "epoch": 0.8978548183569104, "grad_norm": 169.9919891357422, "learning_rate": 1.2123980913508885e-06, "loss": 15.3125, "step": 13519 }, { "epoch": 0.8979212326492662, "grad_norm": 265.9780578613281, "learning_rate": 1.2122929921550365e-06, "loss": 20.1562, "step": 13520 }, { "epoch": 0.8979876469416218, "grad_norm": 172.67132568359375, "learning_rate": 1.2121878905035015e-06, "loss": 16.125, "step": 13521 }, { "epoch": 0.8980540612339776, "grad_norm": 445.81097412109375, "learning_rate": 1.2120827863974997e-06, "loss": 26.5, "step": 13522 }, { "epoch": 0.8981204755263332, "grad_norm": 334.28448486328125, "learning_rate": 1.211977679838246e-06, "loss": 16.9219, "step": 13523 }, { "epoch": 0.898186889818689, "grad_norm": 295.6713562011719, "learning_rate": 1.2118725708269572e-06, "loss": 12.0781, "step": 13524 }, { "epoch": 0.8982533041110446, "grad_norm": 421.7121276855469, "learning_rate": 1.2117674593648486e-06, "loss": 18.5312, "step": 13525 }, { "epoch": 0.8983197184034004, "grad_norm": 188.5327911376953, "learning_rate": 1.211662345453136e-06, "loss": 16.125, "step": 13526 }, { "epoch": 0.8983861326957562, "grad_norm": 283.2092590332031, "learning_rate": 1.2115572290930355e-06, "loss": 13.2266, "step": 13527 }, { "epoch": 0.8984525469881118, "grad_norm": 5320.6015625, "learning_rate": 1.211452110285763e-06, "loss": 20.0, "step": 13528 }, { "epoch": 0.8985189612804676, "grad_norm": 307.17578125, "learning_rate": 1.2113469890325346e-06, "loss": 22.9688, "step": 13529 }, { "epoch": 0.8985853755728233, "grad_norm": 222.53369140625, "learning_rate": 1.2112418653345658e-06, "loss": 17.0625, "step": 13530 }, { "epoch": 0.898651789865179, "grad_norm": 182.5307159423828, "learning_rate": 1.211136739193073e-06, "loss": 14.8281, "step": 13531 }, { "epoch": 0.8987182041575347, "grad_norm": 243.3323211669922, "learning_rate": 1.2110316106092719e-06, "loss": 20.6094, "step": 13532 }, { "epoch": 0.8987846184498904, "grad_norm": 160.18455505371094, "learning_rate": 1.2109264795843792e-06, "loss": 13.0938, "step": 13533 }, { "epoch": 0.8988510327422461, "grad_norm": 248.7847442626953, "learning_rate": 1.2108213461196104e-06, "loss": 14.9688, "step": 13534 }, { "epoch": 0.8989174470346019, "grad_norm": 405.933837890625, "learning_rate": 1.210716210216182e-06, "loss": 12.0469, "step": 13535 }, { "epoch": 0.8989838613269575, "grad_norm": 221.67575073242188, "learning_rate": 1.2106110718753098e-06, "loss": 14.6406, "step": 13536 }, { "epoch": 0.8990502756193133, "grad_norm": 230.9103240966797, "learning_rate": 1.2105059310982104e-06, "loss": 13.8438, "step": 13537 }, { "epoch": 0.899116689911669, "grad_norm": 197.41110229492188, "learning_rate": 1.2104007878860995e-06, "loss": 20.7969, "step": 13538 }, { "epoch": 0.8991831042040247, "grad_norm": 1038.6419677734375, "learning_rate": 1.2102956422401937e-06, "loss": 23.125, "step": 13539 }, { "epoch": 0.8992495184963805, "grad_norm": 707.3071899414062, "learning_rate": 1.2101904941617093e-06, "loss": 16.9531, "step": 13540 }, { "epoch": 0.8993159327887361, "grad_norm": 143.72650146484375, "learning_rate": 1.2100853436518623e-06, "loss": 21.125, "step": 13541 }, { "epoch": 0.8993823470810919, "grad_norm": 125.9725570678711, "learning_rate": 1.2099801907118691e-06, "loss": 16.1719, "step": 13542 }, { "epoch": 0.8994487613734475, "grad_norm": 268.0186462402344, "learning_rate": 1.2098750353429465e-06, "loss": 15.6562, "step": 13543 }, { "epoch": 0.8995151756658033, "grad_norm": 312.2488708496094, "learning_rate": 1.2097698775463104e-06, "loss": 18.3125, "step": 13544 }, { "epoch": 0.899581589958159, "grad_norm": 236.7783966064453, "learning_rate": 1.209664717323177e-06, "loss": 20.1875, "step": 13545 }, { "epoch": 0.8996480042505147, "grad_norm": 329.351318359375, "learning_rate": 1.2095595546747632e-06, "loss": 20.2969, "step": 13546 }, { "epoch": 0.8997144185428704, "grad_norm": 276.5709228515625, "learning_rate": 1.2094543896022858e-06, "loss": 18.8438, "step": 13547 }, { "epoch": 0.8997808328352261, "grad_norm": 366.13336181640625, "learning_rate": 1.2093492221069602e-06, "loss": 16.7188, "step": 13548 }, { "epoch": 0.8998472471275819, "grad_norm": 351.21075439453125, "learning_rate": 1.2092440521900036e-06, "loss": 20.25, "step": 13549 }, { "epoch": 0.8999136614199376, "grad_norm": 271.759033203125, "learning_rate": 1.2091388798526328e-06, "loss": 16.3906, "step": 13550 }, { "epoch": 0.8999800757122933, "grad_norm": 296.48291015625, "learning_rate": 1.2090337050960637e-06, "loss": 18.0312, "step": 13551 }, { "epoch": 0.900046490004649, "grad_norm": 253.45309448242188, "learning_rate": 1.2089285279215134e-06, "loss": 16.7344, "step": 13552 }, { "epoch": 0.9001129042970047, "grad_norm": 120.44574737548828, "learning_rate": 1.2088233483301982e-06, "loss": 14.8438, "step": 13553 }, { "epoch": 0.9001793185893604, "grad_norm": 288.0663757324219, "learning_rate": 1.2087181663233351e-06, "loss": 17.0625, "step": 13554 }, { "epoch": 0.9002457328817162, "grad_norm": 208.99520874023438, "learning_rate": 1.2086129819021404e-06, "loss": 16.0156, "step": 13555 }, { "epoch": 0.9003121471740718, "grad_norm": 217.73056030273438, "learning_rate": 1.2085077950678312e-06, "loss": 16.6406, "step": 13556 }, { "epoch": 0.9003785614664276, "grad_norm": 210.57083129882812, "learning_rate": 1.2084026058216238e-06, "loss": 20.875, "step": 13557 }, { "epoch": 0.9004449757587832, "grad_norm": 160.12147521972656, "learning_rate": 1.2082974141647357e-06, "loss": 16.0625, "step": 13558 }, { "epoch": 0.900511390051139, "grad_norm": 204.0362091064453, "learning_rate": 1.208192220098383e-06, "loss": 19.2812, "step": 13559 }, { "epoch": 0.9005778043434948, "grad_norm": 647.7532348632812, "learning_rate": 1.2080870236237826e-06, "loss": 15.9062, "step": 13560 }, { "epoch": 0.9006442186358504, "grad_norm": 238.71604919433594, "learning_rate": 1.2079818247421519e-06, "loss": 22.8125, "step": 13561 }, { "epoch": 0.9007106329282062, "grad_norm": 191.9447784423828, "learning_rate": 1.2078766234547069e-06, "loss": 14.1406, "step": 13562 }, { "epoch": 0.9007770472205618, "grad_norm": 167.9212188720703, "learning_rate": 1.2077714197626652e-06, "loss": 19.2969, "step": 13563 }, { "epoch": 0.9008434615129176, "grad_norm": 206.229736328125, "learning_rate": 1.2076662136672433e-06, "loss": 17.9688, "step": 13564 }, { "epoch": 0.9009098758052733, "grad_norm": 261.4123229980469, "learning_rate": 1.2075610051696588e-06, "loss": 24.8125, "step": 13565 }, { "epoch": 0.900976290097629, "grad_norm": 189.85989379882812, "learning_rate": 1.207455794271128e-06, "loss": 17.7344, "step": 13566 }, { "epoch": 0.9010427043899847, "grad_norm": 184.27906799316406, "learning_rate": 1.2073505809728683e-06, "loss": 12.9844, "step": 13567 }, { "epoch": 0.9011091186823404, "grad_norm": 225.18161010742188, "learning_rate": 1.2072453652760965e-06, "loss": 14.375, "step": 13568 }, { "epoch": 0.9011755329746961, "grad_norm": 225.39718627929688, "learning_rate": 1.2071401471820302e-06, "loss": 22.4062, "step": 13569 }, { "epoch": 0.9012419472670519, "grad_norm": 114.9576416015625, "learning_rate": 1.2070349266918858e-06, "loss": 14.4375, "step": 13570 }, { "epoch": 0.9013083615594076, "grad_norm": 104.61027526855469, "learning_rate": 1.2069297038068807e-06, "loss": 12.7031, "step": 13571 }, { "epoch": 0.9013747758517633, "grad_norm": 263.31268310546875, "learning_rate": 1.2068244785282323e-06, "loss": 28.125, "step": 13572 }, { "epoch": 0.901441190144119, "grad_norm": 296.949462890625, "learning_rate": 1.2067192508571575e-06, "loss": 17.9844, "step": 13573 }, { "epoch": 0.9015076044364747, "grad_norm": 238.64157104492188, "learning_rate": 1.2066140207948736e-06, "loss": 16.1406, "step": 13574 }, { "epoch": 0.9015740187288305, "grad_norm": 589.0908813476562, "learning_rate": 1.2065087883425976e-06, "loss": 15.2812, "step": 13575 }, { "epoch": 0.9016404330211861, "grad_norm": 171.4126434326172, "learning_rate": 1.2064035535015476e-06, "loss": 12.2812, "step": 13576 }, { "epoch": 0.9017068473135419, "grad_norm": 198.833984375, "learning_rate": 1.2062983162729399e-06, "loss": 14.5469, "step": 13577 }, { "epoch": 0.9017732616058975, "grad_norm": 245.25201416015625, "learning_rate": 1.206193076657992e-06, "loss": 16.7031, "step": 13578 }, { "epoch": 0.9018396758982533, "grad_norm": 214.74989318847656, "learning_rate": 1.206087834657922e-06, "loss": 22.4219, "step": 13579 }, { "epoch": 0.901906090190609, "grad_norm": 122.76207733154297, "learning_rate": 1.2059825902739467e-06, "loss": 19.9688, "step": 13580 }, { "epoch": 0.9019725044829647, "grad_norm": 283.2929992675781, "learning_rate": 1.2058773435072834e-06, "loss": 24.1562, "step": 13581 }, { "epoch": 0.9020389187753205, "grad_norm": 149.83705139160156, "learning_rate": 1.2057720943591498e-06, "loss": 16.9219, "step": 13582 }, { "epoch": 0.9021053330676762, "grad_norm": 349.2048034667969, "learning_rate": 1.205666842830763e-06, "loss": 22.3594, "step": 13583 }, { "epoch": 0.9021717473600319, "grad_norm": 134.30795288085938, "learning_rate": 1.205561588923341e-06, "loss": 15.3281, "step": 13584 }, { "epoch": 0.9022381616523876, "grad_norm": 119.90874481201172, "learning_rate": 1.2054563326381012e-06, "loss": 13.4688, "step": 13585 }, { "epoch": 0.9023045759447433, "grad_norm": 242.22366333007812, "learning_rate": 1.205351073976261e-06, "loss": 16.6875, "step": 13586 }, { "epoch": 0.902370990237099, "grad_norm": 205.9420166015625, "learning_rate": 1.2052458129390377e-06, "loss": 14.25, "step": 13587 }, { "epoch": 0.9024374045294548, "grad_norm": 142.4228057861328, "learning_rate": 1.2051405495276492e-06, "loss": 16.5469, "step": 13588 }, { "epoch": 0.9025038188218104, "grad_norm": 219.4607696533203, "learning_rate": 1.2050352837433132e-06, "loss": 15.1094, "step": 13589 }, { "epoch": 0.9025702331141662, "grad_norm": 196.92909240722656, "learning_rate": 1.2049300155872476e-06, "loss": 17.2344, "step": 13590 }, { "epoch": 0.9026366474065218, "grad_norm": 202.50570678710938, "learning_rate": 1.2048247450606695e-06, "loss": 21.4375, "step": 13591 }, { "epoch": 0.9027030616988776, "grad_norm": 223.8618621826172, "learning_rate": 1.2047194721647966e-06, "loss": 16.6406, "step": 13592 }, { "epoch": 0.9027694759912334, "grad_norm": 150.84141540527344, "learning_rate": 1.2046141969008474e-06, "loss": 15.25, "step": 13593 }, { "epoch": 0.902835890283589, "grad_norm": 232.361328125, "learning_rate": 1.204508919270039e-06, "loss": 22.5312, "step": 13594 }, { "epoch": 0.9029023045759448, "grad_norm": 287.20697021484375, "learning_rate": 1.204403639273589e-06, "loss": 15.0781, "step": 13595 }, { "epoch": 0.9029687188683004, "grad_norm": 150.59071350097656, "learning_rate": 1.2042983569127157e-06, "loss": 18.1719, "step": 13596 }, { "epoch": 0.9030351331606562, "grad_norm": 367.0889587402344, "learning_rate": 1.2041930721886371e-06, "loss": 14.4531, "step": 13597 }, { "epoch": 0.9031015474530119, "grad_norm": 361.3119201660156, "learning_rate": 1.2040877851025705e-06, "loss": 19.4375, "step": 13598 }, { "epoch": 0.9031679617453676, "grad_norm": 269.05145263671875, "learning_rate": 1.2039824956557341e-06, "loss": 23.5, "step": 13599 }, { "epoch": 0.9032343760377233, "grad_norm": 370.6520080566406, "learning_rate": 1.2038772038493458e-06, "loss": 25.7344, "step": 13600 }, { "epoch": 0.903300790330079, "grad_norm": 138.46041870117188, "learning_rate": 1.203771909684624e-06, "loss": 14.3906, "step": 13601 }, { "epoch": 0.9033672046224347, "grad_norm": 473.9445495605469, "learning_rate": 1.2036666131627857e-06, "loss": 15.7656, "step": 13602 }, { "epoch": 0.9034336189147905, "grad_norm": 274.20050048828125, "learning_rate": 1.2035613142850495e-06, "loss": 17.0625, "step": 13603 }, { "epoch": 0.9035000332071462, "grad_norm": 185.56292724609375, "learning_rate": 1.203456013052634e-06, "loss": 20.7656, "step": 13604 }, { "epoch": 0.9035664474995019, "grad_norm": 188.69444274902344, "learning_rate": 1.203350709466756e-06, "loss": 18.1562, "step": 13605 }, { "epoch": 0.9036328617918576, "grad_norm": 128.62075805664062, "learning_rate": 1.2032454035286346e-06, "loss": 16.2188, "step": 13606 }, { "epoch": 0.9036992760842133, "grad_norm": 131.84124755859375, "learning_rate": 1.2031400952394872e-06, "loss": 19.5781, "step": 13607 }, { "epoch": 0.9037656903765691, "grad_norm": 247.791015625, "learning_rate": 1.203034784600533e-06, "loss": 17.2031, "step": 13608 }, { "epoch": 0.9038321046689247, "grad_norm": 161.5446014404297, "learning_rate": 1.2029294716129887e-06, "loss": 13.4062, "step": 13609 }, { "epoch": 0.9038985189612805, "grad_norm": 177.35202026367188, "learning_rate": 1.2028241562780736e-06, "loss": 17.25, "step": 13610 }, { "epoch": 0.9039649332536361, "grad_norm": 301.7534484863281, "learning_rate": 1.2027188385970056e-06, "loss": 12.5781, "step": 13611 }, { "epoch": 0.9040313475459919, "grad_norm": 152.72389221191406, "learning_rate": 1.2026135185710034e-06, "loss": 11.4688, "step": 13612 }, { "epoch": 0.9040977618383476, "grad_norm": 235.66175842285156, "learning_rate": 1.2025081962012845e-06, "loss": 15.3281, "step": 13613 }, { "epoch": 0.9041641761307033, "grad_norm": 153.60479736328125, "learning_rate": 1.2024028714890674e-06, "loss": 16.5312, "step": 13614 }, { "epoch": 0.9042305904230591, "grad_norm": 231.59791564941406, "learning_rate": 1.2022975444355705e-06, "loss": 12.2969, "step": 13615 }, { "epoch": 0.9042970047154147, "grad_norm": 251.4401092529297, "learning_rate": 1.2021922150420126e-06, "loss": 13.3906, "step": 13616 }, { "epoch": 0.9043634190077705, "grad_norm": 227.1002655029297, "learning_rate": 1.2020868833096116e-06, "loss": 19.8125, "step": 13617 }, { "epoch": 0.9044298333001262, "grad_norm": 317.879150390625, "learning_rate": 1.2019815492395862e-06, "loss": 19.75, "step": 13618 }, { "epoch": 0.9044962475924819, "grad_norm": 189.6003875732422, "learning_rate": 1.2018762128331545e-06, "loss": 18.3438, "step": 13619 }, { "epoch": 0.9045626618848376, "grad_norm": 239.33497619628906, "learning_rate": 1.2017708740915353e-06, "loss": 23.2031, "step": 13620 }, { "epoch": 0.9046290761771933, "grad_norm": 126.10403442382812, "learning_rate": 1.201665533015947e-06, "loss": 18.4688, "step": 13621 }, { "epoch": 0.904695490469549, "grad_norm": 319.91864013671875, "learning_rate": 1.2015601896076083e-06, "loss": 18.8281, "step": 13622 }, { "epoch": 0.9047619047619048, "grad_norm": 152.7448272705078, "learning_rate": 1.2014548438677372e-06, "loss": 14.5156, "step": 13623 }, { "epoch": 0.9048283190542604, "grad_norm": 105.83047485351562, "learning_rate": 1.2013494957975528e-06, "loss": 15.2656, "step": 13624 }, { "epoch": 0.9048947333466162, "grad_norm": 150.81756591796875, "learning_rate": 1.2012441453982734e-06, "loss": 18.7812, "step": 13625 }, { "epoch": 0.904961147638972, "grad_norm": 217.3016357421875, "learning_rate": 1.201138792671118e-06, "loss": 16.5156, "step": 13626 }, { "epoch": 0.9050275619313276, "grad_norm": 221.0774383544922, "learning_rate": 1.2010334376173048e-06, "loss": 20.0625, "step": 13627 }, { "epoch": 0.9050939762236834, "grad_norm": 348.0822448730469, "learning_rate": 1.2009280802380525e-06, "loss": 17.6875, "step": 13628 }, { "epoch": 0.905160390516039, "grad_norm": 227.21395874023438, "learning_rate": 1.2008227205345808e-06, "loss": 21.5156, "step": 13629 }, { "epoch": 0.9052268048083948, "grad_norm": 221.45787048339844, "learning_rate": 1.200717358508107e-06, "loss": 17.8594, "step": 13630 }, { "epoch": 0.9052932191007504, "grad_norm": 114.30862426757812, "learning_rate": 1.2006119941598507e-06, "loss": 15.6719, "step": 13631 }, { "epoch": 0.9053596333931062, "grad_norm": 136.0520477294922, "learning_rate": 1.2005066274910305e-06, "loss": 13.625, "step": 13632 }, { "epoch": 0.9054260476854619, "grad_norm": 171.83555603027344, "learning_rate": 1.2004012585028656e-06, "loss": 21.3594, "step": 13633 }, { "epoch": 0.9054924619778176, "grad_norm": 199.66104125976562, "learning_rate": 1.200295887196574e-06, "loss": 16.2969, "step": 13634 }, { "epoch": 0.9055588762701734, "grad_norm": 188.91873168945312, "learning_rate": 1.2001905135733754e-06, "loss": 15.5312, "step": 13635 }, { "epoch": 0.905625290562529, "grad_norm": 645.37060546875, "learning_rate": 1.2000851376344882e-06, "loss": 28.1094, "step": 13636 }, { "epoch": 0.9056917048548848, "grad_norm": 208.1016387939453, "learning_rate": 1.1999797593811317e-06, "loss": 20.8438, "step": 13637 }, { "epoch": 0.9057581191472405, "grad_norm": 302.8061218261719, "learning_rate": 1.1998743788145244e-06, "loss": 22.9062, "step": 13638 }, { "epoch": 0.9058245334395962, "grad_norm": 371.4350280761719, "learning_rate": 1.1997689959358857e-06, "loss": 19.0312, "step": 13639 }, { "epoch": 0.9058909477319519, "grad_norm": 434.68060302734375, "learning_rate": 1.1996636107464347e-06, "loss": 14.7969, "step": 13640 }, { "epoch": 0.9059573620243077, "grad_norm": 493.8428955078125, "learning_rate": 1.1995582232473898e-06, "loss": 15.7344, "step": 13641 }, { "epoch": 0.9060237763166633, "grad_norm": 109.4850082397461, "learning_rate": 1.1994528334399703e-06, "loss": 15.4062, "step": 13642 }, { "epoch": 0.9060901906090191, "grad_norm": 121.37577056884766, "learning_rate": 1.1993474413253957e-06, "loss": 15.7188, "step": 13643 }, { "epoch": 0.9061566049013747, "grad_norm": 691.1854858398438, "learning_rate": 1.199242046904885e-06, "loss": 16.9219, "step": 13644 }, { "epoch": 0.9062230191937305, "grad_norm": 149.97286987304688, "learning_rate": 1.199136650179657e-06, "loss": 18.4219, "step": 13645 }, { "epoch": 0.9062894334860863, "grad_norm": 176.24923706054688, "learning_rate": 1.1990312511509312e-06, "loss": 17.3906, "step": 13646 }, { "epoch": 0.9063558477784419, "grad_norm": 123.15177917480469, "learning_rate": 1.1989258498199265e-06, "loss": 13.3438, "step": 13647 }, { "epoch": 0.9064222620707977, "grad_norm": 445.3028869628906, "learning_rate": 1.1988204461878623e-06, "loss": 18.0469, "step": 13648 }, { "epoch": 0.9064886763631533, "grad_norm": 217.25205993652344, "learning_rate": 1.1987150402559579e-06, "loss": 15.4844, "step": 13649 }, { "epoch": 0.9065550906555091, "grad_norm": 271.2752685546875, "learning_rate": 1.1986096320254322e-06, "loss": 19.1094, "step": 13650 }, { "epoch": 0.9066215049478648, "grad_norm": 380.6661071777344, "learning_rate": 1.1985042214975052e-06, "loss": 20.3594, "step": 13651 }, { "epoch": 0.9066879192402205, "grad_norm": 628.185302734375, "learning_rate": 1.1983988086733954e-06, "loss": 21.0156, "step": 13652 }, { "epoch": 0.9067543335325762, "grad_norm": 198.42906188964844, "learning_rate": 1.1982933935543228e-06, "loss": 20.9531, "step": 13653 }, { "epoch": 0.9068207478249319, "grad_norm": 174.17018127441406, "learning_rate": 1.1981879761415068e-06, "loss": 12.5703, "step": 13654 }, { "epoch": 0.9068871621172876, "grad_norm": 337.1221923828125, "learning_rate": 1.1980825564361663e-06, "loss": 14.6875, "step": 13655 }, { "epoch": 0.9069535764096434, "grad_norm": 367.7975158691406, "learning_rate": 1.197977134439521e-06, "loss": 18.9375, "step": 13656 }, { "epoch": 0.9070199907019991, "grad_norm": 176.93614196777344, "learning_rate": 1.1978717101527908e-06, "loss": 19.5938, "step": 13657 }, { "epoch": 0.9070864049943548, "grad_norm": 357.8055114746094, "learning_rate": 1.1977662835771945e-06, "loss": 23.875, "step": 13658 }, { "epoch": 0.9071528192867105, "grad_norm": 190.17466735839844, "learning_rate": 1.1976608547139514e-06, "loss": 17.7656, "step": 13659 }, { "epoch": 0.9072192335790662, "grad_norm": 224.80862426757812, "learning_rate": 1.1975554235642823e-06, "loss": 18.5781, "step": 13660 }, { "epoch": 0.907285647871422, "grad_norm": 215.4459991455078, "learning_rate": 1.1974499901294055e-06, "loss": 16.2031, "step": 13661 }, { "epoch": 0.9073520621637776, "grad_norm": 177.54818725585938, "learning_rate": 1.1973445544105412e-06, "loss": 13.375, "step": 13662 }, { "epoch": 0.9074184764561334, "grad_norm": 324.962646484375, "learning_rate": 1.197239116408909e-06, "loss": 24.0156, "step": 13663 }, { "epoch": 0.907484890748489, "grad_norm": 223.19007873535156, "learning_rate": 1.197133676125728e-06, "loss": 15.3438, "step": 13664 }, { "epoch": 0.9075513050408448, "grad_norm": 285.5452575683594, "learning_rate": 1.197028233562219e-06, "loss": 16.4531, "step": 13665 }, { "epoch": 0.9076177193332005, "grad_norm": 200.5494384765625, "learning_rate": 1.1969227887196002e-06, "loss": 13.9219, "step": 13666 }, { "epoch": 0.9076841336255562, "grad_norm": 158.22531127929688, "learning_rate": 1.1968173415990925e-06, "loss": 16.2344, "step": 13667 }, { "epoch": 0.907750547917912, "grad_norm": 124.56182861328125, "learning_rate": 1.1967118922019156e-06, "loss": 13.7188, "step": 13668 }, { "epoch": 0.9078169622102676, "grad_norm": 195.6716766357422, "learning_rate": 1.1966064405292886e-06, "loss": 19.8438, "step": 13669 }, { "epoch": 0.9078833765026234, "grad_norm": 298.80596923828125, "learning_rate": 1.1965009865824316e-06, "loss": 15.1094, "step": 13670 }, { "epoch": 0.9079497907949791, "grad_norm": 242.9239501953125, "learning_rate": 1.1963955303625646e-06, "loss": 25.9375, "step": 13671 }, { "epoch": 0.9080162050873348, "grad_norm": 264.9438171386719, "learning_rate": 1.1962900718709076e-06, "loss": 13.2969, "step": 13672 }, { "epoch": 0.9080826193796905, "grad_norm": 152.84225463867188, "learning_rate": 1.1961846111086799e-06, "loss": 16.9688, "step": 13673 }, { "epoch": 0.9081490336720462, "grad_norm": 1382.8828125, "learning_rate": 1.1960791480771018e-06, "loss": 28.4844, "step": 13674 }, { "epoch": 0.9082154479644019, "grad_norm": 307.5063781738281, "learning_rate": 1.1959736827773932e-06, "loss": 20.1719, "step": 13675 }, { "epoch": 0.9082818622567577, "grad_norm": 159.2467498779297, "learning_rate": 1.1958682152107745e-06, "loss": 15.2344, "step": 13676 }, { "epoch": 0.9083482765491133, "grad_norm": 416.50286865234375, "learning_rate": 1.1957627453784646e-06, "loss": 15.3906, "step": 13677 }, { "epoch": 0.9084146908414691, "grad_norm": 507.0788879394531, "learning_rate": 1.1956572732816844e-06, "loss": 26.6875, "step": 13678 }, { "epoch": 0.9084811051338249, "grad_norm": 415.2737121582031, "learning_rate": 1.1955517989216537e-06, "loss": 28.0625, "step": 13679 }, { "epoch": 0.9085475194261805, "grad_norm": 183.92459106445312, "learning_rate": 1.1954463222995925e-06, "loss": 17.5312, "step": 13680 }, { "epoch": 0.9086139337185363, "grad_norm": 357.1944580078125, "learning_rate": 1.195340843416721e-06, "loss": 19.9688, "step": 13681 }, { "epoch": 0.9086803480108919, "grad_norm": 120.4352035522461, "learning_rate": 1.195235362274259e-06, "loss": 13.0, "step": 13682 }, { "epoch": 0.9087467623032477, "grad_norm": 316.84759521484375, "learning_rate": 1.1951298788734275e-06, "loss": 17.1094, "step": 13683 }, { "epoch": 0.9088131765956033, "grad_norm": 153.08934020996094, "learning_rate": 1.1950243932154457e-06, "loss": 18.5625, "step": 13684 }, { "epoch": 0.9088795908879591, "grad_norm": 231.33349609375, "learning_rate": 1.194918905301534e-06, "loss": 15.7031, "step": 13685 }, { "epoch": 0.9089460051803148, "grad_norm": 206.8487548828125, "learning_rate": 1.194813415132913e-06, "loss": 15.2812, "step": 13686 }, { "epoch": 0.9090124194726705, "grad_norm": 255.7725372314453, "learning_rate": 1.1947079227108029e-06, "loss": 22.4531, "step": 13687 }, { "epoch": 0.9090788337650262, "grad_norm": 292.0482482910156, "learning_rate": 1.1946024280364234e-06, "loss": 16.9219, "step": 13688 }, { "epoch": 0.909145248057382, "grad_norm": 166.4922332763672, "learning_rate": 1.1944969311109953e-06, "loss": 19.4844, "step": 13689 }, { "epoch": 0.9092116623497377, "grad_norm": 290.06591796875, "learning_rate": 1.1943914319357392e-06, "loss": 26.9062, "step": 13690 }, { "epoch": 0.9092780766420934, "grad_norm": 226.19630432128906, "learning_rate": 1.1942859305118749e-06, "loss": 14.6875, "step": 13691 }, { "epoch": 0.9093444909344491, "grad_norm": 221.87234497070312, "learning_rate": 1.194180426840623e-06, "loss": 15.1406, "step": 13692 }, { "epoch": 0.9094109052268048, "grad_norm": 592.9027099609375, "learning_rate": 1.1940749209232039e-06, "loss": 12.2578, "step": 13693 }, { "epoch": 0.9094773195191606, "grad_norm": 122.24322509765625, "learning_rate": 1.1939694127608382e-06, "loss": 15.0781, "step": 13694 }, { "epoch": 0.9095437338115162, "grad_norm": 222.28346252441406, "learning_rate": 1.193863902354746e-06, "loss": 20.4219, "step": 13695 }, { "epoch": 0.909610148103872, "grad_norm": 449.7301025390625, "learning_rate": 1.193758389706148e-06, "loss": 25.0625, "step": 13696 }, { "epoch": 0.9096765623962276, "grad_norm": 146.2874298095703, "learning_rate": 1.1936528748162647e-06, "loss": 14.8984, "step": 13697 }, { "epoch": 0.9097429766885834, "grad_norm": 148.6626739501953, "learning_rate": 1.1935473576863165e-06, "loss": 11.4688, "step": 13698 }, { "epoch": 0.909809390980939, "grad_norm": 110.17737579345703, "learning_rate": 1.1934418383175243e-06, "loss": 14.8438, "step": 13699 }, { "epoch": 0.9098758052732948, "grad_norm": 264.32623291015625, "learning_rate": 1.1933363167111084e-06, "loss": 16.0625, "step": 13700 }, { "epoch": 0.9099422195656506, "grad_norm": 227.64370727539062, "learning_rate": 1.1932307928682892e-06, "loss": 18.2422, "step": 13701 }, { "epoch": 0.9100086338580062, "grad_norm": 291.05487060546875, "learning_rate": 1.193125266790288e-06, "loss": 14.7656, "step": 13702 }, { "epoch": 0.910075048150362, "grad_norm": 586.1334228515625, "learning_rate": 1.1930197384783247e-06, "loss": 18.7969, "step": 13703 }, { "epoch": 0.9101414624427177, "grad_norm": 203.73382568359375, "learning_rate": 1.1929142079336205e-06, "loss": 16.9375, "step": 13704 }, { "epoch": 0.9102078767350734, "grad_norm": 548.4331665039062, "learning_rate": 1.192808675157396e-06, "loss": 20.1875, "step": 13705 }, { "epoch": 0.9102742910274291, "grad_norm": 135.12667846679688, "learning_rate": 1.192703140150872e-06, "loss": 15.2656, "step": 13706 }, { "epoch": 0.9103407053197848, "grad_norm": 199.19444274902344, "learning_rate": 1.192597602915269e-06, "loss": 18.4375, "step": 13707 }, { "epoch": 0.9104071196121405, "grad_norm": 192.3689727783203, "learning_rate": 1.1924920634518084e-06, "loss": 16.625, "step": 13708 }, { "epoch": 0.9104735339044963, "grad_norm": 230.8422393798828, "learning_rate": 1.1923865217617102e-06, "loss": 14.5625, "step": 13709 }, { "epoch": 0.9105399481968519, "grad_norm": 146.71517944335938, "learning_rate": 1.1922809778461958e-06, "loss": 17.7812, "step": 13710 }, { "epoch": 0.9106063624892077, "grad_norm": 138.1364288330078, "learning_rate": 1.192175431706486e-06, "loss": 17.75, "step": 13711 }, { "epoch": 0.9106727767815634, "grad_norm": 176.13873291015625, "learning_rate": 1.1920698833438017e-06, "loss": 15.7656, "step": 13712 }, { "epoch": 0.9107391910739191, "grad_norm": 317.63372802734375, "learning_rate": 1.1919643327593633e-06, "loss": 20.5625, "step": 13713 }, { "epoch": 0.9108056053662749, "grad_norm": 265.0188293457031, "learning_rate": 1.1918587799543925e-06, "loss": 22.25, "step": 13714 }, { "epoch": 0.9108720196586305, "grad_norm": 272.2994689941406, "learning_rate": 1.19175322493011e-06, "loss": 20.25, "step": 13715 }, { "epoch": 0.9109384339509863, "grad_norm": 138.5894775390625, "learning_rate": 1.1916476676877367e-06, "loss": 15.7656, "step": 13716 }, { "epoch": 0.9110048482433419, "grad_norm": 230.5036163330078, "learning_rate": 1.1915421082284937e-06, "loss": 14.0312, "step": 13717 }, { "epoch": 0.9110712625356977, "grad_norm": 388.5280456542969, "learning_rate": 1.1914365465536023e-06, "loss": 31.3906, "step": 13718 }, { "epoch": 0.9111376768280534, "grad_norm": 187.9488983154297, "learning_rate": 1.191330982664283e-06, "loss": 14.0312, "step": 13719 }, { "epoch": 0.9112040911204091, "grad_norm": 498.21820068359375, "learning_rate": 1.1912254165617573e-06, "loss": 21.5625, "step": 13720 }, { "epoch": 0.9112705054127648, "grad_norm": 257.3121643066406, "learning_rate": 1.191119848247246e-06, "loss": 21.4688, "step": 13721 }, { "epoch": 0.9113369197051205, "grad_norm": 345.122314453125, "learning_rate": 1.191014277721971e-06, "loss": 16.9375, "step": 13722 }, { "epoch": 0.9114033339974763, "grad_norm": 202.51466369628906, "learning_rate": 1.1909087049871524e-06, "loss": 19.3125, "step": 13723 }, { "epoch": 0.911469748289832, "grad_norm": 151.25047302246094, "learning_rate": 1.1908031300440125e-06, "loss": 14.5, "step": 13724 }, { "epoch": 0.9115361625821877, "grad_norm": 280.5210266113281, "learning_rate": 1.1906975528937717e-06, "loss": 16.1875, "step": 13725 }, { "epoch": 0.9116025768745434, "grad_norm": 195.0980682373047, "learning_rate": 1.1905919735376515e-06, "loss": 15.0781, "step": 13726 }, { "epoch": 0.9116689911668991, "grad_norm": 254.5400390625, "learning_rate": 1.1904863919768735e-06, "loss": 17.4688, "step": 13727 }, { "epoch": 0.9117354054592548, "grad_norm": 132.83642578125, "learning_rate": 1.1903808082126585e-06, "loss": 13.0938, "step": 13728 }, { "epoch": 0.9118018197516106, "grad_norm": 160.9817352294922, "learning_rate": 1.1902752222462284e-06, "loss": 22.9062, "step": 13729 }, { "epoch": 0.9118682340439662, "grad_norm": 283.9925537109375, "learning_rate": 1.1901696340788042e-06, "loss": 21.1094, "step": 13730 }, { "epoch": 0.911934648336322, "grad_norm": 588.2325439453125, "learning_rate": 1.1900640437116072e-06, "loss": 26.0, "step": 13731 }, { "epoch": 0.9120010626286776, "grad_norm": 160.45993041992188, "learning_rate": 1.189958451145859e-06, "loss": 19.3438, "step": 13732 }, { "epoch": 0.9120674769210334, "grad_norm": 178.25387573242188, "learning_rate": 1.189852856382781e-06, "loss": 12.6719, "step": 13733 }, { "epoch": 0.9121338912133892, "grad_norm": 140.87721252441406, "learning_rate": 1.1897472594235946e-06, "loss": 19.4219, "step": 13734 }, { "epoch": 0.9122003055057448, "grad_norm": 139.41725158691406, "learning_rate": 1.1896416602695212e-06, "loss": 14.2656, "step": 13735 }, { "epoch": 0.9122667197981006, "grad_norm": 523.8801879882812, "learning_rate": 1.1895360589217827e-06, "loss": 25.875, "step": 13736 }, { "epoch": 0.9123331340904562, "grad_norm": 334.9560241699219, "learning_rate": 1.1894304553816002e-06, "loss": 20.3906, "step": 13737 }, { "epoch": 0.912399548382812, "grad_norm": 279.20806884765625, "learning_rate": 1.1893248496501952e-06, "loss": 19.6562, "step": 13738 }, { "epoch": 0.9124659626751677, "grad_norm": 237.99420166015625, "learning_rate": 1.1892192417287901e-06, "loss": 19.3906, "step": 13739 }, { "epoch": 0.9125323769675234, "grad_norm": 325.9173583984375, "learning_rate": 1.1891136316186055e-06, "loss": 17.3281, "step": 13740 }, { "epoch": 0.9125987912598791, "grad_norm": 182.51194763183594, "learning_rate": 1.1890080193208634e-06, "loss": 19.2969, "step": 13741 }, { "epoch": 0.9126652055522348, "grad_norm": 328.3002014160156, "learning_rate": 1.1889024048367858e-06, "loss": 17.6094, "step": 13742 }, { "epoch": 0.9127316198445905, "grad_norm": 426.8739318847656, "learning_rate": 1.1887967881675939e-06, "loss": 14.9844, "step": 13743 }, { "epoch": 0.9127980341369463, "grad_norm": 397.3824768066406, "learning_rate": 1.1886911693145097e-06, "loss": 22.0, "step": 13744 }, { "epoch": 0.912864448429302, "grad_norm": 181.13490295410156, "learning_rate": 1.1885855482787548e-06, "loss": 17.125, "step": 13745 }, { "epoch": 0.9129308627216577, "grad_norm": 323.30670166015625, "learning_rate": 1.188479925061551e-06, "loss": 17.8438, "step": 13746 }, { "epoch": 0.9129972770140135, "grad_norm": 296.5975646972656, "learning_rate": 1.1883742996641202e-06, "loss": 19.7656, "step": 13747 }, { "epoch": 0.9130636913063691, "grad_norm": 211.93417358398438, "learning_rate": 1.1882686720876842e-06, "loss": 14.8438, "step": 13748 }, { "epoch": 0.9131301055987249, "grad_norm": 203.71560668945312, "learning_rate": 1.1881630423334645e-06, "loss": 17.4844, "step": 13749 }, { "epoch": 0.9131965198910805, "grad_norm": 123.49626922607422, "learning_rate": 1.1880574104026835e-06, "loss": 15.9062, "step": 13750 }, { "epoch": 0.9132629341834363, "grad_norm": 168.50425720214844, "learning_rate": 1.1879517762965628e-06, "loss": 14.9219, "step": 13751 }, { "epoch": 0.913329348475792, "grad_norm": 191.6545867919922, "learning_rate": 1.187846140016324e-06, "loss": 15.5781, "step": 13752 }, { "epoch": 0.9133957627681477, "grad_norm": 167.0216064453125, "learning_rate": 1.1877405015631896e-06, "loss": 17.2656, "step": 13753 }, { "epoch": 0.9134621770605034, "grad_norm": 232.85205078125, "learning_rate": 1.1876348609383815e-06, "loss": 15.375, "step": 13754 }, { "epoch": 0.9135285913528591, "grad_norm": 169.23898315429688, "learning_rate": 1.1875292181431214e-06, "loss": 27.4844, "step": 13755 }, { "epoch": 0.9135950056452149, "grad_norm": 216.927490234375, "learning_rate": 1.1874235731786313e-06, "loss": 18.0156, "step": 13756 }, { "epoch": 0.9136614199375706, "grad_norm": 132.5144500732422, "learning_rate": 1.1873179260461335e-06, "loss": 19.1094, "step": 13757 }, { "epoch": 0.9137278342299263, "grad_norm": 101.91397857666016, "learning_rate": 1.18721227674685e-06, "loss": 12.5, "step": 13758 }, { "epoch": 0.913794248522282, "grad_norm": 187.78964233398438, "learning_rate": 1.1871066252820027e-06, "loss": 14.2969, "step": 13759 }, { "epoch": 0.9138606628146377, "grad_norm": 150.7019500732422, "learning_rate": 1.1870009716528139e-06, "loss": 20.3438, "step": 13760 }, { "epoch": 0.9139270771069934, "grad_norm": 417.2435302734375, "learning_rate": 1.1868953158605057e-06, "loss": 18.4062, "step": 13761 }, { "epoch": 0.9139934913993492, "grad_norm": 216.12428283691406, "learning_rate": 1.1867896579063001e-06, "loss": 18.4219, "step": 13762 }, { "epoch": 0.9140599056917048, "grad_norm": 176.52220153808594, "learning_rate": 1.1866839977914196e-06, "loss": 16.8438, "step": 13763 }, { "epoch": 0.9141263199840606, "grad_norm": 129.15989685058594, "learning_rate": 1.1865783355170864e-06, "loss": 13.2188, "step": 13764 }, { "epoch": 0.9141927342764162, "grad_norm": 258.0090026855469, "learning_rate": 1.1864726710845223e-06, "loss": 13.4219, "step": 13765 }, { "epoch": 0.914259148568772, "grad_norm": 254.41571044921875, "learning_rate": 1.18636700449495e-06, "loss": 21.5, "step": 13766 }, { "epoch": 0.9143255628611278, "grad_norm": 301.749755859375, "learning_rate": 1.1862613357495916e-06, "loss": 14.6562, "step": 13767 }, { "epoch": 0.9143919771534834, "grad_norm": 371.12872314453125, "learning_rate": 1.1861556648496698e-06, "loss": 15.8281, "step": 13768 }, { "epoch": 0.9144583914458392, "grad_norm": 278.9457702636719, "learning_rate": 1.1860499917964063e-06, "loss": 16.3281, "step": 13769 }, { "epoch": 0.9145248057381948, "grad_norm": 187.60621643066406, "learning_rate": 1.185944316591024e-06, "loss": 14.3281, "step": 13770 }, { "epoch": 0.9145912200305506, "grad_norm": 180.4157257080078, "learning_rate": 1.185838639234745e-06, "loss": 16.7969, "step": 13771 }, { "epoch": 0.9146576343229063, "grad_norm": 241.690185546875, "learning_rate": 1.1857329597287915e-06, "loss": 17.25, "step": 13772 }, { "epoch": 0.914724048615262, "grad_norm": 152.0023956298828, "learning_rate": 1.1856272780743866e-06, "loss": 21.1562, "step": 13773 }, { "epoch": 0.9147904629076177, "grad_norm": 553.435791015625, "learning_rate": 1.1855215942727521e-06, "loss": 17.1875, "step": 13774 }, { "epoch": 0.9148568771999734, "grad_norm": 121.02288818359375, "learning_rate": 1.185415908325111e-06, "loss": 14.5, "step": 13775 }, { "epoch": 0.9149232914923291, "grad_norm": 169.27410888671875, "learning_rate": 1.1853102202326854e-06, "loss": 17.3438, "step": 13776 }, { "epoch": 0.9149897057846849, "grad_norm": 181.88697814941406, "learning_rate": 1.1852045299966979e-06, "loss": 14.6562, "step": 13777 }, { "epoch": 0.9150561200770406, "grad_norm": 137.64996337890625, "learning_rate": 1.1850988376183715e-06, "loss": 18.0156, "step": 13778 }, { "epoch": 0.9151225343693963, "grad_norm": 247.87362670898438, "learning_rate": 1.1849931430989287e-06, "loss": 14.8281, "step": 13779 }, { "epoch": 0.915188948661752, "grad_norm": 136.25570678710938, "learning_rate": 1.1848874464395913e-06, "loss": 12.8438, "step": 13780 }, { "epoch": 0.9152553629541077, "grad_norm": 572.870849609375, "learning_rate": 1.1847817476415828e-06, "loss": 20.5938, "step": 13781 }, { "epoch": 0.9153217772464635, "grad_norm": 270.69268798828125, "learning_rate": 1.1846760467061256e-06, "loss": 17.5938, "step": 13782 }, { "epoch": 0.9153881915388191, "grad_norm": 285.7759094238281, "learning_rate": 1.1845703436344424e-06, "loss": 14.6562, "step": 13783 }, { "epoch": 0.9154546058311749, "grad_norm": 152.2661895751953, "learning_rate": 1.184464638427756e-06, "loss": 18.125, "step": 13784 }, { "epoch": 0.9155210201235305, "grad_norm": 205.20335388183594, "learning_rate": 1.1843589310872887e-06, "loss": 21.8906, "step": 13785 }, { "epoch": 0.9155874344158863, "grad_norm": 154.25587463378906, "learning_rate": 1.184253221614264e-06, "loss": 15.7812, "step": 13786 }, { "epoch": 0.9156538487082421, "grad_norm": 151.0376739501953, "learning_rate": 1.1841475100099041e-06, "loss": 15.7031, "step": 13787 }, { "epoch": 0.9157202630005977, "grad_norm": 379.4220275878906, "learning_rate": 1.1840417962754318e-06, "loss": 15.2812, "step": 13788 }, { "epoch": 0.9157866772929535, "grad_norm": 354.7362365722656, "learning_rate": 1.1839360804120703e-06, "loss": 18.5625, "step": 13789 }, { "epoch": 0.9158530915853091, "grad_norm": 167.34341430664062, "learning_rate": 1.1838303624210423e-06, "loss": 17.9062, "step": 13790 }, { "epoch": 0.9159195058776649, "grad_norm": 473.3821716308594, "learning_rate": 1.1837246423035707e-06, "loss": 17.0625, "step": 13791 }, { "epoch": 0.9159859201700206, "grad_norm": 188.96409606933594, "learning_rate": 1.1836189200608784e-06, "loss": 17.8594, "step": 13792 }, { "epoch": 0.9160523344623763, "grad_norm": 193.38389587402344, "learning_rate": 1.1835131956941885e-06, "loss": 14.3594, "step": 13793 }, { "epoch": 0.916118748754732, "grad_norm": 115.96293640136719, "learning_rate": 1.1834074692047233e-06, "loss": 12.4688, "step": 13794 }, { "epoch": 0.9161851630470877, "grad_norm": 386.53631591796875, "learning_rate": 1.1833017405937065e-06, "loss": 20.1406, "step": 13795 }, { "epoch": 0.9162515773394434, "grad_norm": 239.2084503173828, "learning_rate": 1.1831960098623606e-06, "loss": 16.4844, "step": 13796 }, { "epoch": 0.9163179916317992, "grad_norm": 182.93186950683594, "learning_rate": 1.1830902770119094e-06, "loss": 15.0781, "step": 13797 }, { "epoch": 0.9163844059241549, "grad_norm": 183.3994598388672, "learning_rate": 1.1829845420435752e-06, "loss": 16.2188, "step": 13798 }, { "epoch": 0.9164508202165106, "grad_norm": 129.63742065429688, "learning_rate": 1.182878804958581e-06, "loss": 18.9844, "step": 13799 }, { "epoch": 0.9165172345088664, "grad_norm": 148.72901916503906, "learning_rate": 1.1827730657581505e-06, "loss": 17.8438, "step": 13800 }, { "epoch": 0.916583648801222, "grad_norm": 396.879638671875, "learning_rate": 1.1826673244435066e-06, "loss": 18.8281, "step": 13801 }, { "epoch": 0.9166500630935778, "grad_norm": 142.5271453857422, "learning_rate": 1.1825615810158725e-06, "loss": 13.6719, "step": 13802 }, { "epoch": 0.9167164773859334, "grad_norm": 283.5183410644531, "learning_rate": 1.1824558354764714e-06, "loss": 16.8594, "step": 13803 }, { "epoch": 0.9167828916782892, "grad_norm": 548.1717529296875, "learning_rate": 1.182350087826526e-06, "loss": 17.5625, "step": 13804 }, { "epoch": 0.9168493059706448, "grad_norm": 307.5982971191406, "learning_rate": 1.1822443380672601e-06, "loss": 19.5, "step": 13805 }, { "epoch": 0.9169157202630006, "grad_norm": 155.1156463623047, "learning_rate": 1.1821385861998969e-06, "loss": 19.7188, "step": 13806 }, { "epoch": 0.9169821345553563, "grad_norm": 186.45599365234375, "learning_rate": 1.18203283222566e-06, "loss": 12.4531, "step": 13807 }, { "epoch": 0.917048548847712, "grad_norm": 587.5480346679688, "learning_rate": 1.1819270761457714e-06, "loss": 13.0469, "step": 13808 }, { "epoch": 0.9171149631400678, "grad_norm": 325.90704345703125, "learning_rate": 1.1818213179614559e-06, "loss": 11.7812, "step": 13809 }, { "epoch": 0.9171813774324235, "grad_norm": 406.10162353515625, "learning_rate": 1.1817155576739357e-06, "loss": 22.5938, "step": 13810 }, { "epoch": 0.9172477917247792, "grad_norm": 227.09054565429688, "learning_rate": 1.1816097952844353e-06, "loss": 21.9844, "step": 13811 }, { "epoch": 0.9173142060171349, "grad_norm": 187.18203735351562, "learning_rate": 1.1815040307941773e-06, "loss": 16.125, "step": 13812 }, { "epoch": 0.9173806203094906, "grad_norm": 256.25543212890625, "learning_rate": 1.1813982642043851e-06, "loss": 23.9062, "step": 13813 }, { "epoch": 0.9174470346018463, "grad_norm": 405.9716796875, "learning_rate": 1.1812924955162827e-06, "loss": 28.7812, "step": 13814 }, { "epoch": 0.917513448894202, "grad_norm": 647.9664916992188, "learning_rate": 1.181186724731093e-06, "loss": 16.9844, "step": 13815 }, { "epoch": 0.9175798631865577, "grad_norm": 239.7101593017578, "learning_rate": 1.1810809518500397e-06, "loss": 21.8438, "step": 13816 }, { "epoch": 0.9176462774789135, "grad_norm": 256.64642333984375, "learning_rate": 1.1809751768743465e-06, "loss": 19.8594, "step": 13817 }, { "epoch": 0.9177126917712691, "grad_norm": 159.68690490722656, "learning_rate": 1.180869399805237e-06, "loss": 16.2031, "step": 13818 }, { "epoch": 0.9177791060636249, "grad_norm": 692.413330078125, "learning_rate": 1.1807636206439345e-06, "loss": 27.8438, "step": 13819 }, { "epoch": 0.9178455203559807, "grad_norm": 181.8727264404297, "learning_rate": 1.1806578393916624e-06, "loss": 19.5625, "step": 13820 }, { "epoch": 0.9179119346483363, "grad_norm": 181.7205047607422, "learning_rate": 1.1805520560496447e-06, "loss": 14.7188, "step": 13821 }, { "epoch": 0.9179783489406921, "grad_norm": 206.86614990234375, "learning_rate": 1.1804462706191052e-06, "loss": 17.8281, "step": 13822 }, { "epoch": 0.9180447632330477, "grad_norm": 199.73880004882812, "learning_rate": 1.1803404831012672e-06, "loss": 21.1094, "step": 13823 }, { "epoch": 0.9181111775254035, "grad_norm": 334.61102294921875, "learning_rate": 1.1802346934973541e-06, "loss": 15.8125, "step": 13824 }, { "epoch": 0.9181775918177592, "grad_norm": 153.82705688476562, "learning_rate": 1.1801289018085904e-06, "loss": 18.125, "step": 13825 }, { "epoch": 0.9182440061101149, "grad_norm": 426.71063232421875, "learning_rate": 1.1800231080361993e-06, "loss": 13.9375, "step": 13826 }, { "epoch": 0.9183104204024706, "grad_norm": 162.63352966308594, "learning_rate": 1.1799173121814046e-06, "loss": 14.625, "step": 13827 }, { "epoch": 0.9183768346948263, "grad_norm": 178.54098510742188, "learning_rate": 1.17981151424543e-06, "loss": 16.6875, "step": 13828 }, { "epoch": 0.918443248987182, "grad_norm": 192.7706756591797, "learning_rate": 1.1797057142295e-06, "loss": 14.3125, "step": 13829 }, { "epoch": 0.9185096632795378, "grad_norm": 183.75135803222656, "learning_rate": 1.1795999121348377e-06, "loss": 20.0312, "step": 13830 }, { "epoch": 0.9185760775718935, "grad_norm": 275.83599853515625, "learning_rate": 1.1794941079626671e-06, "loss": 16.9531, "step": 13831 }, { "epoch": 0.9186424918642492, "grad_norm": 159.4490203857422, "learning_rate": 1.1793883017142122e-06, "loss": 28.9531, "step": 13832 }, { "epoch": 0.9187089061566049, "grad_norm": 167.71075439453125, "learning_rate": 1.1792824933906971e-06, "loss": 15.7188, "step": 13833 }, { "epoch": 0.9187753204489606, "grad_norm": 190.82501220703125, "learning_rate": 1.1791766829933452e-06, "loss": 18.0625, "step": 13834 }, { "epoch": 0.9188417347413164, "grad_norm": 214.76553344726562, "learning_rate": 1.1790708705233807e-06, "loss": 14.4531, "step": 13835 }, { "epoch": 0.918908149033672, "grad_norm": 128.58302307128906, "learning_rate": 1.1789650559820276e-06, "loss": 15.9375, "step": 13836 }, { "epoch": 0.9189745633260278, "grad_norm": 255.3847198486328, "learning_rate": 1.1788592393705101e-06, "loss": 19.5781, "step": 13837 }, { "epoch": 0.9190409776183834, "grad_norm": 200.83363342285156, "learning_rate": 1.178753420690052e-06, "loss": 9.9531, "step": 13838 }, { "epoch": 0.9191073919107392, "grad_norm": 424.8367919921875, "learning_rate": 1.1786475999418778e-06, "loss": 23.2656, "step": 13839 }, { "epoch": 0.9191738062030949, "grad_norm": 157.9293212890625, "learning_rate": 1.1785417771272106e-06, "loss": 23.8281, "step": 13840 }, { "epoch": 0.9192402204954506, "grad_norm": 148.0748748779297, "learning_rate": 1.1784359522472754e-06, "loss": 15.2031, "step": 13841 }, { "epoch": 0.9193066347878064, "grad_norm": 185.1136016845703, "learning_rate": 1.1783301253032957e-06, "loss": 17.1094, "step": 13842 }, { "epoch": 0.919373049080162, "grad_norm": 176.39503479003906, "learning_rate": 1.1782242962964963e-06, "loss": 18.5938, "step": 13843 }, { "epoch": 0.9194394633725178, "grad_norm": 208.261962890625, "learning_rate": 1.1781184652281008e-06, "loss": 19.6094, "step": 13844 }, { "epoch": 0.9195058776648735, "grad_norm": 219.49066162109375, "learning_rate": 1.1780126320993335e-06, "loss": 14.8906, "step": 13845 }, { "epoch": 0.9195722919572292, "grad_norm": 244.43055725097656, "learning_rate": 1.1779067969114192e-06, "loss": 17.9375, "step": 13846 }, { "epoch": 0.9196387062495849, "grad_norm": 176.51113891601562, "learning_rate": 1.177800959665581e-06, "loss": 17.2031, "step": 13847 }, { "epoch": 0.9197051205419406, "grad_norm": 174.59393310546875, "learning_rate": 1.1776951203630443e-06, "loss": 13.7344, "step": 13848 }, { "epoch": 0.9197715348342963, "grad_norm": 651.85107421875, "learning_rate": 1.1775892790050328e-06, "loss": 16.3906, "step": 13849 }, { "epoch": 0.9198379491266521, "grad_norm": 187.57505798339844, "learning_rate": 1.177483435592771e-06, "loss": 14.9844, "step": 13850 }, { "epoch": 0.9199043634190077, "grad_norm": 229.85118103027344, "learning_rate": 1.1773775901274829e-06, "loss": 16.1719, "step": 13851 }, { "epoch": 0.9199707777113635, "grad_norm": 264.5897521972656, "learning_rate": 1.1772717426103932e-06, "loss": 17.2188, "step": 13852 }, { "epoch": 0.9200371920037193, "grad_norm": 230.0032958984375, "learning_rate": 1.1771658930427264e-06, "loss": 19.2188, "step": 13853 }, { "epoch": 0.9201036062960749, "grad_norm": 256.5158996582031, "learning_rate": 1.1770600414257066e-06, "loss": 22.0, "step": 13854 }, { "epoch": 0.9201700205884307, "grad_norm": 433.7156677246094, "learning_rate": 1.1769541877605584e-06, "loss": 15.7969, "step": 13855 }, { "epoch": 0.9202364348807863, "grad_norm": 304.0159606933594, "learning_rate": 1.176848332048506e-06, "loss": 18.9062, "step": 13856 }, { "epoch": 0.9203028491731421, "grad_norm": 435.1617126464844, "learning_rate": 1.1767424742907744e-06, "loss": 16.2812, "step": 13857 }, { "epoch": 0.9203692634654977, "grad_norm": 327.0810546875, "learning_rate": 1.1766366144885876e-06, "loss": 21.6875, "step": 13858 }, { "epoch": 0.9204356777578535, "grad_norm": 167.97694396972656, "learning_rate": 1.17653075264317e-06, "loss": 18.5, "step": 13859 }, { "epoch": 0.9205020920502092, "grad_norm": 248.70614624023438, "learning_rate": 1.1764248887557467e-06, "loss": 12.0469, "step": 13860 }, { "epoch": 0.9205685063425649, "grad_norm": 285.2068176269531, "learning_rate": 1.1763190228275421e-06, "loss": 18.3438, "step": 13861 }, { "epoch": 0.9206349206349206, "grad_norm": 473.9425964355469, "learning_rate": 1.1762131548597807e-06, "loss": 20.3438, "step": 13862 }, { "epoch": 0.9207013349272763, "grad_norm": 175.86505126953125, "learning_rate": 1.176107284853687e-06, "loss": 18.9844, "step": 13863 }, { "epoch": 0.9207677492196321, "grad_norm": 172.76536560058594, "learning_rate": 1.1760014128104859e-06, "loss": 17.1719, "step": 13864 }, { "epoch": 0.9208341635119878, "grad_norm": 190.505859375, "learning_rate": 1.175895538731402e-06, "loss": 16.5781, "step": 13865 }, { "epoch": 0.9209005778043435, "grad_norm": 320.775634765625, "learning_rate": 1.1757896626176598e-06, "loss": 16.2812, "step": 13866 }, { "epoch": 0.9209669920966992, "grad_norm": 404.8790588378906, "learning_rate": 1.175683784470484e-06, "loss": 12.8125, "step": 13867 }, { "epoch": 0.921033406389055, "grad_norm": 186.26876831054688, "learning_rate": 1.1755779042910997e-06, "loss": 13.875, "step": 13868 }, { "epoch": 0.9210998206814106, "grad_norm": 135.71546936035156, "learning_rate": 1.1754720220807315e-06, "loss": 17.5469, "step": 13869 }, { "epoch": 0.9211662349737664, "grad_norm": 537.9979858398438, "learning_rate": 1.175366137840604e-06, "loss": 22.9219, "step": 13870 }, { "epoch": 0.921232649266122, "grad_norm": 344.1333923339844, "learning_rate": 1.1752602515719422e-06, "loss": 12.7969, "step": 13871 }, { "epoch": 0.9212990635584778, "grad_norm": 298.3353271484375, "learning_rate": 1.1751543632759709e-06, "loss": 22.3594, "step": 13872 }, { "epoch": 0.9213654778508334, "grad_norm": 227.1334991455078, "learning_rate": 1.1750484729539148e-06, "loss": 18.4219, "step": 13873 }, { "epoch": 0.9214318921431892, "grad_norm": 168.1014404296875, "learning_rate": 1.174942580606999e-06, "loss": 18.9688, "step": 13874 }, { "epoch": 0.921498306435545, "grad_norm": 290.1566467285156, "learning_rate": 1.1748366862364482e-06, "loss": 16.9844, "step": 13875 }, { "epoch": 0.9215647207279006, "grad_norm": 522.0466918945312, "learning_rate": 1.1747307898434874e-06, "loss": 19.7969, "step": 13876 }, { "epoch": 0.9216311350202564, "grad_norm": 286.14825439453125, "learning_rate": 1.1746248914293417e-06, "loss": 30.5938, "step": 13877 }, { "epoch": 0.921697549312612, "grad_norm": 289.8384704589844, "learning_rate": 1.1745189909952362e-06, "loss": 21.1875, "step": 13878 }, { "epoch": 0.9217639636049678, "grad_norm": 238.3465118408203, "learning_rate": 1.1744130885423952e-06, "loss": 26.2812, "step": 13879 }, { "epoch": 0.9218303778973235, "grad_norm": 142.4233856201172, "learning_rate": 1.1743071840720445e-06, "loss": 16.7656, "step": 13880 }, { "epoch": 0.9218967921896792, "grad_norm": 632.5816650390625, "learning_rate": 1.1742012775854084e-06, "loss": 16.1562, "step": 13881 }, { "epoch": 0.9219632064820349, "grad_norm": 184.16000366210938, "learning_rate": 1.174095369083713e-06, "loss": 17.4844, "step": 13882 }, { "epoch": 0.9220296207743907, "grad_norm": 864.7320556640625, "learning_rate": 1.1739894585681823e-06, "loss": 18.7812, "step": 13883 }, { "epoch": 0.9220960350667463, "grad_norm": 120.52044677734375, "learning_rate": 1.1738835460400421e-06, "loss": 17.2812, "step": 13884 }, { "epoch": 0.9221624493591021, "grad_norm": 651.3514404296875, "learning_rate": 1.173777631500517e-06, "loss": 26.875, "step": 13885 }, { "epoch": 0.9222288636514578, "grad_norm": 145.2726287841797, "learning_rate": 1.1736717149508328e-06, "loss": 13.7656, "step": 13886 }, { "epoch": 0.9222952779438135, "grad_norm": 206.0854949951172, "learning_rate": 1.173565796392214e-06, "loss": 16.9531, "step": 13887 }, { "epoch": 0.9223616922361693, "grad_norm": 120.2880630493164, "learning_rate": 1.1734598758258862e-06, "loss": 14.125, "step": 13888 }, { "epoch": 0.9224281065285249, "grad_norm": 140.9849395751953, "learning_rate": 1.173353953253075e-06, "loss": 13.1562, "step": 13889 }, { "epoch": 0.9224945208208807, "grad_norm": 187.3050079345703, "learning_rate": 1.1732480286750048e-06, "loss": 11.2344, "step": 13890 }, { "epoch": 0.9225609351132363, "grad_norm": 142.87353515625, "learning_rate": 1.1731421020929016e-06, "loss": 14.8438, "step": 13891 }, { "epoch": 0.9226273494055921, "grad_norm": 438.0882263183594, "learning_rate": 1.17303617350799e-06, "loss": 20.6094, "step": 13892 }, { "epoch": 0.9226937636979478, "grad_norm": 586.7261352539062, "learning_rate": 1.1729302429214963e-06, "loss": 18.7188, "step": 13893 }, { "epoch": 0.9227601779903035, "grad_norm": 191.5500030517578, "learning_rate": 1.1728243103346448e-06, "loss": 18.6484, "step": 13894 }, { "epoch": 0.9228265922826592, "grad_norm": 491.5491027832031, "learning_rate": 1.1727183757486614e-06, "loss": 30.5, "step": 13895 }, { "epoch": 0.9228930065750149, "grad_norm": 287.9892578125, "learning_rate": 1.1726124391647715e-06, "loss": 20.3906, "step": 13896 }, { "epoch": 0.9229594208673707, "grad_norm": 109.01211547851562, "learning_rate": 1.1725065005842006e-06, "loss": 15.7969, "step": 13897 }, { "epoch": 0.9230258351597264, "grad_norm": 126.11534118652344, "learning_rate": 1.1724005600081738e-06, "loss": 20.3125, "step": 13898 }, { "epoch": 0.9230922494520821, "grad_norm": 218.43043518066406, "learning_rate": 1.1722946174379167e-06, "loss": 14.7188, "step": 13899 }, { "epoch": 0.9231586637444378, "grad_norm": 131.60984802246094, "learning_rate": 1.172188672874655e-06, "loss": 13.8906, "step": 13900 }, { "epoch": 0.9232250780367935, "grad_norm": 129.77357482910156, "learning_rate": 1.1720827263196139e-06, "loss": 13.1875, "step": 13901 }, { "epoch": 0.9232914923291492, "grad_norm": 120.09488677978516, "learning_rate": 1.1719767777740188e-06, "loss": 14.5469, "step": 13902 }, { "epoch": 0.923357906621505, "grad_norm": 140.25442504882812, "learning_rate": 1.1718708272390957e-06, "loss": 18.2344, "step": 13903 }, { "epoch": 0.9234243209138606, "grad_norm": 613.6776733398438, "learning_rate": 1.1717648747160702e-06, "loss": 25.0938, "step": 13904 }, { "epoch": 0.9234907352062164, "grad_norm": 150.499755859375, "learning_rate": 1.1716589202061676e-06, "loss": 17.8438, "step": 13905 }, { "epoch": 0.923557149498572, "grad_norm": 213.47021484375, "learning_rate": 1.1715529637106135e-06, "loss": 15.1562, "step": 13906 }, { "epoch": 0.9236235637909278, "grad_norm": 200.71566772460938, "learning_rate": 1.1714470052306336e-06, "loss": 20.9375, "step": 13907 }, { "epoch": 0.9236899780832836, "grad_norm": 159.1558074951172, "learning_rate": 1.1713410447674537e-06, "loss": 18.0625, "step": 13908 }, { "epoch": 0.9237563923756392, "grad_norm": 146.3910675048828, "learning_rate": 1.1712350823222989e-06, "loss": 17.0703, "step": 13909 }, { "epoch": 0.923822806667995, "grad_norm": 193.3775634765625, "learning_rate": 1.171129117896396e-06, "loss": 16.6562, "step": 13910 }, { "epoch": 0.9238892209603506, "grad_norm": 184.2185516357422, "learning_rate": 1.1710231514909698e-06, "loss": 13.8203, "step": 13911 }, { "epoch": 0.9239556352527064, "grad_norm": 205.46273803710938, "learning_rate": 1.1709171831072467e-06, "loss": 13.5781, "step": 13912 }, { "epoch": 0.9240220495450621, "grad_norm": 260.595947265625, "learning_rate": 1.170811212746452e-06, "loss": 21.0, "step": 13913 }, { "epoch": 0.9240884638374178, "grad_norm": 274.0948791503906, "learning_rate": 1.170705240409812e-06, "loss": 22.3594, "step": 13914 }, { "epoch": 0.9241548781297735, "grad_norm": 480.8735656738281, "learning_rate": 1.1705992660985518e-06, "loss": 22.8125, "step": 13915 }, { "epoch": 0.9242212924221292, "grad_norm": 150.34622192382812, "learning_rate": 1.1704932898138978e-06, "loss": 16.5625, "step": 13916 }, { "epoch": 0.9242877067144849, "grad_norm": 180.2115020751953, "learning_rate": 1.1703873115570756e-06, "loss": 12.4844, "step": 13917 }, { "epoch": 0.9243541210068407, "grad_norm": 328.69171142578125, "learning_rate": 1.1702813313293116e-06, "loss": 16.5625, "step": 13918 }, { "epoch": 0.9244205352991964, "grad_norm": 303.4171447753906, "learning_rate": 1.1701753491318312e-06, "loss": 15.5938, "step": 13919 }, { "epoch": 0.9244869495915521, "grad_norm": 167.0362091064453, "learning_rate": 1.17006936496586e-06, "loss": 15.4062, "step": 13920 }, { "epoch": 0.9245533638839079, "grad_norm": 188.2670135498047, "learning_rate": 1.1699633788326253e-06, "loss": 13.9219, "step": 13921 }, { "epoch": 0.9246197781762635, "grad_norm": 170.74156188964844, "learning_rate": 1.1698573907333518e-06, "loss": 15.9688, "step": 13922 }, { "epoch": 0.9246861924686193, "grad_norm": 158.6240997314453, "learning_rate": 1.1697514006692658e-06, "loss": 13.0, "step": 13923 }, { "epoch": 0.9247526067609749, "grad_norm": 752.3200073242188, "learning_rate": 1.1696454086415937e-06, "loss": 15.0312, "step": 13924 }, { "epoch": 0.9248190210533307, "grad_norm": 406.7743835449219, "learning_rate": 1.1695394146515613e-06, "loss": 16.5625, "step": 13925 }, { "epoch": 0.9248854353456863, "grad_norm": 126.35488891601562, "learning_rate": 1.1694334187003947e-06, "loss": 15.1719, "step": 13926 }, { "epoch": 0.9249518496380421, "grad_norm": 142.76365661621094, "learning_rate": 1.16932742078932e-06, "loss": 14.9375, "step": 13927 }, { "epoch": 0.9250182639303979, "grad_norm": 77.7164306640625, "learning_rate": 1.1692214209195635e-06, "loss": 11.2656, "step": 13928 }, { "epoch": 0.9250846782227535, "grad_norm": 276.4362487792969, "learning_rate": 1.169115419092351e-06, "loss": 18.3438, "step": 13929 }, { "epoch": 0.9251510925151093, "grad_norm": 173.38455200195312, "learning_rate": 1.169009415308909e-06, "loss": 16.9844, "step": 13930 }, { "epoch": 0.925217506807465, "grad_norm": 140.201904296875, "learning_rate": 1.1689034095704633e-06, "loss": 14.6016, "step": 13931 }, { "epoch": 0.9252839210998207, "grad_norm": 416.1241149902344, "learning_rate": 1.1687974018782405e-06, "loss": 17.2812, "step": 13932 }, { "epoch": 0.9253503353921764, "grad_norm": 255.96389770507812, "learning_rate": 1.1686913922334667e-06, "loss": 16.0156, "step": 13933 }, { "epoch": 0.9254167496845321, "grad_norm": 232.12608337402344, "learning_rate": 1.168585380637368e-06, "loss": 14.4688, "step": 13934 }, { "epoch": 0.9254831639768878, "grad_norm": 283.38055419921875, "learning_rate": 1.1684793670911707e-06, "loss": 16.1406, "step": 13935 }, { "epoch": 0.9255495782692436, "grad_norm": 280.15631103515625, "learning_rate": 1.1683733515961019e-06, "loss": 18.4219, "step": 13936 }, { "epoch": 0.9256159925615992, "grad_norm": 271.4341735839844, "learning_rate": 1.1682673341533866e-06, "loss": 10.9688, "step": 13937 }, { "epoch": 0.925682406853955, "grad_norm": 264.1971130371094, "learning_rate": 1.1681613147642522e-06, "loss": 12.8438, "step": 13938 }, { "epoch": 0.9257488211463107, "grad_norm": 289.19091796875, "learning_rate": 1.1680552934299245e-06, "loss": 19.8281, "step": 13939 }, { "epoch": 0.9258152354386664, "grad_norm": 223.49539184570312, "learning_rate": 1.16794927015163e-06, "loss": 18.6719, "step": 13940 }, { "epoch": 0.9258816497310222, "grad_norm": 394.6381530761719, "learning_rate": 1.1678432449305953e-06, "loss": 21.7812, "step": 13941 }, { "epoch": 0.9259480640233778, "grad_norm": 284.56964111328125, "learning_rate": 1.1677372177680468e-06, "loss": 23.6562, "step": 13942 }, { "epoch": 0.9260144783157336, "grad_norm": 436.0841369628906, "learning_rate": 1.1676311886652109e-06, "loss": 25.9688, "step": 13943 }, { "epoch": 0.9260808926080892, "grad_norm": 609.9356079101562, "learning_rate": 1.167525157623314e-06, "loss": 19.3594, "step": 13944 }, { "epoch": 0.926147306900445, "grad_norm": 220.3490447998047, "learning_rate": 1.1674191246435826e-06, "loss": 15.7031, "step": 13945 }, { "epoch": 0.9262137211928007, "grad_norm": 257.7503967285156, "learning_rate": 1.1673130897272432e-06, "loss": 15.2969, "step": 13946 }, { "epoch": 0.9262801354851564, "grad_norm": 243.43109130859375, "learning_rate": 1.1672070528755225e-06, "loss": 19.0469, "step": 13947 }, { "epoch": 0.9263465497775121, "grad_norm": 259.7588195800781, "learning_rate": 1.167101014089647e-06, "loss": 18.3906, "step": 13948 }, { "epoch": 0.9264129640698678, "grad_norm": 563.1836547851562, "learning_rate": 1.1669949733708436e-06, "loss": 24.5469, "step": 13949 }, { "epoch": 0.9264793783622236, "grad_norm": 422.89337158203125, "learning_rate": 1.1668889307203385e-06, "loss": 11.25, "step": 13950 }, { "epoch": 0.9265457926545793, "grad_norm": 1115.54345703125, "learning_rate": 1.1667828861393585e-06, "loss": 17.25, "step": 13951 }, { "epoch": 0.926612206946935, "grad_norm": 171.22262573242188, "learning_rate": 1.16667683962913e-06, "loss": 19.6562, "step": 13952 }, { "epoch": 0.9266786212392907, "grad_norm": 518.1735229492188, "learning_rate": 1.1665707911908804e-06, "loss": 15.2109, "step": 13953 }, { "epoch": 0.9267450355316464, "grad_norm": 175.84738159179688, "learning_rate": 1.1664647408258357e-06, "loss": 14.5547, "step": 13954 }, { "epoch": 0.9268114498240021, "grad_norm": 197.17306518554688, "learning_rate": 1.1663586885352228e-06, "loss": 14.8906, "step": 13955 }, { "epoch": 0.9268778641163579, "grad_norm": 201.5785675048828, "learning_rate": 1.1662526343202685e-06, "loss": 10.9531, "step": 13956 }, { "epoch": 0.9269442784087135, "grad_norm": 279.4155578613281, "learning_rate": 1.1661465781821997e-06, "loss": 12.1172, "step": 13957 }, { "epoch": 0.9270106927010693, "grad_norm": 113.38127136230469, "learning_rate": 1.1660405201222431e-06, "loss": 15.9688, "step": 13958 }, { "epoch": 0.9270771069934249, "grad_norm": 198.0388641357422, "learning_rate": 1.1659344601416254e-06, "loss": 22.6094, "step": 13959 }, { "epoch": 0.9271435212857807, "grad_norm": 181.60752868652344, "learning_rate": 1.1658283982415736e-06, "loss": 15.2188, "step": 13960 }, { "epoch": 0.9272099355781365, "grad_norm": 285.5871887207031, "learning_rate": 1.1657223344233147e-06, "loss": 18.1406, "step": 13961 }, { "epoch": 0.9272763498704921, "grad_norm": 266.90789794921875, "learning_rate": 1.1656162686880752e-06, "loss": 22.25, "step": 13962 }, { "epoch": 0.9273427641628479, "grad_norm": 526.165283203125, "learning_rate": 1.165510201037082e-06, "loss": 18.6719, "step": 13963 }, { "epoch": 0.9274091784552035, "grad_norm": 414.1994934082031, "learning_rate": 1.1654041314715629e-06, "loss": 19.1875, "step": 13964 }, { "epoch": 0.9274755927475593, "grad_norm": 232.79429626464844, "learning_rate": 1.1652980599927435e-06, "loss": 14.1016, "step": 13965 }, { "epoch": 0.927542007039915, "grad_norm": 283.8873291015625, "learning_rate": 1.1651919866018519e-06, "loss": 22.7812, "step": 13966 }, { "epoch": 0.9276084213322707, "grad_norm": 278.6662292480469, "learning_rate": 1.165085911300114e-06, "loss": 20.0, "step": 13967 }, { "epoch": 0.9276748356246264, "grad_norm": 146.19451904296875, "learning_rate": 1.1649798340887582e-06, "loss": 15.8906, "step": 13968 }, { "epoch": 0.9277412499169821, "grad_norm": 340.3782653808594, "learning_rate": 1.1648737549690107e-06, "loss": 17.5625, "step": 13969 }, { "epoch": 0.9278076642093378, "grad_norm": 139.53736877441406, "learning_rate": 1.1647676739420984e-06, "loss": 17.0, "step": 13970 }, { "epoch": 0.9278740785016936, "grad_norm": 213.75685119628906, "learning_rate": 1.164661591009249e-06, "loss": 18.7344, "step": 13971 }, { "epoch": 0.9279404927940493, "grad_norm": 638.3567504882812, "learning_rate": 1.164555506171689e-06, "loss": 17.2812, "step": 13972 }, { "epoch": 0.928006907086405, "grad_norm": 243.7135772705078, "learning_rate": 1.1644494194306456e-06, "loss": 17.0469, "step": 13973 }, { "epoch": 0.9280733213787608, "grad_norm": 410.11334228515625, "learning_rate": 1.1643433307873464e-06, "loss": 15.1641, "step": 13974 }, { "epoch": 0.9281397356711164, "grad_norm": 250.2569122314453, "learning_rate": 1.1642372402430184e-06, "loss": 21.2031, "step": 13975 }, { "epoch": 0.9282061499634722, "grad_norm": 238.8245391845703, "learning_rate": 1.1641311477988887e-06, "loss": 14.9688, "step": 13976 }, { "epoch": 0.9282725642558278, "grad_norm": 215.35743713378906, "learning_rate": 1.1640250534561841e-06, "loss": 16.4844, "step": 13977 }, { "epoch": 0.9283389785481836, "grad_norm": 161.49765014648438, "learning_rate": 1.1639189572161328e-06, "loss": 11.0312, "step": 13978 }, { "epoch": 0.9284053928405392, "grad_norm": 141.55068969726562, "learning_rate": 1.1638128590799613e-06, "loss": 15.0781, "step": 13979 }, { "epoch": 0.928471807132895, "grad_norm": 300.33575439453125, "learning_rate": 1.1637067590488971e-06, "loss": 12.0156, "step": 13980 }, { "epoch": 0.9285382214252507, "grad_norm": 278.0499267578125, "learning_rate": 1.1636006571241675e-06, "loss": 16.0938, "step": 13981 }, { "epoch": 0.9286046357176064, "grad_norm": 131.16162109375, "learning_rate": 1.1634945533069999e-06, "loss": 13.0938, "step": 13982 }, { "epoch": 0.9286710500099622, "grad_norm": 144.32440185546875, "learning_rate": 1.1633884475986216e-06, "loss": 14.5625, "step": 13983 }, { "epoch": 0.9287374643023178, "grad_norm": 270.49053955078125, "learning_rate": 1.1632823400002597e-06, "loss": 19.0156, "step": 13984 }, { "epoch": 0.9288038785946736, "grad_norm": 209.1817169189453, "learning_rate": 1.1631762305131422e-06, "loss": 17.9062, "step": 13985 }, { "epoch": 0.9288702928870293, "grad_norm": 400.11419677734375, "learning_rate": 1.1630701191384959e-06, "loss": 26.0, "step": 13986 }, { "epoch": 0.928936707179385, "grad_norm": 110.32889556884766, "learning_rate": 1.1629640058775485e-06, "loss": 16.5312, "step": 13987 }, { "epoch": 0.9290031214717407, "grad_norm": 400.6357116699219, "learning_rate": 1.1628578907315276e-06, "loss": 22.875, "step": 13988 }, { "epoch": 0.9290695357640965, "grad_norm": 315.18939208984375, "learning_rate": 1.1627517737016605e-06, "loss": 18.1562, "step": 13989 }, { "epoch": 0.9291359500564521, "grad_norm": 217.1800079345703, "learning_rate": 1.1626456547891748e-06, "loss": 24.3438, "step": 13990 }, { "epoch": 0.9292023643488079, "grad_norm": 138.3140411376953, "learning_rate": 1.1625395339952978e-06, "loss": 16.0, "step": 13991 }, { "epoch": 0.9292687786411635, "grad_norm": 316.0625305175781, "learning_rate": 1.1624334113212574e-06, "loss": 21.125, "step": 13992 }, { "epoch": 0.9293351929335193, "grad_norm": 209.77403259277344, "learning_rate": 1.1623272867682806e-06, "loss": 22.8906, "step": 13993 }, { "epoch": 0.9294016072258751, "grad_norm": 253.07667541503906, "learning_rate": 1.1622211603375957e-06, "loss": 20.0312, "step": 13994 }, { "epoch": 0.9294680215182307, "grad_norm": 251.96763610839844, "learning_rate": 1.1621150320304298e-06, "loss": 13.9297, "step": 13995 }, { "epoch": 0.9295344358105865, "grad_norm": 217.87754821777344, "learning_rate": 1.162008901848011e-06, "loss": 20.5781, "step": 13996 }, { "epoch": 0.9296008501029421, "grad_norm": 152.73756408691406, "learning_rate": 1.1619027697915663e-06, "loss": 15.1719, "step": 13997 }, { "epoch": 0.9296672643952979, "grad_norm": 236.15975952148438, "learning_rate": 1.1617966358623236e-06, "loss": 15.0, "step": 13998 }, { "epoch": 0.9297336786876536, "grad_norm": 116.88550567626953, "learning_rate": 1.161690500061511e-06, "loss": 10.4531, "step": 13999 }, { "epoch": 0.9298000929800093, "grad_norm": 248.3883056640625, "learning_rate": 1.1615843623903558e-06, "loss": 16.7031, "step": 14000 }, { "epoch": 0.929866507272365, "grad_norm": 316.5467529296875, "learning_rate": 1.1614782228500859e-06, "loss": 18.8125, "step": 14001 }, { "epoch": 0.9299329215647207, "grad_norm": 202.22799682617188, "learning_rate": 1.161372081441929e-06, "loss": 14.7656, "step": 14002 }, { "epoch": 0.9299993358570764, "grad_norm": 149.57240295410156, "learning_rate": 1.1612659381671131e-06, "loss": 12.0156, "step": 14003 }, { "epoch": 0.9300657501494322, "grad_norm": 202.9608612060547, "learning_rate": 1.1611597930268655e-06, "loss": 19.9688, "step": 14004 }, { "epoch": 0.9301321644417879, "grad_norm": 134.3763427734375, "learning_rate": 1.1610536460224146e-06, "loss": 14.8594, "step": 14005 }, { "epoch": 0.9301985787341436, "grad_norm": 160.9499053955078, "learning_rate": 1.1609474971549878e-06, "loss": 14.0938, "step": 14006 }, { "epoch": 0.9302649930264993, "grad_norm": 338.1408996582031, "learning_rate": 1.1608413464258132e-06, "loss": 19.1875, "step": 14007 }, { "epoch": 0.930331407318855, "grad_norm": 165.3679656982422, "learning_rate": 1.160735193836119e-06, "loss": 17.7578, "step": 14008 }, { "epoch": 0.9303978216112108, "grad_norm": 454.7276611328125, "learning_rate": 1.1606290393871323e-06, "loss": 13.1719, "step": 14009 }, { "epoch": 0.9304642359035664, "grad_norm": 206.1351318359375, "learning_rate": 1.1605228830800815e-06, "loss": 20.4844, "step": 14010 }, { "epoch": 0.9305306501959222, "grad_norm": 256.0267028808594, "learning_rate": 1.160416724916195e-06, "loss": 14.5312, "step": 14011 }, { "epoch": 0.9305970644882778, "grad_norm": 301.83575439453125, "learning_rate": 1.1603105648967e-06, "loss": 22.2969, "step": 14012 }, { "epoch": 0.9306634787806336, "grad_norm": 345.44354248046875, "learning_rate": 1.160204403022825e-06, "loss": 18.6875, "step": 14013 }, { "epoch": 0.9307298930729893, "grad_norm": 219.4303741455078, "learning_rate": 1.1600982392957977e-06, "loss": 18.4844, "step": 14014 }, { "epoch": 0.930796307365345, "grad_norm": 223.27212524414062, "learning_rate": 1.1599920737168464e-06, "loss": 15.8125, "step": 14015 }, { "epoch": 0.9308627216577008, "grad_norm": 301.59246826171875, "learning_rate": 1.1598859062871986e-06, "loss": 16.6484, "step": 14016 }, { "epoch": 0.9309291359500564, "grad_norm": 151.3486328125, "learning_rate": 1.1597797370080832e-06, "loss": 14.7969, "step": 14017 }, { "epoch": 0.9309955502424122, "grad_norm": 222.42709350585938, "learning_rate": 1.1596735658807282e-06, "loss": 16.7031, "step": 14018 }, { "epoch": 0.9310619645347679, "grad_norm": 223.28164672851562, "learning_rate": 1.159567392906361e-06, "loss": 15.1406, "step": 14019 }, { "epoch": 0.9311283788271236, "grad_norm": 234.12863159179688, "learning_rate": 1.1594612180862102e-06, "loss": 26.6719, "step": 14020 }, { "epoch": 0.9311947931194793, "grad_norm": 135.10781860351562, "learning_rate": 1.1593550414215043e-06, "loss": 13.4688, "step": 14021 }, { "epoch": 0.931261207411835, "grad_norm": 446.4962463378906, "learning_rate": 1.1592488629134709e-06, "loss": 21.9844, "step": 14022 }, { "epoch": 0.9313276217041907, "grad_norm": 173.39312744140625, "learning_rate": 1.1591426825633386e-06, "loss": 17.2656, "step": 14023 }, { "epoch": 0.9313940359965465, "grad_norm": 439.5224609375, "learning_rate": 1.1590365003723357e-06, "loss": 15.6562, "step": 14024 }, { "epoch": 0.9314604502889021, "grad_norm": 132.21556091308594, "learning_rate": 1.1589303163416902e-06, "loss": 11.7578, "step": 14025 }, { "epoch": 0.9315268645812579, "grad_norm": 212.56884765625, "learning_rate": 1.1588241304726303e-06, "loss": 24.1719, "step": 14026 }, { "epoch": 0.9315932788736137, "grad_norm": 195.51803588867188, "learning_rate": 1.1587179427663843e-06, "loss": 17.4062, "step": 14027 }, { "epoch": 0.9316596931659693, "grad_norm": 361.02392578125, "learning_rate": 1.158611753224181e-06, "loss": 18.8281, "step": 14028 }, { "epoch": 0.9317261074583251, "grad_norm": 197.377685546875, "learning_rate": 1.1585055618472483e-06, "loss": 21.8438, "step": 14029 }, { "epoch": 0.9317925217506807, "grad_norm": 167.465576171875, "learning_rate": 1.1583993686368144e-06, "loss": 14.0156, "step": 14030 }, { "epoch": 0.9318589360430365, "grad_norm": 468.4892272949219, "learning_rate": 1.1582931735941082e-06, "loss": 19.1406, "step": 14031 }, { "epoch": 0.9319253503353921, "grad_norm": 383.19891357421875, "learning_rate": 1.158186976720358e-06, "loss": 19.8906, "step": 14032 }, { "epoch": 0.9319917646277479, "grad_norm": 252.3173828125, "learning_rate": 1.158080778016792e-06, "loss": 16.5781, "step": 14033 }, { "epoch": 0.9320581789201036, "grad_norm": 156.72003173828125, "learning_rate": 1.1579745774846383e-06, "loss": 18.1875, "step": 14034 }, { "epoch": 0.9321245932124593, "grad_norm": 207.5613555908203, "learning_rate": 1.1578683751251264e-06, "loss": 27.4531, "step": 14035 }, { "epoch": 0.932191007504815, "grad_norm": 131.17027282714844, "learning_rate": 1.1577621709394838e-06, "loss": 23.9219, "step": 14036 }, { "epoch": 0.9322574217971707, "grad_norm": 343.3312072753906, "learning_rate": 1.1576559649289392e-06, "loss": 19.1094, "step": 14037 }, { "epoch": 0.9323238360895265, "grad_norm": 199.27133178710938, "learning_rate": 1.1575497570947215e-06, "loss": 14.6875, "step": 14038 }, { "epoch": 0.9323902503818822, "grad_norm": 220.17465209960938, "learning_rate": 1.1574435474380592e-06, "loss": 16.1719, "step": 14039 }, { "epoch": 0.9324566646742379, "grad_norm": 353.83990478515625, "learning_rate": 1.1573373359601806e-06, "loss": 20.375, "step": 14040 }, { "epoch": 0.9325230789665936, "grad_norm": 348.6593017578125, "learning_rate": 1.1572311226623145e-06, "loss": 12.7656, "step": 14041 }, { "epoch": 0.9325894932589494, "grad_norm": 390.3749084472656, "learning_rate": 1.1571249075456893e-06, "loss": 23.2812, "step": 14042 }, { "epoch": 0.932655907551305, "grad_norm": 162.2781982421875, "learning_rate": 1.157018690611534e-06, "loss": 18.3125, "step": 14043 }, { "epoch": 0.9327223218436608, "grad_norm": 554.2074584960938, "learning_rate": 1.1569124718610767e-06, "loss": 20.8125, "step": 14044 }, { "epoch": 0.9327887361360164, "grad_norm": 529.2793579101562, "learning_rate": 1.1568062512955465e-06, "loss": 24.1875, "step": 14045 }, { "epoch": 0.9328551504283722, "grad_norm": 273.7620544433594, "learning_rate": 1.1567000289161724e-06, "loss": 13.375, "step": 14046 }, { "epoch": 0.9329215647207278, "grad_norm": 401.51019287109375, "learning_rate": 1.1565938047241823e-06, "loss": 13.0781, "step": 14047 }, { "epoch": 0.9329879790130836, "grad_norm": 365.3753967285156, "learning_rate": 1.1564875787208052e-06, "loss": 19.8125, "step": 14048 }, { "epoch": 0.9330543933054394, "grad_norm": 301.5508117675781, "learning_rate": 1.1563813509072705e-06, "loss": 14.1406, "step": 14049 }, { "epoch": 0.933120807597795, "grad_norm": 473.13616943359375, "learning_rate": 1.1562751212848063e-06, "loss": 27.4531, "step": 14050 }, { "epoch": 0.9331872218901508, "grad_norm": 185.359130859375, "learning_rate": 1.1561688898546416e-06, "loss": 11.8125, "step": 14051 }, { "epoch": 0.9332536361825065, "grad_norm": 146.6707305908203, "learning_rate": 1.156062656618005e-06, "loss": 11.0781, "step": 14052 }, { "epoch": 0.9333200504748622, "grad_norm": 182.52610778808594, "learning_rate": 1.155956421576126e-06, "loss": 15.3125, "step": 14053 }, { "epoch": 0.9333864647672179, "grad_norm": 149.91624450683594, "learning_rate": 1.1558501847302332e-06, "loss": 19.0469, "step": 14054 }, { "epoch": 0.9334528790595736, "grad_norm": 491.9882507324219, "learning_rate": 1.1557439460815548e-06, "loss": 18.0938, "step": 14055 }, { "epoch": 0.9335192933519293, "grad_norm": 100.34410858154297, "learning_rate": 1.1556377056313204e-06, "loss": 11.8828, "step": 14056 }, { "epoch": 0.9335857076442851, "grad_norm": 156.44717407226562, "learning_rate": 1.1555314633807588e-06, "loss": 18.4844, "step": 14057 }, { "epoch": 0.9336521219366407, "grad_norm": 196.8466796875, "learning_rate": 1.155425219331099e-06, "loss": 11.0156, "step": 14058 }, { "epoch": 0.9337185362289965, "grad_norm": 291.80096435546875, "learning_rate": 1.1553189734835695e-06, "loss": 15.4531, "step": 14059 }, { "epoch": 0.9337849505213522, "grad_norm": 419.47784423828125, "learning_rate": 1.1552127258394002e-06, "loss": 16.75, "step": 14060 }, { "epoch": 0.9338513648137079, "grad_norm": 131.61390686035156, "learning_rate": 1.1551064763998194e-06, "loss": 12.3438, "step": 14061 }, { "epoch": 0.9339177791060637, "grad_norm": 98.4249496459961, "learning_rate": 1.1550002251660562e-06, "loss": 17.3281, "step": 14062 }, { "epoch": 0.9339841933984193, "grad_norm": 140.0194091796875, "learning_rate": 1.1548939721393398e-06, "loss": 17.5938, "step": 14063 }, { "epoch": 0.9340506076907751, "grad_norm": 387.3232727050781, "learning_rate": 1.1547877173208994e-06, "loss": 22.9531, "step": 14064 }, { "epoch": 0.9341170219831307, "grad_norm": 198.79710388183594, "learning_rate": 1.154681460711964e-06, "loss": 15.4688, "step": 14065 }, { "epoch": 0.9341834362754865, "grad_norm": 321.6970520019531, "learning_rate": 1.1545752023137626e-06, "loss": 22.8125, "step": 14066 }, { "epoch": 0.9342498505678422, "grad_norm": 236.04798889160156, "learning_rate": 1.1544689421275243e-06, "loss": 20.75, "step": 14067 }, { "epoch": 0.9343162648601979, "grad_norm": 209.56439208984375, "learning_rate": 1.1543626801544783e-06, "loss": 12.4219, "step": 14068 }, { "epoch": 0.9343826791525536, "grad_norm": 215.45587158203125, "learning_rate": 1.1542564163958538e-06, "loss": 16.5938, "step": 14069 }, { "epoch": 0.9344490934449093, "grad_norm": 163.25856018066406, "learning_rate": 1.1541501508528801e-06, "loss": 14.1562, "step": 14070 }, { "epoch": 0.9345155077372651, "grad_norm": 170.95367431640625, "learning_rate": 1.1540438835267864e-06, "loss": 17.4375, "step": 14071 }, { "epoch": 0.9345819220296208, "grad_norm": 222.457763671875, "learning_rate": 1.153937614418802e-06, "loss": 16.7344, "step": 14072 }, { "epoch": 0.9346483363219765, "grad_norm": 302.4137268066406, "learning_rate": 1.1538313435301558e-06, "loss": 15.4844, "step": 14073 }, { "epoch": 0.9347147506143322, "grad_norm": 270.39349365234375, "learning_rate": 1.153725070862077e-06, "loss": 17.0312, "step": 14074 }, { "epoch": 0.934781164906688, "grad_norm": 272.5193786621094, "learning_rate": 1.153618796415796e-06, "loss": 17.25, "step": 14075 }, { "epoch": 0.9348475791990436, "grad_norm": 215.47628784179688, "learning_rate": 1.1535125201925411e-06, "loss": 18.1875, "step": 14076 }, { "epoch": 0.9349139934913994, "grad_norm": 116.54003143310547, "learning_rate": 1.1534062421935416e-06, "loss": 14.6562, "step": 14077 }, { "epoch": 0.934980407783755, "grad_norm": 201.09300231933594, "learning_rate": 1.1532999624200274e-06, "loss": 14.1094, "step": 14078 }, { "epoch": 0.9350468220761108, "grad_norm": 521.7757568359375, "learning_rate": 1.1531936808732274e-06, "loss": 21.8125, "step": 14079 }, { "epoch": 0.9351132363684665, "grad_norm": 99.44380950927734, "learning_rate": 1.1530873975543714e-06, "loss": 13.2109, "step": 14080 }, { "epoch": 0.9351796506608222, "grad_norm": 229.35716247558594, "learning_rate": 1.1529811124646887e-06, "loss": 14.4531, "step": 14081 }, { "epoch": 0.935246064953178, "grad_norm": 452.3534851074219, "learning_rate": 1.1528748256054086e-06, "loss": 16.2656, "step": 14082 }, { "epoch": 0.9353124792455336, "grad_norm": 320.1664123535156, "learning_rate": 1.1527685369777608e-06, "loss": 21.1875, "step": 14083 }, { "epoch": 0.9353788935378894, "grad_norm": 235.42076110839844, "learning_rate": 1.1526622465829745e-06, "loss": 20.8281, "step": 14084 }, { "epoch": 0.935445307830245, "grad_norm": 398.385009765625, "learning_rate": 1.1525559544222795e-06, "loss": 15.4062, "step": 14085 }, { "epoch": 0.9355117221226008, "grad_norm": 575.7125244140625, "learning_rate": 1.1524496604969052e-06, "loss": 19.9219, "step": 14086 }, { "epoch": 0.9355781364149565, "grad_norm": 179.85479736328125, "learning_rate": 1.152343364808081e-06, "loss": 20.7812, "step": 14087 }, { "epoch": 0.9356445507073122, "grad_norm": 102.93086242675781, "learning_rate": 1.1522370673570365e-06, "loss": 15.0312, "step": 14088 }, { "epoch": 0.9357109649996679, "grad_norm": 213.85902404785156, "learning_rate": 1.1521307681450016e-06, "loss": 17.7031, "step": 14089 }, { "epoch": 0.9357773792920236, "grad_norm": 359.0214538574219, "learning_rate": 1.1520244671732058e-06, "loss": 18.8438, "step": 14090 }, { "epoch": 0.9358437935843794, "grad_norm": 872.3049926757812, "learning_rate": 1.1519181644428783e-06, "loss": 22.0, "step": 14091 }, { "epoch": 0.9359102078767351, "grad_norm": 255.41156005859375, "learning_rate": 1.1518118599552493e-06, "loss": 21.2656, "step": 14092 }, { "epoch": 0.9359766221690908, "grad_norm": 198.20167541503906, "learning_rate": 1.1517055537115482e-06, "loss": 18.3125, "step": 14093 }, { "epoch": 0.9360430364614465, "grad_norm": 222.38905334472656, "learning_rate": 1.1515992457130046e-06, "loss": 14.7344, "step": 14094 }, { "epoch": 0.9361094507538023, "grad_norm": 201.6365966796875, "learning_rate": 1.1514929359608483e-06, "loss": 15.0156, "step": 14095 }, { "epoch": 0.9361758650461579, "grad_norm": 202.09742736816406, "learning_rate": 1.1513866244563092e-06, "loss": 17.2656, "step": 14096 }, { "epoch": 0.9362422793385137, "grad_norm": 285.10504150390625, "learning_rate": 1.1512803112006171e-06, "loss": 17.4062, "step": 14097 }, { "epoch": 0.9363086936308693, "grad_norm": 395.65289306640625, "learning_rate": 1.1511739961950014e-06, "loss": 17.9844, "step": 14098 }, { "epoch": 0.9363751079232251, "grad_norm": 271.6120300292969, "learning_rate": 1.1510676794406924e-06, "loss": 14.9688, "step": 14099 }, { "epoch": 0.9364415222155807, "grad_norm": 227.43783569335938, "learning_rate": 1.1509613609389192e-06, "loss": 21.2969, "step": 14100 }, { "epoch": 0.9365079365079365, "grad_norm": 142.06936645507812, "learning_rate": 1.150855040690912e-06, "loss": 13.0312, "step": 14101 }, { "epoch": 0.9365743508002923, "grad_norm": 159.78260803222656, "learning_rate": 1.150748718697901e-06, "loss": 14.0938, "step": 14102 }, { "epoch": 0.9366407650926479, "grad_norm": 267.8777160644531, "learning_rate": 1.1506423949611157e-06, "loss": 14.5625, "step": 14103 }, { "epoch": 0.9367071793850037, "grad_norm": 285.9607238769531, "learning_rate": 1.1505360694817859e-06, "loss": 23.1094, "step": 14104 }, { "epoch": 0.9367735936773594, "grad_norm": 194.72581481933594, "learning_rate": 1.1504297422611415e-06, "loss": 16.6719, "step": 14105 }, { "epoch": 0.9368400079697151, "grad_norm": 151.6840057373047, "learning_rate": 1.1503234133004127e-06, "loss": 15.5859, "step": 14106 }, { "epoch": 0.9369064222620708, "grad_norm": 155.38340759277344, "learning_rate": 1.1502170826008294e-06, "loss": 17.6406, "step": 14107 }, { "epoch": 0.9369728365544265, "grad_norm": 156.69480895996094, "learning_rate": 1.1501107501636216e-06, "loss": 18.0312, "step": 14108 }, { "epoch": 0.9370392508467822, "grad_norm": 248.45411682128906, "learning_rate": 1.1500044159900191e-06, "loss": 14.8438, "step": 14109 }, { "epoch": 0.937105665139138, "grad_norm": 142.499755859375, "learning_rate": 1.149898080081252e-06, "loss": 14.625, "step": 14110 }, { "epoch": 0.9371720794314936, "grad_norm": 230.82411193847656, "learning_rate": 1.1497917424385503e-06, "loss": 18.8594, "step": 14111 }, { "epoch": 0.9372384937238494, "grad_norm": 125.22887420654297, "learning_rate": 1.1496854030631443e-06, "loss": 19.0312, "step": 14112 }, { "epoch": 0.9373049080162051, "grad_norm": 190.9276123046875, "learning_rate": 1.1495790619562635e-06, "loss": 13.4375, "step": 14113 }, { "epoch": 0.9373713223085608, "grad_norm": 842.2828369140625, "learning_rate": 1.149472719119139e-06, "loss": 19.5625, "step": 14114 }, { "epoch": 0.9374377366009166, "grad_norm": 222.2373809814453, "learning_rate": 1.1493663745529998e-06, "loss": 16.5938, "step": 14115 }, { "epoch": 0.9375041508932722, "grad_norm": 131.26100158691406, "learning_rate": 1.1492600282590763e-06, "loss": 14.9375, "step": 14116 }, { "epoch": 0.937570565185628, "grad_norm": 234.6229705810547, "learning_rate": 1.1491536802385993e-06, "loss": 21.8281, "step": 14117 }, { "epoch": 0.9376369794779836, "grad_norm": 157.6251678466797, "learning_rate": 1.1490473304927984e-06, "loss": 15.2188, "step": 14118 }, { "epoch": 0.9377033937703394, "grad_norm": 206.72579956054688, "learning_rate": 1.1489409790229038e-06, "loss": 18.5156, "step": 14119 }, { "epoch": 0.937769808062695, "grad_norm": 381.4842224121094, "learning_rate": 1.148834625830146e-06, "loss": 15.0469, "step": 14120 }, { "epoch": 0.9378362223550508, "grad_norm": 225.23263549804688, "learning_rate": 1.148728270915755e-06, "loss": 21.7812, "step": 14121 }, { "epoch": 0.9379026366474065, "grad_norm": 169.06509399414062, "learning_rate": 1.1486219142809612e-06, "loss": 16.8594, "step": 14122 }, { "epoch": 0.9379690509397622, "grad_norm": 147.62979125976562, "learning_rate": 1.1485155559269949e-06, "loss": 16.0312, "step": 14123 }, { "epoch": 0.938035465232118, "grad_norm": 242.80258178710938, "learning_rate": 1.1484091958550858e-06, "loss": 16.6094, "step": 14124 }, { "epoch": 0.9381018795244737, "grad_norm": 388.5726623535156, "learning_rate": 1.1483028340664654e-06, "loss": 25.2188, "step": 14125 }, { "epoch": 0.9381682938168294, "grad_norm": 177.75135803222656, "learning_rate": 1.1481964705623627e-06, "loss": 16.9062, "step": 14126 }, { "epoch": 0.9382347081091851, "grad_norm": 175.36866760253906, "learning_rate": 1.148090105344009e-06, "loss": 18.3281, "step": 14127 }, { "epoch": 0.9383011224015408, "grad_norm": 447.9810791015625, "learning_rate": 1.1479837384126346e-06, "loss": 17.2031, "step": 14128 }, { "epoch": 0.9383675366938965, "grad_norm": 348.7829284667969, "learning_rate": 1.147877369769469e-06, "loss": 19.5938, "step": 14129 }, { "epoch": 0.9384339509862523, "grad_norm": 236.44920349121094, "learning_rate": 1.1477709994157434e-06, "loss": 19.0156, "step": 14130 }, { "epoch": 0.9385003652786079, "grad_norm": 183.5041961669922, "learning_rate": 1.1476646273526884e-06, "loss": 18.9062, "step": 14131 }, { "epoch": 0.9385667795709637, "grad_norm": 129.2920684814453, "learning_rate": 1.147558253581534e-06, "loss": 15.4688, "step": 14132 }, { "epoch": 0.9386331938633193, "grad_norm": 372.7818603515625, "learning_rate": 1.1474518781035108e-06, "loss": 17.2969, "step": 14133 }, { "epoch": 0.9386996081556751, "grad_norm": 269.2969970703125, "learning_rate": 1.147345500919849e-06, "loss": 22.2344, "step": 14134 }, { "epoch": 0.9387660224480309, "grad_norm": 131.0232391357422, "learning_rate": 1.14723912203178e-06, "loss": 15.2344, "step": 14135 }, { "epoch": 0.9388324367403865, "grad_norm": 153.2774658203125, "learning_rate": 1.1471327414405336e-06, "loss": 17.625, "step": 14136 }, { "epoch": 0.9388988510327423, "grad_norm": 241.6728057861328, "learning_rate": 1.1470263591473403e-06, "loss": 20.8906, "step": 14137 }, { "epoch": 0.9389652653250979, "grad_norm": 316.5830993652344, "learning_rate": 1.1469199751534308e-06, "loss": 14.8281, "step": 14138 }, { "epoch": 0.9390316796174537, "grad_norm": 506.5397644042969, "learning_rate": 1.1468135894600361e-06, "loss": 16.4062, "step": 14139 }, { "epoch": 0.9390980939098094, "grad_norm": 563.8857421875, "learning_rate": 1.146707202068386e-06, "loss": 19.8125, "step": 14140 }, { "epoch": 0.9391645082021651, "grad_norm": 228.98175048828125, "learning_rate": 1.146600812979712e-06, "loss": 13.5312, "step": 14141 }, { "epoch": 0.9392309224945208, "grad_norm": 163.4589080810547, "learning_rate": 1.146494422195244e-06, "loss": 15.2031, "step": 14142 }, { "epoch": 0.9392973367868765, "grad_norm": 128.36874389648438, "learning_rate": 1.1463880297162134e-06, "loss": 11.5, "step": 14143 }, { "epoch": 0.9393637510792322, "grad_norm": 127.35810852050781, "learning_rate": 1.1462816355438501e-06, "loss": 12.6562, "step": 14144 }, { "epoch": 0.939430165371588, "grad_norm": 245.38397216796875, "learning_rate": 1.1461752396793854e-06, "loss": 22.3047, "step": 14145 }, { "epoch": 0.9394965796639437, "grad_norm": 583.9588012695312, "learning_rate": 1.1460688421240496e-06, "loss": 20.0312, "step": 14146 }, { "epoch": 0.9395629939562994, "grad_norm": 185.77273559570312, "learning_rate": 1.145962442879074e-06, "loss": 15.9219, "step": 14147 }, { "epoch": 0.9396294082486552, "grad_norm": 159.72982788085938, "learning_rate": 1.1458560419456886e-06, "loss": 12.5391, "step": 14148 }, { "epoch": 0.9396958225410108, "grad_norm": 247.82080078125, "learning_rate": 1.145749639325125e-06, "loss": 17.0312, "step": 14149 }, { "epoch": 0.9397622368333666, "grad_norm": 126.75061798095703, "learning_rate": 1.1456432350186135e-06, "loss": 12.2188, "step": 14150 }, { "epoch": 0.9398286511257222, "grad_norm": 113.6982650756836, "learning_rate": 1.1455368290273847e-06, "loss": 11.7188, "step": 14151 }, { "epoch": 0.939895065418078, "grad_norm": 387.708740234375, "learning_rate": 1.1454304213526702e-06, "loss": 15.0625, "step": 14152 }, { "epoch": 0.9399614797104336, "grad_norm": 346.24127197265625, "learning_rate": 1.1453240119957004e-06, "loss": 21.8594, "step": 14153 }, { "epoch": 0.9400278940027894, "grad_norm": 444.0738830566406, "learning_rate": 1.1452176009577062e-06, "loss": 28.2812, "step": 14154 }, { "epoch": 0.9400943082951451, "grad_norm": 175.964111328125, "learning_rate": 1.1451111882399183e-06, "loss": 19.1094, "step": 14155 }, { "epoch": 0.9401607225875008, "grad_norm": 156.10475158691406, "learning_rate": 1.145004773843568e-06, "loss": 19.2812, "step": 14156 }, { "epoch": 0.9402271368798566, "grad_norm": 211.614013671875, "learning_rate": 1.1448983577698864e-06, "loss": 12.5, "step": 14157 }, { "epoch": 0.9402935511722122, "grad_norm": 199.21282958984375, "learning_rate": 1.144791940020104e-06, "loss": 17.2344, "step": 14158 }, { "epoch": 0.940359965464568, "grad_norm": 189.8286590576172, "learning_rate": 1.1446855205954515e-06, "loss": 18.1719, "step": 14159 }, { "epoch": 0.9404263797569237, "grad_norm": 424.1538391113281, "learning_rate": 1.1445790994971606e-06, "loss": 12.5938, "step": 14160 }, { "epoch": 0.9404927940492794, "grad_norm": 143.7761993408203, "learning_rate": 1.1444726767264623e-06, "loss": 16.9375, "step": 14161 }, { "epoch": 0.9405592083416351, "grad_norm": 168.0339813232422, "learning_rate": 1.144366252284587e-06, "loss": 20.5312, "step": 14162 }, { "epoch": 0.9406256226339909, "grad_norm": 178.03431701660156, "learning_rate": 1.1442598261727663e-06, "loss": 13.4922, "step": 14163 }, { "epoch": 0.9406920369263465, "grad_norm": 198.94989013671875, "learning_rate": 1.144153398392231e-06, "loss": 17.0938, "step": 14164 }, { "epoch": 0.9407584512187023, "grad_norm": 169.5279083251953, "learning_rate": 1.1440469689442125e-06, "loss": 17.7969, "step": 14165 }, { "epoch": 0.9408248655110579, "grad_norm": 188.32786560058594, "learning_rate": 1.1439405378299417e-06, "loss": 17.8906, "step": 14166 }, { "epoch": 0.9408912798034137, "grad_norm": 278.2591247558594, "learning_rate": 1.1438341050506495e-06, "loss": 16.6562, "step": 14167 }, { "epoch": 0.9409576940957695, "grad_norm": 134.8390350341797, "learning_rate": 1.1437276706075676e-06, "loss": 19.7344, "step": 14168 }, { "epoch": 0.9410241083881251, "grad_norm": 245.6970672607422, "learning_rate": 1.1436212345019266e-06, "loss": 13.5938, "step": 14169 }, { "epoch": 0.9410905226804809, "grad_norm": 266.00579833984375, "learning_rate": 1.1435147967349582e-06, "loss": 14.2656, "step": 14170 }, { "epoch": 0.9411569369728365, "grad_norm": 258.2058410644531, "learning_rate": 1.1434083573078936e-06, "loss": 15.6875, "step": 14171 }, { "epoch": 0.9412233512651923, "grad_norm": 221.7283935546875, "learning_rate": 1.1433019162219634e-06, "loss": 18.1719, "step": 14172 }, { "epoch": 0.941289765557548, "grad_norm": 296.70880126953125, "learning_rate": 1.1431954734783995e-06, "loss": 21.0, "step": 14173 }, { "epoch": 0.9413561798499037, "grad_norm": 179.16856384277344, "learning_rate": 1.143089029078433e-06, "loss": 21.7031, "step": 14174 }, { "epoch": 0.9414225941422594, "grad_norm": 242.73199462890625, "learning_rate": 1.1429825830232948e-06, "loss": 13.8594, "step": 14175 }, { "epoch": 0.9414890084346151, "grad_norm": 154.8798828125, "learning_rate": 1.1428761353142169e-06, "loss": 16.0859, "step": 14176 }, { "epoch": 0.9415554227269708, "grad_norm": 188.2205810546875, "learning_rate": 1.14276968595243e-06, "loss": 20.8125, "step": 14177 }, { "epoch": 0.9416218370193266, "grad_norm": 299.6739501953125, "learning_rate": 1.142663234939166e-06, "loss": 19.9375, "step": 14178 }, { "epoch": 0.9416882513116823, "grad_norm": 166.6594696044922, "learning_rate": 1.1425567822756557e-06, "loss": 15.125, "step": 14179 }, { "epoch": 0.941754665604038, "grad_norm": 172.9248809814453, "learning_rate": 1.1424503279631307e-06, "loss": 13.2656, "step": 14180 }, { "epoch": 0.9418210798963937, "grad_norm": 103.96605682373047, "learning_rate": 1.1423438720028225e-06, "loss": 13.5938, "step": 14181 }, { "epoch": 0.9418874941887494, "grad_norm": 333.6945495605469, "learning_rate": 1.1422374143959625e-06, "loss": 14.3125, "step": 14182 }, { "epoch": 0.9419539084811052, "grad_norm": 133.1971435546875, "learning_rate": 1.1421309551437821e-06, "loss": 18.5625, "step": 14183 }, { "epoch": 0.9420203227734608, "grad_norm": 142.37054443359375, "learning_rate": 1.1420244942475127e-06, "loss": 12.7344, "step": 14184 }, { "epoch": 0.9420867370658166, "grad_norm": 189.5361785888672, "learning_rate": 1.141918031708386e-06, "loss": 23.1562, "step": 14185 }, { "epoch": 0.9421531513581722, "grad_norm": 225.86305236816406, "learning_rate": 1.1418115675276335e-06, "loss": 19.5156, "step": 14186 }, { "epoch": 0.942219565650528, "grad_norm": 269.9233703613281, "learning_rate": 1.1417051017064862e-06, "loss": 16.5312, "step": 14187 }, { "epoch": 0.9422859799428837, "grad_norm": 319.04925537109375, "learning_rate": 1.141598634246176e-06, "loss": 24.3125, "step": 14188 }, { "epoch": 0.9423523942352394, "grad_norm": 232.85826110839844, "learning_rate": 1.1414921651479348e-06, "loss": 18.0156, "step": 14189 }, { "epoch": 0.9424188085275952, "grad_norm": 238.3290557861328, "learning_rate": 1.1413856944129935e-06, "loss": 22.0, "step": 14190 }, { "epoch": 0.9424852228199508, "grad_norm": 182.78012084960938, "learning_rate": 1.1412792220425842e-06, "loss": 18.8438, "step": 14191 }, { "epoch": 0.9425516371123066, "grad_norm": 294.09185791015625, "learning_rate": 1.1411727480379382e-06, "loss": 13.0312, "step": 14192 }, { "epoch": 0.9426180514046623, "grad_norm": 152.3224334716797, "learning_rate": 1.1410662724002872e-06, "loss": 17.2344, "step": 14193 }, { "epoch": 0.942684465697018, "grad_norm": 115.99618530273438, "learning_rate": 1.1409597951308631e-06, "loss": 15.5469, "step": 14194 }, { "epoch": 0.9427508799893737, "grad_norm": 222.81187438964844, "learning_rate": 1.1408533162308974e-06, "loss": 17.6719, "step": 14195 }, { "epoch": 0.9428172942817294, "grad_norm": 347.29095458984375, "learning_rate": 1.1407468357016216e-06, "loss": 23.0547, "step": 14196 }, { "epoch": 0.9428837085740851, "grad_norm": 262.23028564453125, "learning_rate": 1.1406403535442675e-06, "loss": 13.0781, "step": 14197 }, { "epoch": 0.9429501228664409, "grad_norm": 222.26136779785156, "learning_rate": 1.140533869760067e-06, "loss": 20.4062, "step": 14198 }, { "epoch": 0.9430165371587965, "grad_norm": 229.5783233642578, "learning_rate": 1.1404273843502517e-06, "loss": 17.6875, "step": 14199 }, { "epoch": 0.9430829514511523, "grad_norm": 286.8330993652344, "learning_rate": 1.1403208973160536e-06, "loss": 15.7031, "step": 14200 }, { "epoch": 0.943149365743508, "grad_norm": 484.3792724609375, "learning_rate": 1.140214408658704e-06, "loss": 15.9531, "step": 14201 }, { "epoch": 0.9432157800358637, "grad_norm": 124.76309204101562, "learning_rate": 1.140107918379435e-06, "loss": 14.4062, "step": 14202 }, { "epoch": 0.9432821943282195, "grad_norm": 145.64190673828125, "learning_rate": 1.1400014264794786e-06, "loss": 17.7812, "step": 14203 }, { "epoch": 0.9433486086205751, "grad_norm": 227.99229431152344, "learning_rate": 1.139894932960066e-06, "loss": 19.6094, "step": 14204 }, { "epoch": 0.9434150229129309, "grad_norm": 256.0626525878906, "learning_rate": 1.1397884378224296e-06, "loss": 16.4375, "step": 14205 }, { "epoch": 0.9434814372052865, "grad_norm": 401.8974304199219, "learning_rate": 1.1396819410678015e-06, "loss": 15.6875, "step": 14206 }, { "epoch": 0.9435478514976423, "grad_norm": 216.23458862304688, "learning_rate": 1.1395754426974132e-06, "loss": 16.4375, "step": 14207 }, { "epoch": 0.943614265789998, "grad_norm": 243.47337341308594, "learning_rate": 1.1394689427124964e-06, "loss": 17.8281, "step": 14208 }, { "epoch": 0.9436806800823537, "grad_norm": 108.87731170654297, "learning_rate": 1.1393624411142832e-06, "loss": 10.1094, "step": 14209 }, { "epoch": 0.9437470943747094, "grad_norm": 170.53558349609375, "learning_rate": 1.139255937904006e-06, "loss": 14.7656, "step": 14210 }, { "epoch": 0.9438135086670651, "grad_norm": 381.49554443359375, "learning_rate": 1.1391494330828965e-06, "loss": 22.4531, "step": 14211 }, { "epoch": 0.9438799229594209, "grad_norm": 200.71475219726562, "learning_rate": 1.1390429266521862e-06, "loss": 19.0469, "step": 14212 }, { "epoch": 0.9439463372517766, "grad_norm": 270.0024108886719, "learning_rate": 1.1389364186131076e-06, "loss": 17.4375, "step": 14213 }, { "epoch": 0.9440127515441323, "grad_norm": 227.09521484375, "learning_rate": 1.1388299089668928e-06, "loss": 18.375, "step": 14214 }, { "epoch": 0.944079165836488, "grad_norm": 155.17727661132812, "learning_rate": 1.1387233977147737e-06, "loss": 16.6094, "step": 14215 }, { "epoch": 0.9441455801288438, "grad_norm": 156.0037384033203, "learning_rate": 1.138616884857982e-06, "loss": 11.9531, "step": 14216 }, { "epoch": 0.9442119944211994, "grad_norm": 292.6156005859375, "learning_rate": 1.1385103703977505e-06, "loss": 17.6562, "step": 14217 }, { "epoch": 0.9442784087135552, "grad_norm": 321.14593505859375, "learning_rate": 1.1384038543353107e-06, "loss": 13.4688, "step": 14218 }, { "epoch": 0.9443448230059108, "grad_norm": 269.60614013671875, "learning_rate": 1.138297336671895e-06, "loss": 22.6406, "step": 14219 }, { "epoch": 0.9444112372982666, "grad_norm": 146.09054565429688, "learning_rate": 1.1381908174087351e-06, "loss": 17.9219, "step": 14220 }, { "epoch": 0.9444776515906222, "grad_norm": 269.7774353027344, "learning_rate": 1.138084296547064e-06, "loss": 14.5469, "step": 14221 }, { "epoch": 0.944544065882978, "grad_norm": 198.03282165527344, "learning_rate": 1.137977774088113e-06, "loss": 20.2812, "step": 14222 }, { "epoch": 0.9446104801753338, "grad_norm": 261.4814758300781, "learning_rate": 1.137871250033115e-06, "loss": 17.5312, "step": 14223 }, { "epoch": 0.9446768944676894, "grad_norm": 158.2006378173828, "learning_rate": 1.1377647243833018e-06, "loss": 13.25, "step": 14224 }, { "epoch": 0.9447433087600452, "grad_norm": 397.2091369628906, "learning_rate": 1.1376581971399055e-06, "loss": 16.9219, "step": 14225 }, { "epoch": 0.9448097230524009, "grad_norm": 899.2146606445312, "learning_rate": 1.1375516683041587e-06, "loss": 13.6719, "step": 14226 }, { "epoch": 0.9448761373447566, "grad_norm": 179.33432006835938, "learning_rate": 1.1374451378772937e-06, "loss": 15.3125, "step": 14227 }, { "epoch": 0.9449425516371123, "grad_norm": 441.4417724609375, "learning_rate": 1.1373386058605423e-06, "loss": 23.9062, "step": 14228 }, { "epoch": 0.945008965929468, "grad_norm": 245.07308959960938, "learning_rate": 1.137232072255137e-06, "loss": 19.5781, "step": 14229 }, { "epoch": 0.9450753802218237, "grad_norm": 276.45953369140625, "learning_rate": 1.1371255370623106e-06, "loss": 22.1562, "step": 14230 }, { "epoch": 0.9451417945141795, "grad_norm": 168.90699768066406, "learning_rate": 1.1370190002832946e-06, "loss": 14.4688, "step": 14231 }, { "epoch": 0.9452082088065352, "grad_norm": 167.66615295410156, "learning_rate": 1.1369124619193224e-06, "loss": 21.9062, "step": 14232 }, { "epoch": 0.9452746230988909, "grad_norm": 120.79503631591797, "learning_rate": 1.1368059219716254e-06, "loss": 14.1719, "step": 14233 }, { "epoch": 0.9453410373912466, "grad_norm": 282.6758728027344, "learning_rate": 1.1366993804414364e-06, "loss": 22.0938, "step": 14234 }, { "epoch": 0.9454074516836023, "grad_norm": 133.1244354248047, "learning_rate": 1.1365928373299882e-06, "loss": 19.1875, "step": 14235 }, { "epoch": 0.9454738659759581, "grad_norm": 124.32431030273438, "learning_rate": 1.1364862926385124e-06, "loss": 13.5156, "step": 14236 }, { "epoch": 0.9455402802683137, "grad_norm": 255.2411651611328, "learning_rate": 1.136379746368242e-06, "loss": 16.6875, "step": 14237 }, { "epoch": 0.9456066945606695, "grad_norm": 199.64820861816406, "learning_rate": 1.136273198520409e-06, "loss": 16.2969, "step": 14238 }, { "epoch": 0.9456731088530251, "grad_norm": 293.2633361816406, "learning_rate": 1.1361666490962467e-06, "loss": 15.5938, "step": 14239 }, { "epoch": 0.9457395231453809, "grad_norm": 151.40072631835938, "learning_rate": 1.1360600980969869e-06, "loss": 15.2188, "step": 14240 }, { "epoch": 0.9458059374377366, "grad_norm": 220.5587158203125, "learning_rate": 1.1359535455238623e-06, "loss": 16.1797, "step": 14241 }, { "epoch": 0.9458723517300923, "grad_norm": 241.66995239257812, "learning_rate": 1.1358469913781054e-06, "loss": 21.9375, "step": 14242 }, { "epoch": 0.9459387660224481, "grad_norm": 369.0885009765625, "learning_rate": 1.135740435660949e-06, "loss": 16.0469, "step": 14243 }, { "epoch": 0.9460051803148037, "grad_norm": 121.56546783447266, "learning_rate": 1.1356338783736254e-06, "loss": 15.4062, "step": 14244 }, { "epoch": 0.9460715946071595, "grad_norm": 179.8040313720703, "learning_rate": 1.1355273195173676e-06, "loss": 15.3906, "step": 14245 }, { "epoch": 0.9461380088995152, "grad_norm": 172.0005645751953, "learning_rate": 1.1354207590934078e-06, "loss": 15.3203, "step": 14246 }, { "epoch": 0.9462044231918709, "grad_norm": 197.2164764404297, "learning_rate": 1.1353141971029786e-06, "loss": 18.2656, "step": 14247 }, { "epoch": 0.9462708374842266, "grad_norm": 245.95620727539062, "learning_rate": 1.1352076335473129e-06, "loss": 15.9219, "step": 14248 }, { "epoch": 0.9463372517765823, "grad_norm": 930.0925903320312, "learning_rate": 1.1351010684276432e-06, "loss": 16.0781, "step": 14249 }, { "epoch": 0.946403666068938, "grad_norm": 389.22552490234375, "learning_rate": 1.1349945017452022e-06, "loss": 15.4375, "step": 14250 }, { "epoch": 0.9464700803612938, "grad_norm": 171.7136688232422, "learning_rate": 1.1348879335012227e-06, "loss": 14.5156, "step": 14251 }, { "epoch": 0.9465364946536494, "grad_norm": 212.9664764404297, "learning_rate": 1.1347813636969374e-06, "loss": 19.9844, "step": 14252 }, { "epoch": 0.9466029089460052, "grad_norm": 240.2056427001953, "learning_rate": 1.134674792333579e-06, "loss": 14.4375, "step": 14253 }, { "epoch": 0.946669323238361, "grad_norm": 97.2503433227539, "learning_rate": 1.1345682194123805e-06, "loss": 14.2188, "step": 14254 }, { "epoch": 0.9467357375307166, "grad_norm": 281.94970703125, "learning_rate": 1.1344616449345741e-06, "loss": 20.25, "step": 14255 }, { "epoch": 0.9468021518230724, "grad_norm": 201.83433532714844, "learning_rate": 1.134355068901393e-06, "loss": 16.125, "step": 14256 }, { "epoch": 0.946868566115428, "grad_norm": 191.5196533203125, "learning_rate": 1.13424849131407e-06, "loss": 17.6406, "step": 14257 }, { "epoch": 0.9469349804077838, "grad_norm": 161.463623046875, "learning_rate": 1.134141912173838e-06, "loss": 17.7031, "step": 14258 }, { "epoch": 0.9470013947001394, "grad_norm": 210.11880493164062, "learning_rate": 1.1340353314819291e-06, "loss": 22.6875, "step": 14259 }, { "epoch": 0.9470678089924952, "grad_norm": 149.11529541015625, "learning_rate": 1.1339287492395774e-06, "loss": 19.5469, "step": 14260 }, { "epoch": 0.9471342232848509, "grad_norm": 143.45777893066406, "learning_rate": 1.1338221654480149e-06, "loss": 13.9531, "step": 14261 }, { "epoch": 0.9472006375772066, "grad_norm": 123.5479736328125, "learning_rate": 1.1337155801084749e-06, "loss": 13.8281, "step": 14262 }, { "epoch": 0.9472670518695623, "grad_norm": 106.81552124023438, "learning_rate": 1.13360899322219e-06, "loss": 16.0469, "step": 14263 }, { "epoch": 0.947333466161918, "grad_norm": 192.22964477539062, "learning_rate": 1.1335024047903934e-06, "loss": 17.8438, "step": 14264 }, { "epoch": 0.9473998804542738, "grad_norm": 174.53126525878906, "learning_rate": 1.1333958148143179e-06, "loss": 14.5312, "step": 14265 }, { "epoch": 0.9474662947466295, "grad_norm": 544.4833984375, "learning_rate": 1.1332892232951965e-06, "loss": 16.6719, "step": 14266 }, { "epoch": 0.9475327090389852, "grad_norm": 283.65313720703125, "learning_rate": 1.1331826302342627e-06, "loss": 20.625, "step": 14267 }, { "epoch": 0.9475991233313409, "grad_norm": 220.37982177734375, "learning_rate": 1.1330760356327484e-06, "loss": 20.0312, "step": 14268 }, { "epoch": 0.9476655376236967, "grad_norm": 226.0614776611328, "learning_rate": 1.1329694394918875e-06, "loss": 17.7031, "step": 14269 }, { "epoch": 0.9477319519160523, "grad_norm": 322.075927734375, "learning_rate": 1.1328628418129127e-06, "loss": 22.7188, "step": 14270 }, { "epoch": 0.9477983662084081, "grad_norm": 177.599609375, "learning_rate": 1.1327562425970575e-06, "loss": 12.3984, "step": 14271 }, { "epoch": 0.9478647805007637, "grad_norm": 677.0182495117188, "learning_rate": 1.1326496418455542e-06, "loss": 24.2344, "step": 14272 }, { "epoch": 0.9479311947931195, "grad_norm": 203.55958557128906, "learning_rate": 1.1325430395596365e-06, "loss": 18.2031, "step": 14273 }, { "epoch": 0.9479976090854751, "grad_norm": 226.75225830078125, "learning_rate": 1.1324364357405373e-06, "loss": 14.4062, "step": 14274 }, { "epoch": 0.9480640233778309, "grad_norm": 369.0704345703125, "learning_rate": 1.13232983038949e-06, "loss": 24.625, "step": 14275 }, { "epoch": 0.9481304376701867, "grad_norm": 294.0438232421875, "learning_rate": 1.1322232235077276e-06, "loss": 17.2031, "step": 14276 }, { "epoch": 0.9481968519625423, "grad_norm": 170.17381286621094, "learning_rate": 1.1321166150964827e-06, "loss": 21.5938, "step": 14277 }, { "epoch": 0.9482632662548981, "grad_norm": 267.9087219238281, "learning_rate": 1.1320100051569892e-06, "loss": 23.9062, "step": 14278 }, { "epoch": 0.9483296805472538, "grad_norm": 233.4241180419922, "learning_rate": 1.1319033936904803e-06, "loss": 14.4219, "step": 14279 }, { "epoch": 0.9483960948396095, "grad_norm": 633.11767578125, "learning_rate": 1.1317967806981888e-06, "loss": 28.5469, "step": 14280 }, { "epoch": 0.9484625091319652, "grad_norm": 301.4453430175781, "learning_rate": 1.1316901661813483e-06, "loss": 13.875, "step": 14281 }, { "epoch": 0.9485289234243209, "grad_norm": 241.01181030273438, "learning_rate": 1.1315835501411916e-06, "loss": 16.6875, "step": 14282 }, { "epoch": 0.9485953377166766, "grad_norm": 255.74989318847656, "learning_rate": 1.1314769325789524e-06, "loss": 17.0312, "step": 14283 }, { "epoch": 0.9486617520090324, "grad_norm": 212.16981506347656, "learning_rate": 1.131370313495864e-06, "loss": 19.4062, "step": 14284 }, { "epoch": 0.948728166301388, "grad_norm": 393.415283203125, "learning_rate": 1.1312636928931593e-06, "loss": 22.2188, "step": 14285 }, { "epoch": 0.9487945805937438, "grad_norm": 462.5027160644531, "learning_rate": 1.1311570707720722e-06, "loss": 17.9531, "step": 14286 }, { "epoch": 0.9488609948860995, "grad_norm": 480.5608825683594, "learning_rate": 1.1310504471338356e-06, "loss": 19.3906, "step": 14287 }, { "epoch": 0.9489274091784552, "grad_norm": 201.53378295898438, "learning_rate": 1.1309438219796833e-06, "loss": 18.3281, "step": 14288 }, { "epoch": 0.948993823470811, "grad_norm": 160.7758331298828, "learning_rate": 1.1308371953108477e-06, "loss": 18.7969, "step": 14289 }, { "epoch": 0.9490602377631666, "grad_norm": 329.2186279296875, "learning_rate": 1.1307305671285631e-06, "loss": 18.7188, "step": 14290 }, { "epoch": 0.9491266520555224, "grad_norm": 228.73727416992188, "learning_rate": 1.130623937434063e-06, "loss": 18.9688, "step": 14291 }, { "epoch": 0.949193066347878, "grad_norm": 139.76637268066406, "learning_rate": 1.1305173062285804e-06, "loss": 13.7969, "step": 14292 }, { "epoch": 0.9492594806402338, "grad_norm": 671.1566162109375, "learning_rate": 1.1304106735133486e-06, "loss": 14.1875, "step": 14293 }, { "epoch": 0.9493258949325895, "grad_norm": 203.92648315429688, "learning_rate": 1.1303040392896016e-06, "loss": 17.0156, "step": 14294 }, { "epoch": 0.9493923092249452, "grad_norm": 232.5235137939453, "learning_rate": 1.1301974035585725e-06, "loss": 18.0625, "step": 14295 }, { "epoch": 0.9494587235173009, "grad_norm": 314.39471435546875, "learning_rate": 1.1300907663214951e-06, "loss": 18.0156, "step": 14296 }, { "epoch": 0.9495251378096566, "grad_norm": 240.2358856201172, "learning_rate": 1.1299841275796025e-06, "loss": 16.9844, "step": 14297 }, { "epoch": 0.9495915521020124, "grad_norm": 141.18984985351562, "learning_rate": 1.1298774873341286e-06, "loss": 11.2812, "step": 14298 }, { "epoch": 0.9496579663943681, "grad_norm": 208.39361572265625, "learning_rate": 1.1297708455863069e-06, "loss": 16.8438, "step": 14299 }, { "epoch": 0.9497243806867238, "grad_norm": 328.87249755859375, "learning_rate": 1.1296642023373706e-06, "loss": 21.5156, "step": 14300 }, { "epoch": 0.9497907949790795, "grad_norm": 488.5047607421875, "learning_rate": 1.1295575575885537e-06, "loss": 17.6406, "step": 14301 }, { "epoch": 0.9498572092714352, "grad_norm": 597.4931030273438, "learning_rate": 1.1294509113410894e-06, "loss": 17.1719, "step": 14302 }, { "epoch": 0.9499236235637909, "grad_norm": 239.40565490722656, "learning_rate": 1.1293442635962122e-06, "loss": 13.5312, "step": 14303 }, { "epoch": 0.9499900378561467, "grad_norm": 379.60723876953125, "learning_rate": 1.129237614355155e-06, "loss": 19.7188, "step": 14304 }, { "epoch": 0.9500564521485023, "grad_norm": 297.3790588378906, "learning_rate": 1.1291309636191513e-06, "loss": 20.0469, "step": 14305 }, { "epoch": 0.9501228664408581, "grad_norm": 137.80284118652344, "learning_rate": 1.1290243113894352e-06, "loss": 16.9062, "step": 14306 }, { "epoch": 0.9501892807332137, "grad_norm": 118.62454986572266, "learning_rate": 1.1289176576672404e-06, "loss": 16.0, "step": 14307 }, { "epoch": 0.9502556950255695, "grad_norm": 193.44667053222656, "learning_rate": 1.1288110024538002e-06, "loss": 20.1406, "step": 14308 }, { "epoch": 0.9503221093179253, "grad_norm": 183.2318115234375, "learning_rate": 1.1287043457503487e-06, "loss": 15.9844, "step": 14309 }, { "epoch": 0.9503885236102809, "grad_norm": 194.73353576660156, "learning_rate": 1.1285976875581198e-06, "loss": 20.4844, "step": 14310 }, { "epoch": 0.9504549379026367, "grad_norm": 371.08544921875, "learning_rate": 1.1284910278783467e-06, "loss": 15.2656, "step": 14311 }, { "epoch": 0.9505213521949923, "grad_norm": 151.78851318359375, "learning_rate": 1.1283843667122638e-06, "loss": 17.4688, "step": 14312 }, { "epoch": 0.9505877664873481, "grad_norm": 210.04544067382812, "learning_rate": 1.1282777040611042e-06, "loss": 16.5625, "step": 14313 }, { "epoch": 0.9506541807797038, "grad_norm": 142.51950073242188, "learning_rate": 1.1281710399261021e-06, "loss": 15.8594, "step": 14314 }, { "epoch": 0.9507205950720595, "grad_norm": 131.86370849609375, "learning_rate": 1.1280643743084915e-06, "loss": 12.1406, "step": 14315 }, { "epoch": 0.9507870093644152, "grad_norm": 195.21871948242188, "learning_rate": 1.1279577072095058e-06, "loss": 16.7422, "step": 14316 }, { "epoch": 0.950853423656771, "grad_norm": 133.58224487304688, "learning_rate": 1.1278510386303796e-06, "loss": 18.4219, "step": 14317 }, { "epoch": 0.9509198379491266, "grad_norm": 240.29180908203125, "learning_rate": 1.127744368572346e-06, "loss": 18.5781, "step": 14318 }, { "epoch": 0.9509862522414824, "grad_norm": 502.42547607421875, "learning_rate": 1.1276376970366393e-06, "loss": 16.0938, "step": 14319 }, { "epoch": 0.9510526665338381, "grad_norm": 299.403076171875, "learning_rate": 1.1275310240244935e-06, "loss": 18.3438, "step": 14320 }, { "epoch": 0.9511190808261938, "grad_norm": 229.15084838867188, "learning_rate": 1.127424349537142e-06, "loss": 18.4844, "step": 14321 }, { "epoch": 0.9511854951185496, "grad_norm": 190.27694702148438, "learning_rate": 1.1273176735758192e-06, "loss": 17.3594, "step": 14322 }, { "epoch": 0.9512519094109052, "grad_norm": 248.21897888183594, "learning_rate": 1.127210996141759e-06, "loss": 20.2344, "step": 14323 }, { "epoch": 0.951318323703261, "grad_norm": 279.42596435546875, "learning_rate": 1.1271043172361956e-06, "loss": 15.7969, "step": 14324 }, { "epoch": 0.9513847379956166, "grad_norm": 117.60537719726562, "learning_rate": 1.1269976368603624e-06, "loss": 12.4297, "step": 14325 }, { "epoch": 0.9514511522879724, "grad_norm": 217.7826690673828, "learning_rate": 1.126890955015494e-06, "loss": 15.9531, "step": 14326 }, { "epoch": 0.951517566580328, "grad_norm": 212.05233764648438, "learning_rate": 1.126784271702824e-06, "loss": 19.3906, "step": 14327 }, { "epoch": 0.9515839808726838, "grad_norm": 389.9716491699219, "learning_rate": 1.1266775869235867e-06, "loss": 16.2656, "step": 14328 }, { "epoch": 0.9516503951650395, "grad_norm": 201.62049865722656, "learning_rate": 1.1265709006790164e-06, "loss": 16.8906, "step": 14329 }, { "epoch": 0.9517168094573952, "grad_norm": 193.47512817382812, "learning_rate": 1.1264642129703462e-06, "loss": 17.1875, "step": 14330 }, { "epoch": 0.951783223749751, "grad_norm": 207.57406616210938, "learning_rate": 1.1263575237988119e-06, "loss": 13.9531, "step": 14331 }, { "epoch": 0.9518496380421066, "grad_norm": 328.551513671875, "learning_rate": 1.126250833165646e-06, "loss": 13.4062, "step": 14332 }, { "epoch": 0.9519160523344624, "grad_norm": 360.8406982421875, "learning_rate": 1.1261441410720834e-06, "loss": 20.0, "step": 14333 }, { "epoch": 0.9519824666268181, "grad_norm": 159.28176879882812, "learning_rate": 1.126037447519358e-06, "loss": 24.4375, "step": 14334 }, { "epoch": 0.9520488809191738, "grad_norm": 174.93276977539062, "learning_rate": 1.1259307525087043e-06, "loss": 27.5625, "step": 14335 }, { "epoch": 0.9521152952115295, "grad_norm": 563.9594116210938, "learning_rate": 1.1258240560413559e-06, "loss": 22.8203, "step": 14336 }, { "epoch": 0.9521817095038853, "grad_norm": 310.6063232421875, "learning_rate": 1.1257173581185475e-06, "loss": 12.8438, "step": 14337 }, { "epoch": 0.9522481237962409, "grad_norm": 203.391845703125, "learning_rate": 1.1256106587415134e-06, "loss": 15.7969, "step": 14338 }, { "epoch": 0.9523145380885967, "grad_norm": 254.57679748535156, "learning_rate": 1.1255039579114878e-06, "loss": 16.3438, "step": 14339 }, { "epoch": 0.9523809523809523, "grad_norm": 184.48829650878906, "learning_rate": 1.1253972556297044e-06, "loss": 19.1562, "step": 14340 }, { "epoch": 0.9524473666733081, "grad_norm": 156.06358337402344, "learning_rate": 1.125290551897398e-06, "loss": 14.7031, "step": 14341 }, { "epoch": 0.9525137809656639, "grad_norm": 244.2390899658203, "learning_rate": 1.1251838467158029e-06, "loss": 15.7344, "step": 14342 }, { "epoch": 0.9525801952580195, "grad_norm": 147.09446716308594, "learning_rate": 1.125077140086153e-06, "loss": 18.8125, "step": 14343 }, { "epoch": 0.9526466095503753, "grad_norm": 391.9620056152344, "learning_rate": 1.1249704320096826e-06, "loss": 24.4688, "step": 14344 }, { "epoch": 0.9527130238427309, "grad_norm": 176.88780212402344, "learning_rate": 1.1248637224876266e-06, "loss": 25.5156, "step": 14345 }, { "epoch": 0.9527794381350867, "grad_norm": 509.8473205566406, "learning_rate": 1.1247570115212194e-06, "loss": 16.4062, "step": 14346 }, { "epoch": 0.9528458524274424, "grad_norm": 524.783203125, "learning_rate": 1.1246502991116947e-06, "loss": 22.4219, "step": 14347 }, { "epoch": 0.9529122667197981, "grad_norm": 338.98480224609375, "learning_rate": 1.124543585260287e-06, "loss": 15.6875, "step": 14348 }, { "epoch": 0.9529786810121538, "grad_norm": 208.49891662597656, "learning_rate": 1.124436869968231e-06, "loss": 15.1406, "step": 14349 }, { "epoch": 0.9530450953045095, "grad_norm": 235.98512268066406, "learning_rate": 1.1243301532367618e-06, "loss": 17.625, "step": 14350 }, { "epoch": 0.9531115095968652, "grad_norm": 193.46498107910156, "learning_rate": 1.124223435067112e-06, "loss": 9.8672, "step": 14351 }, { "epoch": 0.953177923889221, "grad_norm": 285.25604248046875, "learning_rate": 1.1241167154605178e-06, "loss": 17.7969, "step": 14352 }, { "epoch": 0.9532443381815767, "grad_norm": 560.3789672851562, "learning_rate": 1.1240099944182128e-06, "loss": 25.5156, "step": 14353 }, { "epoch": 0.9533107524739324, "grad_norm": 208.39012145996094, "learning_rate": 1.1239032719414317e-06, "loss": 16.7969, "step": 14354 }, { "epoch": 0.9533771667662881, "grad_norm": 233.98927307128906, "learning_rate": 1.123796548031409e-06, "loss": 20.7969, "step": 14355 }, { "epoch": 0.9534435810586438, "grad_norm": 187.68666076660156, "learning_rate": 1.1236898226893794e-06, "loss": 17.625, "step": 14356 }, { "epoch": 0.9535099953509996, "grad_norm": 216.9363555908203, "learning_rate": 1.123583095916577e-06, "loss": 18.5625, "step": 14357 }, { "epoch": 0.9535764096433552, "grad_norm": 210.31427001953125, "learning_rate": 1.1234763677142367e-06, "loss": 16.7812, "step": 14358 }, { "epoch": 0.953642823935711, "grad_norm": 323.1219177246094, "learning_rate": 1.123369638083593e-06, "loss": 18.0938, "step": 14359 }, { "epoch": 0.9537092382280666, "grad_norm": 131.6094207763672, "learning_rate": 1.1232629070258806e-06, "loss": 16.4531, "step": 14360 }, { "epoch": 0.9537756525204224, "grad_norm": 171.18450927734375, "learning_rate": 1.1231561745423338e-06, "loss": 18.1562, "step": 14361 }, { "epoch": 0.953842066812778, "grad_norm": 179.3366241455078, "learning_rate": 1.123049440634187e-06, "loss": 16.0625, "step": 14362 }, { "epoch": 0.9539084811051338, "grad_norm": 160.78968811035156, "learning_rate": 1.1229427053026759e-06, "loss": 19.4062, "step": 14363 }, { "epoch": 0.9539748953974896, "grad_norm": 331.6112060546875, "learning_rate": 1.1228359685490344e-06, "loss": 17.0, "step": 14364 }, { "epoch": 0.9540413096898452, "grad_norm": 178.14613342285156, "learning_rate": 1.1227292303744968e-06, "loss": 15.0156, "step": 14365 }, { "epoch": 0.954107723982201, "grad_norm": 209.24603271484375, "learning_rate": 1.1226224907802983e-06, "loss": 16.9062, "step": 14366 }, { "epoch": 0.9541741382745567, "grad_norm": 301.8386535644531, "learning_rate": 1.1225157497676741e-06, "loss": 20.3125, "step": 14367 }, { "epoch": 0.9542405525669124, "grad_norm": 196.210205078125, "learning_rate": 1.1224090073378577e-06, "loss": 20.1094, "step": 14368 }, { "epoch": 0.9543069668592681, "grad_norm": 242.15484619140625, "learning_rate": 1.122302263492085e-06, "loss": 17.5625, "step": 14369 }, { "epoch": 0.9543733811516238, "grad_norm": 142.36497497558594, "learning_rate": 1.1221955182315897e-06, "loss": 12.1953, "step": 14370 }, { "epoch": 0.9544397954439795, "grad_norm": 343.289306640625, "learning_rate": 1.1220887715576078e-06, "loss": 23.4062, "step": 14371 }, { "epoch": 0.9545062097363353, "grad_norm": 153.37608337402344, "learning_rate": 1.121982023471373e-06, "loss": 16.4062, "step": 14372 }, { "epoch": 0.9545726240286909, "grad_norm": 262.213623046875, "learning_rate": 1.1218752739741203e-06, "loss": 22.3906, "step": 14373 }, { "epoch": 0.9546390383210467, "grad_norm": 213.29330444335938, "learning_rate": 1.121768523067085e-06, "loss": 17.8125, "step": 14374 }, { "epoch": 0.9547054526134024, "grad_norm": 232.7063751220703, "learning_rate": 1.1216617707515014e-06, "loss": 17.9531, "step": 14375 }, { "epoch": 0.9547718669057581, "grad_norm": 167.50466918945312, "learning_rate": 1.1215550170286047e-06, "loss": 20.9688, "step": 14376 }, { "epoch": 0.9548382811981139, "grad_norm": 304.90887451171875, "learning_rate": 1.1214482618996297e-06, "loss": 16.75, "step": 14377 }, { "epoch": 0.9549046954904695, "grad_norm": 251.2882537841797, "learning_rate": 1.1213415053658114e-06, "loss": 18.1875, "step": 14378 }, { "epoch": 0.9549711097828253, "grad_norm": 216.56344604492188, "learning_rate": 1.1212347474283843e-06, "loss": 14.2031, "step": 14379 }, { "epoch": 0.9550375240751809, "grad_norm": 193.27694702148438, "learning_rate": 1.1211279880885836e-06, "loss": 16.0781, "step": 14380 }, { "epoch": 0.9551039383675367, "grad_norm": 130.1464385986328, "learning_rate": 1.1210212273476443e-06, "loss": 16.6094, "step": 14381 }, { "epoch": 0.9551703526598924, "grad_norm": 151.57452392578125, "learning_rate": 1.120914465206801e-06, "loss": 18.2188, "step": 14382 }, { "epoch": 0.9552367669522481, "grad_norm": 153.8306427001953, "learning_rate": 1.120807701667289e-06, "loss": 16.8438, "step": 14383 }, { "epoch": 0.9553031812446039, "grad_norm": 244.4873809814453, "learning_rate": 1.120700936730343e-06, "loss": 21.8125, "step": 14384 }, { "epoch": 0.9553695955369595, "grad_norm": 227.8756561279297, "learning_rate": 1.1205941703971987e-06, "loss": 18.8438, "step": 14385 }, { "epoch": 0.9554360098293153, "grad_norm": 220.88119506835938, "learning_rate": 1.1204874026690901e-06, "loss": 16.7656, "step": 14386 }, { "epoch": 0.955502424121671, "grad_norm": 203.4777069091797, "learning_rate": 1.1203806335472526e-06, "loss": 19.7031, "step": 14387 }, { "epoch": 0.9555688384140267, "grad_norm": 92.63526916503906, "learning_rate": 1.1202738630329217e-06, "loss": 10.9375, "step": 14388 }, { "epoch": 0.9556352527063824, "grad_norm": 282.924072265625, "learning_rate": 1.1201670911273318e-06, "loss": 15.9688, "step": 14389 }, { "epoch": 0.9557016669987382, "grad_norm": 280.82366943359375, "learning_rate": 1.1200603178317182e-06, "loss": 19.75, "step": 14390 }, { "epoch": 0.9557680812910938, "grad_norm": 240.92210388183594, "learning_rate": 1.1199535431473164e-06, "loss": 20.4219, "step": 14391 }, { "epoch": 0.9558344955834496, "grad_norm": 239.9224090576172, "learning_rate": 1.1198467670753614e-06, "loss": 17.3906, "step": 14392 }, { "epoch": 0.9559009098758052, "grad_norm": 211.81101989746094, "learning_rate": 1.1197399896170876e-06, "loss": 14.6875, "step": 14393 }, { "epoch": 0.955967324168161, "grad_norm": 160.7742919921875, "learning_rate": 1.119633210773731e-06, "loss": 12.3594, "step": 14394 }, { "epoch": 0.9560337384605168, "grad_norm": 398.69573974609375, "learning_rate": 1.1195264305465264e-06, "loss": 19.8125, "step": 14395 }, { "epoch": 0.9561001527528724, "grad_norm": 456.6498107910156, "learning_rate": 1.1194196489367087e-06, "loss": 17.75, "step": 14396 }, { "epoch": 0.9561665670452282, "grad_norm": 222.46621704101562, "learning_rate": 1.1193128659455132e-06, "loss": 13.6562, "step": 14397 }, { "epoch": 0.9562329813375838, "grad_norm": 483.9731750488281, "learning_rate": 1.1192060815741756e-06, "loss": 15.5312, "step": 14398 }, { "epoch": 0.9562993956299396, "grad_norm": 378.1305236816406, "learning_rate": 1.119099295823931e-06, "loss": 25.0469, "step": 14399 }, { "epoch": 0.9563658099222953, "grad_norm": 470.6154479980469, "learning_rate": 1.118992508696014e-06, "loss": 19.1562, "step": 14400 }, { "epoch": 0.956432224214651, "grad_norm": 164.12672424316406, "learning_rate": 1.1188857201916605e-06, "loss": 16.4219, "step": 14401 }, { "epoch": 0.9564986385070067, "grad_norm": 137.64669799804688, "learning_rate": 1.1187789303121056e-06, "loss": 17.4844, "step": 14402 }, { "epoch": 0.9565650527993624, "grad_norm": 95.62623596191406, "learning_rate": 1.1186721390585844e-06, "loss": 16.3906, "step": 14403 }, { "epoch": 0.9566314670917181, "grad_norm": 318.9255065917969, "learning_rate": 1.1185653464323324e-06, "loss": 16.125, "step": 14404 }, { "epoch": 0.9566978813840739, "grad_norm": 252.72508239746094, "learning_rate": 1.1184585524345846e-06, "loss": 16.7031, "step": 14405 }, { "epoch": 0.9567642956764296, "grad_norm": 418.1126403808594, "learning_rate": 1.118351757066577e-06, "loss": 21.0, "step": 14406 }, { "epoch": 0.9568307099687853, "grad_norm": 299.3466491699219, "learning_rate": 1.1182449603295443e-06, "loss": 17.4062, "step": 14407 }, { "epoch": 0.956897124261141, "grad_norm": 175.31021118164062, "learning_rate": 1.118138162224722e-06, "loss": 17.4688, "step": 14408 }, { "epoch": 0.9569635385534967, "grad_norm": 226.1111297607422, "learning_rate": 1.1180313627533456e-06, "loss": 22.9688, "step": 14409 }, { "epoch": 0.9570299528458525, "grad_norm": 485.56005859375, "learning_rate": 1.1179245619166506e-06, "loss": 14.0, "step": 14410 }, { "epoch": 0.9570963671382081, "grad_norm": 266.1864318847656, "learning_rate": 1.117817759715872e-06, "loss": 21.6875, "step": 14411 }, { "epoch": 0.9571627814305639, "grad_norm": 280.4039001464844, "learning_rate": 1.1177109561522455e-06, "loss": 25.1875, "step": 14412 }, { "epoch": 0.9572291957229195, "grad_norm": 177.60813903808594, "learning_rate": 1.117604151227007e-06, "loss": 12.9609, "step": 14413 }, { "epoch": 0.9572956100152753, "grad_norm": 102.20764923095703, "learning_rate": 1.1174973449413911e-06, "loss": 10.5625, "step": 14414 }, { "epoch": 0.957362024307631, "grad_norm": 619.89208984375, "learning_rate": 1.1173905372966337e-06, "loss": 16.8906, "step": 14415 }, { "epoch": 0.9574284385999867, "grad_norm": 105.77445983886719, "learning_rate": 1.1172837282939701e-06, "loss": 20.7812, "step": 14416 }, { "epoch": 0.9574948528923425, "grad_norm": 122.76644897460938, "learning_rate": 1.1171769179346364e-06, "loss": 15.75, "step": 14417 }, { "epoch": 0.9575612671846981, "grad_norm": 365.9790954589844, "learning_rate": 1.1170701062198674e-06, "loss": 20.0625, "step": 14418 }, { "epoch": 0.9576276814770539, "grad_norm": 262.94757080078125, "learning_rate": 1.1169632931508987e-06, "loss": 14.9062, "step": 14419 }, { "epoch": 0.9576940957694096, "grad_norm": 194.2696075439453, "learning_rate": 1.1168564787289664e-06, "loss": 15.5156, "step": 14420 }, { "epoch": 0.9577605100617653, "grad_norm": 196.62489318847656, "learning_rate": 1.1167496629553058e-06, "loss": 13.2344, "step": 14421 }, { "epoch": 0.957826924354121, "grad_norm": 208.8585205078125, "learning_rate": 1.1166428458311523e-06, "loss": 17.0625, "step": 14422 }, { "epoch": 0.9578933386464767, "grad_norm": 158.70394897460938, "learning_rate": 1.1165360273577416e-06, "loss": 14.0156, "step": 14423 }, { "epoch": 0.9579597529388324, "grad_norm": 391.0941467285156, "learning_rate": 1.116429207536309e-06, "loss": 16.8281, "step": 14424 }, { "epoch": 0.9580261672311882, "grad_norm": 313.27374267578125, "learning_rate": 1.1163223863680908e-06, "loss": 13.3125, "step": 14425 }, { "epoch": 0.9580925815235438, "grad_norm": 421.6903991699219, "learning_rate": 1.1162155638543223e-06, "loss": 17.4375, "step": 14426 }, { "epoch": 0.9581589958158996, "grad_norm": 505.62298583984375, "learning_rate": 1.1161087399962395e-06, "loss": 28.2812, "step": 14427 }, { "epoch": 0.9582254101082553, "grad_norm": 156.10618591308594, "learning_rate": 1.1160019147950772e-06, "loss": 16.2656, "step": 14428 }, { "epoch": 0.958291824400611, "grad_norm": 695.5598754882812, "learning_rate": 1.1158950882520719e-06, "loss": 16.875, "step": 14429 }, { "epoch": 0.9583582386929668, "grad_norm": 173.46832275390625, "learning_rate": 1.115788260368459e-06, "loss": 13.0625, "step": 14430 }, { "epoch": 0.9584246529853224, "grad_norm": 187.24708557128906, "learning_rate": 1.1156814311454744e-06, "loss": 15.3672, "step": 14431 }, { "epoch": 0.9584910672776782, "grad_norm": 259.6197509765625, "learning_rate": 1.1155746005843534e-06, "loss": 16.9531, "step": 14432 }, { "epoch": 0.9585574815700338, "grad_norm": 361.4656677246094, "learning_rate": 1.1154677686863323e-06, "loss": 13.8438, "step": 14433 }, { "epoch": 0.9586238958623896, "grad_norm": 170.27703857421875, "learning_rate": 1.1153609354526466e-06, "loss": 22.5156, "step": 14434 }, { "epoch": 0.9586903101547453, "grad_norm": 3835.8857421875, "learning_rate": 1.1152541008845322e-06, "loss": 18.3594, "step": 14435 }, { "epoch": 0.958756724447101, "grad_norm": 215.9892120361328, "learning_rate": 1.1151472649832245e-06, "loss": 18.1719, "step": 14436 }, { "epoch": 0.9588231387394567, "grad_norm": 161.57119750976562, "learning_rate": 1.11504042774996e-06, "loss": 15.9844, "step": 14437 }, { "epoch": 0.9588895530318124, "grad_norm": 240.66929626464844, "learning_rate": 1.1149335891859742e-06, "loss": 15.3281, "step": 14438 }, { "epoch": 0.9589559673241682, "grad_norm": 269.5403137207031, "learning_rate": 1.1148267492925025e-06, "loss": 17.0469, "step": 14439 }, { "epoch": 0.9590223816165239, "grad_norm": 153.0252227783203, "learning_rate": 1.1147199080707814e-06, "loss": 18.8906, "step": 14440 }, { "epoch": 0.9590887959088796, "grad_norm": 241.14927673339844, "learning_rate": 1.1146130655220467e-06, "loss": 19.3125, "step": 14441 }, { "epoch": 0.9591552102012353, "grad_norm": 288.2093811035156, "learning_rate": 1.1145062216475341e-06, "loss": 15.6406, "step": 14442 }, { "epoch": 0.959221624493591, "grad_norm": 279.6717224121094, "learning_rate": 1.1143993764484793e-06, "loss": 19.1875, "step": 14443 }, { "epoch": 0.9592880387859467, "grad_norm": 277.4583435058594, "learning_rate": 1.1142925299261187e-06, "loss": 18.2344, "step": 14444 }, { "epoch": 0.9593544530783025, "grad_norm": 445.15045166015625, "learning_rate": 1.114185682081688e-06, "loss": 27.7188, "step": 14445 }, { "epoch": 0.9594208673706581, "grad_norm": 280.2618408203125, "learning_rate": 1.1140788329164233e-06, "loss": 14.5625, "step": 14446 }, { "epoch": 0.9594872816630139, "grad_norm": 208.23092651367188, "learning_rate": 1.1139719824315604e-06, "loss": 15.8125, "step": 14447 }, { "epoch": 0.9595536959553695, "grad_norm": 331.4979553222656, "learning_rate": 1.1138651306283352e-06, "loss": 18.5156, "step": 14448 }, { "epoch": 0.9596201102477253, "grad_norm": 188.57862854003906, "learning_rate": 1.1137582775079839e-06, "loss": 12.2969, "step": 14449 }, { "epoch": 0.9596865245400811, "grad_norm": 181.20314025878906, "learning_rate": 1.113651423071742e-06, "loss": 16.5938, "step": 14450 }, { "epoch": 0.9597529388324367, "grad_norm": 149.1480712890625, "learning_rate": 1.1135445673208466e-06, "loss": 16.7188, "step": 14451 }, { "epoch": 0.9598193531247925, "grad_norm": 134.99615478515625, "learning_rate": 1.1134377102565328e-06, "loss": 16.0469, "step": 14452 }, { "epoch": 0.9598857674171482, "grad_norm": 217.17564392089844, "learning_rate": 1.1133308518800374e-06, "loss": 13.5312, "step": 14453 }, { "epoch": 0.9599521817095039, "grad_norm": 255.40687561035156, "learning_rate": 1.1132239921925954e-06, "loss": 15.125, "step": 14454 }, { "epoch": 0.9600185960018596, "grad_norm": 833.0218505859375, "learning_rate": 1.113117131195444e-06, "loss": 19.4531, "step": 14455 }, { "epoch": 0.9600850102942153, "grad_norm": 213.67210388183594, "learning_rate": 1.1130102688898189e-06, "loss": 16.7344, "step": 14456 }, { "epoch": 0.960151424586571, "grad_norm": 179.38804626464844, "learning_rate": 1.112903405276956e-06, "loss": 22.1406, "step": 14457 }, { "epoch": 0.9602178388789268, "grad_norm": 289.803466796875, "learning_rate": 1.1127965403580916e-06, "loss": 21.7969, "step": 14458 }, { "epoch": 0.9602842531712824, "grad_norm": 332.21051025390625, "learning_rate": 1.1126896741344616e-06, "loss": 25.7344, "step": 14459 }, { "epoch": 0.9603506674636382, "grad_norm": 299.36083984375, "learning_rate": 1.1125828066073028e-06, "loss": 18.1094, "step": 14460 }, { "epoch": 0.9604170817559939, "grad_norm": 206.60760498046875, "learning_rate": 1.1124759377778507e-06, "loss": 16.8594, "step": 14461 }, { "epoch": 0.9604834960483496, "grad_norm": 255.7564239501953, "learning_rate": 1.112369067647342e-06, "loss": 20.1875, "step": 14462 }, { "epoch": 0.9605499103407054, "grad_norm": 107.19515991210938, "learning_rate": 1.1122621962170128e-06, "loss": 14.0625, "step": 14463 }, { "epoch": 0.960616324633061, "grad_norm": 298.0208435058594, "learning_rate": 1.112155323488099e-06, "loss": 17.1562, "step": 14464 }, { "epoch": 0.9606827389254168, "grad_norm": 193.1800079345703, "learning_rate": 1.1120484494618373e-06, "loss": 16.3438, "step": 14465 }, { "epoch": 0.9607491532177724, "grad_norm": 254.3221893310547, "learning_rate": 1.1119415741394637e-06, "loss": 16.875, "step": 14466 }, { "epoch": 0.9608155675101282, "grad_norm": 135.42892456054688, "learning_rate": 1.1118346975222147e-06, "loss": 18.9375, "step": 14467 }, { "epoch": 0.9608819818024839, "grad_norm": 244.56784057617188, "learning_rate": 1.1117278196113264e-06, "loss": 26.3125, "step": 14468 }, { "epoch": 0.9609483960948396, "grad_norm": 213.14381408691406, "learning_rate": 1.1116209404080348e-06, "loss": 15.6094, "step": 14469 }, { "epoch": 0.9610148103871953, "grad_norm": 162.2574920654297, "learning_rate": 1.1115140599135768e-06, "loss": 18.5156, "step": 14470 }, { "epoch": 0.961081224679551, "grad_norm": 176.58499145507812, "learning_rate": 1.1114071781291885e-06, "loss": 14.3281, "step": 14471 }, { "epoch": 0.9611476389719068, "grad_norm": 202.60403442382812, "learning_rate": 1.111300295056106e-06, "loss": 17.8281, "step": 14472 }, { "epoch": 0.9612140532642625, "grad_norm": 180.11355590820312, "learning_rate": 1.1111934106955657e-06, "loss": 18.9219, "step": 14473 }, { "epoch": 0.9612804675566182, "grad_norm": 157.70687866210938, "learning_rate": 1.1110865250488045e-06, "loss": 14.5938, "step": 14474 }, { "epoch": 0.9613468818489739, "grad_norm": 153.1212921142578, "learning_rate": 1.1109796381170583e-06, "loss": 19.75, "step": 14475 }, { "epoch": 0.9614132961413296, "grad_norm": 293.219970703125, "learning_rate": 1.1108727499015636e-06, "loss": 15.4375, "step": 14476 }, { "epoch": 0.9614797104336853, "grad_norm": 270.2137145996094, "learning_rate": 1.110765860403557e-06, "loss": 19.6562, "step": 14477 }, { "epoch": 0.9615461247260411, "grad_norm": 309.9179992675781, "learning_rate": 1.1106589696242747e-06, "loss": 17.1094, "step": 14478 }, { "epoch": 0.9616125390183967, "grad_norm": 451.46099853515625, "learning_rate": 1.1105520775649533e-06, "loss": 17.7656, "step": 14479 }, { "epoch": 0.9616789533107525, "grad_norm": 315.59454345703125, "learning_rate": 1.110445184226829e-06, "loss": 15.3125, "step": 14480 }, { "epoch": 0.9617453676031081, "grad_norm": 214.02972412109375, "learning_rate": 1.1103382896111388e-06, "loss": 25.0781, "step": 14481 }, { "epoch": 0.9618117818954639, "grad_norm": 252.62022399902344, "learning_rate": 1.1102313937191187e-06, "loss": 13.0625, "step": 14482 }, { "epoch": 0.9618781961878197, "grad_norm": 178.1011199951172, "learning_rate": 1.1101244965520053e-06, "loss": 18.25, "step": 14483 }, { "epoch": 0.9619446104801753, "grad_norm": 158.9470672607422, "learning_rate": 1.1100175981110355e-06, "loss": 19.7656, "step": 14484 }, { "epoch": 0.9620110247725311, "grad_norm": 337.3687744140625, "learning_rate": 1.1099106983974457e-06, "loss": 17.6719, "step": 14485 }, { "epoch": 0.9620774390648867, "grad_norm": 203.5511016845703, "learning_rate": 1.1098037974124719e-06, "loss": 18.9375, "step": 14486 }, { "epoch": 0.9621438533572425, "grad_norm": 190.8775634765625, "learning_rate": 1.1096968951573512e-06, "loss": 17.7344, "step": 14487 }, { "epoch": 0.9622102676495982, "grad_norm": 142.97633361816406, "learning_rate": 1.1095899916333203e-06, "loss": 14.2188, "step": 14488 }, { "epoch": 0.9622766819419539, "grad_norm": 499.1668701171875, "learning_rate": 1.109483086841615e-06, "loss": 11.9219, "step": 14489 }, { "epoch": 0.9623430962343096, "grad_norm": 197.75721740722656, "learning_rate": 1.1093761807834732e-06, "loss": 14.0, "step": 14490 }, { "epoch": 0.9624095105266653, "grad_norm": 125.67083740234375, "learning_rate": 1.1092692734601303e-06, "loss": 18.0938, "step": 14491 }, { "epoch": 0.962475924819021, "grad_norm": 354.6040954589844, "learning_rate": 1.1091623648728238e-06, "loss": 15.1875, "step": 14492 }, { "epoch": 0.9625423391113768, "grad_norm": 256.0198974609375, "learning_rate": 1.1090554550227898e-06, "loss": 15.0781, "step": 14493 }, { "epoch": 0.9626087534037325, "grad_norm": 180.70201110839844, "learning_rate": 1.108948543911265e-06, "loss": 17.75, "step": 14494 }, { "epoch": 0.9626751676960882, "grad_norm": 250.33143615722656, "learning_rate": 1.1088416315394865e-06, "loss": 16.4688, "step": 14495 }, { "epoch": 0.962741581988444, "grad_norm": 554.8217163085938, "learning_rate": 1.108734717908691e-06, "loss": 18.6719, "step": 14496 }, { "epoch": 0.9628079962807996, "grad_norm": 809.70263671875, "learning_rate": 1.1086278030201147e-06, "loss": 22.9375, "step": 14497 }, { "epoch": 0.9628744105731554, "grad_norm": 126.75472259521484, "learning_rate": 1.1085208868749944e-06, "loss": 16.7031, "step": 14498 }, { "epoch": 0.962940824865511, "grad_norm": 190.1603546142578, "learning_rate": 1.1084139694745676e-06, "loss": 17.2188, "step": 14499 }, { "epoch": 0.9630072391578668, "grad_norm": 240.7950439453125, "learning_rate": 1.10830705082007e-06, "loss": 24.25, "step": 14500 }, { "epoch": 0.9630736534502224, "grad_norm": 321.34954833984375, "learning_rate": 1.108200130912739e-06, "loss": 19.6094, "step": 14501 }, { "epoch": 0.9631400677425782, "grad_norm": 283.83050537109375, "learning_rate": 1.1080932097538115e-06, "loss": 19.7969, "step": 14502 }, { "epoch": 0.9632064820349339, "grad_norm": 237.36891174316406, "learning_rate": 1.107986287344524e-06, "loss": 19.0469, "step": 14503 }, { "epoch": 0.9632728963272896, "grad_norm": 273.410888671875, "learning_rate": 1.1078793636861134e-06, "loss": 15.7031, "step": 14504 }, { "epoch": 0.9633393106196454, "grad_norm": 179.8965606689453, "learning_rate": 1.1077724387798164e-06, "loss": 14.0547, "step": 14505 }, { "epoch": 0.963405724912001, "grad_norm": 224.3064727783203, "learning_rate": 1.1076655126268703e-06, "loss": 18.0469, "step": 14506 }, { "epoch": 0.9634721392043568, "grad_norm": 117.67342376708984, "learning_rate": 1.1075585852285115e-06, "loss": 16.2969, "step": 14507 }, { "epoch": 0.9635385534967125, "grad_norm": 150.21217346191406, "learning_rate": 1.1074516565859767e-06, "loss": 14.0625, "step": 14508 }, { "epoch": 0.9636049677890682, "grad_norm": 356.54437255859375, "learning_rate": 1.1073447267005037e-06, "loss": 14.8438, "step": 14509 }, { "epoch": 0.9636713820814239, "grad_norm": 577.66748046875, "learning_rate": 1.1072377955733282e-06, "loss": 16.4531, "step": 14510 }, { "epoch": 0.9637377963737797, "grad_norm": 204.5485076904297, "learning_rate": 1.107130863205688e-06, "loss": 15.6562, "step": 14511 }, { "epoch": 0.9638042106661353, "grad_norm": 150.24905395507812, "learning_rate": 1.1070239295988195e-06, "loss": 20.5, "step": 14512 }, { "epoch": 0.9638706249584911, "grad_norm": 710.6935424804688, "learning_rate": 1.1069169947539603e-06, "loss": 20.4844, "step": 14513 }, { "epoch": 0.9639370392508467, "grad_norm": 297.68646240234375, "learning_rate": 1.1068100586723466e-06, "loss": 18.1094, "step": 14514 }, { "epoch": 0.9640034535432025, "grad_norm": 172.7992706298828, "learning_rate": 1.1067031213552159e-06, "loss": 14.4531, "step": 14515 }, { "epoch": 0.9640698678355583, "grad_norm": 188.37680053710938, "learning_rate": 1.1065961828038049e-06, "loss": 13.9062, "step": 14516 }, { "epoch": 0.9641362821279139, "grad_norm": 178.6353759765625, "learning_rate": 1.1064892430193508e-06, "loss": 15.7656, "step": 14517 }, { "epoch": 0.9642026964202697, "grad_norm": 195.47508239746094, "learning_rate": 1.1063823020030905e-06, "loss": 15.75, "step": 14518 }, { "epoch": 0.9642691107126253, "grad_norm": 195.68255615234375, "learning_rate": 1.106275359756261e-06, "loss": 20.9531, "step": 14519 }, { "epoch": 0.9643355250049811, "grad_norm": 280.2884216308594, "learning_rate": 1.1061684162801e-06, "loss": 19.2812, "step": 14520 }, { "epoch": 0.9644019392973368, "grad_norm": 380.2621765136719, "learning_rate": 1.1060614715758435e-06, "loss": 16.6953, "step": 14521 }, { "epoch": 0.9644683535896925, "grad_norm": 205.4551544189453, "learning_rate": 1.1059545256447288e-06, "loss": 12.9375, "step": 14522 }, { "epoch": 0.9645347678820482, "grad_norm": 249.2432403564453, "learning_rate": 1.1058475784879935e-06, "loss": 22.625, "step": 14523 }, { "epoch": 0.9646011821744039, "grad_norm": 578.4573974609375, "learning_rate": 1.1057406301068747e-06, "loss": 16.2656, "step": 14524 }, { "epoch": 0.9646675964667596, "grad_norm": 528.64013671875, "learning_rate": 1.105633680502609e-06, "loss": 18.8125, "step": 14525 }, { "epoch": 0.9647340107591154, "grad_norm": 174.31552124023438, "learning_rate": 1.1055267296764336e-06, "loss": 20.0156, "step": 14526 }, { "epoch": 0.9648004250514711, "grad_norm": 163.26541137695312, "learning_rate": 1.1054197776295861e-06, "loss": 16.6562, "step": 14527 }, { "epoch": 0.9648668393438268, "grad_norm": 452.35791015625, "learning_rate": 1.1053128243633036e-06, "loss": 16.6562, "step": 14528 }, { "epoch": 0.9649332536361825, "grad_norm": 115.4693374633789, "learning_rate": 1.1052058698788228e-06, "loss": 16.2656, "step": 14529 }, { "epoch": 0.9649996679285382, "grad_norm": 111.47505950927734, "learning_rate": 1.1050989141773813e-06, "loss": 13.0156, "step": 14530 }, { "epoch": 0.965066082220894, "grad_norm": 253.68768310546875, "learning_rate": 1.104991957260216e-06, "loss": 16.9062, "step": 14531 }, { "epoch": 0.9651324965132496, "grad_norm": 369.265625, "learning_rate": 1.1048849991285643e-06, "loss": 23.6562, "step": 14532 }, { "epoch": 0.9651989108056054, "grad_norm": 121.30144500732422, "learning_rate": 1.1047780397836635e-06, "loss": 13.8438, "step": 14533 }, { "epoch": 0.965265325097961, "grad_norm": 461.0329895019531, "learning_rate": 1.1046710792267507e-06, "loss": 17.2969, "step": 14534 }, { "epoch": 0.9653317393903168, "grad_norm": 126.82730102539062, "learning_rate": 1.1045641174590632e-06, "loss": 15.1562, "step": 14535 }, { "epoch": 0.9653981536826726, "grad_norm": 228.95960998535156, "learning_rate": 1.1044571544818383e-06, "loss": 19.3438, "step": 14536 }, { "epoch": 0.9654645679750282, "grad_norm": 242.19859313964844, "learning_rate": 1.104350190296313e-06, "loss": 17.6719, "step": 14537 }, { "epoch": 0.965530982267384, "grad_norm": 115.55115509033203, "learning_rate": 1.1042432249037255e-06, "loss": 13.3594, "step": 14538 }, { "epoch": 0.9655973965597396, "grad_norm": 274.124755859375, "learning_rate": 1.104136258305312e-06, "loss": 19.9062, "step": 14539 }, { "epoch": 0.9656638108520954, "grad_norm": 93.97062683105469, "learning_rate": 1.1040292905023103e-06, "loss": 14.9844, "step": 14540 }, { "epoch": 0.9657302251444511, "grad_norm": 236.8448486328125, "learning_rate": 1.103922321495958e-06, "loss": 17.75, "step": 14541 }, { "epoch": 0.9657966394368068, "grad_norm": 268.2572021484375, "learning_rate": 1.103815351287492e-06, "loss": 17.2969, "step": 14542 }, { "epoch": 0.9658630537291625, "grad_norm": 2200.154052734375, "learning_rate": 1.10370837987815e-06, "loss": 13.5469, "step": 14543 }, { "epoch": 0.9659294680215182, "grad_norm": 181.6652374267578, "learning_rate": 1.1036014072691693e-06, "loss": 13.9531, "step": 14544 }, { "epoch": 0.9659958823138739, "grad_norm": 491.4317932128906, "learning_rate": 1.1034944334617875e-06, "loss": 16.9219, "step": 14545 }, { "epoch": 0.9660622966062297, "grad_norm": 340.95452880859375, "learning_rate": 1.1033874584572414e-06, "loss": 23.875, "step": 14546 }, { "epoch": 0.9661287108985854, "grad_norm": 258.3323974609375, "learning_rate": 1.103280482256769e-06, "loss": 19.8438, "step": 14547 }, { "epoch": 0.9661951251909411, "grad_norm": 137.15020751953125, "learning_rate": 1.1031735048616073e-06, "loss": 15.0625, "step": 14548 }, { "epoch": 0.9662615394832968, "grad_norm": 235.84703063964844, "learning_rate": 1.103066526272994e-06, "loss": 19.7969, "step": 14549 }, { "epoch": 0.9663279537756525, "grad_norm": 218.8421630859375, "learning_rate": 1.1029595464921669e-06, "loss": 14.9062, "step": 14550 }, { "epoch": 0.9663943680680083, "grad_norm": 151.41525268554688, "learning_rate": 1.1028525655203629e-06, "loss": 18.4844, "step": 14551 }, { "epoch": 0.9664607823603639, "grad_norm": 170.54051208496094, "learning_rate": 1.1027455833588198e-06, "loss": 16.2031, "step": 14552 }, { "epoch": 0.9665271966527197, "grad_norm": 348.33160400390625, "learning_rate": 1.102638600008775e-06, "loss": 20.1406, "step": 14553 }, { "epoch": 0.9665936109450753, "grad_norm": 1072.9464111328125, "learning_rate": 1.1025316154714658e-06, "loss": 14.375, "step": 14554 }, { "epoch": 0.9666600252374311, "grad_norm": 210.62704467773438, "learning_rate": 1.1024246297481303e-06, "loss": 15.9219, "step": 14555 }, { "epoch": 0.9667264395297868, "grad_norm": 149.79226684570312, "learning_rate": 1.1023176428400058e-06, "loss": 14.1719, "step": 14556 }, { "epoch": 0.9667928538221425, "grad_norm": 853.6788940429688, "learning_rate": 1.1022106547483296e-06, "loss": 22.5469, "step": 14557 }, { "epoch": 0.9668592681144983, "grad_norm": 268.8697509765625, "learning_rate": 1.1021036654743396e-06, "loss": 18.5469, "step": 14558 }, { "epoch": 0.966925682406854, "grad_norm": 189.85218811035156, "learning_rate": 1.101996675019273e-06, "loss": 12.5, "step": 14559 }, { "epoch": 0.9669920966992097, "grad_norm": 160.6874237060547, "learning_rate": 1.1018896833843682e-06, "loss": 15.5469, "step": 14560 }, { "epoch": 0.9670585109915654, "grad_norm": 263.1260681152344, "learning_rate": 1.101782690570862e-06, "loss": 17.7188, "step": 14561 }, { "epoch": 0.9671249252839211, "grad_norm": 240.40084838867188, "learning_rate": 1.1016756965799922e-06, "loss": 17.2031, "step": 14562 }, { "epoch": 0.9671913395762768, "grad_norm": 542.89208984375, "learning_rate": 1.1015687014129968e-06, "loss": 17.4688, "step": 14563 }, { "epoch": 0.9672577538686326, "grad_norm": 206.76168823242188, "learning_rate": 1.1014617050711133e-06, "loss": 20.4531, "step": 14564 }, { "epoch": 0.9673241681609882, "grad_norm": 269.5882873535156, "learning_rate": 1.1013547075555792e-06, "loss": 17.7969, "step": 14565 }, { "epoch": 0.967390582453344, "grad_norm": 744.2354125976562, "learning_rate": 1.101247708867632e-06, "loss": 19.1562, "step": 14566 }, { "epoch": 0.9674569967456996, "grad_norm": 198.369873046875, "learning_rate": 1.10114070900851e-06, "loss": 15.3594, "step": 14567 }, { "epoch": 0.9675234110380554, "grad_norm": 201.14035034179688, "learning_rate": 1.1010337079794506e-06, "loss": 16.1094, "step": 14568 }, { "epoch": 0.9675898253304112, "grad_norm": 1747.194091796875, "learning_rate": 1.1009267057816912e-06, "loss": 21.0781, "step": 14569 }, { "epoch": 0.9676562396227668, "grad_norm": 187.4701385498047, "learning_rate": 1.1008197024164702e-06, "loss": 16.25, "step": 14570 }, { "epoch": 0.9677226539151226, "grad_norm": 150.31149291992188, "learning_rate": 1.100712697885025e-06, "loss": 16.9219, "step": 14571 }, { "epoch": 0.9677890682074782, "grad_norm": 592.145263671875, "learning_rate": 1.1006056921885932e-06, "loss": 21.2031, "step": 14572 }, { "epoch": 0.967855482499834, "grad_norm": 192.23074340820312, "learning_rate": 1.100498685328413e-06, "loss": 16.0469, "step": 14573 }, { "epoch": 0.9679218967921897, "grad_norm": 622.697509765625, "learning_rate": 1.1003916773057218e-06, "loss": 18.5781, "step": 14574 }, { "epoch": 0.9679883110845454, "grad_norm": 111.26602172851562, "learning_rate": 1.1002846681217575e-06, "loss": 14.2656, "step": 14575 }, { "epoch": 0.9680547253769011, "grad_norm": 276.39453125, "learning_rate": 1.100177657777758e-06, "loss": 18.4844, "step": 14576 }, { "epoch": 0.9681211396692568, "grad_norm": 311.7979736328125, "learning_rate": 1.1000706462749614e-06, "loss": 16.7969, "step": 14577 }, { "epoch": 0.9681875539616125, "grad_norm": 578.8971557617188, "learning_rate": 1.099963633614605e-06, "loss": 28.0, "step": 14578 }, { "epoch": 0.9682539682539683, "grad_norm": 369.8011169433594, "learning_rate": 1.0998566197979272e-06, "loss": 17.2344, "step": 14579 }, { "epoch": 0.968320382546324, "grad_norm": 260.5761413574219, "learning_rate": 1.0997496048261652e-06, "loss": 13.2656, "step": 14580 }, { "epoch": 0.9683867968386797, "grad_norm": 92.16474151611328, "learning_rate": 1.0996425887005576e-06, "loss": 12.2578, "step": 14581 }, { "epoch": 0.9684532111310354, "grad_norm": 216.738037109375, "learning_rate": 1.0995355714223418e-06, "loss": 15.3906, "step": 14582 }, { "epoch": 0.9685196254233911, "grad_norm": 653.7507934570312, "learning_rate": 1.099428552992756e-06, "loss": 22.9844, "step": 14583 }, { "epoch": 0.9685860397157469, "grad_norm": 194.89376831054688, "learning_rate": 1.0993215334130381e-06, "loss": 20.4375, "step": 14584 }, { "epoch": 0.9686524540081025, "grad_norm": 293.2484130859375, "learning_rate": 1.0992145126844258e-06, "loss": 11.9297, "step": 14585 }, { "epoch": 0.9687188683004583, "grad_norm": 510.0141906738281, "learning_rate": 1.0991074908081574e-06, "loss": 15.3438, "step": 14586 }, { "epoch": 0.9687852825928139, "grad_norm": 302.7095031738281, "learning_rate": 1.0990004677854705e-06, "loss": 18.1875, "step": 14587 }, { "epoch": 0.9688516968851697, "grad_norm": 413.2590026855469, "learning_rate": 1.0988934436176037e-06, "loss": 21.875, "step": 14588 }, { "epoch": 0.9689181111775254, "grad_norm": 459.9503479003906, "learning_rate": 1.0987864183057942e-06, "loss": 20.2031, "step": 14589 }, { "epoch": 0.9689845254698811, "grad_norm": 147.38059997558594, "learning_rate": 1.0986793918512804e-06, "loss": 24.25, "step": 14590 }, { "epoch": 0.9690509397622369, "grad_norm": 137.75067138671875, "learning_rate": 1.0985723642553003e-06, "loss": 14.5781, "step": 14591 }, { "epoch": 0.9691173540545925, "grad_norm": 1594.7015380859375, "learning_rate": 1.098465335519092e-06, "loss": 18.25, "step": 14592 }, { "epoch": 0.9691837683469483, "grad_norm": 378.5194091796875, "learning_rate": 1.0983583056438935e-06, "loss": 19.7656, "step": 14593 }, { "epoch": 0.969250182639304, "grad_norm": 196.090087890625, "learning_rate": 1.0982512746309428e-06, "loss": 14.0781, "step": 14594 }, { "epoch": 0.9693165969316597, "grad_norm": 315.71246337890625, "learning_rate": 1.098144242481478e-06, "loss": 21.3594, "step": 14595 }, { "epoch": 0.9693830112240154, "grad_norm": 154.6583709716797, "learning_rate": 1.098037209196737e-06, "loss": 20.5625, "step": 14596 }, { "epoch": 0.9694494255163711, "grad_norm": 137.7742156982422, "learning_rate": 1.0979301747779586e-06, "loss": 18.375, "step": 14597 }, { "epoch": 0.9695158398087268, "grad_norm": 153.4966278076172, "learning_rate": 1.09782313922638e-06, "loss": 14.1406, "step": 14598 }, { "epoch": 0.9695822541010826, "grad_norm": 168.9053955078125, "learning_rate": 1.09771610254324e-06, "loss": 14.9297, "step": 14599 }, { "epoch": 0.9696486683934382, "grad_norm": 275.6763916015625, "learning_rate": 1.0976090647297762e-06, "loss": 20.375, "step": 14600 }, { "epoch": 0.969715082685794, "grad_norm": 214.7770538330078, "learning_rate": 1.097502025787227e-06, "loss": 17.3281, "step": 14601 }, { "epoch": 0.9697814969781497, "grad_norm": 425.12646484375, "learning_rate": 1.0973949857168308e-06, "loss": 23.0625, "step": 14602 }, { "epoch": 0.9698479112705054, "grad_norm": 276.9201354980469, "learning_rate": 1.0972879445198256e-06, "loss": 18.1875, "step": 14603 }, { "epoch": 0.9699143255628612, "grad_norm": 184.65785217285156, "learning_rate": 1.0971809021974493e-06, "loss": 16.1406, "step": 14604 }, { "epoch": 0.9699807398552168, "grad_norm": 229.29611206054688, "learning_rate": 1.0970738587509405e-06, "loss": 20.0, "step": 14605 }, { "epoch": 0.9700471541475726, "grad_norm": 241.7386932373047, "learning_rate": 1.0969668141815373e-06, "loss": 17.6406, "step": 14606 }, { "epoch": 0.9701135684399282, "grad_norm": 158.50445556640625, "learning_rate": 1.0968597684904778e-06, "loss": 15.0469, "step": 14607 }, { "epoch": 0.970179982732284, "grad_norm": 291.92547607421875, "learning_rate": 1.0967527216790002e-06, "loss": 29.2812, "step": 14608 }, { "epoch": 0.9702463970246397, "grad_norm": 365.2156677246094, "learning_rate": 1.0966456737483434e-06, "loss": 17.5781, "step": 14609 }, { "epoch": 0.9703128113169954, "grad_norm": 201.0789031982422, "learning_rate": 1.0965386246997449e-06, "loss": 15.7188, "step": 14610 }, { "epoch": 0.9703792256093511, "grad_norm": 161.38856506347656, "learning_rate": 1.0964315745344432e-06, "loss": 12.0, "step": 14611 }, { "epoch": 0.9704456399017068, "grad_norm": 172.25115966796875, "learning_rate": 1.0963245232536767e-06, "loss": 16.75, "step": 14612 }, { "epoch": 0.9705120541940626, "grad_norm": 263.9412536621094, "learning_rate": 1.0962174708586837e-06, "loss": 14.3594, "step": 14613 }, { "epoch": 0.9705784684864183, "grad_norm": 203.33969116210938, "learning_rate": 1.0961104173507025e-06, "loss": 20.3438, "step": 14614 }, { "epoch": 0.970644882778774, "grad_norm": 409.90716552734375, "learning_rate": 1.0960033627309713e-06, "loss": 19.3281, "step": 14615 }, { "epoch": 0.9707112970711297, "grad_norm": 327.17138671875, "learning_rate": 1.0958963070007287e-06, "loss": 16.4688, "step": 14616 }, { "epoch": 0.9707777113634855, "grad_norm": 464.22503662109375, "learning_rate": 1.095789250161213e-06, "loss": 13.9297, "step": 14617 }, { "epoch": 0.9708441256558411, "grad_norm": 143.6153564453125, "learning_rate": 1.0956821922136623e-06, "loss": 17.4375, "step": 14618 }, { "epoch": 0.9709105399481969, "grad_norm": 266.4660949707031, "learning_rate": 1.095575133159315e-06, "loss": 20.75, "step": 14619 }, { "epoch": 0.9709769542405525, "grad_norm": 120.12632751464844, "learning_rate": 1.09546807299941e-06, "loss": 14.5625, "step": 14620 }, { "epoch": 0.9710433685329083, "grad_norm": 459.2789611816406, "learning_rate": 1.0953610117351855e-06, "loss": 17.2812, "step": 14621 }, { "epoch": 0.971109782825264, "grad_norm": 326.9244384765625, "learning_rate": 1.0952539493678794e-06, "loss": 15.1797, "step": 14622 }, { "epoch": 0.9711761971176197, "grad_norm": 240.77293395996094, "learning_rate": 1.0951468858987308e-06, "loss": 19.7969, "step": 14623 }, { "epoch": 0.9712426114099755, "grad_norm": 176.989990234375, "learning_rate": 1.0950398213289777e-06, "loss": 13.4219, "step": 14624 }, { "epoch": 0.9713090257023311, "grad_norm": 191.38450622558594, "learning_rate": 1.0949327556598588e-06, "loss": 15.9219, "step": 14625 }, { "epoch": 0.9713754399946869, "grad_norm": 621.662109375, "learning_rate": 1.0948256888926125e-06, "loss": 21.6562, "step": 14626 }, { "epoch": 0.9714418542870425, "grad_norm": 136.1822509765625, "learning_rate": 1.0947186210284779e-06, "loss": 14.2812, "step": 14627 }, { "epoch": 0.9715082685793983, "grad_norm": 163.68353271484375, "learning_rate": 1.094611552068692e-06, "loss": 15.9375, "step": 14628 }, { "epoch": 0.971574682871754, "grad_norm": 319.6947326660156, "learning_rate": 1.0945044820144947e-06, "loss": 15.4844, "step": 14629 }, { "epoch": 0.9716410971641097, "grad_norm": 267.3009338378906, "learning_rate": 1.094397410867124e-06, "loss": 18.2812, "step": 14630 }, { "epoch": 0.9717075114564654, "grad_norm": 216.57601928710938, "learning_rate": 1.0942903386278186e-06, "loss": 14.6562, "step": 14631 }, { "epoch": 0.9717739257488212, "grad_norm": 202.5482940673828, "learning_rate": 1.0941832652978166e-06, "loss": 17.5781, "step": 14632 }, { "epoch": 0.9718403400411768, "grad_norm": 146.78977966308594, "learning_rate": 1.0940761908783571e-06, "loss": 21.0625, "step": 14633 }, { "epoch": 0.9719067543335326, "grad_norm": 185.66482543945312, "learning_rate": 1.0939691153706785e-06, "loss": 17.375, "step": 14634 }, { "epoch": 0.9719731686258883, "grad_norm": 390.5264892578125, "learning_rate": 1.0938620387760194e-06, "loss": 23.7188, "step": 14635 }, { "epoch": 0.972039582918244, "grad_norm": 219.61219787597656, "learning_rate": 1.0937549610956182e-06, "loss": 21.0312, "step": 14636 }, { "epoch": 0.9721059972105998, "grad_norm": 256.7579650878906, "learning_rate": 1.0936478823307137e-06, "loss": 14.6719, "step": 14637 }, { "epoch": 0.9721724115029554, "grad_norm": 182.6090545654297, "learning_rate": 1.0935408024825447e-06, "loss": 17.4531, "step": 14638 }, { "epoch": 0.9722388257953112, "grad_norm": 154.2148895263672, "learning_rate": 1.0934337215523496e-06, "loss": 12.7969, "step": 14639 }, { "epoch": 0.9723052400876668, "grad_norm": 227.58546447753906, "learning_rate": 1.0933266395413667e-06, "loss": 18.0, "step": 14640 }, { "epoch": 0.9723716543800226, "grad_norm": 149.29364013671875, "learning_rate": 1.0932195564508354e-06, "loss": 19.9062, "step": 14641 }, { "epoch": 0.9724380686723783, "grad_norm": 119.24303436279297, "learning_rate": 1.093112472281994e-06, "loss": 17.7344, "step": 14642 }, { "epoch": 0.972504482964734, "grad_norm": 276.0596923828125, "learning_rate": 1.0930053870360812e-06, "loss": 21.7812, "step": 14643 }, { "epoch": 0.9725708972570897, "grad_norm": 609.7587280273438, "learning_rate": 1.0928983007143356e-06, "loss": 19.8906, "step": 14644 }, { "epoch": 0.9726373115494454, "grad_norm": 342.4252624511719, "learning_rate": 1.092791213317996e-06, "loss": 24.8281, "step": 14645 }, { "epoch": 0.9727037258418012, "grad_norm": 215.42626953125, "learning_rate": 1.092684124848301e-06, "loss": 13.9375, "step": 14646 }, { "epoch": 0.9727701401341569, "grad_norm": 179.9274139404297, "learning_rate": 1.0925770353064897e-06, "loss": 19.7812, "step": 14647 }, { "epoch": 0.9728365544265126, "grad_norm": 146.42929077148438, "learning_rate": 1.0924699446938008e-06, "loss": 18.0781, "step": 14648 }, { "epoch": 0.9729029687188683, "grad_norm": 122.6411361694336, "learning_rate": 1.0923628530114728e-06, "loss": 15.75, "step": 14649 }, { "epoch": 0.972969383011224, "grad_norm": 202.67196655273438, "learning_rate": 1.0922557602607444e-06, "loss": 12.6562, "step": 14650 }, { "epoch": 0.9730357973035797, "grad_norm": 199.9638214111328, "learning_rate": 1.0921486664428547e-06, "loss": 16.2812, "step": 14651 }, { "epoch": 0.9731022115959355, "grad_norm": 122.71174621582031, "learning_rate": 1.0920415715590426e-06, "loss": 13.2188, "step": 14652 }, { "epoch": 0.9731686258882911, "grad_norm": 547.8458251953125, "learning_rate": 1.0919344756105463e-06, "loss": 24.9844, "step": 14653 }, { "epoch": 0.9732350401806469, "grad_norm": 220.50880432128906, "learning_rate": 1.0918273785986052e-06, "loss": 18.9531, "step": 14654 }, { "epoch": 0.9733014544730025, "grad_norm": 114.48511505126953, "learning_rate": 1.0917202805244575e-06, "loss": 11.0, "step": 14655 }, { "epoch": 0.9733678687653583, "grad_norm": 164.75050354003906, "learning_rate": 1.0916131813893433e-06, "loss": 12.8906, "step": 14656 }, { "epoch": 0.9734342830577141, "grad_norm": 208.4654998779297, "learning_rate": 1.0915060811945001e-06, "loss": 15.1094, "step": 14657 }, { "epoch": 0.9735006973500697, "grad_norm": 504.5552978515625, "learning_rate": 1.0913989799411674e-06, "loss": 26.2812, "step": 14658 }, { "epoch": 0.9735671116424255, "grad_norm": 160.82913208007812, "learning_rate": 1.0912918776305842e-06, "loss": 12.7969, "step": 14659 }, { "epoch": 0.9736335259347811, "grad_norm": 224.28421020507812, "learning_rate": 1.0911847742639891e-06, "loss": 23.3438, "step": 14660 }, { "epoch": 0.9736999402271369, "grad_norm": 121.6328125, "learning_rate": 1.0910776698426208e-06, "loss": 12.5, "step": 14661 }, { "epoch": 0.9737663545194926, "grad_norm": 381.9375305175781, "learning_rate": 1.0909705643677188e-06, "loss": 16.7656, "step": 14662 }, { "epoch": 0.9738327688118483, "grad_norm": 258.6241455078125, "learning_rate": 1.0908634578405217e-06, "loss": 18.7969, "step": 14663 }, { "epoch": 0.973899183104204, "grad_norm": 274.87353515625, "learning_rate": 1.0907563502622686e-06, "loss": 16.8438, "step": 14664 }, { "epoch": 0.9739655973965597, "grad_norm": 344.27752685546875, "learning_rate": 1.0906492416341983e-06, "loss": 23.1719, "step": 14665 }, { "epoch": 0.9740320116889154, "grad_norm": 241.49057006835938, "learning_rate": 1.09054213195755e-06, "loss": 20.625, "step": 14666 }, { "epoch": 0.9740984259812712, "grad_norm": 232.21873474121094, "learning_rate": 1.0904350212335624e-06, "loss": 16.625, "step": 14667 }, { "epoch": 0.9741648402736269, "grad_norm": 400.1820068359375, "learning_rate": 1.0903279094634745e-06, "loss": 14.1406, "step": 14668 }, { "epoch": 0.9742312545659826, "grad_norm": 182.2392120361328, "learning_rate": 1.0902207966485255e-06, "loss": 19.7812, "step": 14669 }, { "epoch": 0.9742976688583384, "grad_norm": 286.5332946777344, "learning_rate": 1.0901136827899546e-06, "loss": 25.625, "step": 14670 }, { "epoch": 0.974364083150694, "grad_norm": 189.02847290039062, "learning_rate": 1.0900065678890002e-06, "loss": 16.0938, "step": 14671 }, { "epoch": 0.9744304974430498, "grad_norm": 207.0476531982422, "learning_rate": 1.0898994519469021e-06, "loss": 13.7031, "step": 14672 }, { "epoch": 0.9744969117354054, "grad_norm": 175.14532470703125, "learning_rate": 1.0897923349648985e-06, "loss": 13.7969, "step": 14673 }, { "epoch": 0.9745633260277612, "grad_norm": 144.45034790039062, "learning_rate": 1.0896852169442294e-06, "loss": 10.25, "step": 14674 }, { "epoch": 0.9746297403201168, "grad_norm": 175.4053497314453, "learning_rate": 1.0895780978861332e-06, "loss": 16.1094, "step": 14675 }, { "epoch": 0.9746961546124726, "grad_norm": 198.98623657226562, "learning_rate": 1.0894709777918494e-06, "loss": 14.3594, "step": 14676 }, { "epoch": 0.9747625689048283, "grad_norm": 189.3223876953125, "learning_rate": 1.0893638566626168e-06, "loss": 20.7656, "step": 14677 }, { "epoch": 0.974828983197184, "grad_norm": 236.3565673828125, "learning_rate": 1.0892567344996745e-06, "loss": 17.75, "step": 14678 }, { "epoch": 0.9748953974895398, "grad_norm": 215.58848571777344, "learning_rate": 1.089149611304262e-06, "loss": 15.0781, "step": 14679 }, { "epoch": 0.9749618117818954, "grad_norm": 164.38465881347656, "learning_rate": 1.089042487077618e-06, "loss": 18.9531, "step": 14680 }, { "epoch": 0.9750282260742512, "grad_norm": 293.7582092285156, "learning_rate": 1.0889353618209821e-06, "loss": 19.0781, "step": 14681 }, { "epoch": 0.9750946403666069, "grad_norm": 353.4132080078125, "learning_rate": 1.0888282355355928e-06, "loss": 19.7656, "step": 14682 }, { "epoch": 0.9751610546589626, "grad_norm": 851.22998046875, "learning_rate": 1.08872110822269e-06, "loss": 15.0156, "step": 14683 }, { "epoch": 0.9752274689513183, "grad_norm": 448.66143798828125, "learning_rate": 1.0886139798835126e-06, "loss": 21.8438, "step": 14684 }, { "epoch": 0.975293883243674, "grad_norm": 192.3760223388672, "learning_rate": 1.0885068505192997e-06, "loss": 28.375, "step": 14685 }, { "epoch": 0.9753602975360297, "grad_norm": 135.73695373535156, "learning_rate": 1.0883997201312904e-06, "loss": 15.3906, "step": 14686 }, { "epoch": 0.9754267118283855, "grad_norm": 770.604736328125, "learning_rate": 1.0882925887207245e-06, "loss": 13.9531, "step": 14687 }, { "epoch": 0.9754931261207412, "grad_norm": 467.6078186035156, "learning_rate": 1.0881854562888408e-06, "loss": 21.6406, "step": 14688 }, { "epoch": 0.9755595404130969, "grad_norm": 257.4414978027344, "learning_rate": 1.0880783228368784e-06, "loss": 14.2812, "step": 14689 }, { "epoch": 0.9756259547054527, "grad_norm": 232.03016662597656, "learning_rate": 1.0879711883660767e-06, "loss": 14.5625, "step": 14690 }, { "epoch": 0.9756923689978083, "grad_norm": 250.07273864746094, "learning_rate": 1.0878640528776752e-06, "loss": 18.1094, "step": 14691 }, { "epoch": 0.9757587832901641, "grad_norm": 116.54293823242188, "learning_rate": 1.087756916372913e-06, "loss": 16.875, "step": 14692 }, { "epoch": 0.9758251975825197, "grad_norm": 474.79705810546875, "learning_rate": 1.0876497788530292e-06, "loss": 19.2344, "step": 14693 }, { "epoch": 0.9758916118748755, "grad_norm": 308.98046875, "learning_rate": 1.0875426403192633e-06, "loss": 11.5234, "step": 14694 }, { "epoch": 0.9759580261672312, "grad_norm": 109.07699584960938, "learning_rate": 1.087435500772855e-06, "loss": 17.1875, "step": 14695 }, { "epoch": 0.9760244404595869, "grad_norm": 149.83909606933594, "learning_rate": 1.087328360215043e-06, "loss": 18.9219, "step": 14696 }, { "epoch": 0.9760908547519426, "grad_norm": 467.424072265625, "learning_rate": 1.087221218647067e-06, "loss": 27.25, "step": 14697 }, { "epoch": 0.9761572690442983, "grad_norm": 124.70510864257812, "learning_rate": 1.087114076070166e-06, "loss": 16.2188, "step": 14698 }, { "epoch": 0.9762236833366541, "grad_norm": 213.77371215820312, "learning_rate": 1.0870069324855797e-06, "loss": 16.4531, "step": 14699 }, { "epoch": 0.9762900976290098, "grad_norm": 215.39662170410156, "learning_rate": 1.0868997878945474e-06, "loss": 15.2344, "step": 14700 }, { "epoch": 0.9763565119213655, "grad_norm": 663.24560546875, "learning_rate": 1.0867926422983082e-06, "loss": 17.9688, "step": 14701 }, { "epoch": 0.9764229262137212, "grad_norm": 119.41663360595703, "learning_rate": 1.0866854956981022e-06, "loss": 18.2812, "step": 14702 }, { "epoch": 0.9764893405060769, "grad_norm": 316.43060302734375, "learning_rate": 1.0865783480951683e-06, "loss": 18.9062, "step": 14703 }, { "epoch": 0.9765557547984326, "grad_norm": 348.6966247558594, "learning_rate": 1.0864711994907456e-06, "loss": 15.4375, "step": 14704 }, { "epoch": 0.9766221690907884, "grad_norm": 188.23468017578125, "learning_rate": 1.0863640498860742e-06, "loss": 23.7188, "step": 14705 }, { "epoch": 0.976688583383144, "grad_norm": 90.0692138671875, "learning_rate": 1.0862568992823935e-06, "loss": 11.9375, "step": 14706 }, { "epoch": 0.9767549976754998, "grad_norm": 99.03854370117188, "learning_rate": 1.0861497476809425e-06, "loss": 18.25, "step": 14707 }, { "epoch": 0.9768214119678554, "grad_norm": 202.64376831054688, "learning_rate": 1.0860425950829608e-06, "loss": 19.1094, "step": 14708 }, { "epoch": 0.9768878262602112, "grad_norm": 99.40367889404297, "learning_rate": 1.0859354414896881e-06, "loss": 11.6562, "step": 14709 }, { "epoch": 0.976954240552567, "grad_norm": 297.3289489746094, "learning_rate": 1.0858282869023638e-06, "loss": 20.0312, "step": 14710 }, { "epoch": 0.9770206548449226, "grad_norm": 138.98890686035156, "learning_rate": 1.0857211313222271e-06, "loss": 13.7656, "step": 14711 }, { "epoch": 0.9770870691372784, "grad_norm": 383.40924072265625, "learning_rate": 1.085613974750518e-06, "loss": 12.8438, "step": 14712 }, { "epoch": 0.977153483429634, "grad_norm": 118.984130859375, "learning_rate": 1.085506817188476e-06, "loss": 15.3906, "step": 14713 }, { "epoch": 0.9772198977219898, "grad_norm": 187.568115234375, "learning_rate": 1.0853996586373402e-06, "loss": 19.7031, "step": 14714 }, { "epoch": 0.9772863120143455, "grad_norm": 169.39390563964844, "learning_rate": 1.0852924990983505e-06, "loss": 17.0938, "step": 14715 }, { "epoch": 0.9773527263067012, "grad_norm": 172.0887908935547, "learning_rate": 1.085185338572746e-06, "loss": 13.4219, "step": 14716 }, { "epoch": 0.9774191405990569, "grad_norm": 149.37298583984375, "learning_rate": 1.0850781770617673e-06, "loss": 18.6406, "step": 14717 }, { "epoch": 0.9774855548914126, "grad_norm": 149.66207885742188, "learning_rate": 1.0849710145666528e-06, "loss": 15.8281, "step": 14718 }, { "epoch": 0.9775519691837683, "grad_norm": 340.0410461425781, "learning_rate": 1.0848638510886425e-06, "loss": 17.1562, "step": 14719 }, { "epoch": 0.9776183834761241, "grad_norm": 217.1071014404297, "learning_rate": 1.0847566866289764e-06, "loss": 23.1406, "step": 14720 }, { "epoch": 0.9776847977684798, "grad_norm": 229.82838439941406, "learning_rate": 1.0846495211888938e-06, "loss": 16.2812, "step": 14721 }, { "epoch": 0.9777512120608355, "grad_norm": 544.67529296875, "learning_rate": 1.084542354769634e-06, "loss": 14.4844, "step": 14722 }, { "epoch": 0.9778176263531912, "grad_norm": 117.8341293334961, "learning_rate": 1.0844351873724376e-06, "loss": 14.4219, "step": 14723 }, { "epoch": 0.9778840406455469, "grad_norm": 180.19378662109375, "learning_rate": 1.0843280189985434e-06, "loss": 17.8672, "step": 14724 }, { "epoch": 0.9779504549379027, "grad_norm": 252.8843536376953, "learning_rate": 1.084220849649191e-06, "loss": 17.7344, "step": 14725 }, { "epoch": 0.9780168692302583, "grad_norm": 325.05877685546875, "learning_rate": 1.0841136793256207e-06, "loss": 16.0156, "step": 14726 }, { "epoch": 0.9780832835226141, "grad_norm": 293.3414306640625, "learning_rate": 1.0840065080290719e-06, "loss": 10.9141, "step": 14727 }, { "epoch": 0.9781496978149697, "grad_norm": 877.714111328125, "learning_rate": 1.083899335760784e-06, "loss": 28.0625, "step": 14728 }, { "epoch": 0.9782161121073255, "grad_norm": 305.8095397949219, "learning_rate": 1.083792162521997e-06, "loss": 19.8906, "step": 14729 }, { "epoch": 0.9782825263996812, "grad_norm": 188.578857421875, "learning_rate": 1.083684988313951e-06, "loss": 19.5625, "step": 14730 }, { "epoch": 0.9783489406920369, "grad_norm": 158.15658569335938, "learning_rate": 1.0835778131378849e-06, "loss": 15.1562, "step": 14731 }, { "epoch": 0.9784153549843927, "grad_norm": 284.4168701171875, "learning_rate": 1.083470636995039e-06, "loss": 14.8438, "step": 14732 }, { "epoch": 0.9784817692767483, "grad_norm": 106.64698028564453, "learning_rate": 1.0833634598866525e-06, "loss": 16.2656, "step": 14733 }, { "epoch": 0.9785481835691041, "grad_norm": 260.3485412597656, "learning_rate": 1.083256281813966e-06, "loss": 18.9375, "step": 14734 }, { "epoch": 0.9786145978614598, "grad_norm": 133.2247314453125, "learning_rate": 1.0831491027782186e-06, "loss": 17.9219, "step": 14735 }, { "epoch": 0.9786810121538155, "grad_norm": 88.73954772949219, "learning_rate": 1.0830419227806506e-06, "loss": 13.2344, "step": 14736 }, { "epoch": 0.9787474264461712, "grad_norm": 372.3802795410156, "learning_rate": 1.0829347418225011e-06, "loss": 16.7969, "step": 14737 }, { "epoch": 0.978813840738527, "grad_norm": 229.2887725830078, "learning_rate": 1.082827559905011e-06, "loss": 11.7812, "step": 14738 }, { "epoch": 0.9788802550308826, "grad_norm": 143.96730041503906, "learning_rate": 1.082720377029419e-06, "loss": 13.1406, "step": 14739 }, { "epoch": 0.9789466693232384, "grad_norm": 873.2544555664062, "learning_rate": 1.0826131931969654e-06, "loss": 22.4219, "step": 14740 }, { "epoch": 0.979013083615594, "grad_norm": 253.717041015625, "learning_rate": 1.0825060084088901e-06, "loss": 14.4531, "step": 14741 }, { "epoch": 0.9790794979079498, "grad_norm": 289.1617126464844, "learning_rate": 1.082398822666433e-06, "loss": 15.6719, "step": 14742 }, { "epoch": 0.9791459122003056, "grad_norm": 319.000244140625, "learning_rate": 1.0822916359708334e-06, "loss": 21.75, "step": 14743 }, { "epoch": 0.9792123264926612, "grad_norm": 229.5111541748047, "learning_rate": 1.082184448323332e-06, "loss": 14.0312, "step": 14744 }, { "epoch": 0.979278740785017, "grad_norm": 197.99754333496094, "learning_rate": 1.0820772597251682e-06, "loss": 19.7188, "step": 14745 }, { "epoch": 0.9793451550773726, "grad_norm": 229.25564575195312, "learning_rate": 1.081970070177582e-06, "loss": 19.625, "step": 14746 }, { "epoch": 0.9794115693697284, "grad_norm": 204.84002685546875, "learning_rate": 1.0818628796818131e-06, "loss": 15.0469, "step": 14747 }, { "epoch": 0.979477983662084, "grad_norm": 169.43356323242188, "learning_rate": 1.0817556882391017e-06, "loss": 15.4531, "step": 14748 }, { "epoch": 0.9795443979544398, "grad_norm": 651.1852416992188, "learning_rate": 1.0816484958506879e-06, "loss": 26.0469, "step": 14749 }, { "epoch": 0.9796108122467955, "grad_norm": 167.54627990722656, "learning_rate": 1.081541302517811e-06, "loss": 10.7344, "step": 14750 }, { "epoch": 0.9796772265391512, "grad_norm": 156.5225067138672, "learning_rate": 1.0814341082417116e-06, "loss": 19.0312, "step": 14751 }, { "epoch": 0.9797436408315069, "grad_norm": 636.9602661132812, "learning_rate": 1.0813269130236295e-06, "loss": 27.3125, "step": 14752 }, { "epoch": 0.9798100551238627, "grad_norm": 114.13219451904297, "learning_rate": 1.0812197168648042e-06, "loss": 11.8672, "step": 14753 }, { "epoch": 0.9798764694162184, "grad_norm": 144.92652893066406, "learning_rate": 1.0811125197664764e-06, "loss": 14.25, "step": 14754 }, { "epoch": 0.9799428837085741, "grad_norm": 442.9191589355469, "learning_rate": 1.0810053217298854e-06, "loss": 18.0781, "step": 14755 }, { "epoch": 0.9800092980009298, "grad_norm": 209.85507202148438, "learning_rate": 1.0808981227562717e-06, "loss": 10.9531, "step": 14756 }, { "epoch": 0.9800757122932855, "grad_norm": 369.4689025878906, "learning_rate": 1.080790922846875e-06, "loss": 18.2188, "step": 14757 }, { "epoch": 0.9801421265856413, "grad_norm": 303.394775390625, "learning_rate": 1.0806837220029357e-06, "loss": 17.7188, "step": 14758 }, { "epoch": 0.9802085408779969, "grad_norm": 236.14581298828125, "learning_rate": 1.0805765202256939e-06, "loss": 23.3125, "step": 14759 }, { "epoch": 0.9802749551703527, "grad_norm": 252.0072021484375, "learning_rate": 1.0804693175163888e-06, "loss": 16.6562, "step": 14760 }, { "epoch": 0.9803413694627083, "grad_norm": 1658.707763671875, "learning_rate": 1.0803621138762613e-06, "loss": 17.375, "step": 14761 }, { "epoch": 0.9804077837550641, "grad_norm": 146.78131103515625, "learning_rate": 1.0802549093065515e-06, "loss": 10.3125, "step": 14762 }, { "epoch": 0.9804741980474198, "grad_norm": 640.7660522460938, "learning_rate": 1.0801477038084986e-06, "loss": 23.375, "step": 14763 }, { "epoch": 0.9805406123397755, "grad_norm": 139.22987365722656, "learning_rate": 1.0800404973833437e-06, "loss": 10.8672, "step": 14764 }, { "epoch": 0.9806070266321313, "grad_norm": 180.35800170898438, "learning_rate": 1.079933290032326e-06, "loss": 16.25, "step": 14765 }, { "epoch": 0.9806734409244869, "grad_norm": 721.0428466796875, "learning_rate": 1.0798260817566868e-06, "loss": 24.875, "step": 14766 }, { "epoch": 0.9807398552168427, "grad_norm": 342.78460693359375, "learning_rate": 1.0797188725576652e-06, "loss": 18.7031, "step": 14767 }, { "epoch": 0.9808062695091984, "grad_norm": 265.2709655761719, "learning_rate": 1.0796116624365014e-06, "loss": 20.5156, "step": 14768 }, { "epoch": 0.9808726838015541, "grad_norm": 164.33041381835938, "learning_rate": 1.079504451394436e-06, "loss": 16.0156, "step": 14769 }, { "epoch": 0.9809390980939098, "grad_norm": 120.49658966064453, "learning_rate": 1.0793972394327094e-06, "loss": 19.5469, "step": 14770 }, { "epoch": 0.9810055123862655, "grad_norm": 148.4968719482422, "learning_rate": 1.0792900265525607e-06, "loss": 15.9688, "step": 14771 }, { "epoch": 0.9810719266786212, "grad_norm": 465.9100341796875, "learning_rate": 1.0791828127552308e-06, "loss": 16.4219, "step": 14772 }, { "epoch": 0.981138340970977, "grad_norm": 230.8925323486328, "learning_rate": 1.07907559804196e-06, "loss": 22.3438, "step": 14773 }, { "epoch": 0.9812047552633326, "grad_norm": 124.78254699707031, "learning_rate": 1.078968382413988e-06, "loss": 17.4062, "step": 14774 }, { "epoch": 0.9812711695556884, "grad_norm": 478.4473876953125, "learning_rate": 1.0788611658725555e-06, "loss": 16.6094, "step": 14775 }, { "epoch": 0.9813375838480441, "grad_norm": 295.2213134765625, "learning_rate": 1.0787539484189022e-06, "loss": 19.0938, "step": 14776 }, { "epoch": 0.9814039981403998, "grad_norm": 270.635986328125, "learning_rate": 1.0786467300542692e-06, "loss": 13.875, "step": 14777 }, { "epoch": 0.9814704124327556, "grad_norm": 160.53536987304688, "learning_rate": 1.0785395107798957e-06, "loss": 14.6562, "step": 14778 }, { "epoch": 0.9815368267251112, "grad_norm": 197.92031860351562, "learning_rate": 1.0784322905970226e-06, "loss": 16.125, "step": 14779 }, { "epoch": 0.981603241017467, "grad_norm": 337.4249572753906, "learning_rate": 1.07832506950689e-06, "loss": 20.7344, "step": 14780 }, { "epoch": 0.9816696553098226, "grad_norm": 165.3182830810547, "learning_rate": 1.0782178475107382e-06, "loss": 24.5469, "step": 14781 }, { "epoch": 0.9817360696021784, "grad_norm": 221.22592163085938, "learning_rate": 1.0781106246098073e-06, "loss": 19.9531, "step": 14782 }, { "epoch": 0.9818024838945341, "grad_norm": 84.10088348388672, "learning_rate": 1.078003400805338e-06, "loss": 13.9375, "step": 14783 }, { "epoch": 0.9818688981868898, "grad_norm": 180.1685028076172, "learning_rate": 1.07789617609857e-06, "loss": 16.9219, "step": 14784 }, { "epoch": 0.9819353124792455, "grad_norm": 229.38345336914062, "learning_rate": 1.0777889504907442e-06, "loss": 17.875, "step": 14785 }, { "epoch": 0.9820017267716012, "grad_norm": 247.4945526123047, "learning_rate": 1.0776817239831005e-06, "loss": 19.2812, "step": 14786 }, { "epoch": 0.982068141063957, "grad_norm": 199.81602478027344, "learning_rate": 1.0775744965768793e-06, "loss": 16.2812, "step": 14787 }, { "epoch": 0.9821345553563127, "grad_norm": 212.0083465576172, "learning_rate": 1.0774672682733216e-06, "loss": 15.3125, "step": 14788 }, { "epoch": 0.9822009696486684, "grad_norm": 200.0011749267578, "learning_rate": 1.0773600390736667e-06, "loss": 19.0312, "step": 14789 }, { "epoch": 0.9822673839410241, "grad_norm": 118.33740234375, "learning_rate": 1.0772528089791554e-06, "loss": 11.4375, "step": 14790 }, { "epoch": 0.9823337982333799, "grad_norm": 328.65924072265625, "learning_rate": 1.0771455779910286e-06, "loss": 18.2188, "step": 14791 }, { "epoch": 0.9824002125257355, "grad_norm": 257.6192932128906, "learning_rate": 1.077038346110526e-06, "loss": 19.6719, "step": 14792 }, { "epoch": 0.9824666268180913, "grad_norm": 536.9754638671875, "learning_rate": 1.076931113338888e-06, "loss": 16.25, "step": 14793 }, { "epoch": 0.9825330411104469, "grad_norm": 125.4428482055664, "learning_rate": 1.0768238796773556e-06, "loss": 17.1406, "step": 14794 }, { "epoch": 0.9825994554028027, "grad_norm": 230.07455444335938, "learning_rate": 1.0767166451271688e-06, "loss": 16.2031, "step": 14795 }, { "epoch": 0.9826658696951583, "grad_norm": 95.0962905883789, "learning_rate": 1.076609409689568e-06, "loss": 15.7188, "step": 14796 }, { "epoch": 0.9827322839875141, "grad_norm": 218.98126220703125, "learning_rate": 1.0765021733657936e-06, "loss": 18.5156, "step": 14797 }, { "epoch": 0.9827986982798699, "grad_norm": 140.70896911621094, "learning_rate": 1.0763949361570865e-06, "loss": 17.6094, "step": 14798 }, { "epoch": 0.9828651125722255, "grad_norm": 172.97335815429688, "learning_rate": 1.0762876980646867e-06, "loss": 19.125, "step": 14799 }, { "epoch": 0.9829315268645813, "grad_norm": 917.5189208984375, "learning_rate": 1.0761804590898347e-06, "loss": 14.75, "step": 14800 }, { "epoch": 0.982997941156937, "grad_norm": 259.5804138183594, "learning_rate": 1.0760732192337713e-06, "loss": 18.4844, "step": 14801 }, { "epoch": 0.9830643554492927, "grad_norm": 2040.4429931640625, "learning_rate": 1.0759659784977367e-06, "loss": 16.2812, "step": 14802 }, { "epoch": 0.9831307697416484, "grad_norm": 155.21482849121094, "learning_rate": 1.0758587368829713e-06, "loss": 16.4219, "step": 14803 }, { "epoch": 0.9831971840340041, "grad_norm": 147.4443817138672, "learning_rate": 1.0757514943907157e-06, "loss": 19.1562, "step": 14804 }, { "epoch": 0.9832635983263598, "grad_norm": 216.79931640625, "learning_rate": 1.0756442510222107e-06, "loss": 19.2188, "step": 14805 }, { "epoch": 0.9833300126187156, "grad_norm": 527.8464965820312, "learning_rate": 1.0755370067786966e-06, "loss": 15.9375, "step": 14806 }, { "epoch": 0.9833964269110712, "grad_norm": 425.64697265625, "learning_rate": 1.075429761661414e-06, "loss": 15.8125, "step": 14807 }, { "epoch": 0.983462841203427, "grad_norm": 419.76641845703125, "learning_rate": 1.0753225156716033e-06, "loss": 17.4375, "step": 14808 }, { "epoch": 0.9835292554957827, "grad_norm": 146.06517028808594, "learning_rate": 1.0752152688105053e-06, "loss": 18.9141, "step": 14809 }, { "epoch": 0.9835956697881384, "grad_norm": 345.9888610839844, "learning_rate": 1.0751080210793603e-06, "loss": 14.7188, "step": 14810 }, { "epoch": 0.9836620840804942, "grad_norm": 499.6360168457031, "learning_rate": 1.075000772479409e-06, "loss": 16.8438, "step": 14811 }, { "epoch": 0.9837284983728498, "grad_norm": 186.0915069580078, "learning_rate": 1.0748935230118921e-06, "loss": 15.6406, "step": 14812 }, { "epoch": 0.9837949126652056, "grad_norm": 144.56480407714844, "learning_rate": 1.0747862726780503e-06, "loss": 14.7969, "step": 14813 }, { "epoch": 0.9838613269575612, "grad_norm": 169.22848510742188, "learning_rate": 1.0746790214791236e-06, "loss": 17.75, "step": 14814 }, { "epoch": 0.983927741249917, "grad_norm": 124.30143737792969, "learning_rate": 1.0745717694163534e-06, "loss": 17.9531, "step": 14815 }, { "epoch": 0.9839941555422727, "grad_norm": 215.01016235351562, "learning_rate": 1.07446451649098e-06, "loss": 15.1719, "step": 14816 }, { "epoch": 0.9840605698346284, "grad_norm": 348.6751403808594, "learning_rate": 1.0743572627042437e-06, "loss": 17.6094, "step": 14817 }, { "epoch": 0.9841269841269841, "grad_norm": 184.4016876220703, "learning_rate": 1.0742500080573856e-06, "loss": 13.5625, "step": 14818 }, { "epoch": 0.9841933984193398, "grad_norm": 445.8621826171875, "learning_rate": 1.0741427525516462e-06, "loss": 13.8906, "step": 14819 }, { "epoch": 0.9842598127116956, "grad_norm": 245.5206298828125, "learning_rate": 1.0740354961882662e-06, "loss": 11.5938, "step": 14820 }, { "epoch": 0.9843262270040513, "grad_norm": 243.08453369140625, "learning_rate": 1.0739282389684864e-06, "loss": 21.0312, "step": 14821 }, { "epoch": 0.984392641296407, "grad_norm": 176.27072143554688, "learning_rate": 1.0738209808935472e-06, "loss": 17.0312, "step": 14822 }, { "epoch": 0.9844590555887627, "grad_norm": 187.63232421875, "learning_rate": 1.0737137219646895e-06, "loss": 16.7812, "step": 14823 }, { "epoch": 0.9845254698811184, "grad_norm": 170.01023864746094, "learning_rate": 1.0736064621831543e-06, "loss": 17.4219, "step": 14824 }, { "epoch": 0.9845918841734741, "grad_norm": 216.77938842773438, "learning_rate": 1.0734992015501813e-06, "loss": 15.3906, "step": 14825 }, { "epoch": 0.9846582984658299, "grad_norm": 5268.31591796875, "learning_rate": 1.0733919400670123e-06, "loss": 16.7344, "step": 14826 }, { "epoch": 0.9847247127581855, "grad_norm": 132.73512268066406, "learning_rate": 1.0732846777348879e-06, "loss": 24.0938, "step": 14827 }, { "epoch": 0.9847911270505413, "grad_norm": 675.4876708984375, "learning_rate": 1.0731774145550484e-06, "loss": 12.2656, "step": 14828 }, { "epoch": 0.9848575413428969, "grad_norm": 338.9500732421875, "learning_rate": 1.0730701505287344e-06, "loss": 18.3906, "step": 14829 }, { "epoch": 0.9849239556352527, "grad_norm": 157.0601348876953, "learning_rate": 1.0729628856571875e-06, "loss": 14.5625, "step": 14830 }, { "epoch": 0.9849903699276085, "grad_norm": 168.82302856445312, "learning_rate": 1.0728556199416478e-06, "loss": 18.6875, "step": 14831 }, { "epoch": 0.9850567842199641, "grad_norm": 386.9909973144531, "learning_rate": 1.0727483533833564e-06, "loss": 14.3125, "step": 14832 }, { "epoch": 0.9851231985123199, "grad_norm": 179.28651428222656, "learning_rate": 1.0726410859835537e-06, "loss": 14.7031, "step": 14833 }, { "epoch": 0.9851896128046755, "grad_norm": 108.75254821777344, "learning_rate": 1.0725338177434813e-06, "loss": 12.1094, "step": 14834 }, { "epoch": 0.9852560270970313, "grad_norm": 343.5047912597656, "learning_rate": 1.0724265486643792e-06, "loss": 21.6562, "step": 14835 }, { "epoch": 0.985322441389387, "grad_norm": 336.49285888671875, "learning_rate": 1.0723192787474887e-06, "loss": 17.2031, "step": 14836 }, { "epoch": 0.9853888556817427, "grad_norm": 130.7655487060547, "learning_rate": 1.0722120079940507e-06, "loss": 18.1094, "step": 14837 }, { "epoch": 0.9854552699740984, "grad_norm": 267.5780334472656, "learning_rate": 1.0721047364053054e-06, "loss": 16.4219, "step": 14838 }, { "epoch": 0.9855216842664541, "grad_norm": 205.72433471679688, "learning_rate": 1.0719974639824943e-06, "loss": 16.5, "step": 14839 }, { "epoch": 0.9855880985588099, "grad_norm": 136.1444091796875, "learning_rate": 1.0718901907268579e-06, "loss": 14.1406, "step": 14840 }, { "epoch": 0.9856545128511656, "grad_norm": 241.2274627685547, "learning_rate": 1.0717829166396375e-06, "loss": 13.3125, "step": 14841 }, { "epoch": 0.9857209271435213, "grad_norm": 131.7313232421875, "learning_rate": 1.0716756417220734e-06, "loss": 14.875, "step": 14842 }, { "epoch": 0.985787341435877, "grad_norm": 140.4283447265625, "learning_rate": 1.0715683659754069e-06, "loss": 14.9062, "step": 14843 }, { "epoch": 0.9858537557282328, "grad_norm": 137.42320251464844, "learning_rate": 1.071461089400879e-06, "loss": 14.8438, "step": 14844 }, { "epoch": 0.9859201700205884, "grad_norm": 168.83755493164062, "learning_rate": 1.0713538119997302e-06, "loss": 15.375, "step": 14845 }, { "epoch": 0.9859865843129442, "grad_norm": 323.74481201171875, "learning_rate": 1.0712465337732017e-06, "loss": 17.5781, "step": 14846 }, { "epoch": 0.9860529986052998, "grad_norm": 412.5177001953125, "learning_rate": 1.0711392547225344e-06, "loss": 14.5469, "step": 14847 }, { "epoch": 0.9861194128976556, "grad_norm": 2264.30517578125, "learning_rate": 1.0710319748489694e-06, "loss": 22.6719, "step": 14848 }, { "epoch": 0.9861858271900112, "grad_norm": 202.80360412597656, "learning_rate": 1.0709246941537473e-06, "loss": 18.2656, "step": 14849 }, { "epoch": 0.986252241482367, "grad_norm": 147.66958618164062, "learning_rate": 1.0708174126381091e-06, "loss": 15.5156, "step": 14850 }, { "epoch": 0.9863186557747228, "grad_norm": 323.5301513671875, "learning_rate": 1.070710130303296e-06, "loss": 19.5312, "step": 14851 }, { "epoch": 0.9863850700670784, "grad_norm": 281.5103759765625, "learning_rate": 1.0706028471505491e-06, "loss": 17.1719, "step": 14852 }, { "epoch": 0.9864514843594342, "grad_norm": 194.8575439453125, "learning_rate": 1.070495563181109e-06, "loss": 19.7969, "step": 14853 }, { "epoch": 0.9865178986517898, "grad_norm": 289.7232666015625, "learning_rate": 1.0703882783962168e-06, "loss": 17.7188, "step": 14854 }, { "epoch": 0.9865843129441456, "grad_norm": 197.3006134033203, "learning_rate": 1.0702809927971137e-06, "loss": 16.8125, "step": 14855 }, { "epoch": 0.9866507272365013, "grad_norm": 207.06857299804688, "learning_rate": 1.0701737063850407e-06, "loss": 15.7344, "step": 14856 }, { "epoch": 0.986717141528857, "grad_norm": 250.3455352783203, "learning_rate": 1.0700664191612384e-06, "loss": 16.5078, "step": 14857 }, { "epoch": 0.9867835558212127, "grad_norm": 171.4378662109375, "learning_rate": 1.0699591311269484e-06, "loss": 18.5156, "step": 14858 }, { "epoch": 0.9868499701135685, "grad_norm": 248.54269409179688, "learning_rate": 1.0698518422834116e-06, "loss": 15.8438, "step": 14859 }, { "epoch": 0.9869163844059241, "grad_norm": 189.97361755371094, "learning_rate": 1.069744552631869e-06, "loss": 14.125, "step": 14860 }, { "epoch": 0.9869827986982799, "grad_norm": 157.48585510253906, "learning_rate": 1.0696372621735612e-06, "loss": 15.5312, "step": 14861 }, { "epoch": 0.9870492129906356, "grad_norm": 175.01187133789062, "learning_rate": 1.06952997090973e-06, "loss": 14.9844, "step": 14862 }, { "epoch": 0.9871156272829913, "grad_norm": 89.98109436035156, "learning_rate": 1.0694226788416164e-06, "loss": 13.9062, "step": 14863 }, { "epoch": 0.9871820415753471, "grad_norm": 249.11761474609375, "learning_rate": 1.069315385970461e-06, "loss": 15.7812, "step": 14864 }, { "epoch": 0.9872484558677027, "grad_norm": 281.4459533691406, "learning_rate": 1.069208092297505e-06, "loss": 17.5938, "step": 14865 }, { "epoch": 0.9873148701600585, "grad_norm": 617.1522827148438, "learning_rate": 1.0691007978239901e-06, "loss": 16.1406, "step": 14866 }, { "epoch": 0.9873812844524141, "grad_norm": 131.91329956054688, "learning_rate": 1.0689935025511567e-06, "loss": 15.5156, "step": 14867 }, { "epoch": 0.9874476987447699, "grad_norm": 173.39633178710938, "learning_rate": 1.0688862064802465e-06, "loss": 15.4375, "step": 14868 }, { "epoch": 0.9875141130371256, "grad_norm": 244.85072326660156, "learning_rate": 1.0687789096125004e-06, "loss": 15.7656, "step": 14869 }, { "epoch": 0.9875805273294813, "grad_norm": 173.97682189941406, "learning_rate": 1.0686716119491591e-06, "loss": 11.8906, "step": 14870 }, { "epoch": 0.987646941621837, "grad_norm": 276.4430236816406, "learning_rate": 1.0685643134914647e-06, "loss": 19.7188, "step": 14871 }, { "epoch": 0.9877133559141927, "grad_norm": 281.599853515625, "learning_rate": 1.0684570142406575e-06, "loss": 16.6719, "step": 14872 }, { "epoch": 0.9877797702065485, "grad_norm": 440.79193115234375, "learning_rate": 1.0683497141979793e-06, "loss": 24.1875, "step": 14873 }, { "epoch": 0.9878461844989042, "grad_norm": 213.96295166015625, "learning_rate": 1.068242413364671e-06, "loss": 16.4062, "step": 14874 }, { "epoch": 0.9879125987912599, "grad_norm": 167.6053009033203, "learning_rate": 1.0681351117419737e-06, "loss": 18.3906, "step": 14875 }, { "epoch": 0.9879790130836156, "grad_norm": 678.8952026367188, "learning_rate": 1.0680278093311286e-06, "loss": 23.5625, "step": 14876 }, { "epoch": 0.9880454273759713, "grad_norm": 279.1181335449219, "learning_rate": 1.0679205061333774e-06, "loss": 18.3594, "step": 14877 }, { "epoch": 0.988111841668327, "grad_norm": 99.21804809570312, "learning_rate": 1.0678132021499606e-06, "loss": 11.5156, "step": 14878 }, { "epoch": 0.9881782559606828, "grad_norm": 324.88494873046875, "learning_rate": 1.0677058973821197e-06, "loss": 19.9766, "step": 14879 }, { "epoch": 0.9882446702530384, "grad_norm": 340.9894714355469, "learning_rate": 1.0675985918310965e-06, "loss": 17.6094, "step": 14880 }, { "epoch": 0.9883110845453942, "grad_norm": 245.26290893554688, "learning_rate": 1.0674912854981316e-06, "loss": 17.8281, "step": 14881 }, { "epoch": 0.9883774988377498, "grad_norm": 139.43931579589844, "learning_rate": 1.0673839783844662e-06, "loss": 12.0312, "step": 14882 }, { "epoch": 0.9884439131301056, "grad_norm": 1156.4609375, "learning_rate": 1.0672766704913418e-06, "loss": 11.875, "step": 14883 }, { "epoch": 0.9885103274224614, "grad_norm": 168.164794921875, "learning_rate": 1.06716936182e-06, "loss": 14.375, "step": 14884 }, { "epoch": 0.988576741714817, "grad_norm": 477.3559265136719, "learning_rate": 1.0670620523716816e-06, "loss": 21.7656, "step": 14885 }, { "epoch": 0.9886431560071728, "grad_norm": 309.1903991699219, "learning_rate": 1.066954742147628e-06, "loss": 24.9375, "step": 14886 }, { "epoch": 0.9887095702995284, "grad_norm": 175.4017791748047, "learning_rate": 1.0668474311490802e-06, "loss": 14.0469, "step": 14887 }, { "epoch": 0.9887759845918842, "grad_norm": 425.16192626953125, "learning_rate": 1.0667401193772804e-06, "loss": 16.7344, "step": 14888 }, { "epoch": 0.9888423988842399, "grad_norm": 125.59088897705078, "learning_rate": 1.0666328068334693e-06, "loss": 13.4531, "step": 14889 }, { "epoch": 0.9889088131765956, "grad_norm": 134.09197998046875, "learning_rate": 1.066525493518888e-06, "loss": 13.4062, "step": 14890 }, { "epoch": 0.9889752274689513, "grad_norm": 179.40077209472656, "learning_rate": 1.0664181794347787e-06, "loss": 15.5469, "step": 14891 }, { "epoch": 0.989041641761307, "grad_norm": 155.40786743164062, "learning_rate": 1.0663108645823818e-06, "loss": 14.2969, "step": 14892 }, { "epoch": 0.9891080560536627, "grad_norm": 158.49392700195312, "learning_rate": 1.066203548962939e-06, "loss": 17.1094, "step": 14893 }, { "epoch": 0.9891744703460185, "grad_norm": 279.5341796875, "learning_rate": 1.0660962325776919e-06, "loss": 19.1562, "step": 14894 }, { "epoch": 0.9892408846383742, "grad_norm": 151.97633361816406, "learning_rate": 1.0659889154278816e-06, "loss": 15.0938, "step": 14895 }, { "epoch": 0.9893072989307299, "grad_norm": 177.81802368164062, "learning_rate": 1.0658815975147496e-06, "loss": 15.3281, "step": 14896 }, { "epoch": 0.9893737132230856, "grad_norm": 192.3904266357422, "learning_rate": 1.0657742788395372e-06, "loss": 14.5, "step": 14897 }, { "epoch": 0.9894401275154413, "grad_norm": 495.9021911621094, "learning_rate": 1.0656669594034862e-06, "loss": 21.75, "step": 14898 }, { "epoch": 0.9895065418077971, "grad_norm": 119.9177017211914, "learning_rate": 1.0655596392078374e-06, "loss": 17.8359, "step": 14899 }, { "epoch": 0.9895729561001527, "grad_norm": 296.9478454589844, "learning_rate": 1.0654523182538323e-06, "loss": 15.5625, "step": 14900 }, { "epoch": 0.9896393703925085, "grad_norm": 162.0088653564453, "learning_rate": 1.0653449965427128e-06, "loss": 18.6562, "step": 14901 }, { "epoch": 0.9897057846848641, "grad_norm": 268.60064697265625, "learning_rate": 1.06523767407572e-06, "loss": 15.5312, "step": 14902 }, { "epoch": 0.9897721989772199, "grad_norm": 169.1877899169922, "learning_rate": 1.0651303508540953e-06, "loss": 22.4688, "step": 14903 }, { "epoch": 0.9898386132695756, "grad_norm": 310.0061340332031, "learning_rate": 1.0650230268790803e-06, "loss": 19.75, "step": 14904 }, { "epoch": 0.9899050275619313, "grad_norm": 377.70184326171875, "learning_rate": 1.0649157021519166e-06, "loss": 25.4375, "step": 14905 }, { "epoch": 0.9899714418542871, "grad_norm": 163.79002380371094, "learning_rate": 1.0648083766738455e-06, "loss": 15.75, "step": 14906 }, { "epoch": 0.9900378561466427, "grad_norm": 152.9989013671875, "learning_rate": 1.0647010504461083e-06, "loss": 12.0469, "step": 14907 }, { "epoch": 0.9901042704389985, "grad_norm": 361.9524230957031, "learning_rate": 1.0645937234699465e-06, "loss": 17.1562, "step": 14908 }, { "epoch": 0.9901706847313542, "grad_norm": 248.20925903320312, "learning_rate": 1.0644863957466022e-06, "loss": 21.1562, "step": 14909 }, { "epoch": 0.9902370990237099, "grad_norm": 246.14395141601562, "learning_rate": 1.0643790672773162e-06, "loss": 17.25, "step": 14910 }, { "epoch": 0.9903035133160656, "grad_norm": 126.3523941040039, "learning_rate": 1.0642717380633302e-06, "loss": 15.875, "step": 14911 }, { "epoch": 0.9903699276084214, "grad_norm": 199.72023010253906, "learning_rate": 1.064164408105886e-06, "loss": 17.6875, "step": 14912 }, { "epoch": 0.990436341900777, "grad_norm": 145.46485900878906, "learning_rate": 1.0640570774062247e-06, "loss": 17.1641, "step": 14913 }, { "epoch": 0.9905027561931328, "grad_norm": 277.1904296875, "learning_rate": 1.0639497459655881e-06, "loss": 21.1562, "step": 14914 }, { "epoch": 0.9905691704854884, "grad_norm": 188.5623321533203, "learning_rate": 1.0638424137852179e-06, "loss": 15.9688, "step": 14915 }, { "epoch": 0.9906355847778442, "grad_norm": 573.5348510742188, "learning_rate": 1.0637350808663555e-06, "loss": 32.5938, "step": 14916 }, { "epoch": 0.9907019990702, "grad_norm": 133.4594268798828, "learning_rate": 1.063627747210242e-06, "loss": 15.9844, "step": 14917 }, { "epoch": 0.9907684133625556, "grad_norm": 168.1111297607422, "learning_rate": 1.06352041281812e-06, "loss": 14.9219, "step": 14918 }, { "epoch": 0.9908348276549114, "grad_norm": 238.5756378173828, "learning_rate": 1.0634130776912303e-06, "loss": 26.7344, "step": 14919 }, { "epoch": 0.990901241947267, "grad_norm": 229.67724609375, "learning_rate": 1.0633057418308144e-06, "loss": 18.5625, "step": 14920 }, { "epoch": 0.9909676562396228, "grad_norm": 169.14990234375, "learning_rate": 1.0631984052381144e-06, "loss": 21.1094, "step": 14921 }, { "epoch": 0.9910340705319785, "grad_norm": 210.88929748535156, "learning_rate": 1.0630910679143716e-06, "loss": 18.625, "step": 14922 }, { "epoch": 0.9911004848243342, "grad_norm": 147.9856719970703, "learning_rate": 1.062983729860828e-06, "loss": 17.6562, "step": 14923 }, { "epoch": 0.9911668991166899, "grad_norm": 162.52584838867188, "learning_rate": 1.0628763910787245e-06, "loss": 15.7656, "step": 14924 }, { "epoch": 0.9912333134090456, "grad_norm": 183.45089721679688, "learning_rate": 1.0627690515693032e-06, "loss": 13.0625, "step": 14925 }, { "epoch": 0.9912997277014013, "grad_norm": 363.5160827636719, "learning_rate": 1.0626617113338059e-06, "loss": 17.8906, "step": 14926 }, { "epoch": 0.991366141993757, "grad_norm": 151.18484497070312, "learning_rate": 1.0625543703734743e-06, "loss": 19.3281, "step": 14927 }, { "epoch": 0.9914325562861128, "grad_norm": 186.7537841796875, "learning_rate": 1.0624470286895495e-06, "loss": 17.375, "step": 14928 }, { "epoch": 0.9914989705784685, "grad_norm": 123.6052474975586, "learning_rate": 1.0623396862832734e-06, "loss": 11.9844, "step": 14929 }, { "epoch": 0.9915653848708242, "grad_norm": 367.85137939453125, "learning_rate": 1.062232343155888e-06, "loss": 20.8906, "step": 14930 }, { "epoch": 0.9916317991631799, "grad_norm": 378.08709716796875, "learning_rate": 1.0621249993086346e-06, "loss": 19.4375, "step": 14931 }, { "epoch": 0.9916982134555357, "grad_norm": 149.37557983398438, "learning_rate": 1.062017654742755e-06, "loss": 12.5547, "step": 14932 }, { "epoch": 0.9917646277478913, "grad_norm": 358.1175537109375, "learning_rate": 1.0619103094594908e-06, "loss": 14.4688, "step": 14933 }, { "epoch": 0.9918310420402471, "grad_norm": 152.85389709472656, "learning_rate": 1.0618029634600842e-06, "loss": 15.875, "step": 14934 }, { "epoch": 0.9918974563326027, "grad_norm": 184.9801483154297, "learning_rate": 1.0616956167457764e-06, "loss": 14.7188, "step": 14935 }, { "epoch": 0.9919638706249585, "grad_norm": 279.6256103515625, "learning_rate": 1.0615882693178091e-06, "loss": 26.0625, "step": 14936 }, { "epoch": 0.9920302849173142, "grad_norm": 131.41702270507812, "learning_rate": 1.0614809211774244e-06, "loss": 16.1094, "step": 14937 }, { "epoch": 0.9920966992096699, "grad_norm": 143.8179168701172, "learning_rate": 1.061373572325864e-06, "loss": 14.7188, "step": 14938 }, { "epoch": 0.9921631135020257, "grad_norm": 653.9718627929688, "learning_rate": 1.0612662227643689e-06, "loss": 21.8594, "step": 14939 }, { "epoch": 0.9922295277943813, "grad_norm": 115.7454833984375, "learning_rate": 1.061158872494182e-06, "loss": 15.1406, "step": 14940 }, { "epoch": 0.9922959420867371, "grad_norm": 211.5092315673828, "learning_rate": 1.0610515215165445e-06, "loss": 14.7969, "step": 14941 }, { "epoch": 0.9923623563790928, "grad_norm": 284.1727294921875, "learning_rate": 1.060944169832698e-06, "loss": 16.4531, "step": 14942 }, { "epoch": 0.9924287706714485, "grad_norm": 244.53086853027344, "learning_rate": 1.0608368174438846e-06, "loss": 17.2812, "step": 14943 }, { "epoch": 0.9924951849638042, "grad_norm": 212.1380615234375, "learning_rate": 1.0607294643513463e-06, "loss": 20.375, "step": 14944 }, { "epoch": 0.9925615992561599, "grad_norm": 277.45709228515625, "learning_rate": 1.0606221105563243e-06, "loss": 18.3906, "step": 14945 }, { "epoch": 0.9926280135485156, "grad_norm": 532.9578247070312, "learning_rate": 1.0605147560600606e-06, "loss": 25.2188, "step": 14946 }, { "epoch": 0.9926944278408714, "grad_norm": 161.4566650390625, "learning_rate": 1.0604074008637968e-06, "loss": 13.9688, "step": 14947 }, { "epoch": 0.992760842133227, "grad_norm": 235.56088256835938, "learning_rate": 1.0603000449687755e-06, "loss": 20.8906, "step": 14948 }, { "epoch": 0.9928272564255828, "grad_norm": 162.45567321777344, "learning_rate": 1.060192688376238e-06, "loss": 14.75, "step": 14949 }, { "epoch": 0.9928936707179385, "grad_norm": 272.6230163574219, "learning_rate": 1.060085331087426e-06, "loss": 16.5312, "step": 14950 }, { "epoch": 0.9929600850102942, "grad_norm": 284.98101806640625, "learning_rate": 1.0599779731035818e-06, "loss": 17.5625, "step": 14951 }, { "epoch": 0.99302649930265, "grad_norm": 189.673583984375, "learning_rate": 1.0598706144259467e-06, "loss": 13.4219, "step": 14952 }, { "epoch": 0.9930929135950056, "grad_norm": 214.3497314453125, "learning_rate": 1.0597632550557632e-06, "loss": 15.2344, "step": 14953 }, { "epoch": 0.9931593278873614, "grad_norm": 195.63365173339844, "learning_rate": 1.0596558949942723e-06, "loss": 15.625, "step": 14954 }, { "epoch": 0.993225742179717, "grad_norm": 459.1214599609375, "learning_rate": 1.0595485342427171e-06, "loss": 15.2812, "step": 14955 }, { "epoch": 0.9932921564720728, "grad_norm": 333.8574523925781, "learning_rate": 1.0594411728023382e-06, "loss": 18.6562, "step": 14956 }, { "epoch": 0.9933585707644285, "grad_norm": 346.8373107910156, "learning_rate": 1.0593338106743784e-06, "loss": 11.5781, "step": 14957 }, { "epoch": 0.9934249850567842, "grad_norm": 196.09085083007812, "learning_rate": 1.059226447860079e-06, "loss": 20.2188, "step": 14958 }, { "epoch": 0.9934913993491399, "grad_norm": 219.27427673339844, "learning_rate": 1.0591190843606824e-06, "loss": 16.8438, "step": 14959 }, { "epoch": 0.9935578136414956, "grad_norm": 275.2781982421875, "learning_rate": 1.0590117201774304e-06, "loss": 14.3594, "step": 14960 }, { "epoch": 0.9936242279338514, "grad_norm": 176.54086303710938, "learning_rate": 1.058904355311565e-06, "loss": 17.6406, "step": 14961 }, { "epoch": 0.9936906422262071, "grad_norm": 133.29806518554688, "learning_rate": 1.0587969897643277e-06, "loss": 15.0156, "step": 14962 }, { "epoch": 0.9937570565185628, "grad_norm": 544.7841186523438, "learning_rate": 1.0586896235369607e-06, "loss": 17.2031, "step": 14963 }, { "epoch": 0.9938234708109185, "grad_norm": 151.34005737304688, "learning_rate": 1.0585822566307062e-06, "loss": 11.5938, "step": 14964 }, { "epoch": 0.9938898851032743, "grad_norm": 184.46470642089844, "learning_rate": 1.0584748890468055e-06, "loss": 15.2969, "step": 14965 }, { "epoch": 0.9939562993956299, "grad_norm": 508.7340393066406, "learning_rate": 1.0583675207865015e-06, "loss": 20.5312, "step": 14966 }, { "epoch": 0.9940227136879857, "grad_norm": 154.02931213378906, "learning_rate": 1.0582601518510355e-06, "loss": 16.3047, "step": 14967 }, { "epoch": 0.9940891279803413, "grad_norm": 237.06405639648438, "learning_rate": 1.0581527822416495e-06, "loss": 17.7188, "step": 14968 }, { "epoch": 0.9941555422726971, "grad_norm": 353.858154296875, "learning_rate": 1.0580454119595858e-06, "loss": 17.1719, "step": 14969 }, { "epoch": 0.9942219565650527, "grad_norm": 126.31645202636719, "learning_rate": 1.0579380410060862e-06, "loss": 13.2344, "step": 14970 }, { "epoch": 0.9942883708574085, "grad_norm": 265.4424133300781, "learning_rate": 1.0578306693823928e-06, "loss": 19.1719, "step": 14971 }, { "epoch": 0.9943547851497643, "grad_norm": 566.3740234375, "learning_rate": 1.0577232970897476e-06, "loss": 21.3594, "step": 14972 }, { "epoch": 0.9944211994421199, "grad_norm": 222.88922119140625, "learning_rate": 1.0576159241293928e-06, "loss": 24.1875, "step": 14973 }, { "epoch": 0.9944876137344757, "grad_norm": 200.6878662109375, "learning_rate": 1.0575085505025696e-06, "loss": 18.3281, "step": 14974 }, { "epoch": 0.9945540280268313, "grad_norm": 147.0557403564453, "learning_rate": 1.057401176210521e-06, "loss": 16.0156, "step": 14975 }, { "epoch": 0.9946204423191871, "grad_norm": 239.0738983154297, "learning_rate": 1.0572938012544888e-06, "loss": 15.0781, "step": 14976 }, { "epoch": 0.9946868566115428, "grad_norm": 205.94415283203125, "learning_rate": 1.0571864256357145e-06, "loss": 15.9844, "step": 14977 }, { "epoch": 0.9947532709038985, "grad_norm": 344.6522521972656, "learning_rate": 1.057079049355441e-06, "loss": 20.3906, "step": 14978 }, { "epoch": 0.9948196851962542, "grad_norm": 237.3756103515625, "learning_rate": 1.0569716724149097e-06, "loss": 20.4062, "step": 14979 }, { "epoch": 0.99488609948861, "grad_norm": 191.35975646972656, "learning_rate": 1.0568642948153635e-06, "loss": 16.7812, "step": 14980 }, { "epoch": 0.9949525137809656, "grad_norm": 388.4723205566406, "learning_rate": 1.0567569165580435e-06, "loss": 15.3438, "step": 14981 }, { "epoch": 0.9950189280733214, "grad_norm": 155.62713623046875, "learning_rate": 1.0566495376441923e-06, "loss": 16.2031, "step": 14982 }, { "epoch": 0.9950853423656771, "grad_norm": 223.6415252685547, "learning_rate": 1.056542158075052e-06, "loss": 18.8125, "step": 14983 }, { "epoch": 0.9951517566580328, "grad_norm": 276.31317138671875, "learning_rate": 1.0564347778518642e-06, "loss": 21.75, "step": 14984 }, { "epoch": 0.9952181709503886, "grad_norm": 192.2698516845703, "learning_rate": 1.056327396975872e-06, "loss": 23.4219, "step": 14985 }, { "epoch": 0.9952845852427442, "grad_norm": 303.5063781738281, "learning_rate": 1.0562200154483166e-06, "loss": 25.6406, "step": 14986 }, { "epoch": 0.9953509995351, "grad_norm": 319.51165771484375, "learning_rate": 1.0561126332704406e-06, "loss": 20.3438, "step": 14987 }, { "epoch": 0.9954174138274556, "grad_norm": 141.10601806640625, "learning_rate": 1.056005250443486e-06, "loss": 9.9062, "step": 14988 }, { "epoch": 0.9954838281198114, "grad_norm": 240.1409912109375, "learning_rate": 1.0558978669686948e-06, "loss": 16.0469, "step": 14989 }, { "epoch": 0.995550242412167, "grad_norm": 301.73065185546875, "learning_rate": 1.0557904828473096e-06, "loss": 21.1562, "step": 14990 }, { "epoch": 0.9956166567045228, "grad_norm": 546.6434936523438, "learning_rate": 1.055683098080572e-06, "loss": 14.7344, "step": 14991 }, { "epoch": 0.9956830709968786, "grad_norm": 151.99366760253906, "learning_rate": 1.0555757126697247e-06, "loss": 16.0312, "step": 14992 }, { "epoch": 0.9957494852892342, "grad_norm": 470.5895690917969, "learning_rate": 1.0554683266160093e-06, "loss": 13.2188, "step": 14993 }, { "epoch": 0.99581589958159, "grad_norm": 352.9528503417969, "learning_rate": 1.0553609399206687e-06, "loss": 13.3125, "step": 14994 }, { "epoch": 0.9958823138739457, "grad_norm": 281.14898681640625, "learning_rate": 1.0552535525849444e-06, "loss": 13.3125, "step": 14995 }, { "epoch": 0.9959487281663014, "grad_norm": 125.04069519042969, "learning_rate": 1.0551461646100787e-06, "loss": 19.1406, "step": 14996 }, { "epoch": 0.9960151424586571, "grad_norm": 244.41485595703125, "learning_rate": 1.0550387759973142e-06, "loss": 21.5625, "step": 14997 }, { "epoch": 0.9960815567510128, "grad_norm": 263.722412109375, "learning_rate": 1.054931386747893e-06, "loss": 16.3906, "step": 14998 }, { "epoch": 0.9961479710433685, "grad_norm": 312.4307556152344, "learning_rate": 1.054823996863057e-06, "loss": 20.1719, "step": 14999 }, { "epoch": 0.9962143853357243, "grad_norm": 561.3172607421875, "learning_rate": 1.0547166063440484e-06, "loss": 18.25, "step": 15000 }, { "epoch": 0.9962807996280799, "grad_norm": 164.34951782226562, "learning_rate": 1.05460921519211e-06, "loss": 15.375, "step": 15001 }, { "epoch": 0.9963472139204357, "grad_norm": 196.8488311767578, "learning_rate": 1.0545018234084837e-06, "loss": 15.7344, "step": 15002 }, { "epoch": 0.9964136282127914, "grad_norm": 244.21502685546875, "learning_rate": 1.0543944309944115e-06, "loss": 20.6406, "step": 15003 }, { "epoch": 0.9964800425051471, "grad_norm": 212.2190399169922, "learning_rate": 1.054287037951136e-06, "loss": 23.75, "step": 15004 }, { "epoch": 0.9965464567975029, "grad_norm": 232.71629333496094, "learning_rate": 1.0541796442798996e-06, "loss": 18.8438, "step": 15005 }, { "epoch": 0.9966128710898585, "grad_norm": 154.31065368652344, "learning_rate": 1.054072249981944e-06, "loss": 14.2656, "step": 15006 }, { "epoch": 0.9966792853822143, "grad_norm": 261.7049255371094, "learning_rate": 1.0539648550585116e-06, "loss": 17.2109, "step": 15007 }, { "epoch": 0.9967456996745699, "grad_norm": 162.74856567382812, "learning_rate": 1.053857459510845e-06, "loss": 15.7734, "step": 15008 }, { "epoch": 0.9968121139669257, "grad_norm": 375.2406311035156, "learning_rate": 1.0537500633401868e-06, "loss": 23.9375, "step": 15009 }, { "epoch": 0.9968785282592814, "grad_norm": 132.68637084960938, "learning_rate": 1.0536426665477785e-06, "loss": 12.6406, "step": 15010 }, { "epoch": 0.9969449425516371, "grad_norm": 165.25399780273438, "learning_rate": 1.0535352691348628e-06, "loss": 15.5469, "step": 15011 }, { "epoch": 0.9970113568439928, "grad_norm": 208.1719970703125, "learning_rate": 1.053427871102682e-06, "loss": 15.1875, "step": 15012 }, { "epoch": 0.9970777711363485, "grad_norm": 128.95870971679688, "learning_rate": 1.0533204724524783e-06, "loss": 13.6562, "step": 15013 }, { "epoch": 0.9971441854287043, "grad_norm": 131.82557678222656, "learning_rate": 1.053213073185494e-06, "loss": 12.4531, "step": 15014 }, { "epoch": 0.99721059972106, "grad_norm": 110.79936218261719, "learning_rate": 1.0531056733029719e-06, "loss": 19.0625, "step": 15015 }, { "epoch": 0.9972770140134157, "grad_norm": 535.8395385742188, "learning_rate": 1.0529982728061539e-06, "loss": 13.8906, "step": 15016 }, { "epoch": 0.9973434283057714, "grad_norm": 298.0881042480469, "learning_rate": 1.0528908716962824e-06, "loss": 25.5938, "step": 15017 }, { "epoch": 0.9974098425981271, "grad_norm": 111.2623291015625, "learning_rate": 1.0527834699745997e-06, "loss": 16.0469, "step": 15018 }, { "epoch": 0.9974762568904828, "grad_norm": 416.74017333984375, "learning_rate": 1.0526760676423485e-06, "loss": 20.6562, "step": 15019 }, { "epoch": 0.9975426711828386, "grad_norm": 245.97450256347656, "learning_rate": 1.0525686647007708e-06, "loss": 14.0469, "step": 15020 }, { "epoch": 0.9976090854751942, "grad_norm": 272.8262939453125, "learning_rate": 1.052461261151109e-06, "loss": 16.7656, "step": 15021 }, { "epoch": 0.99767549976755, "grad_norm": 406.7048645019531, "learning_rate": 1.0523538569946056e-06, "loss": 15.7188, "step": 15022 }, { "epoch": 0.9977419140599056, "grad_norm": 111.59500885009766, "learning_rate": 1.0522464522325031e-06, "loss": 13.1094, "step": 15023 }, { "epoch": 0.9978083283522614, "grad_norm": 169.2578582763672, "learning_rate": 1.0521390468660436e-06, "loss": 17.1094, "step": 15024 }, { "epoch": 0.9978747426446172, "grad_norm": 194.09674072265625, "learning_rate": 1.0520316408964699e-06, "loss": 15.2344, "step": 15025 }, { "epoch": 0.9979411569369728, "grad_norm": 248.14454650878906, "learning_rate": 1.0519242343250242e-06, "loss": 18.4688, "step": 15026 }, { "epoch": 0.9980075712293286, "grad_norm": 283.8114929199219, "learning_rate": 1.0518168271529487e-06, "loss": 12.9688, "step": 15027 }, { "epoch": 0.9980739855216842, "grad_norm": 435.1947937011719, "learning_rate": 1.0517094193814858e-06, "loss": 26.0938, "step": 15028 }, { "epoch": 0.99814039981404, "grad_norm": 171.5227813720703, "learning_rate": 1.0516020110118784e-06, "loss": 15.1875, "step": 15029 }, { "epoch": 0.9982068141063957, "grad_norm": 408.00726318359375, "learning_rate": 1.051494602045369e-06, "loss": 17.4844, "step": 15030 }, { "epoch": 0.9982732283987514, "grad_norm": 233.953369140625, "learning_rate": 1.0513871924831992e-06, "loss": 20.3125, "step": 15031 }, { "epoch": 0.9983396426911071, "grad_norm": 288.5137939453125, "learning_rate": 1.0512797823266122e-06, "loss": 14.8125, "step": 15032 }, { "epoch": 0.9984060569834629, "grad_norm": 365.3363952636719, "learning_rate": 1.0511723715768504e-06, "loss": 22.9844, "step": 15033 }, { "epoch": 0.9984724712758185, "grad_norm": 128.06988525390625, "learning_rate": 1.051064960235156e-06, "loss": 14.9375, "step": 15034 }, { "epoch": 0.9985388855681743, "grad_norm": 499.80755615234375, "learning_rate": 1.0509575483027714e-06, "loss": 22.5312, "step": 15035 }, { "epoch": 0.99860529986053, "grad_norm": 241.16310119628906, "learning_rate": 1.0508501357809396e-06, "loss": 16.5781, "step": 15036 }, { "epoch": 0.9986717141528857, "grad_norm": 142.4352569580078, "learning_rate": 1.0507427226709027e-06, "loss": 13.375, "step": 15037 }, { "epoch": 0.9987381284452415, "grad_norm": 140.42210388183594, "learning_rate": 1.0506353089739031e-06, "loss": 18.8438, "step": 15038 }, { "epoch": 0.9988045427375971, "grad_norm": 198.20303344726562, "learning_rate": 1.0505278946911834e-06, "loss": 16.3438, "step": 15039 }, { "epoch": 0.9988709570299529, "grad_norm": 779.9288330078125, "learning_rate": 1.0504204798239862e-06, "loss": 17.5938, "step": 15040 }, { "epoch": 0.9989373713223085, "grad_norm": 420.93902587890625, "learning_rate": 1.0503130643735543e-06, "loss": 15.4531, "step": 15041 }, { "epoch": 0.9990037856146643, "grad_norm": 387.03375244140625, "learning_rate": 1.0502056483411295e-06, "loss": 19.4531, "step": 15042 }, { "epoch": 0.99907019990702, "grad_norm": 253.489990234375, "learning_rate": 1.0500982317279547e-06, "loss": 17.4062, "step": 15043 }, { "epoch": 0.9991366141993757, "grad_norm": 661.278076171875, "learning_rate": 1.0499908145352726e-06, "loss": 18.5312, "step": 15044 }, { "epoch": 0.9992030284917314, "grad_norm": 223.9980926513672, "learning_rate": 1.0498833967643255e-06, "loss": 14.5469, "step": 15045 }, { "epoch": 0.9992694427840871, "grad_norm": 423.0218200683594, "learning_rate": 1.049775978416356e-06, "loss": 19.7188, "step": 15046 }, { "epoch": 0.9993358570764429, "grad_norm": 229.80825805664062, "learning_rate": 1.0496685594926067e-06, "loss": 16.5312, "step": 15047 }, { "epoch": 0.9994022713687986, "grad_norm": 132.1520538330078, "learning_rate": 1.0495611399943204e-06, "loss": 17.5312, "step": 15048 }, { "epoch": 0.9994686856611543, "grad_norm": 413.3098449707031, "learning_rate": 1.049453719922739e-06, "loss": 14.6406, "step": 15049 }, { "epoch": 0.99953509995351, "grad_norm": 394.10345458984375, "learning_rate": 1.0493462992791056e-06, "loss": 17.5469, "step": 15050 }, { "epoch": 0.9996015142458657, "grad_norm": 445.18499755859375, "learning_rate": 1.0492388780646628e-06, "loss": 14.3281, "step": 15051 }, { "epoch": 0.9996679285382214, "grad_norm": 439.6500244140625, "learning_rate": 1.049131456280653e-06, "loss": 16.1094, "step": 15052 }, { "epoch": 0.9997343428305772, "grad_norm": 341.9541015625, "learning_rate": 1.0490240339283188e-06, "loss": 19.9219, "step": 15053 }, { "epoch": 0.9998007571229328, "grad_norm": 160.63246154785156, "learning_rate": 1.0489166110089028e-06, "loss": 15.9844, "step": 15054 }, { "epoch": 0.9998671714152886, "grad_norm": 658.4697875976562, "learning_rate": 1.0488091875236478e-06, "loss": 15.3594, "step": 15055 }, { "epoch": 0.9999335857076442, "grad_norm": 112.68948364257812, "learning_rate": 1.0487017634737963e-06, "loss": 12.3125, "step": 15056 }, { "epoch": 1.0, "grad_norm": 317.928955078125, "learning_rate": 1.0485943388605908e-06, "loss": 17.7656, "step": 15057 }, { "epoch": 1.0000664142923557, "grad_norm": 1180.0670166015625, "learning_rate": 1.048486913685274e-06, "loss": 13.0625, "step": 15058 }, { "epoch": 1.0001328285847115, "grad_norm": 274.2171936035156, "learning_rate": 1.0483794879490884e-06, "loss": 15.9453, "step": 15059 }, { "epoch": 1.0001992428770672, "grad_norm": 164.62158203125, "learning_rate": 1.0482720616532769e-06, "loss": 17.9062, "step": 15060 }, { "epoch": 1.0002656571694228, "grad_norm": 376.88226318359375, "learning_rate": 1.048164634799082e-06, "loss": 14.5938, "step": 15061 }, { "epoch": 1.0003320714617785, "grad_norm": 152.1481170654297, "learning_rate": 1.0480572073877465e-06, "loss": 23.3906, "step": 15062 }, { "epoch": 1.0003984857541344, "grad_norm": 225.82785034179688, "learning_rate": 1.047949779420513e-06, "loss": 16.4688, "step": 15063 }, { "epoch": 1.00046490004649, "grad_norm": 251.3386688232422, "learning_rate": 1.0478423508986239e-06, "loss": 23.3125, "step": 15064 }, { "epoch": 1.0005313143388457, "grad_norm": 129.3287811279297, "learning_rate": 1.047734921823322e-06, "loss": 14.6094, "step": 15065 }, { "epoch": 1.0005977286312013, "grad_norm": 237.80482482910156, "learning_rate": 1.0476274921958502e-06, "loss": 16.4531, "step": 15066 }, { "epoch": 1.0006641429235572, "grad_norm": 188.9457244873047, "learning_rate": 1.047520062017451e-06, "loss": 17.8906, "step": 15067 }, { "epoch": 1.0007305572159129, "grad_norm": 216.43222045898438, "learning_rate": 1.047412631289367e-06, "loss": 32.0, "step": 15068 }, { "epoch": 1.0007969715082685, "grad_norm": 152.7721710205078, "learning_rate": 1.0473052000128416e-06, "loss": 12.6562, "step": 15069 }, { "epoch": 1.0008633858006244, "grad_norm": 494.27337646484375, "learning_rate": 1.0471977681891163e-06, "loss": 22.75, "step": 15070 }, { "epoch": 1.00092980009298, "grad_norm": 295.6286926269531, "learning_rate": 1.0470903358194346e-06, "loss": 10.3672, "step": 15071 }, { "epoch": 1.0009962143853357, "grad_norm": 149.26304626464844, "learning_rate": 1.046982902905039e-06, "loss": 17.0781, "step": 15072 }, { "epoch": 1.0010626286776914, "grad_norm": 237.8308563232422, "learning_rate": 1.0468754694471723e-06, "loss": 19.4688, "step": 15073 }, { "epoch": 1.0011290429700472, "grad_norm": 216.694580078125, "learning_rate": 1.0467680354470774e-06, "loss": 17.25, "step": 15074 }, { "epoch": 1.0011954572624029, "grad_norm": 170.17349243164062, "learning_rate": 1.0466606009059967e-06, "loss": 13.0938, "step": 15075 }, { "epoch": 1.0012618715547585, "grad_norm": 257.8725280761719, "learning_rate": 1.0465531658251726e-06, "loss": 18.3438, "step": 15076 }, { "epoch": 1.0013282858471142, "grad_norm": 214.3199005126953, "learning_rate": 1.046445730205849e-06, "loss": 14.6875, "step": 15077 }, { "epoch": 1.00139470013947, "grad_norm": 176.20516967773438, "learning_rate": 1.0463382940492677e-06, "loss": 17.0625, "step": 15078 }, { "epoch": 1.0014611144318257, "grad_norm": 439.5146789550781, "learning_rate": 1.0462308573566719e-06, "loss": 23.8281, "step": 15079 }, { "epoch": 1.0015275287241814, "grad_norm": 279.4128723144531, "learning_rate": 1.046123420129304e-06, "loss": 16.2344, "step": 15080 }, { "epoch": 1.0015939430165373, "grad_norm": 422.8970642089844, "learning_rate": 1.0460159823684072e-06, "loss": 27.9375, "step": 15081 }, { "epoch": 1.001660357308893, "grad_norm": 229.82479858398438, "learning_rate": 1.0459085440752238e-06, "loss": 27.0625, "step": 15082 }, { "epoch": 1.0017267716012486, "grad_norm": 246.5252685546875, "learning_rate": 1.045801105250997e-06, "loss": 13.5547, "step": 15083 }, { "epoch": 1.0017931858936042, "grad_norm": 603.4347534179688, "learning_rate": 1.0456936658969696e-06, "loss": 18.2031, "step": 15084 }, { "epoch": 1.00185960018596, "grad_norm": 290.9486083984375, "learning_rate": 1.0455862260143837e-06, "loss": 15.5312, "step": 15085 }, { "epoch": 1.0019260144783158, "grad_norm": 298.5609130859375, "learning_rate": 1.0454787856044827e-06, "loss": 14.75, "step": 15086 }, { "epoch": 1.0019924287706714, "grad_norm": 334.1923828125, "learning_rate": 1.0453713446685095e-06, "loss": 25.25, "step": 15087 }, { "epoch": 1.002058843063027, "grad_norm": 271.2597961425781, "learning_rate": 1.0452639032077069e-06, "loss": 18.875, "step": 15088 }, { "epoch": 1.002125257355383, "grad_norm": 273.0907897949219, "learning_rate": 1.0451564612233172e-06, "loss": 18.6562, "step": 15089 }, { "epoch": 1.0021916716477386, "grad_norm": 390.90740966796875, "learning_rate": 1.045049018716584e-06, "loss": 17.9062, "step": 15090 }, { "epoch": 1.0022580859400942, "grad_norm": 194.7640380859375, "learning_rate": 1.0449415756887497e-06, "loss": 17.9219, "step": 15091 }, { "epoch": 1.0023245002324501, "grad_norm": 145.95423889160156, "learning_rate": 1.044834132141057e-06, "loss": 17.1406, "step": 15092 }, { "epoch": 1.0023909145248058, "grad_norm": 154.83111572265625, "learning_rate": 1.044726688074749e-06, "loss": 16.3438, "step": 15093 }, { "epoch": 1.0024573288171614, "grad_norm": 102.44623565673828, "learning_rate": 1.0446192434910684e-06, "loss": 15.8906, "step": 15094 }, { "epoch": 1.002523743109517, "grad_norm": 190.72821044921875, "learning_rate": 1.0445117983912582e-06, "loss": 15.6406, "step": 15095 }, { "epoch": 1.002590157401873, "grad_norm": 194.82180786132812, "learning_rate": 1.0444043527765609e-06, "loss": 13.1719, "step": 15096 }, { "epoch": 1.0026565716942286, "grad_norm": 239.22451782226562, "learning_rate": 1.0442969066482198e-06, "loss": 16.7031, "step": 15097 }, { "epoch": 1.0027229859865843, "grad_norm": 229.94122314453125, "learning_rate": 1.0441894600074778e-06, "loss": 19.2656, "step": 15098 }, { "epoch": 1.00278940027894, "grad_norm": 167.28758239746094, "learning_rate": 1.0440820128555777e-06, "loss": 29.8438, "step": 15099 }, { "epoch": 1.0028558145712958, "grad_norm": 212.59022521972656, "learning_rate": 1.043974565193762e-06, "loss": 13.375, "step": 15100 }, { "epoch": 1.0029222288636515, "grad_norm": 455.5641174316406, "learning_rate": 1.043867117023274e-06, "loss": 21.75, "step": 15101 }, { "epoch": 1.002988643156007, "grad_norm": 245.35821533203125, "learning_rate": 1.0437596683453564e-06, "loss": 16.4609, "step": 15102 }, { "epoch": 1.003055057448363, "grad_norm": 246.89244079589844, "learning_rate": 1.0436522191612523e-06, "loss": 15.9219, "step": 15103 }, { "epoch": 1.0031214717407186, "grad_norm": 330.5847473144531, "learning_rate": 1.0435447694722045e-06, "loss": 16.0156, "step": 15104 }, { "epoch": 1.0031878860330743, "grad_norm": 336.7348327636719, "learning_rate": 1.0434373192794559e-06, "loss": 12.7734, "step": 15105 }, { "epoch": 1.00325430032543, "grad_norm": 347.42547607421875, "learning_rate": 1.0433298685842494e-06, "loss": 14.2812, "step": 15106 }, { "epoch": 1.0033207146177858, "grad_norm": 417.25152587890625, "learning_rate": 1.043222417387828e-06, "loss": 16.0312, "step": 15107 }, { "epoch": 1.0033871289101415, "grad_norm": 160.1173553466797, "learning_rate": 1.0431149656914345e-06, "loss": 13.9844, "step": 15108 }, { "epoch": 1.0034535432024971, "grad_norm": 199.3600616455078, "learning_rate": 1.043007513496312e-06, "loss": 16.25, "step": 15109 }, { "epoch": 1.0035199574948528, "grad_norm": 147.09014892578125, "learning_rate": 1.0429000608037036e-06, "loss": 14.9688, "step": 15110 }, { "epoch": 1.0035863717872087, "grad_norm": 164.76266479492188, "learning_rate": 1.0427926076148517e-06, "loss": 17.4531, "step": 15111 }, { "epoch": 1.0036527860795643, "grad_norm": 239.7679901123047, "learning_rate": 1.0426851539309998e-06, "loss": 20.375, "step": 15112 }, { "epoch": 1.00371920037192, "grad_norm": 210.0658721923828, "learning_rate": 1.0425776997533906e-06, "loss": 25.625, "step": 15113 }, { "epoch": 1.0037856146642758, "grad_norm": 338.1379089355469, "learning_rate": 1.042470245083267e-06, "loss": 16.9062, "step": 15114 }, { "epoch": 1.0038520289566315, "grad_norm": 330.474365234375, "learning_rate": 1.042362789921872e-06, "loss": 18.0, "step": 15115 }, { "epoch": 1.0039184432489872, "grad_norm": 142.62460327148438, "learning_rate": 1.0422553342704493e-06, "loss": 16.0781, "step": 15116 }, { "epoch": 1.0039848575413428, "grad_norm": 132.16970825195312, "learning_rate": 1.0421478781302406e-06, "loss": 19.9375, "step": 15117 }, { "epoch": 1.0040512718336987, "grad_norm": 312.95465087890625, "learning_rate": 1.04204042150249e-06, "loss": 15.7656, "step": 15118 }, { "epoch": 1.0041176861260543, "grad_norm": 228.92881774902344, "learning_rate": 1.0419329643884399e-06, "loss": 28.875, "step": 15119 }, { "epoch": 1.00418410041841, "grad_norm": 230.5487518310547, "learning_rate": 1.0418255067893332e-06, "loss": 22.9375, "step": 15120 }, { "epoch": 1.0042505147107657, "grad_norm": 241.1062469482422, "learning_rate": 1.0417180487064131e-06, "loss": 16.5469, "step": 15121 }, { "epoch": 1.0043169290031215, "grad_norm": 233.25355529785156, "learning_rate": 1.0416105901409232e-06, "loss": 12.7031, "step": 15122 }, { "epoch": 1.0043833432954772, "grad_norm": 226.96463012695312, "learning_rate": 1.0415031310941053e-06, "loss": 21.6875, "step": 15123 }, { "epoch": 1.0044497575878328, "grad_norm": 415.4456481933594, "learning_rate": 1.0413956715672037e-06, "loss": 21.3594, "step": 15124 }, { "epoch": 1.0045161718801887, "grad_norm": 361.0090026855469, "learning_rate": 1.0412882115614604e-06, "loss": 16.25, "step": 15125 }, { "epoch": 1.0045825861725444, "grad_norm": 275.3906555175781, "learning_rate": 1.0411807510781193e-06, "loss": 16.3594, "step": 15126 }, { "epoch": 1.0046490004649, "grad_norm": 354.9599609375, "learning_rate": 1.041073290118423e-06, "loss": 17.7344, "step": 15127 }, { "epoch": 1.0047154147572557, "grad_norm": 160.83053588867188, "learning_rate": 1.0409658286836142e-06, "loss": 13.2969, "step": 15128 }, { "epoch": 1.0047818290496116, "grad_norm": 227.89129638671875, "learning_rate": 1.0408583667749365e-06, "loss": 17.4688, "step": 15129 }, { "epoch": 1.0048482433419672, "grad_norm": 143.09092712402344, "learning_rate": 1.040750904393633e-06, "loss": 14.2969, "step": 15130 }, { "epoch": 1.0049146576343229, "grad_norm": 270.9931640625, "learning_rate": 1.0406434415409461e-06, "loss": 13.2031, "step": 15131 }, { "epoch": 1.0049810719266785, "grad_norm": 235.30455017089844, "learning_rate": 1.0405359782181196e-06, "loss": 15.7188, "step": 15132 }, { "epoch": 1.0050474862190344, "grad_norm": 141.4430389404297, "learning_rate": 1.0404285144263963e-06, "loss": 14.6719, "step": 15133 }, { "epoch": 1.00511390051139, "grad_norm": 97.00724029541016, "learning_rate": 1.040321050167019e-06, "loss": 11.7812, "step": 15134 }, { "epoch": 1.0051803148037457, "grad_norm": 269.3276672363281, "learning_rate": 1.0402135854412314e-06, "loss": 13.8125, "step": 15135 }, { "epoch": 1.0052467290961016, "grad_norm": 142.6072998046875, "learning_rate": 1.040106120250276e-06, "loss": 17.1094, "step": 15136 }, { "epoch": 1.0053131433884572, "grad_norm": 317.43865966796875, "learning_rate": 1.039998654595396e-06, "loss": 25.5156, "step": 15137 }, { "epoch": 1.0053795576808129, "grad_norm": 217.58937072753906, "learning_rate": 1.039891188477835e-06, "loss": 15.0625, "step": 15138 }, { "epoch": 1.0054459719731685, "grad_norm": 270.9247741699219, "learning_rate": 1.0397837218988353e-06, "loss": 16.875, "step": 15139 }, { "epoch": 1.0055123862655244, "grad_norm": 257.2172546386719, "learning_rate": 1.0396762548596406e-06, "loss": 15.9219, "step": 15140 }, { "epoch": 1.00557880055788, "grad_norm": 332.07110595703125, "learning_rate": 1.0395687873614943e-06, "loss": 19.375, "step": 15141 }, { "epoch": 1.0056452148502357, "grad_norm": 374.4956359863281, "learning_rate": 1.0394613194056385e-06, "loss": 20.75, "step": 15142 }, { "epoch": 1.0057116291425914, "grad_norm": 153.5078887939453, "learning_rate": 1.039353850993317e-06, "loss": 18.5, "step": 15143 }, { "epoch": 1.0057780434349473, "grad_norm": 116.5879898071289, "learning_rate": 1.0392463821257732e-06, "loss": 16.75, "step": 15144 }, { "epoch": 1.005844457727303, "grad_norm": 126.33966064453125, "learning_rate": 1.0391389128042496e-06, "loss": 26.2812, "step": 15145 }, { "epoch": 1.0059108720196586, "grad_norm": 155.9084014892578, "learning_rate": 1.0390314430299896e-06, "loss": 18.6406, "step": 15146 }, { "epoch": 1.0059772863120144, "grad_norm": 104.59862518310547, "learning_rate": 1.0389239728042364e-06, "loss": 18.3906, "step": 15147 }, { "epoch": 1.00604370060437, "grad_norm": 198.57150268554688, "learning_rate": 1.0388165021282334e-06, "loss": 21.9688, "step": 15148 }, { "epoch": 1.0061101148967257, "grad_norm": 340.45880126953125, "learning_rate": 1.0387090310032231e-06, "loss": 15.7188, "step": 15149 }, { "epoch": 1.0061765291890814, "grad_norm": 256.5771484375, "learning_rate": 1.0386015594304494e-06, "loss": 14.7969, "step": 15150 }, { "epoch": 1.0062429434814373, "grad_norm": 214.52096557617188, "learning_rate": 1.038494087411155e-06, "loss": 12.6562, "step": 15151 }, { "epoch": 1.006309357773793, "grad_norm": 146.60585021972656, "learning_rate": 1.038386614946583e-06, "loss": 14.8125, "step": 15152 }, { "epoch": 1.0063757720661486, "grad_norm": 229.6327667236328, "learning_rate": 1.0382791420379767e-06, "loss": 19.875, "step": 15153 }, { "epoch": 1.0064421863585042, "grad_norm": 512.9149780273438, "learning_rate": 1.0381716686865797e-06, "loss": 21.2656, "step": 15154 }, { "epoch": 1.0065086006508601, "grad_norm": 552.2413330078125, "learning_rate": 1.0380641948936348e-06, "loss": 21.5938, "step": 15155 }, { "epoch": 1.0065750149432158, "grad_norm": 200.67672729492188, "learning_rate": 1.0379567206603852e-06, "loss": 19.1875, "step": 15156 }, { "epoch": 1.0066414292355714, "grad_norm": 138.94427490234375, "learning_rate": 1.0378492459880742e-06, "loss": 18.9062, "step": 15157 }, { "epoch": 1.0067078435279273, "grad_norm": 178.75643920898438, "learning_rate": 1.0377417708779445e-06, "loss": 17.2031, "step": 15158 }, { "epoch": 1.006774257820283, "grad_norm": 142.25860595703125, "learning_rate": 1.0376342953312405e-06, "loss": 16.5781, "step": 15159 }, { "epoch": 1.0068406721126386, "grad_norm": 292.7067565917969, "learning_rate": 1.037526819349204e-06, "loss": 17.3594, "step": 15160 }, { "epoch": 1.0069070864049943, "grad_norm": 115.84979248046875, "learning_rate": 1.037419342933079e-06, "loss": 14.4375, "step": 15161 }, { "epoch": 1.0069735006973501, "grad_norm": 156.56752014160156, "learning_rate": 1.037311866084109e-06, "loss": 23.9062, "step": 15162 }, { "epoch": 1.0070399149897058, "grad_norm": 217.6951904296875, "learning_rate": 1.0372043888035364e-06, "loss": 15.625, "step": 15163 }, { "epoch": 1.0071063292820615, "grad_norm": 159.0476837158203, "learning_rate": 1.0370969110926052e-06, "loss": 15.875, "step": 15164 }, { "epoch": 1.007172743574417, "grad_norm": 389.2127685546875, "learning_rate": 1.0369894329525584e-06, "loss": 14.0781, "step": 15165 }, { "epoch": 1.007239157866773, "grad_norm": 173.23202514648438, "learning_rate": 1.036881954384639e-06, "loss": 21.6562, "step": 15166 }, { "epoch": 1.0073055721591286, "grad_norm": 229.03335571289062, "learning_rate": 1.0367744753900902e-06, "loss": 18.6094, "step": 15167 }, { "epoch": 1.0073719864514843, "grad_norm": 391.25701904296875, "learning_rate": 1.0366669959701557e-06, "loss": 20.9062, "step": 15168 }, { "epoch": 1.0074384007438402, "grad_norm": 224.8548126220703, "learning_rate": 1.0365595161260788e-06, "loss": 16.0156, "step": 15169 }, { "epoch": 1.0075048150361958, "grad_norm": 187.4927978515625, "learning_rate": 1.0364520358591022e-06, "loss": 15.3984, "step": 15170 }, { "epoch": 1.0075712293285515, "grad_norm": 313.5338134765625, "learning_rate": 1.0363445551704695e-06, "loss": 16.1406, "step": 15171 }, { "epoch": 1.0076376436209071, "grad_norm": 404.7364501953125, "learning_rate": 1.0362370740614238e-06, "loss": 17.75, "step": 15172 }, { "epoch": 1.007704057913263, "grad_norm": 159.24008178710938, "learning_rate": 1.0361295925332088e-06, "loss": 12.6719, "step": 15173 }, { "epoch": 1.0077704722056187, "grad_norm": 330.4547424316406, "learning_rate": 1.0360221105870674e-06, "loss": 20.125, "step": 15174 }, { "epoch": 1.0078368864979743, "grad_norm": 880.4702758789062, "learning_rate": 1.0359146282242429e-06, "loss": 14.8281, "step": 15175 }, { "epoch": 1.00790330079033, "grad_norm": 227.2270965576172, "learning_rate": 1.0358071454459788e-06, "loss": 16.7812, "step": 15176 }, { "epoch": 1.0079697150826858, "grad_norm": 187.9154815673828, "learning_rate": 1.0356996622535184e-06, "loss": 16.5312, "step": 15177 }, { "epoch": 1.0080361293750415, "grad_norm": 163.33750915527344, "learning_rate": 1.0355921786481046e-06, "loss": 18.4375, "step": 15178 }, { "epoch": 1.0081025436673972, "grad_norm": 227.52383422851562, "learning_rate": 1.035484694630981e-06, "loss": 18.125, "step": 15179 }, { "epoch": 1.008168957959753, "grad_norm": 215.2240447998047, "learning_rate": 1.0353772102033912e-06, "loss": 18.7188, "step": 15180 }, { "epoch": 1.0082353722521087, "grad_norm": 164.4773712158203, "learning_rate": 1.0352697253665781e-06, "loss": 17.75, "step": 15181 }, { "epoch": 1.0083017865444643, "grad_norm": 145.0121307373047, "learning_rate": 1.0351622401217852e-06, "loss": 15.3906, "step": 15182 }, { "epoch": 1.00836820083682, "grad_norm": 181.56552124023438, "learning_rate": 1.035054754470256e-06, "loss": 13.0625, "step": 15183 }, { "epoch": 1.0084346151291759, "grad_norm": 203.55284118652344, "learning_rate": 1.0349472684132333e-06, "loss": 15.0938, "step": 15184 }, { "epoch": 1.0085010294215315, "grad_norm": 174.52439880371094, "learning_rate": 1.0348397819519604e-06, "loss": 17.5312, "step": 15185 }, { "epoch": 1.0085674437138872, "grad_norm": 220.68746948242188, "learning_rate": 1.0347322950876816e-06, "loss": 12.75, "step": 15186 }, { "epoch": 1.0086338580062428, "grad_norm": 140.01568603515625, "learning_rate": 1.0346248078216394e-06, "loss": 13.9375, "step": 15187 }, { "epoch": 1.0087002722985987, "grad_norm": 164.20513916015625, "learning_rate": 1.0345173201550775e-06, "loss": 19.5, "step": 15188 }, { "epoch": 1.0087666865909544, "grad_norm": 924.4891357421875, "learning_rate": 1.034409832089239e-06, "loss": 23.2656, "step": 15189 }, { "epoch": 1.00883310088331, "grad_norm": 118.53543853759766, "learning_rate": 1.0343023436253673e-06, "loss": 14.0234, "step": 15190 }, { "epoch": 1.008899515175666, "grad_norm": 301.17169189453125, "learning_rate": 1.0341948547647064e-06, "loss": 14.9375, "step": 15191 }, { "epoch": 1.0089659294680215, "grad_norm": 236.3602752685547, "learning_rate": 1.0340873655084984e-06, "loss": 19.25, "step": 15192 }, { "epoch": 1.0090323437603772, "grad_norm": 95.56351470947266, "learning_rate": 1.0339798758579878e-06, "loss": 15.2188, "step": 15193 }, { "epoch": 1.0090987580527329, "grad_norm": 559.5105590820312, "learning_rate": 1.0338723858144178e-06, "loss": 13.7031, "step": 15194 }, { "epoch": 1.0091651723450887, "grad_norm": 349.4998474121094, "learning_rate": 1.0337648953790313e-06, "loss": 15.3281, "step": 15195 }, { "epoch": 1.0092315866374444, "grad_norm": 251.47283935546875, "learning_rate": 1.0336574045530719e-06, "loss": 15.125, "step": 15196 }, { "epoch": 1.0092980009298, "grad_norm": 151.85252380371094, "learning_rate": 1.0335499133377832e-06, "loss": 15.1875, "step": 15197 }, { "epoch": 1.009364415222156, "grad_norm": 139.41726684570312, "learning_rate": 1.0334424217344084e-06, "loss": 17.9531, "step": 15198 }, { "epoch": 1.0094308295145116, "grad_norm": 267.8622741699219, "learning_rate": 1.033334929744191e-06, "loss": 13.0625, "step": 15199 }, { "epoch": 1.0094972438068672, "grad_norm": 542.6375122070312, "learning_rate": 1.033227437368374e-06, "loss": 19.0469, "step": 15200 }, { "epoch": 1.0095636580992229, "grad_norm": 167.43655395507812, "learning_rate": 1.0331199446082017e-06, "loss": 13.0469, "step": 15201 }, { "epoch": 1.0096300723915788, "grad_norm": 178.43055725097656, "learning_rate": 1.0330124514649168e-06, "loss": 17.0469, "step": 15202 }, { "epoch": 1.0096964866839344, "grad_norm": 319.75274658203125, "learning_rate": 1.0329049579397627e-06, "loss": 19.6875, "step": 15203 }, { "epoch": 1.00976290097629, "grad_norm": 118.95789337158203, "learning_rate": 1.0327974640339834e-06, "loss": 18.125, "step": 15204 }, { "epoch": 1.0098293152686457, "grad_norm": 161.37896728515625, "learning_rate": 1.0326899697488216e-06, "loss": 13.5391, "step": 15205 }, { "epoch": 1.0098957295610016, "grad_norm": 206.39553833007812, "learning_rate": 1.0325824750855212e-06, "loss": 11.0391, "step": 15206 }, { "epoch": 1.0099621438533573, "grad_norm": 336.8380126953125, "learning_rate": 1.0324749800453253e-06, "loss": 19.5156, "step": 15207 }, { "epoch": 1.010028558145713, "grad_norm": 357.5743713378906, "learning_rate": 1.032367484629478e-06, "loss": 25.5312, "step": 15208 }, { "epoch": 1.0100949724380688, "grad_norm": 104.46463012695312, "learning_rate": 1.032259988839222e-06, "loss": 14.2031, "step": 15209 }, { "epoch": 1.0101613867304244, "grad_norm": 464.9571838378906, "learning_rate": 1.032152492675801e-06, "loss": 16.0, "step": 15210 }, { "epoch": 1.01022780102278, "grad_norm": 572.3201293945312, "learning_rate": 1.0320449961404588e-06, "loss": 13.8281, "step": 15211 }, { "epoch": 1.0102942153151357, "grad_norm": 168.77786254882812, "learning_rate": 1.0319374992344386e-06, "loss": 16.1875, "step": 15212 }, { "epoch": 1.0103606296074916, "grad_norm": 188.86061096191406, "learning_rate": 1.0318300019589835e-06, "loss": 18.3594, "step": 15213 }, { "epoch": 1.0104270438998473, "grad_norm": 169.63970947265625, "learning_rate": 1.0317225043153374e-06, "loss": 15.6094, "step": 15214 }, { "epoch": 1.010493458192203, "grad_norm": 133.7191619873047, "learning_rate": 1.0316150063047438e-06, "loss": 19.0078, "step": 15215 }, { "epoch": 1.0105598724845586, "grad_norm": 167.56053161621094, "learning_rate": 1.031507507928446e-06, "loss": 17.1562, "step": 15216 }, { "epoch": 1.0106262867769145, "grad_norm": 178.89999389648438, "learning_rate": 1.0314000091876872e-06, "loss": 14.9375, "step": 15217 }, { "epoch": 1.0106927010692701, "grad_norm": 178.7849884033203, "learning_rate": 1.0312925100837113e-06, "loss": 23.2969, "step": 15218 }, { "epoch": 1.0107591153616258, "grad_norm": 569.1829223632812, "learning_rate": 1.031185010617762e-06, "loss": 16.4688, "step": 15219 }, { "epoch": 1.0108255296539816, "grad_norm": 271.1813659667969, "learning_rate": 1.0310775107910823e-06, "loss": 21.625, "step": 15220 }, { "epoch": 1.0108919439463373, "grad_norm": 246.98777770996094, "learning_rate": 1.0309700106049158e-06, "loss": 18.7812, "step": 15221 }, { "epoch": 1.010958358238693, "grad_norm": 251.8763885498047, "learning_rate": 1.030862510060506e-06, "loss": 18.1406, "step": 15222 }, { "epoch": 1.0110247725310486, "grad_norm": 514.677001953125, "learning_rate": 1.0307550091590968e-06, "loss": 17.5, "step": 15223 }, { "epoch": 1.0110911868234045, "grad_norm": 150.06204223632812, "learning_rate": 1.0306475079019313e-06, "loss": 11.3906, "step": 15224 }, { "epoch": 1.0111576011157601, "grad_norm": 134.85707092285156, "learning_rate": 1.030540006290253e-06, "loss": 16.4375, "step": 15225 }, { "epoch": 1.0112240154081158, "grad_norm": 233.1807403564453, "learning_rate": 1.0304325043253054e-06, "loss": 19.6094, "step": 15226 }, { "epoch": 1.0112904297004714, "grad_norm": 325.07647705078125, "learning_rate": 1.0303250020083322e-06, "loss": 23.25, "step": 15227 }, { "epoch": 1.0113568439928273, "grad_norm": 354.6261291503906, "learning_rate": 1.030217499340577e-06, "loss": 26.9219, "step": 15228 }, { "epoch": 1.011423258285183, "grad_norm": 1147.7037353515625, "learning_rate": 1.0301099963232832e-06, "loss": 19.2031, "step": 15229 }, { "epoch": 1.0114896725775386, "grad_norm": 340.7445068359375, "learning_rate": 1.0300024929576942e-06, "loss": 16.3125, "step": 15230 }, { "epoch": 1.0115560868698945, "grad_norm": 199.87257385253906, "learning_rate": 1.0298949892450536e-06, "loss": 12.4375, "step": 15231 }, { "epoch": 1.0116225011622502, "grad_norm": 195.849853515625, "learning_rate": 1.0297874851866053e-06, "loss": 14.7188, "step": 15232 }, { "epoch": 1.0116889154546058, "grad_norm": 130.361572265625, "learning_rate": 1.0296799807835924e-06, "loss": 17.4453, "step": 15233 }, { "epoch": 1.0117553297469615, "grad_norm": 151.31585693359375, "learning_rate": 1.0295724760372584e-06, "loss": 13.1406, "step": 15234 }, { "epoch": 1.0118217440393174, "grad_norm": 281.3899230957031, "learning_rate": 1.029464970948847e-06, "loss": 15.3906, "step": 15235 }, { "epoch": 1.011888158331673, "grad_norm": 183.5377655029297, "learning_rate": 1.0293574655196022e-06, "loss": 14.4375, "step": 15236 }, { "epoch": 1.0119545726240287, "grad_norm": 116.0584716796875, "learning_rate": 1.0292499597507671e-06, "loss": 16.8594, "step": 15237 }, { "epoch": 1.0120209869163843, "grad_norm": 194.33712768554688, "learning_rate": 1.0291424536435851e-06, "loss": 15.4531, "step": 15238 }, { "epoch": 1.0120874012087402, "grad_norm": 179.91526794433594, "learning_rate": 1.0290349471993001e-06, "loss": 14.5156, "step": 15239 }, { "epoch": 1.0121538155010958, "grad_norm": 185.71409606933594, "learning_rate": 1.0289274404191558e-06, "loss": 15.625, "step": 15240 }, { "epoch": 1.0122202297934515, "grad_norm": 345.7841796875, "learning_rate": 1.0288199333043953e-06, "loss": 14.5781, "step": 15241 }, { "epoch": 1.0122866440858074, "grad_norm": 151.68629455566406, "learning_rate": 1.0287124258562623e-06, "loss": 18.6562, "step": 15242 }, { "epoch": 1.012353058378163, "grad_norm": 226.14723205566406, "learning_rate": 1.0286049180760005e-06, "loss": 16.5938, "step": 15243 }, { "epoch": 1.0124194726705187, "grad_norm": 90.597900390625, "learning_rate": 1.028497409964854e-06, "loss": 14.75, "step": 15244 }, { "epoch": 1.0124858869628743, "grad_norm": 272.7528076171875, "learning_rate": 1.0283899015240655e-06, "loss": 17.7969, "step": 15245 }, { "epoch": 1.0125523012552302, "grad_norm": 1317.117431640625, "learning_rate": 1.028282392754879e-06, "loss": 13.2031, "step": 15246 }, { "epoch": 1.0126187155475859, "grad_norm": 203.7834930419922, "learning_rate": 1.0281748836585383e-06, "loss": 17.9844, "step": 15247 }, { "epoch": 1.0126851298399415, "grad_norm": 293.6075744628906, "learning_rate": 1.0280673742362866e-06, "loss": 15.75, "step": 15248 }, { "epoch": 1.0127515441322972, "grad_norm": 258.81939697265625, "learning_rate": 1.0279598644893676e-06, "loss": 14.375, "step": 15249 }, { "epoch": 1.012817958424653, "grad_norm": 735.9961547851562, "learning_rate": 1.0278523544190252e-06, "loss": 25.5, "step": 15250 }, { "epoch": 1.0128843727170087, "grad_norm": 150.5749053955078, "learning_rate": 1.0277448440265028e-06, "loss": 12.6406, "step": 15251 }, { "epoch": 1.0129507870093644, "grad_norm": 343.1180114746094, "learning_rate": 1.0276373333130442e-06, "loss": 25.0156, "step": 15252 }, { "epoch": 1.0130172013017202, "grad_norm": 109.96456909179688, "learning_rate": 1.0275298222798926e-06, "loss": 16.8594, "step": 15253 }, { "epoch": 1.013083615594076, "grad_norm": 259.3356018066406, "learning_rate": 1.0274223109282917e-06, "loss": 19.7188, "step": 15254 }, { "epoch": 1.0131500298864315, "grad_norm": 142.70030212402344, "learning_rate": 1.0273147992594859e-06, "loss": 16.125, "step": 15255 }, { "epoch": 1.0132164441787872, "grad_norm": 230.9971160888672, "learning_rate": 1.027207287274718e-06, "loss": 19.9531, "step": 15256 }, { "epoch": 1.013282858471143, "grad_norm": 137.45758056640625, "learning_rate": 1.0270997749752317e-06, "loss": 15.2188, "step": 15257 }, { "epoch": 1.0133492727634987, "grad_norm": 344.74957275390625, "learning_rate": 1.026992262362271e-06, "loss": 15.9531, "step": 15258 }, { "epoch": 1.0134156870558544, "grad_norm": 230.4017333984375, "learning_rate": 1.0268847494370795e-06, "loss": 14.0156, "step": 15259 }, { "epoch": 1.01348210134821, "grad_norm": 185.313232421875, "learning_rate": 1.0267772362009004e-06, "loss": 17.2031, "step": 15260 }, { "epoch": 1.013548515640566, "grad_norm": 286.3517761230469, "learning_rate": 1.0266697226549779e-06, "loss": 16.5156, "step": 15261 }, { "epoch": 1.0136149299329216, "grad_norm": 365.22216796875, "learning_rate": 1.0265622088005554e-06, "loss": 24.5781, "step": 15262 }, { "epoch": 1.0136813442252772, "grad_norm": 417.37725830078125, "learning_rate": 1.0264546946388765e-06, "loss": 16.4219, "step": 15263 }, { "epoch": 1.013747758517633, "grad_norm": 665.63720703125, "learning_rate": 1.0263471801711852e-06, "loss": 14.3125, "step": 15264 }, { "epoch": 1.0138141728099888, "grad_norm": 177.57936096191406, "learning_rate": 1.0262396653987245e-06, "loss": 15.2812, "step": 15265 }, { "epoch": 1.0138805871023444, "grad_norm": 240.0218963623047, "learning_rate": 1.026132150322739e-06, "loss": 19.5625, "step": 15266 }, { "epoch": 1.0139470013947, "grad_norm": 157.00811767578125, "learning_rate": 1.0260246349444714e-06, "loss": 19.5469, "step": 15267 }, { "epoch": 1.014013415687056, "grad_norm": 230.20693969726562, "learning_rate": 1.025917119265166e-06, "loss": 16.5, "step": 15268 }, { "epoch": 1.0140798299794116, "grad_norm": 578.220947265625, "learning_rate": 1.0258096032860663e-06, "loss": 18.3125, "step": 15269 }, { "epoch": 1.0141462442717672, "grad_norm": 196.1869659423828, "learning_rate": 1.0257020870084157e-06, "loss": 11.0781, "step": 15270 }, { "epoch": 1.014212658564123, "grad_norm": 245.82501220703125, "learning_rate": 1.0255945704334585e-06, "loss": 17.3594, "step": 15271 }, { "epoch": 1.0142790728564788, "grad_norm": 452.6330871582031, "learning_rate": 1.0254870535624383e-06, "loss": 25.2344, "step": 15272 }, { "epoch": 1.0143454871488344, "grad_norm": 717.3190307617188, "learning_rate": 1.025379536396598e-06, "loss": 24.9531, "step": 15273 }, { "epoch": 1.01441190144119, "grad_norm": 146.49961853027344, "learning_rate": 1.0252720189371821e-06, "loss": 17.9219, "step": 15274 }, { "epoch": 1.014478315733546, "grad_norm": 181.62193298339844, "learning_rate": 1.0251645011854342e-06, "loss": 16.7188, "step": 15275 }, { "epoch": 1.0145447300259016, "grad_norm": 412.4401550292969, "learning_rate": 1.0250569831425978e-06, "loss": 18.9219, "step": 15276 }, { "epoch": 1.0146111443182573, "grad_norm": 216.40377807617188, "learning_rate": 1.0249494648099166e-06, "loss": 21.8438, "step": 15277 }, { "epoch": 1.014677558610613, "grad_norm": 781.6943359375, "learning_rate": 1.0248419461886345e-06, "loss": 12.1875, "step": 15278 }, { "epoch": 1.0147439729029688, "grad_norm": 209.64654541015625, "learning_rate": 1.024734427279995e-06, "loss": 17.3594, "step": 15279 }, { "epoch": 1.0148103871953245, "grad_norm": 174.46173095703125, "learning_rate": 1.0246269080852419e-06, "loss": 14.9219, "step": 15280 }, { "epoch": 1.0148768014876801, "grad_norm": 112.80441284179688, "learning_rate": 1.024519388605619e-06, "loss": 17.75, "step": 15281 }, { "epoch": 1.0149432157800358, "grad_norm": 97.53333282470703, "learning_rate": 1.0244118688423698e-06, "loss": 14.1875, "step": 15282 }, { "epoch": 1.0150096300723916, "grad_norm": 441.02789306640625, "learning_rate": 1.0243043487967382e-06, "loss": 23.375, "step": 15283 }, { "epoch": 1.0150760443647473, "grad_norm": 433.83819580078125, "learning_rate": 1.024196828469968e-06, "loss": 19.5469, "step": 15284 }, { "epoch": 1.015142458657103, "grad_norm": 168.41680908203125, "learning_rate": 1.024089307863303e-06, "loss": 15.625, "step": 15285 }, { "epoch": 1.0152088729494588, "grad_norm": 119.3080825805664, "learning_rate": 1.0239817869779864e-06, "loss": 16.8125, "step": 15286 }, { "epoch": 1.0152752872418145, "grad_norm": 167.1240997314453, "learning_rate": 1.0238742658152626e-06, "loss": 15.9844, "step": 15287 }, { "epoch": 1.0153417015341701, "grad_norm": 131.22422790527344, "learning_rate": 1.023766744376375e-06, "loss": 13.2656, "step": 15288 }, { "epoch": 1.0154081158265258, "grad_norm": 213.15943908691406, "learning_rate": 1.0236592226625673e-06, "loss": 16.9219, "step": 15289 }, { "epoch": 1.0154745301188817, "grad_norm": 139.7592315673828, "learning_rate": 1.0235517006750835e-06, "loss": 14.1797, "step": 15290 }, { "epoch": 1.0155409444112373, "grad_norm": 170.85592651367188, "learning_rate": 1.023444178415167e-06, "loss": 12.6562, "step": 15291 }, { "epoch": 1.015607358703593, "grad_norm": 326.05596923828125, "learning_rate": 1.023336655884062e-06, "loss": 12.5938, "step": 15292 }, { "epoch": 1.0156737729959486, "grad_norm": 111.20542907714844, "learning_rate": 1.0232291330830119e-06, "loss": 14.2969, "step": 15293 }, { "epoch": 1.0157401872883045, "grad_norm": 263.9806213378906, "learning_rate": 1.0231216100132608e-06, "loss": 15.2188, "step": 15294 }, { "epoch": 1.0158066015806602, "grad_norm": 332.0462341308594, "learning_rate": 1.023014086676052e-06, "loss": 25.4844, "step": 15295 }, { "epoch": 1.0158730158730158, "grad_norm": 482.7290954589844, "learning_rate": 1.0229065630726294e-06, "loss": 16.5156, "step": 15296 }, { "epoch": 1.0159394301653717, "grad_norm": 335.3446960449219, "learning_rate": 1.022799039204237e-06, "loss": 17.8594, "step": 15297 }, { "epoch": 1.0160058444577273, "grad_norm": 702.2491455078125, "learning_rate": 1.0226915150721187e-06, "loss": 18.8438, "step": 15298 }, { "epoch": 1.016072258750083, "grad_norm": 323.8616027832031, "learning_rate": 1.0225839906775178e-06, "loss": 21.75, "step": 15299 }, { "epoch": 1.0161386730424387, "grad_norm": 250.5437774658203, "learning_rate": 1.0224764660216782e-06, "loss": 12.6875, "step": 15300 }, { "epoch": 1.0162050873347945, "grad_norm": 209.76097106933594, "learning_rate": 1.0223689411058442e-06, "loss": 17.9688, "step": 15301 }, { "epoch": 1.0162715016271502, "grad_norm": 250.78871154785156, "learning_rate": 1.0222614159312588e-06, "loss": 20.4219, "step": 15302 }, { "epoch": 1.0163379159195058, "grad_norm": 207.33609008789062, "learning_rate": 1.0221538904991664e-06, "loss": 20.5156, "step": 15303 }, { "epoch": 1.0164043302118615, "grad_norm": 189.59193420410156, "learning_rate": 1.0220463648108104e-06, "loss": 17.8594, "step": 15304 }, { "epoch": 1.0164707445042174, "grad_norm": 181.48048400878906, "learning_rate": 1.021938838867435e-06, "loss": 20.1719, "step": 15305 }, { "epoch": 1.016537158796573, "grad_norm": 124.75469207763672, "learning_rate": 1.0218313126702838e-06, "loss": 12.8438, "step": 15306 }, { "epoch": 1.0166035730889287, "grad_norm": 150.8341522216797, "learning_rate": 1.0217237862206e-06, "loss": 16.7031, "step": 15307 }, { "epoch": 1.0166699873812846, "grad_norm": 193.71592712402344, "learning_rate": 1.0216162595196286e-06, "loss": 14.5781, "step": 15308 }, { "epoch": 1.0167364016736402, "grad_norm": 238.8983612060547, "learning_rate": 1.0215087325686124e-06, "loss": 21.4062, "step": 15309 }, { "epoch": 1.0168028159659959, "grad_norm": 263.4273376464844, "learning_rate": 1.0214012053687957e-06, "loss": 12.6094, "step": 15310 }, { "epoch": 1.0168692302583515, "grad_norm": 142.986328125, "learning_rate": 1.0212936779214222e-06, "loss": 12.75, "step": 15311 }, { "epoch": 1.0169356445507074, "grad_norm": 140.77862548828125, "learning_rate": 1.0211861502277358e-06, "loss": 13.25, "step": 15312 }, { "epoch": 1.017002058843063, "grad_norm": 167.38111877441406, "learning_rate": 1.0210786222889801e-06, "loss": 22.3438, "step": 15313 }, { "epoch": 1.0170684731354187, "grad_norm": 218.19749450683594, "learning_rate": 1.0209710941063992e-06, "loss": 12.8281, "step": 15314 }, { "epoch": 1.0171348874277744, "grad_norm": 261.9910888671875, "learning_rate": 1.0208635656812367e-06, "loss": 17.9531, "step": 15315 }, { "epoch": 1.0172013017201302, "grad_norm": 282.4525146484375, "learning_rate": 1.0207560370147366e-06, "loss": 18.875, "step": 15316 }, { "epoch": 1.017267716012486, "grad_norm": 103.68407440185547, "learning_rate": 1.0206485081081424e-06, "loss": 15.9688, "step": 15317 }, { "epoch": 1.0173341303048415, "grad_norm": 391.89739990234375, "learning_rate": 1.0205409789626983e-06, "loss": 13.7266, "step": 15318 }, { "epoch": 1.0174005445971974, "grad_norm": 163.1072540283203, "learning_rate": 1.020433449579648e-06, "loss": 19.0781, "step": 15319 }, { "epoch": 1.017466958889553, "grad_norm": 245.18753051757812, "learning_rate": 1.0203259199602354e-06, "loss": 18.9375, "step": 15320 }, { "epoch": 1.0175333731819087, "grad_norm": 214.84841918945312, "learning_rate": 1.0202183901057042e-06, "loss": 16.8125, "step": 15321 }, { "epoch": 1.0175997874742644, "grad_norm": 103.27165985107422, "learning_rate": 1.0201108600172985e-06, "loss": 10.8125, "step": 15322 }, { "epoch": 1.0176662017666203, "grad_norm": 164.06234741210938, "learning_rate": 1.0200033296962616e-06, "loss": 16.9219, "step": 15323 }, { "epoch": 1.017732616058976, "grad_norm": 193.8529815673828, "learning_rate": 1.019895799143838e-06, "loss": 19.3281, "step": 15324 }, { "epoch": 1.0177990303513316, "grad_norm": 162.4621124267578, "learning_rate": 1.0197882683612712e-06, "loss": 17.5156, "step": 15325 }, { "epoch": 1.0178654446436872, "grad_norm": 108.07833862304688, "learning_rate": 1.0196807373498052e-06, "loss": 16.9688, "step": 15326 }, { "epoch": 1.017931858936043, "grad_norm": 179.01171875, "learning_rate": 1.0195732061106834e-06, "loss": 17.6562, "step": 15327 }, { "epoch": 1.0179982732283988, "grad_norm": 209.16143798828125, "learning_rate": 1.0194656746451504e-06, "loss": 14.2031, "step": 15328 }, { "epoch": 1.0180646875207544, "grad_norm": 571.7606811523438, "learning_rate": 1.0193581429544496e-06, "loss": 17.3438, "step": 15329 }, { "epoch": 1.0181311018131103, "grad_norm": 111.35997009277344, "learning_rate": 1.0192506110398252e-06, "loss": 13.0938, "step": 15330 }, { "epoch": 1.018197516105466, "grad_norm": 305.1864929199219, "learning_rate": 1.0191430789025207e-06, "loss": 15.6406, "step": 15331 }, { "epoch": 1.0182639303978216, "grad_norm": 191.30795288085938, "learning_rate": 1.0190355465437796e-06, "loss": 16.8438, "step": 15332 }, { "epoch": 1.0183303446901772, "grad_norm": 261.52618408203125, "learning_rate": 1.0189280139648472e-06, "loss": 13.1875, "step": 15333 }, { "epoch": 1.0183967589825331, "grad_norm": 118.5487060546875, "learning_rate": 1.0188204811669656e-06, "loss": 13.75, "step": 15334 }, { "epoch": 1.0184631732748888, "grad_norm": 185.13992309570312, "learning_rate": 1.0187129481513799e-06, "loss": 16.2344, "step": 15335 }, { "epoch": 1.0185295875672444, "grad_norm": 573.7703247070312, "learning_rate": 1.0186054149193336e-06, "loss": 19.6875, "step": 15336 }, { "epoch": 1.0185960018596, "grad_norm": 205.6060333251953, "learning_rate": 1.0184978814720706e-06, "loss": 14.875, "step": 15337 }, { "epoch": 1.018662416151956, "grad_norm": 269.8479309082031, "learning_rate": 1.0183903478108347e-06, "loss": 16.0625, "step": 15338 }, { "epoch": 1.0187288304443116, "grad_norm": 121.22720336914062, "learning_rate": 1.01828281393687e-06, "loss": 19.125, "step": 15339 }, { "epoch": 1.0187952447366673, "grad_norm": 241.97250366210938, "learning_rate": 1.0181752798514202e-06, "loss": 17.0469, "step": 15340 }, { "epoch": 1.0188616590290231, "grad_norm": 104.91741943359375, "learning_rate": 1.0180677455557292e-06, "loss": 14.3281, "step": 15341 }, { "epoch": 1.0189280733213788, "grad_norm": 507.9609069824219, "learning_rate": 1.0179602110510407e-06, "loss": 19.6875, "step": 15342 }, { "epoch": 1.0189944876137345, "grad_norm": 182.18788146972656, "learning_rate": 1.017852676338599e-06, "loss": 20.7656, "step": 15343 }, { "epoch": 1.01906090190609, "grad_norm": 172.40423583984375, "learning_rate": 1.017745141419648e-06, "loss": 16.8438, "step": 15344 }, { "epoch": 1.019127316198446, "grad_norm": 274.7261657714844, "learning_rate": 1.0176376062954314e-06, "loss": 14.0625, "step": 15345 }, { "epoch": 1.0191937304908016, "grad_norm": 242.3865509033203, "learning_rate": 1.017530070967193e-06, "loss": 17.2656, "step": 15346 }, { "epoch": 1.0192601447831573, "grad_norm": 551.9786376953125, "learning_rate": 1.017422535436177e-06, "loss": 15.7969, "step": 15347 }, { "epoch": 1.019326559075513, "grad_norm": 137.9261932373047, "learning_rate": 1.017314999703627e-06, "loss": 15.5312, "step": 15348 }, { "epoch": 1.0193929733678688, "grad_norm": 129.65835571289062, "learning_rate": 1.0172074637707871e-06, "loss": 14.7969, "step": 15349 }, { "epoch": 1.0194593876602245, "grad_norm": 303.13885498046875, "learning_rate": 1.017099927638901e-06, "loss": 15.8828, "step": 15350 }, { "epoch": 1.0195258019525801, "grad_norm": 149.317626953125, "learning_rate": 1.0169923913092134e-06, "loss": 16.9062, "step": 15351 }, { "epoch": 1.019592216244936, "grad_norm": 128.1129608154297, "learning_rate": 1.016884854782967e-06, "loss": 16.4844, "step": 15352 }, { "epoch": 1.0196586305372917, "grad_norm": 401.136962890625, "learning_rate": 1.0167773180614068e-06, "loss": 18.2656, "step": 15353 }, { "epoch": 1.0197250448296473, "grad_norm": 210.63230895996094, "learning_rate": 1.016669781145776e-06, "loss": 17.0781, "step": 15354 }, { "epoch": 1.019791459122003, "grad_norm": 167.3013916015625, "learning_rate": 1.016562244037319e-06, "loss": 23.6094, "step": 15355 }, { "epoch": 1.0198578734143589, "grad_norm": 367.570556640625, "learning_rate": 1.0164547067372792e-06, "loss": 21.1875, "step": 15356 }, { "epoch": 1.0199242877067145, "grad_norm": 219.33084106445312, "learning_rate": 1.016347169246901e-06, "loss": 19.0938, "step": 15357 }, { "epoch": 1.0199907019990702, "grad_norm": 136.12435913085938, "learning_rate": 1.0162396315674282e-06, "loss": 12.3438, "step": 15358 }, { "epoch": 1.0200571162914258, "grad_norm": 150.0523223876953, "learning_rate": 1.0161320937001047e-06, "loss": 16.2422, "step": 15359 }, { "epoch": 1.0201235305837817, "grad_norm": 609.5306396484375, "learning_rate": 1.0160245556461742e-06, "loss": 22.8906, "step": 15360 }, { "epoch": 1.0201899448761373, "grad_norm": 318.7533264160156, "learning_rate": 1.0159170174068813e-06, "loss": 18.2188, "step": 15361 }, { "epoch": 1.020256359168493, "grad_norm": 306.2981262207031, "learning_rate": 1.0158094789834692e-06, "loss": 20.5312, "step": 15362 }, { "epoch": 1.0203227734608489, "grad_norm": 337.08966064453125, "learning_rate": 1.0157019403771822e-06, "loss": 17.4688, "step": 15363 }, { "epoch": 1.0203891877532045, "grad_norm": 109.77288818359375, "learning_rate": 1.0155944015892643e-06, "loss": 13.4219, "step": 15364 }, { "epoch": 1.0204556020455602, "grad_norm": 123.89181518554688, "learning_rate": 1.0154868626209595e-06, "loss": 16.5781, "step": 15365 }, { "epoch": 1.0205220163379158, "grad_norm": 222.139404296875, "learning_rate": 1.0153793234735112e-06, "loss": 21.7812, "step": 15366 }, { "epoch": 1.0205884306302717, "grad_norm": 161.49049377441406, "learning_rate": 1.0152717841481642e-06, "loss": 17.9531, "step": 15367 }, { "epoch": 1.0206548449226274, "grad_norm": 195.9034423828125, "learning_rate": 1.0151642446461618e-06, "loss": 14.9375, "step": 15368 }, { "epoch": 1.020721259214983, "grad_norm": 220.0365753173828, "learning_rate": 1.015056704968748e-06, "loss": 11.5781, "step": 15369 }, { "epoch": 1.0207876735073387, "grad_norm": 177.39337158203125, "learning_rate": 1.014949165117167e-06, "loss": 15.9531, "step": 15370 }, { "epoch": 1.0208540877996946, "grad_norm": 125.76290893554688, "learning_rate": 1.0148416250926628e-06, "loss": 16.0312, "step": 15371 }, { "epoch": 1.0209205020920502, "grad_norm": 452.1662902832031, "learning_rate": 1.0147340848964793e-06, "loss": 19.75, "step": 15372 }, { "epoch": 1.0209869163844059, "grad_norm": 135.9062042236328, "learning_rate": 1.0146265445298603e-06, "loss": 19.3281, "step": 15373 }, { "epoch": 1.0210533306767617, "grad_norm": 140.68038940429688, "learning_rate": 1.01451900399405e-06, "loss": 19.5938, "step": 15374 }, { "epoch": 1.0211197449691174, "grad_norm": 187.80288696289062, "learning_rate": 1.014411463290292e-06, "loss": 15.875, "step": 15375 }, { "epoch": 1.021186159261473, "grad_norm": 275.9442443847656, "learning_rate": 1.0143039224198306e-06, "loss": 14.4062, "step": 15376 }, { "epoch": 1.0212525735538287, "grad_norm": 268.8526916503906, "learning_rate": 1.0141963813839097e-06, "loss": 18.9062, "step": 15377 }, { "epoch": 1.0213189878461846, "grad_norm": 137.79017639160156, "learning_rate": 1.014088840183773e-06, "loss": 17.0312, "step": 15378 }, { "epoch": 1.0213854021385402, "grad_norm": 133.55014038085938, "learning_rate": 1.013981298820665e-06, "loss": 16.375, "step": 15379 }, { "epoch": 1.0214518164308959, "grad_norm": 726.53662109375, "learning_rate": 1.0138737572958296e-06, "loss": 24.3125, "step": 15380 }, { "epoch": 1.0215182307232515, "grad_norm": 138.16094970703125, "learning_rate": 1.01376621561051e-06, "loss": 14.5938, "step": 15381 }, { "epoch": 1.0215846450156074, "grad_norm": 393.8468933105469, "learning_rate": 1.013658673765951e-06, "loss": 17.5938, "step": 15382 }, { "epoch": 1.021651059307963, "grad_norm": 92.86665344238281, "learning_rate": 1.0135511317633965e-06, "loss": 11.1562, "step": 15383 }, { "epoch": 1.0217174736003187, "grad_norm": 180.72523498535156, "learning_rate": 1.0134435896040903e-06, "loss": 12.0781, "step": 15384 }, { "epoch": 1.0217838878926746, "grad_norm": 177.18345642089844, "learning_rate": 1.0133360472892763e-06, "loss": 18.3281, "step": 15385 }, { "epoch": 1.0218503021850303, "grad_norm": 408.0194091796875, "learning_rate": 1.0132285048201987e-06, "loss": 20.6719, "step": 15386 }, { "epoch": 1.021916716477386, "grad_norm": 194.05677795410156, "learning_rate": 1.0131209621981012e-06, "loss": 16.5938, "step": 15387 }, { "epoch": 1.0219831307697416, "grad_norm": 214.9028778076172, "learning_rate": 1.0130134194242281e-06, "loss": 20.0312, "step": 15388 }, { "epoch": 1.0220495450620974, "grad_norm": 229.80935668945312, "learning_rate": 1.0129058764998231e-06, "loss": 20.2812, "step": 15389 }, { "epoch": 1.022115959354453, "grad_norm": 174.99855041503906, "learning_rate": 1.0127983334261307e-06, "loss": 16.6719, "step": 15390 }, { "epoch": 1.0221823736468088, "grad_norm": 972.5267944335938, "learning_rate": 1.0126907902043945e-06, "loss": 17.25, "step": 15391 }, { "epoch": 1.0222487879391644, "grad_norm": 354.546630859375, "learning_rate": 1.012583246835858e-06, "loss": 10.5938, "step": 15392 }, { "epoch": 1.0223152022315203, "grad_norm": 115.74607849121094, "learning_rate": 1.0124757033217663e-06, "loss": 12.8438, "step": 15393 }, { "epoch": 1.022381616523876, "grad_norm": 264.5953369140625, "learning_rate": 1.0123681596633628e-06, "loss": 26.0781, "step": 15394 }, { "epoch": 1.0224480308162316, "grad_norm": 269.7077331542969, "learning_rate": 1.0122606158618916e-06, "loss": 14.1875, "step": 15395 }, { "epoch": 1.0225144451085875, "grad_norm": 309.248779296875, "learning_rate": 1.0121530719185964e-06, "loss": 12.6406, "step": 15396 }, { "epoch": 1.0225808594009431, "grad_norm": 241.5001220703125, "learning_rate": 1.012045527834722e-06, "loss": 13.9375, "step": 15397 }, { "epoch": 1.0226472736932988, "grad_norm": 232.9688720703125, "learning_rate": 1.0119379836115114e-06, "loss": 17.2344, "step": 15398 }, { "epoch": 1.0227136879856544, "grad_norm": 415.0029602050781, "learning_rate": 1.0118304392502093e-06, "loss": 17.3125, "step": 15399 }, { "epoch": 1.0227801022780103, "grad_norm": 136.1306610107422, "learning_rate": 1.0117228947520592e-06, "loss": 15.875, "step": 15400 }, { "epoch": 1.022846516570366, "grad_norm": 347.4319763183594, "learning_rate": 1.0116153501183057e-06, "loss": 20.3125, "step": 15401 }, { "epoch": 1.0229129308627216, "grad_norm": 148.53343200683594, "learning_rate": 1.0115078053501924e-06, "loss": 14.1172, "step": 15402 }, { "epoch": 1.0229793451550773, "grad_norm": 242.94717407226562, "learning_rate": 1.0114002604489635e-06, "loss": 15.8281, "step": 15403 }, { "epoch": 1.0230457594474331, "grad_norm": 621.0496826171875, "learning_rate": 1.0112927154158632e-06, "loss": 19.625, "step": 15404 }, { "epoch": 1.0231121737397888, "grad_norm": 451.18218994140625, "learning_rate": 1.011185170252135e-06, "loss": 12.3984, "step": 15405 }, { "epoch": 1.0231785880321445, "grad_norm": 117.82634735107422, "learning_rate": 1.011077624959023e-06, "loss": 13.9844, "step": 15406 }, { "epoch": 1.0232450023245003, "grad_norm": 285.611328125, "learning_rate": 1.0109700795377718e-06, "loss": 19.3438, "step": 15407 }, { "epoch": 1.023311416616856, "grad_norm": 146.4630126953125, "learning_rate": 1.010862533989625e-06, "loss": 16.0781, "step": 15408 }, { "epoch": 1.0233778309092116, "grad_norm": 618.4935302734375, "learning_rate": 1.0107549883158265e-06, "loss": 25.2812, "step": 15409 }, { "epoch": 1.0234442452015673, "grad_norm": 259.4277648925781, "learning_rate": 1.0106474425176206e-06, "loss": 20.3594, "step": 15410 }, { "epoch": 1.0235106594939232, "grad_norm": 178.1663818359375, "learning_rate": 1.010539896596251e-06, "loss": 22.3125, "step": 15411 }, { "epoch": 1.0235770737862788, "grad_norm": 341.9736022949219, "learning_rate": 1.0104323505529623e-06, "loss": 17.2969, "step": 15412 }, { "epoch": 1.0236434880786345, "grad_norm": 219.99913024902344, "learning_rate": 1.0103248043889983e-06, "loss": 16.1094, "step": 15413 }, { "epoch": 1.0237099023709901, "grad_norm": 172.4320068359375, "learning_rate": 1.0102172581056026e-06, "loss": 17.6719, "step": 15414 }, { "epoch": 1.023776316663346, "grad_norm": 199.1669921875, "learning_rate": 1.0101097117040195e-06, "loss": 16.2812, "step": 15415 }, { "epoch": 1.0238427309557017, "grad_norm": 203.08132934570312, "learning_rate": 1.0100021651854932e-06, "loss": 17.7188, "step": 15416 }, { "epoch": 1.0239091452480573, "grad_norm": 373.8342590332031, "learning_rate": 1.0098946185512675e-06, "loss": 15.1094, "step": 15417 }, { "epoch": 1.0239755595404132, "grad_norm": 178.24868774414062, "learning_rate": 1.0097870718025866e-06, "loss": 14.9062, "step": 15418 }, { "epoch": 1.0240419738327688, "grad_norm": 164.06558227539062, "learning_rate": 1.0096795249406946e-06, "loss": 18.4375, "step": 15419 }, { "epoch": 1.0241083881251245, "grad_norm": 379.71466064453125, "learning_rate": 1.0095719779668355e-06, "loss": 19.1562, "step": 15420 }, { "epoch": 1.0241748024174802, "grad_norm": 212.35455322265625, "learning_rate": 1.0094644308822532e-06, "loss": 16.875, "step": 15421 }, { "epoch": 1.024241216709836, "grad_norm": 242.05796813964844, "learning_rate": 1.0093568836881918e-06, "loss": 17.375, "step": 15422 }, { "epoch": 1.0243076310021917, "grad_norm": 408.0449523925781, "learning_rate": 1.0092493363858953e-06, "loss": 16.1875, "step": 15423 }, { "epoch": 1.0243740452945473, "grad_norm": 190.9580535888672, "learning_rate": 1.0091417889766078e-06, "loss": 16.1406, "step": 15424 }, { "epoch": 1.024440459586903, "grad_norm": 498.49072265625, "learning_rate": 1.0090342414615732e-06, "loss": 31.0938, "step": 15425 }, { "epoch": 1.0245068738792589, "grad_norm": 527.2183227539062, "learning_rate": 1.0089266938420362e-06, "loss": 13.2344, "step": 15426 }, { "epoch": 1.0245732881716145, "grad_norm": 194.12657165527344, "learning_rate": 1.0088191461192402e-06, "loss": 14.7656, "step": 15427 }, { "epoch": 1.0246397024639702, "grad_norm": 242.97592163085938, "learning_rate": 1.0087115982944292e-06, "loss": 21.2188, "step": 15428 }, { "epoch": 1.024706116756326, "grad_norm": 522.3466186523438, "learning_rate": 1.0086040503688477e-06, "loss": 19.4375, "step": 15429 }, { "epoch": 1.0247725310486817, "grad_norm": 189.02198791503906, "learning_rate": 1.0084965023437393e-06, "loss": 19.1562, "step": 15430 }, { "epoch": 1.0248389453410374, "grad_norm": 192.91763305664062, "learning_rate": 1.0083889542203483e-06, "loss": 14.5781, "step": 15431 }, { "epoch": 1.024905359633393, "grad_norm": 314.35894775390625, "learning_rate": 1.0082814059999185e-06, "loss": 14.4688, "step": 15432 }, { "epoch": 1.024971773925749, "grad_norm": 152.161865234375, "learning_rate": 1.0081738576836946e-06, "loss": 15.7812, "step": 15433 }, { "epoch": 1.0250381882181046, "grad_norm": 540.5400390625, "learning_rate": 1.00806630927292e-06, "loss": 14.0, "step": 15434 }, { "epoch": 1.0251046025104602, "grad_norm": 499.85479736328125, "learning_rate": 1.0079587607688391e-06, "loss": 15.2969, "step": 15435 }, { "epoch": 1.0251710168028159, "grad_norm": 335.33905029296875, "learning_rate": 1.0078512121726958e-06, "loss": 13.9688, "step": 15436 }, { "epoch": 1.0252374310951717, "grad_norm": 154.08279418945312, "learning_rate": 1.0077436634857342e-06, "loss": 13.75, "step": 15437 }, { "epoch": 1.0253038453875274, "grad_norm": 186.7697296142578, "learning_rate": 1.0076361147091983e-06, "loss": 16.1094, "step": 15438 }, { "epoch": 1.025370259679883, "grad_norm": 174.47093200683594, "learning_rate": 1.0075285658443322e-06, "loss": 21.6719, "step": 15439 }, { "epoch": 1.025436673972239, "grad_norm": 188.00360107421875, "learning_rate": 1.0074210168923802e-06, "loss": 25.875, "step": 15440 }, { "epoch": 1.0255030882645946, "grad_norm": 257.7789611816406, "learning_rate": 1.0073134678545858e-06, "loss": 16.0469, "step": 15441 }, { "epoch": 1.0255695025569502, "grad_norm": 338.4630126953125, "learning_rate": 1.0072059187321936e-06, "loss": 19.4688, "step": 15442 }, { "epoch": 1.0256359168493059, "grad_norm": 313.0960693359375, "learning_rate": 1.0070983695264475e-06, "loss": 13.0469, "step": 15443 }, { "epoch": 1.0257023311416618, "grad_norm": 267.0044860839844, "learning_rate": 1.0069908202385918e-06, "loss": 17.9688, "step": 15444 }, { "epoch": 1.0257687454340174, "grad_norm": 123.75599670410156, "learning_rate": 1.00688327086987e-06, "loss": 14.9219, "step": 15445 }, { "epoch": 1.025835159726373, "grad_norm": 201.4158172607422, "learning_rate": 1.0067757214215265e-06, "loss": 18.25, "step": 15446 }, { "epoch": 1.0259015740187287, "grad_norm": 108.92467498779297, "learning_rate": 1.0066681718948055e-06, "loss": 12.1953, "step": 15447 }, { "epoch": 1.0259679883110846, "grad_norm": 135.431884765625, "learning_rate": 1.006560622290951e-06, "loss": 13.0, "step": 15448 }, { "epoch": 1.0260344026034403, "grad_norm": 596.4334716796875, "learning_rate": 1.0064530726112067e-06, "loss": 14.8125, "step": 15449 }, { "epoch": 1.026100816895796, "grad_norm": 754.35107421875, "learning_rate": 1.0063455228568172e-06, "loss": 25.5625, "step": 15450 }, { "epoch": 1.0261672311881518, "grad_norm": 220.90463256835938, "learning_rate": 1.0062379730290265e-06, "loss": 19.1875, "step": 15451 }, { "epoch": 1.0262336454805074, "grad_norm": 290.66387939453125, "learning_rate": 1.0061304231290782e-06, "loss": 19.3594, "step": 15452 }, { "epoch": 1.026300059772863, "grad_norm": 214.49156188964844, "learning_rate": 1.0060228731582167e-06, "loss": 16.8125, "step": 15453 }, { "epoch": 1.0263664740652187, "grad_norm": 187.23377990722656, "learning_rate": 1.0059153231176863e-06, "loss": 16.3438, "step": 15454 }, { "epoch": 1.0264328883575746, "grad_norm": 197.1591033935547, "learning_rate": 1.0058077730087307e-06, "loss": 15.5312, "step": 15455 }, { "epoch": 1.0264993026499303, "grad_norm": 121.75050354003906, "learning_rate": 1.0057002228325941e-06, "loss": 17.9688, "step": 15456 }, { "epoch": 1.026565716942286, "grad_norm": 238.05523681640625, "learning_rate": 1.005592672590521e-06, "loss": 16.625, "step": 15457 }, { "epoch": 1.0266321312346416, "grad_norm": 166.0562286376953, "learning_rate": 1.0054851222837545e-06, "loss": 14.2344, "step": 15458 }, { "epoch": 1.0266985455269975, "grad_norm": 221.53123474121094, "learning_rate": 1.0053775719135396e-06, "loss": 20.9375, "step": 15459 }, { "epoch": 1.0267649598193531, "grad_norm": 243.3643798828125, "learning_rate": 1.0052700214811201e-06, "loss": 18.7031, "step": 15460 }, { "epoch": 1.0268313741117088, "grad_norm": 454.7940979003906, "learning_rate": 1.0051624709877399e-06, "loss": 18.6562, "step": 15461 }, { "epoch": 1.0268977884040646, "grad_norm": 168.8018035888672, "learning_rate": 1.0050549204346432e-06, "loss": 17.7969, "step": 15462 }, { "epoch": 1.0269642026964203, "grad_norm": 174.84336853027344, "learning_rate": 1.004947369823074e-06, "loss": 14.25, "step": 15463 }, { "epoch": 1.027030616988776, "grad_norm": 431.7246398925781, "learning_rate": 1.004839819154277e-06, "loss": 21.6094, "step": 15464 }, { "epoch": 1.0270970312811316, "grad_norm": 177.28170776367188, "learning_rate": 1.0047322684294953e-06, "loss": 21.1562, "step": 15465 }, { "epoch": 1.0271634455734875, "grad_norm": 153.17298889160156, "learning_rate": 1.0046247176499736e-06, "loss": 16.0781, "step": 15466 }, { "epoch": 1.0272298598658431, "grad_norm": 220.18141174316406, "learning_rate": 1.0045171668169555e-06, "loss": 15.5156, "step": 15467 }, { "epoch": 1.0272962741581988, "grad_norm": 125.62235260009766, "learning_rate": 1.004409615931686e-06, "loss": 12.9844, "step": 15468 }, { "epoch": 1.0273626884505545, "grad_norm": 233.11941528320312, "learning_rate": 1.004302064995408e-06, "loss": 19.4531, "step": 15469 }, { "epoch": 1.0274291027429103, "grad_norm": 259.82049560546875, "learning_rate": 1.0041945140093666e-06, "loss": 14.2344, "step": 15470 }, { "epoch": 1.027495517035266, "grad_norm": 124.38906860351562, "learning_rate": 1.0040869629748055e-06, "loss": 17.8125, "step": 15471 }, { "epoch": 1.0275619313276216, "grad_norm": 94.60830688476562, "learning_rate": 1.0039794118929688e-06, "loss": 11.0781, "step": 15472 }, { "epoch": 1.0276283456199775, "grad_norm": 149.67376708984375, "learning_rate": 1.0038718607651003e-06, "loss": 18.2969, "step": 15473 }, { "epoch": 1.0276947599123332, "grad_norm": 250.25363159179688, "learning_rate": 1.0037643095924441e-06, "loss": 17.625, "step": 15474 }, { "epoch": 1.0277611742046888, "grad_norm": 250.49766540527344, "learning_rate": 1.003656758376245e-06, "loss": 22.3438, "step": 15475 }, { "epoch": 1.0278275884970445, "grad_norm": 231.2243194580078, "learning_rate": 1.0035492071177468e-06, "loss": 22.0, "step": 15476 }, { "epoch": 1.0278940027894004, "grad_norm": 234.80003356933594, "learning_rate": 1.0034416558181928e-06, "loss": 14.5625, "step": 15477 }, { "epoch": 1.027960417081756, "grad_norm": 277.63287353515625, "learning_rate": 1.0033341044788282e-06, "loss": 15.0781, "step": 15478 }, { "epoch": 1.0280268313741117, "grad_norm": 164.20472717285156, "learning_rate": 1.0032265531008965e-06, "loss": 13.0156, "step": 15479 }, { "epoch": 1.0280932456664673, "grad_norm": 121.29309844970703, "learning_rate": 1.0031190016856418e-06, "loss": 11.7266, "step": 15480 }, { "epoch": 1.0281596599588232, "grad_norm": 206.02413940429688, "learning_rate": 1.0030114502343084e-06, "loss": 20.9688, "step": 15481 }, { "epoch": 1.0282260742511788, "grad_norm": 200.98695373535156, "learning_rate": 1.00290389874814e-06, "loss": 15.125, "step": 15482 }, { "epoch": 1.0282924885435345, "grad_norm": 153.95172119140625, "learning_rate": 1.0027963472283814e-06, "loss": 14.1094, "step": 15483 }, { "epoch": 1.0283589028358904, "grad_norm": 385.84326171875, "learning_rate": 1.002688795676276e-06, "loss": 22.7344, "step": 15484 }, { "epoch": 1.028425317128246, "grad_norm": 222.93214416503906, "learning_rate": 1.002581244093068e-06, "loss": 13.7188, "step": 15485 }, { "epoch": 1.0284917314206017, "grad_norm": 173.16073608398438, "learning_rate": 1.002473692480002e-06, "loss": 15.3672, "step": 15486 }, { "epoch": 1.0285581457129573, "grad_norm": 190.07620239257812, "learning_rate": 1.0023661408383218e-06, "loss": 12.5703, "step": 15487 }, { "epoch": 1.0286245600053132, "grad_norm": 388.34588623046875, "learning_rate": 1.002258589169271e-06, "loss": 18.1406, "step": 15488 }, { "epoch": 1.0286909742976689, "grad_norm": 177.91915893554688, "learning_rate": 1.0021510374740944e-06, "loss": 14.4922, "step": 15489 }, { "epoch": 1.0287573885900245, "grad_norm": 220.9821319580078, "learning_rate": 1.0020434857540361e-06, "loss": 15.4531, "step": 15490 }, { "epoch": 1.0288238028823802, "grad_norm": 345.7200622558594, "learning_rate": 1.0019359340103393e-06, "loss": 22.0156, "step": 15491 }, { "epoch": 1.028890217174736, "grad_norm": 131.468994140625, "learning_rate": 1.001828382244249e-06, "loss": 13.625, "step": 15492 }, { "epoch": 1.0289566314670917, "grad_norm": 187.7880401611328, "learning_rate": 1.0017208304570093e-06, "loss": 15.6562, "step": 15493 }, { "epoch": 1.0290230457594474, "grad_norm": 1834.5062255859375, "learning_rate": 1.001613278649864e-06, "loss": 34.9219, "step": 15494 }, { "epoch": 1.0290894600518032, "grad_norm": 253.3274383544922, "learning_rate": 1.001505726824057e-06, "loss": 16.7188, "step": 15495 }, { "epoch": 1.029155874344159, "grad_norm": 276.5711364746094, "learning_rate": 1.0013981749808326e-06, "loss": 16.6875, "step": 15496 }, { "epoch": 1.0292222886365145, "grad_norm": 277.1357727050781, "learning_rate": 1.0012906231214351e-06, "loss": 17.9688, "step": 15497 }, { "epoch": 1.0292887029288702, "grad_norm": 87.7431869506836, "learning_rate": 1.0011830712471082e-06, "loss": 16.0156, "step": 15498 }, { "epoch": 1.029355117221226, "grad_norm": 465.7588806152344, "learning_rate": 1.0010755193590965e-06, "loss": 14.5, "step": 15499 }, { "epoch": 1.0294215315135817, "grad_norm": 206.57748413085938, "learning_rate": 1.0009679674586438e-06, "loss": 15.0469, "step": 15500 }, { "epoch": 1.0294879458059374, "grad_norm": 250.0601348876953, "learning_rate": 1.0008604155469939e-06, "loss": 17.2656, "step": 15501 }, { "epoch": 1.029554360098293, "grad_norm": 341.993408203125, "learning_rate": 1.0007528636253916e-06, "loss": 13.6094, "step": 15502 }, { "epoch": 1.029620774390649, "grad_norm": 203.24447631835938, "learning_rate": 1.0006453116950803e-06, "loss": 25.6406, "step": 15503 }, { "epoch": 1.0296871886830046, "grad_norm": 131.07003784179688, "learning_rate": 1.0005377597573047e-06, "loss": 13.0781, "step": 15504 }, { "epoch": 1.0297536029753602, "grad_norm": 278.57159423828125, "learning_rate": 1.0004302078133083e-06, "loss": 18.0781, "step": 15505 }, { "epoch": 1.029820017267716, "grad_norm": 234.1533660888672, "learning_rate": 1.0003226558643354e-06, "loss": 19.8906, "step": 15506 }, { "epoch": 1.0298864315600718, "grad_norm": 152.74758911132812, "learning_rate": 1.0002151039116304e-06, "loss": 18.2656, "step": 15507 }, { "epoch": 1.0299528458524274, "grad_norm": 182.90834045410156, "learning_rate": 1.0001075519564373e-06, "loss": 14.8906, "step": 15508 }, { "epoch": 1.030019260144783, "grad_norm": 150.53565979003906, "learning_rate": 1e-06, "loss": 12.5625, "step": 15509 }, { "epoch": 1.030085674437139, "grad_norm": 217.9492950439453, "learning_rate": 9.998924480435624e-07, "loss": 14.3594, "step": 15510 }, { "epoch": 1.0301520887294946, "grad_norm": 318.0152282714844, "learning_rate": 9.997848960883697e-07, "loss": 13.2344, "step": 15511 }, { "epoch": 1.0302185030218503, "grad_norm": 290.5607604980469, "learning_rate": 9.996773441356643e-07, "loss": 19.1016, "step": 15512 }, { "epoch": 1.030284917314206, "grad_norm": 302.8208312988281, "learning_rate": 9.995697921866918e-07, "loss": 18.6094, "step": 15513 }, { "epoch": 1.0303513316065618, "grad_norm": 247.14996337890625, "learning_rate": 9.994622402426956e-07, "loss": 20.0078, "step": 15514 }, { "epoch": 1.0304177458989174, "grad_norm": 196.20391845703125, "learning_rate": 9.993546883049199e-07, "loss": 13.375, "step": 15515 }, { "epoch": 1.030484160191273, "grad_norm": 141.37925720214844, "learning_rate": 9.992471363746084e-07, "loss": 16.0625, "step": 15516 }, { "epoch": 1.030550574483629, "grad_norm": 99.18904876708984, "learning_rate": 9.99139584453006e-07, "loss": 12.9922, "step": 15517 }, { "epoch": 1.0306169887759846, "grad_norm": 299.8081970214844, "learning_rate": 9.990320325413564e-07, "loss": 15.125, "step": 15518 }, { "epoch": 1.0306834030683403, "grad_norm": 607.9892578125, "learning_rate": 9.989244806409036e-07, "loss": 18.5156, "step": 15519 }, { "epoch": 1.030749817360696, "grad_norm": 283.4778137207031, "learning_rate": 9.988169287528914e-07, "loss": 23.0, "step": 15520 }, { "epoch": 1.0308162316530518, "grad_norm": 359.3651428222656, "learning_rate": 9.987093768785648e-07, "loss": 15.5625, "step": 15521 }, { "epoch": 1.0308826459454075, "grad_norm": 631.0772094726562, "learning_rate": 9.986018250191673e-07, "loss": 19.7812, "step": 15522 }, { "epoch": 1.0309490602377631, "grad_norm": 589.0973510742188, "learning_rate": 9.98494273175943e-07, "loss": 14.25, "step": 15523 }, { "epoch": 1.0310154745301188, "grad_norm": 128.4274444580078, "learning_rate": 9.98386721350136e-07, "loss": 13.7969, "step": 15524 }, { "epoch": 1.0310818888224746, "grad_norm": 1492.256103515625, "learning_rate": 9.982791695429906e-07, "loss": 23.2656, "step": 15525 }, { "epoch": 1.0311483031148303, "grad_norm": 250.12603759765625, "learning_rate": 9.981716177557508e-07, "loss": 18.0469, "step": 15526 }, { "epoch": 1.031214717407186, "grad_norm": 380.1217956542969, "learning_rate": 9.980640659896608e-07, "loss": 12.5781, "step": 15527 }, { "epoch": 1.0312811316995418, "grad_norm": 103.77759552001953, "learning_rate": 9.97956514245964e-07, "loss": 16.9844, "step": 15528 }, { "epoch": 1.0313475459918975, "grad_norm": 287.1513366699219, "learning_rate": 9.978489625259055e-07, "loss": 23.2188, "step": 15529 }, { "epoch": 1.0314139602842531, "grad_norm": 138.42678833007812, "learning_rate": 9.97741410830729e-07, "loss": 12.4375, "step": 15530 }, { "epoch": 1.0314803745766088, "grad_norm": 497.8502502441406, "learning_rate": 9.976338591616786e-07, "loss": 13.3281, "step": 15531 }, { "epoch": 1.0315467888689647, "grad_norm": 233.9539031982422, "learning_rate": 9.975263075199982e-07, "loss": 11.8438, "step": 15532 }, { "epoch": 1.0316132031613203, "grad_norm": 167.25868225097656, "learning_rate": 9.97418755906932e-07, "loss": 20.2188, "step": 15533 }, { "epoch": 1.031679617453676, "grad_norm": 289.0813903808594, "learning_rate": 9.97311204323724e-07, "loss": 13.2969, "step": 15534 }, { "epoch": 1.0317460317460316, "grad_norm": 230.7950439453125, "learning_rate": 9.97203652771619e-07, "loss": 26.7344, "step": 15535 }, { "epoch": 1.0318124460383875, "grad_norm": 150.42086791992188, "learning_rate": 9.9709610125186e-07, "loss": 15.3125, "step": 15536 }, { "epoch": 1.0318788603307432, "grad_norm": 222.25428771972656, "learning_rate": 9.96988549765692e-07, "loss": 15.6562, "step": 15537 }, { "epoch": 1.0319452746230988, "grad_norm": 308.9353332519531, "learning_rate": 9.968809983143583e-07, "loss": 14.3047, "step": 15538 }, { "epoch": 1.0320116889154547, "grad_norm": 605.986572265625, "learning_rate": 9.967734468991036e-07, "loss": 12.5625, "step": 15539 }, { "epoch": 1.0320781032078103, "grad_norm": 174.68043518066406, "learning_rate": 9.96665895521172e-07, "loss": 12.4688, "step": 15540 }, { "epoch": 1.032144517500166, "grad_norm": 212.64480590820312, "learning_rate": 9.965583441818074e-07, "loss": 14.7266, "step": 15541 }, { "epoch": 1.0322109317925217, "grad_norm": 137.08956909179688, "learning_rate": 9.964507928822533e-07, "loss": 16.6094, "step": 15542 }, { "epoch": 1.0322773460848775, "grad_norm": 85.40747833251953, "learning_rate": 9.96343241623755e-07, "loss": 13.2969, "step": 15543 }, { "epoch": 1.0323437603772332, "grad_norm": 437.19586181640625, "learning_rate": 9.962356904075558e-07, "loss": 15.0156, "step": 15544 }, { "epoch": 1.0324101746695888, "grad_norm": 196.10205078125, "learning_rate": 9.961281392349e-07, "loss": 16.8906, "step": 15545 }, { "epoch": 1.0324765889619445, "grad_norm": 345.62109375, "learning_rate": 9.960205881070313e-07, "loss": 16.3438, "step": 15546 }, { "epoch": 1.0325430032543004, "grad_norm": 211.85362243652344, "learning_rate": 9.959130370251944e-07, "loss": 13.4844, "step": 15547 }, { "epoch": 1.032609417546656, "grad_norm": 161.88186645507812, "learning_rate": 9.958054859906334e-07, "loss": 14.4688, "step": 15548 }, { "epoch": 1.0326758318390117, "grad_norm": 229.41259765625, "learning_rate": 9.956979350045916e-07, "loss": 14.0078, "step": 15549 }, { "epoch": 1.0327422461313676, "grad_norm": 114.73013305664062, "learning_rate": 9.955903840683142e-07, "loss": 12.7656, "step": 15550 }, { "epoch": 1.0328086604237232, "grad_norm": 250.5292510986328, "learning_rate": 9.954828331830442e-07, "loss": 15.7812, "step": 15551 }, { "epoch": 1.0328750747160789, "grad_norm": 380.8305358886719, "learning_rate": 9.953752823500266e-07, "loss": 16.1562, "step": 15552 }, { "epoch": 1.0329414890084345, "grad_norm": 336.25762939453125, "learning_rate": 9.952677315705046e-07, "loss": 14.9219, "step": 15553 }, { "epoch": 1.0330079033007904, "grad_norm": 272.30316162109375, "learning_rate": 9.951601808457232e-07, "loss": 20.2188, "step": 15554 }, { "epoch": 1.033074317593146, "grad_norm": 150.49517822265625, "learning_rate": 9.950526301769256e-07, "loss": 16.3594, "step": 15555 }, { "epoch": 1.0331407318855017, "grad_norm": 294.37451171875, "learning_rate": 9.949450795653567e-07, "loss": 15.5469, "step": 15556 }, { "epoch": 1.0332071461778574, "grad_norm": 341.56005859375, "learning_rate": 9.9483752901226e-07, "loss": 15.5469, "step": 15557 }, { "epoch": 1.0332735604702132, "grad_norm": 231.7819061279297, "learning_rate": 9.9472997851888e-07, "loss": 13.9219, "step": 15558 }, { "epoch": 1.033339974762569, "grad_norm": 895.9515991210938, "learning_rate": 9.9462242808646e-07, "loss": 16.5625, "step": 15559 }, { "epoch": 1.0334063890549245, "grad_norm": 127.0871810913086, "learning_rate": 9.945148777162454e-07, "loss": 13.7812, "step": 15560 }, { "epoch": 1.0334728033472804, "grad_norm": 126.03234100341797, "learning_rate": 9.94407327409479e-07, "loss": 13.7344, "step": 15561 }, { "epoch": 1.033539217639636, "grad_norm": 174.55174255371094, "learning_rate": 9.942997771674058e-07, "loss": 17.25, "step": 15562 }, { "epoch": 1.0336056319319917, "grad_norm": 272.837646484375, "learning_rate": 9.941922269912692e-07, "loss": 19.4688, "step": 15563 }, { "epoch": 1.0336720462243474, "grad_norm": 185.05299377441406, "learning_rate": 9.940846768823138e-07, "loss": 21.2344, "step": 15564 }, { "epoch": 1.0337384605167033, "grad_norm": 153.659912109375, "learning_rate": 9.939771268417832e-07, "loss": 23.0, "step": 15565 }, { "epoch": 1.033804874809059, "grad_norm": 478.019287109375, "learning_rate": 9.93869576870922e-07, "loss": 17.7344, "step": 15566 }, { "epoch": 1.0338712891014146, "grad_norm": 275.2830810546875, "learning_rate": 9.937620269709736e-07, "loss": 17.6953, "step": 15567 }, { "epoch": 1.0339377033937704, "grad_norm": 132.80101013183594, "learning_rate": 9.93654477143183e-07, "loss": 13.9219, "step": 15568 }, { "epoch": 1.034004117686126, "grad_norm": 854.501708984375, "learning_rate": 9.935469273887932e-07, "loss": 14.9062, "step": 15569 }, { "epoch": 1.0340705319784818, "grad_norm": 236.49293518066406, "learning_rate": 9.934393777090493e-07, "loss": 12.1562, "step": 15570 }, { "epoch": 1.0341369462708374, "grad_norm": 332.51129150390625, "learning_rate": 9.933318281051944e-07, "loss": 20.7188, "step": 15571 }, { "epoch": 1.0342033605631933, "grad_norm": 466.8984375, "learning_rate": 9.932242785784736e-07, "loss": 18.4219, "step": 15572 }, { "epoch": 1.034269774855549, "grad_norm": 201.01339721679688, "learning_rate": 9.9311672913013e-07, "loss": 20.2656, "step": 15573 }, { "epoch": 1.0343361891479046, "grad_norm": 293.0713195800781, "learning_rate": 9.930091797614085e-07, "loss": 17.0938, "step": 15574 }, { "epoch": 1.0344026034402602, "grad_norm": 91.68917083740234, "learning_rate": 9.929016304735524e-07, "loss": 19.875, "step": 15575 }, { "epoch": 1.0344690177326161, "grad_norm": 90.2311782836914, "learning_rate": 9.927940812678067e-07, "loss": 14.7656, "step": 15576 }, { "epoch": 1.0345354320249718, "grad_norm": 199.2088623046875, "learning_rate": 9.92686532145414e-07, "loss": 13.8281, "step": 15577 }, { "epoch": 1.0346018463173274, "grad_norm": 100.78056335449219, "learning_rate": 9.925789831076201e-07, "loss": 16.25, "step": 15578 }, { "epoch": 1.0346682606096833, "grad_norm": 144.5442657470703, "learning_rate": 9.924714341556679e-07, "loss": 15.6094, "step": 15579 }, { "epoch": 1.034734674902039, "grad_norm": 191.10548400878906, "learning_rate": 9.92363885290802e-07, "loss": 15.4062, "step": 15580 }, { "epoch": 1.0348010891943946, "grad_norm": 342.8744201660156, "learning_rate": 9.92256336514266e-07, "loss": 19.9062, "step": 15581 }, { "epoch": 1.0348675034867503, "grad_norm": 115.80569458007812, "learning_rate": 9.921487878273043e-07, "loss": 18.6094, "step": 15582 }, { "epoch": 1.0349339177791061, "grad_norm": 178.1134033203125, "learning_rate": 9.92041239231161e-07, "loss": 15.3281, "step": 15583 }, { "epoch": 1.0350003320714618, "grad_norm": 124.44124603271484, "learning_rate": 9.919336907270797e-07, "loss": 15.7812, "step": 15584 }, { "epoch": 1.0350667463638175, "grad_norm": 127.97490692138672, "learning_rate": 9.918261423163053e-07, "loss": 15.3281, "step": 15585 }, { "epoch": 1.0351331606561731, "grad_norm": 238.39736938476562, "learning_rate": 9.917185940000812e-07, "loss": 20.3438, "step": 15586 }, { "epoch": 1.035199574948529, "grad_norm": 147.5209197998047, "learning_rate": 9.916110457796518e-07, "loss": 14.5156, "step": 15587 }, { "epoch": 1.0352659892408846, "grad_norm": 131.9459686279297, "learning_rate": 9.915034976562604e-07, "loss": 15.5625, "step": 15588 }, { "epoch": 1.0353324035332403, "grad_norm": 102.02193450927734, "learning_rate": 9.913959496311524e-07, "loss": 9.8594, "step": 15589 }, { "epoch": 1.0353988178255962, "grad_norm": 181.48939514160156, "learning_rate": 9.912884017055705e-07, "loss": 16.2344, "step": 15590 }, { "epoch": 1.0354652321179518, "grad_norm": 164.8686981201172, "learning_rate": 9.9118085388076e-07, "loss": 13.9844, "step": 15591 }, { "epoch": 1.0355316464103075, "grad_norm": 249.54905700683594, "learning_rate": 9.910733061579635e-07, "loss": 17.1406, "step": 15592 }, { "epoch": 1.0355980607026631, "grad_norm": 387.9083251953125, "learning_rate": 9.909657585384267e-07, "loss": 18.2344, "step": 15593 }, { "epoch": 1.035664474995019, "grad_norm": 118.83316040039062, "learning_rate": 9.90858211023392e-07, "loss": 17.1719, "step": 15594 }, { "epoch": 1.0357308892873747, "grad_norm": 216.07339477539062, "learning_rate": 9.907506636141048e-07, "loss": 20.2656, "step": 15595 }, { "epoch": 1.0357973035797303, "grad_norm": 172.926025390625, "learning_rate": 9.906431163118083e-07, "loss": 16.5625, "step": 15596 }, { "epoch": 1.035863717872086, "grad_norm": 210.37501525878906, "learning_rate": 9.90535569117747e-07, "loss": 15.0781, "step": 15597 }, { "epoch": 1.0359301321644419, "grad_norm": 2057.436767578125, "learning_rate": 9.904280220331644e-07, "loss": 17.5938, "step": 15598 }, { "epoch": 1.0359965464567975, "grad_norm": 205.05349731445312, "learning_rate": 9.903204750593053e-07, "loss": 15.2812, "step": 15599 }, { "epoch": 1.0360629607491532, "grad_norm": 258.95184326171875, "learning_rate": 9.902129281974133e-07, "loss": 16.7344, "step": 15600 }, { "epoch": 1.036129375041509, "grad_norm": 250.6297149658203, "learning_rate": 9.901053814487326e-07, "loss": 13.3281, "step": 15601 }, { "epoch": 1.0361957893338647, "grad_norm": 959.4876708984375, "learning_rate": 9.89997834814507e-07, "loss": 15.7344, "step": 15602 }, { "epoch": 1.0362622036262203, "grad_norm": 257.9912109375, "learning_rate": 9.898902882959806e-07, "loss": 15.75, "step": 15603 }, { "epoch": 1.036328617918576, "grad_norm": 371.3089599609375, "learning_rate": 9.897827418943976e-07, "loss": 18.1719, "step": 15604 }, { "epoch": 1.0363950322109319, "grad_norm": 100.64866638183594, "learning_rate": 9.89675195611002e-07, "loss": 14.9062, "step": 15605 }, { "epoch": 1.0364614465032875, "grad_norm": 136.5178680419922, "learning_rate": 9.895676494470376e-07, "loss": 14.6328, "step": 15606 }, { "epoch": 1.0365278607956432, "grad_norm": 111.71382904052734, "learning_rate": 9.894601034037489e-07, "loss": 15.2109, "step": 15607 }, { "epoch": 1.0365942750879988, "grad_norm": 134.11558532714844, "learning_rate": 9.893525574823795e-07, "loss": 14.0, "step": 15608 }, { "epoch": 1.0366606893803547, "grad_norm": 237.80731201171875, "learning_rate": 9.892450116841736e-07, "loss": 19.6719, "step": 15609 }, { "epoch": 1.0367271036727104, "grad_norm": 199.60137939453125, "learning_rate": 9.89137466010375e-07, "loss": 15.0156, "step": 15610 }, { "epoch": 1.036793517965066, "grad_norm": 174.70091247558594, "learning_rate": 9.890299204622283e-07, "loss": 14.4219, "step": 15611 }, { "epoch": 1.036859932257422, "grad_norm": 169.95433044433594, "learning_rate": 9.889223750409768e-07, "loss": 14.4531, "step": 15612 }, { "epoch": 1.0369263465497776, "grad_norm": 226.04791259765625, "learning_rate": 9.888148297478652e-07, "loss": 12.3281, "step": 15613 }, { "epoch": 1.0369927608421332, "grad_norm": 231.132080078125, "learning_rate": 9.88707284584137e-07, "loss": 16.8906, "step": 15614 }, { "epoch": 1.0370591751344889, "grad_norm": 151.36476135253906, "learning_rate": 9.885997395510366e-07, "loss": 12.25, "step": 15615 }, { "epoch": 1.0371255894268447, "grad_norm": 110.95672607421875, "learning_rate": 9.884921946498075e-07, "loss": 14.5, "step": 15616 }, { "epoch": 1.0371920037192004, "grad_norm": 142.2882537841797, "learning_rate": 9.883846498816944e-07, "loss": 12.4375, "step": 15617 }, { "epoch": 1.037258418011556, "grad_norm": 357.42205810546875, "learning_rate": 9.88277105247941e-07, "loss": 17.4688, "step": 15618 }, { "epoch": 1.0373248323039117, "grad_norm": 243.0856475830078, "learning_rate": 9.88169560749791e-07, "loss": 18.875, "step": 15619 }, { "epoch": 1.0373912465962676, "grad_norm": 291.0685729980469, "learning_rate": 9.880620163884887e-07, "loss": 14.2969, "step": 15620 }, { "epoch": 1.0374576608886232, "grad_norm": 248.61163330078125, "learning_rate": 9.87954472165278e-07, "loss": 17.0312, "step": 15621 }, { "epoch": 1.0375240751809789, "grad_norm": 94.3946762084961, "learning_rate": 9.878469280814035e-07, "loss": 14.9375, "step": 15622 }, { "epoch": 1.0375904894733348, "grad_norm": 147.78028869628906, "learning_rate": 9.87739384138108e-07, "loss": 13.8125, "step": 15623 }, { "epoch": 1.0376569037656904, "grad_norm": 144.28680419921875, "learning_rate": 9.876318403366371e-07, "loss": 9.9375, "step": 15624 }, { "epoch": 1.037723318058046, "grad_norm": 220.4378204345703, "learning_rate": 9.875242966782334e-07, "loss": 14.6094, "step": 15625 }, { "epoch": 1.0377897323504017, "grad_norm": 221.69203186035156, "learning_rate": 9.874167531641418e-07, "loss": 15.8594, "step": 15626 }, { "epoch": 1.0378561466427576, "grad_norm": 216.52757263183594, "learning_rate": 9.873092097956055e-07, "loss": 16.875, "step": 15627 }, { "epoch": 1.0379225609351133, "grad_norm": 168.5818328857422, "learning_rate": 9.872016665738692e-07, "loss": 16.5938, "step": 15628 }, { "epoch": 1.037988975227469, "grad_norm": 193.5086669921875, "learning_rate": 9.870941235001768e-07, "loss": 13.0547, "step": 15629 }, { "epoch": 1.0380553895198246, "grad_norm": 273.6392517089844, "learning_rate": 9.86986580575772e-07, "loss": 22.8438, "step": 15630 }, { "epoch": 1.0381218038121804, "grad_norm": 192.39727783203125, "learning_rate": 9.868790378018987e-07, "loss": 13.7188, "step": 15631 }, { "epoch": 1.038188218104536, "grad_norm": 150.26638793945312, "learning_rate": 9.867714951798014e-07, "loss": 22.8438, "step": 15632 }, { "epoch": 1.0382546323968918, "grad_norm": 758.7853393554688, "learning_rate": 9.866639527107237e-07, "loss": 21.4688, "step": 15633 }, { "epoch": 1.0383210466892476, "grad_norm": 416.102783203125, "learning_rate": 9.865564103959098e-07, "loss": 18.875, "step": 15634 }, { "epoch": 1.0383874609816033, "grad_norm": 238.43409729003906, "learning_rate": 9.864488682366034e-07, "loss": 14.7188, "step": 15635 }, { "epoch": 1.038453875273959, "grad_norm": 154.48802185058594, "learning_rate": 9.863413262340491e-07, "loss": 19.125, "step": 15636 }, { "epoch": 1.0385202895663146, "grad_norm": 135.65187072753906, "learning_rate": 9.862337843894898e-07, "loss": 17.2969, "step": 15637 }, { "epoch": 1.0385867038586705, "grad_norm": 237.63844299316406, "learning_rate": 9.861262427041708e-07, "loss": 20.625, "step": 15638 }, { "epoch": 1.0386531181510261, "grad_norm": 243.37142944335938, "learning_rate": 9.86018701179335e-07, "loss": 14.4219, "step": 15639 }, { "epoch": 1.0387195324433818, "grad_norm": 136.36827087402344, "learning_rate": 9.85911159816227e-07, "loss": 14.6016, "step": 15640 }, { "epoch": 1.0387859467357374, "grad_norm": 164.62623596191406, "learning_rate": 9.858036186160904e-07, "loss": 13.5391, "step": 15641 }, { "epoch": 1.0388523610280933, "grad_norm": 272.702392578125, "learning_rate": 9.856960775801695e-07, "loss": 17.1719, "step": 15642 }, { "epoch": 1.038918775320449, "grad_norm": 307.6063537597656, "learning_rate": 9.855885367097082e-07, "loss": 15.7969, "step": 15643 }, { "epoch": 1.0389851896128046, "grad_norm": 208.48953247070312, "learning_rate": 9.854809960059504e-07, "loss": 18.3906, "step": 15644 }, { "epoch": 1.0390516039051605, "grad_norm": 327.3642272949219, "learning_rate": 9.853734554701398e-07, "loss": 21.9219, "step": 15645 }, { "epoch": 1.0391180181975161, "grad_norm": 162.66346740722656, "learning_rate": 9.852659151035208e-07, "loss": 17.8594, "step": 15646 }, { "epoch": 1.0391844324898718, "grad_norm": 176.3546600341797, "learning_rate": 9.851583749073371e-07, "loss": 12.0781, "step": 15647 }, { "epoch": 1.0392508467822275, "grad_norm": 138.0027313232422, "learning_rate": 9.85050834882833e-07, "loss": 19.3438, "step": 15648 }, { "epoch": 1.0393172610745833, "grad_norm": 199.61630249023438, "learning_rate": 9.84943295031252e-07, "loss": 17.2188, "step": 15649 }, { "epoch": 1.039383675366939, "grad_norm": 178.2696533203125, "learning_rate": 9.848357553538384e-07, "loss": 15.1562, "step": 15650 }, { "epoch": 1.0394500896592946, "grad_norm": 224.89076232910156, "learning_rate": 9.847282158518359e-07, "loss": 14.7656, "step": 15651 }, { "epoch": 1.0395165039516503, "grad_norm": 181.68539428710938, "learning_rate": 9.84620676526489e-07, "loss": 16.9844, "step": 15652 }, { "epoch": 1.0395829182440062, "grad_norm": 323.47442626953125, "learning_rate": 9.845131373790404e-07, "loss": 17.4844, "step": 15653 }, { "epoch": 1.0396493325363618, "grad_norm": 305.5068664550781, "learning_rate": 9.844055984107358e-07, "loss": 16.4375, "step": 15654 }, { "epoch": 1.0397157468287175, "grad_norm": 369.68902587890625, "learning_rate": 9.842980596228177e-07, "loss": 14.4531, "step": 15655 }, { "epoch": 1.0397821611210734, "grad_norm": 150.25929260253906, "learning_rate": 9.84190521016531e-07, "loss": 20.375, "step": 15656 }, { "epoch": 1.039848575413429, "grad_norm": 531.06396484375, "learning_rate": 9.840829825931189e-07, "loss": 17.5469, "step": 15657 }, { "epoch": 1.0399149897057847, "grad_norm": 179.457275390625, "learning_rate": 9.839754443538255e-07, "loss": 20.1094, "step": 15658 }, { "epoch": 1.0399814039981403, "grad_norm": 119.2546157836914, "learning_rate": 9.838679062998953e-07, "loss": 15.0156, "step": 15659 }, { "epoch": 1.0400478182904962, "grad_norm": 97.64340209960938, "learning_rate": 9.83760368432572e-07, "loss": 15.4062, "step": 15660 }, { "epoch": 1.0401142325828518, "grad_norm": 115.65797424316406, "learning_rate": 9.83652830753099e-07, "loss": 16.4219, "step": 15661 }, { "epoch": 1.0401806468752075, "grad_norm": 342.7167053222656, "learning_rate": 9.835452932627207e-07, "loss": 17.2969, "step": 15662 }, { "epoch": 1.0402470611675632, "grad_norm": 163.833984375, "learning_rate": 9.834377559626811e-07, "loss": 15.5469, "step": 15663 }, { "epoch": 1.040313475459919, "grad_norm": 229.79385375976562, "learning_rate": 9.83330218854224e-07, "loss": 16.3281, "step": 15664 }, { "epoch": 1.0403798897522747, "grad_norm": 139.22943115234375, "learning_rate": 9.832226819385932e-07, "loss": 10.1875, "step": 15665 }, { "epoch": 1.0404463040446303, "grad_norm": 199.04173278808594, "learning_rate": 9.831151452170327e-07, "loss": 20.9688, "step": 15666 }, { "epoch": 1.0405127183369862, "grad_norm": 263.0077209472656, "learning_rate": 9.830076086907865e-07, "loss": 14.8438, "step": 15667 }, { "epoch": 1.0405791326293419, "grad_norm": 216.45162963867188, "learning_rate": 9.829000723610986e-07, "loss": 16.1797, "step": 15668 }, { "epoch": 1.0406455469216975, "grad_norm": 121.76887512207031, "learning_rate": 9.827925362292128e-07, "loss": 12.4375, "step": 15669 }, { "epoch": 1.0407119612140532, "grad_norm": 194.71908569335938, "learning_rate": 9.826850002963727e-07, "loss": 16.125, "step": 15670 }, { "epoch": 1.040778375506409, "grad_norm": 211.32273864746094, "learning_rate": 9.825774645638232e-07, "loss": 21.0156, "step": 15671 }, { "epoch": 1.0408447897987647, "grad_norm": 271.9544982910156, "learning_rate": 9.824699290328068e-07, "loss": 16.5469, "step": 15672 }, { "epoch": 1.0409112040911204, "grad_norm": 220.20777893066406, "learning_rate": 9.823623937045687e-07, "loss": 15.0625, "step": 15673 }, { "epoch": 1.040977618383476, "grad_norm": 595.0569458007812, "learning_rate": 9.822548585803519e-07, "loss": 24.4531, "step": 15674 }, { "epoch": 1.041044032675832, "grad_norm": 172.2211151123047, "learning_rate": 9.82147323661401e-07, "loss": 19.2031, "step": 15675 }, { "epoch": 1.0411104469681876, "grad_norm": 139.56349182128906, "learning_rate": 9.820397889489592e-07, "loss": 17.8281, "step": 15676 }, { "epoch": 1.0411768612605432, "grad_norm": 631.6290893554688, "learning_rate": 9.81932254444271e-07, "loss": 23.3906, "step": 15677 }, { "epoch": 1.041243275552899, "grad_norm": 337.5711975097656, "learning_rate": 9.818247201485797e-07, "loss": 15.1719, "step": 15678 }, { "epoch": 1.0413096898452547, "grad_norm": 337.7759704589844, "learning_rate": 9.817171860631304e-07, "loss": 16.8438, "step": 15679 }, { "epoch": 1.0413761041376104, "grad_norm": 161.3271942138672, "learning_rate": 9.816096521891652e-07, "loss": 18.0469, "step": 15680 }, { "epoch": 1.041442518429966, "grad_norm": 289.9089050292969, "learning_rate": 9.815021185279295e-07, "loss": 12.6719, "step": 15681 }, { "epoch": 1.041508932722322, "grad_norm": 184.7423553466797, "learning_rate": 9.813945850806665e-07, "loss": 20.7188, "step": 15682 }, { "epoch": 1.0415753470146776, "grad_norm": 159.2779541015625, "learning_rate": 9.812870518486205e-07, "loss": 15.375, "step": 15683 }, { "epoch": 1.0416417613070332, "grad_norm": 207.96788024902344, "learning_rate": 9.811795188330343e-07, "loss": 16.0156, "step": 15684 }, { "epoch": 1.0417081755993889, "grad_norm": 135.7547607421875, "learning_rate": 9.810719860351534e-07, "loss": 15.4688, "step": 15685 }, { "epoch": 1.0417745898917448, "grad_norm": 398.4416198730469, "learning_rate": 9.809644534562203e-07, "loss": 25.2812, "step": 15686 }, { "epoch": 1.0418410041841004, "grad_norm": 135.8651580810547, "learning_rate": 9.808569210974798e-07, "loss": 19.4219, "step": 15687 }, { "epoch": 1.041907418476456, "grad_norm": 398.6253967285156, "learning_rate": 9.80749388960175e-07, "loss": 17.2656, "step": 15688 }, { "epoch": 1.041973832768812, "grad_norm": 119.9359130859375, "learning_rate": 9.806418570455506e-07, "loss": 12.125, "step": 15689 }, { "epoch": 1.0420402470611676, "grad_norm": 195.76515197753906, "learning_rate": 9.805343253548495e-07, "loss": 16.4219, "step": 15690 }, { "epoch": 1.0421066613535233, "grad_norm": 182.5797882080078, "learning_rate": 9.804267938893167e-07, "loss": 18.5156, "step": 15691 }, { "epoch": 1.042173075645879, "grad_norm": 206.6014862060547, "learning_rate": 9.80319262650195e-07, "loss": 14.4844, "step": 15692 }, { "epoch": 1.0422394899382348, "grad_norm": 135.0846710205078, "learning_rate": 9.802117316387287e-07, "loss": 16.4375, "step": 15693 }, { "epoch": 1.0423059042305904, "grad_norm": 131.40513610839844, "learning_rate": 9.80104200856162e-07, "loss": 14.9531, "step": 15694 }, { "epoch": 1.042372318522946, "grad_norm": 371.3923034667969, "learning_rate": 9.79996670303738e-07, "loss": 17.1406, "step": 15695 }, { "epoch": 1.0424387328153017, "grad_norm": 139.94195556640625, "learning_rate": 9.798891399827016e-07, "loss": 14.2969, "step": 15696 }, { "epoch": 1.0425051471076576, "grad_norm": 189.96527099609375, "learning_rate": 9.797816098942957e-07, "loss": 21.6875, "step": 15697 }, { "epoch": 1.0425715614000133, "grad_norm": 139.75787353515625, "learning_rate": 9.796740800397647e-07, "loss": 14.4219, "step": 15698 }, { "epoch": 1.042637975692369, "grad_norm": 117.43487548828125, "learning_rate": 9.795665504203516e-07, "loss": 12.8906, "step": 15699 }, { "epoch": 1.0427043899847248, "grad_norm": 222.88912963867188, "learning_rate": 9.794590210373019e-07, "loss": 11.8984, "step": 15700 }, { "epoch": 1.0427708042770805, "grad_norm": 128.167724609375, "learning_rate": 9.793514918918573e-07, "loss": 13.7656, "step": 15701 }, { "epoch": 1.0428372185694361, "grad_norm": 362.6629333496094, "learning_rate": 9.792439629852635e-07, "loss": 13.8125, "step": 15702 }, { "epoch": 1.0429036328617918, "grad_norm": 240.909912109375, "learning_rate": 9.791364343187632e-07, "loss": 16.2031, "step": 15703 }, { "epoch": 1.0429700471541477, "grad_norm": 230.4791717529297, "learning_rate": 9.79028905893601e-07, "loss": 19.7812, "step": 15704 }, { "epoch": 1.0430364614465033, "grad_norm": 509.2139587402344, "learning_rate": 9.789213777110198e-07, "loss": 22.125, "step": 15705 }, { "epoch": 1.043102875738859, "grad_norm": 180.152587890625, "learning_rate": 9.788138497722643e-07, "loss": 15.8281, "step": 15706 }, { "epoch": 1.0431692900312146, "grad_norm": 434.9666442871094, "learning_rate": 9.787063220785777e-07, "loss": 18.4062, "step": 15707 }, { "epoch": 1.0432357043235705, "grad_norm": 169.727783203125, "learning_rate": 9.785987946312044e-07, "loss": 12.7031, "step": 15708 }, { "epoch": 1.0433021186159261, "grad_norm": 1317.478271484375, "learning_rate": 9.784912674313875e-07, "loss": 10.6562, "step": 15709 }, { "epoch": 1.0433685329082818, "grad_norm": 327.7931213378906, "learning_rate": 9.783837404803716e-07, "loss": 15.25, "step": 15710 }, { "epoch": 1.0434349472006377, "grad_norm": 223.06895446777344, "learning_rate": 9.782762137794e-07, "loss": 15.9219, "step": 15711 }, { "epoch": 1.0435013614929933, "grad_norm": 237.69610595703125, "learning_rate": 9.781686873297165e-07, "loss": 18.9375, "step": 15712 }, { "epoch": 1.043567775785349, "grad_norm": 191.184326171875, "learning_rate": 9.78061161132565e-07, "loss": 10.6641, "step": 15713 }, { "epoch": 1.0436341900777046, "grad_norm": 133.01783752441406, "learning_rate": 9.779536351891895e-07, "loss": 17.8125, "step": 15714 }, { "epoch": 1.0437006043700605, "grad_norm": 134.77981567382812, "learning_rate": 9.778461095008337e-07, "loss": 15.4844, "step": 15715 }, { "epoch": 1.0437670186624162, "grad_norm": 328.0467224121094, "learning_rate": 9.777385840687413e-07, "loss": 17.2969, "step": 15716 }, { "epoch": 1.0438334329547718, "grad_norm": 158.14993286132812, "learning_rate": 9.77631058894156e-07, "loss": 17.4375, "step": 15717 }, { "epoch": 1.0438998472471275, "grad_norm": 197.37229919433594, "learning_rate": 9.775235339783217e-07, "loss": 20.4531, "step": 15718 }, { "epoch": 1.0439662615394834, "grad_norm": 128.65065002441406, "learning_rate": 9.774160093224823e-07, "loss": 15.5625, "step": 15719 }, { "epoch": 1.044032675831839, "grad_norm": 771.1585693359375, "learning_rate": 9.773084849278816e-07, "loss": 14.3125, "step": 15720 }, { "epoch": 1.0440990901241947, "grad_norm": 134.49188232421875, "learning_rate": 9.772009607957631e-07, "loss": 12.6875, "step": 15721 }, { "epoch": 1.0441655044165505, "grad_norm": 167.76199340820312, "learning_rate": 9.770934369273709e-07, "loss": 12.6562, "step": 15722 }, { "epoch": 1.0442319187089062, "grad_norm": 109.17849731445312, "learning_rate": 9.769859133239481e-07, "loss": 12.9062, "step": 15723 }, { "epoch": 1.0442983330012618, "grad_norm": 144.23194885253906, "learning_rate": 9.768783899867396e-07, "loss": 16.8438, "step": 15724 }, { "epoch": 1.0443647472936175, "grad_norm": 383.77325439453125, "learning_rate": 9.767708669169882e-07, "loss": 27.2031, "step": 15725 }, { "epoch": 1.0444311615859734, "grad_norm": 128.6307373046875, "learning_rate": 9.766633441159384e-07, "loss": 16.25, "step": 15726 }, { "epoch": 1.044497575878329, "grad_norm": 218.8885498046875, "learning_rate": 9.765558215848329e-07, "loss": 20.4531, "step": 15727 }, { "epoch": 1.0445639901706847, "grad_norm": 379.9872131347656, "learning_rate": 9.764482993249166e-07, "loss": 19.9375, "step": 15728 }, { "epoch": 1.0446304044630403, "grad_norm": 309.0635681152344, "learning_rate": 9.763407773374328e-07, "loss": 17.3906, "step": 15729 }, { "epoch": 1.0446968187553962, "grad_norm": 246.8901824951172, "learning_rate": 9.76233255623625e-07, "loss": 12.8047, "step": 15730 }, { "epoch": 1.0447632330477519, "grad_norm": 220.83395385742188, "learning_rate": 9.761257341847375e-07, "loss": 14.625, "step": 15731 }, { "epoch": 1.0448296473401075, "grad_norm": 180.82521057128906, "learning_rate": 9.760182130220135e-07, "loss": 12.5469, "step": 15732 }, { "epoch": 1.0448960616324634, "grad_norm": 770.0709838867188, "learning_rate": 9.759106921366972e-07, "loss": 26.3594, "step": 15733 }, { "epoch": 1.044962475924819, "grad_norm": 115.81199645996094, "learning_rate": 9.758031715300317e-07, "loss": 13.5781, "step": 15734 }, { "epoch": 1.0450288902171747, "grad_norm": 397.6253356933594, "learning_rate": 9.756956512032617e-07, "loss": 21.25, "step": 15735 }, { "epoch": 1.0450953045095304, "grad_norm": 228.3644561767578, "learning_rate": 9.755881311576301e-07, "loss": 21.9375, "step": 15736 }, { "epoch": 1.0451617188018862, "grad_norm": 507.9584655761719, "learning_rate": 9.754806113943812e-07, "loss": 12.6562, "step": 15737 }, { "epoch": 1.045228133094242, "grad_norm": 170.7726287841797, "learning_rate": 9.75373091914758e-07, "loss": 12.4844, "step": 15738 }, { "epoch": 1.0452945473865976, "grad_norm": 178.95777893066406, "learning_rate": 9.75265572720005e-07, "loss": 16.9688, "step": 15739 }, { "epoch": 1.0453609616789532, "grad_norm": 237.2075653076172, "learning_rate": 9.751580538113654e-07, "loss": 18.1719, "step": 15740 }, { "epoch": 1.045427375971309, "grad_norm": 197.0717010498047, "learning_rate": 9.750505351900833e-07, "loss": 15.125, "step": 15741 }, { "epoch": 1.0454937902636647, "grad_norm": 766.370361328125, "learning_rate": 9.749430168574023e-07, "loss": 27.9688, "step": 15742 }, { "epoch": 1.0455602045560204, "grad_norm": 205.12167358398438, "learning_rate": 9.74835498814566e-07, "loss": 19.2344, "step": 15743 }, { "epoch": 1.0456266188483763, "grad_norm": 486.7780456542969, "learning_rate": 9.747279810628176e-07, "loss": 14.6641, "step": 15744 }, { "epoch": 1.045693033140732, "grad_norm": 1556.208251953125, "learning_rate": 9.74620463603402e-07, "loss": 15.5469, "step": 15745 }, { "epoch": 1.0457594474330876, "grad_norm": 211.08865356445312, "learning_rate": 9.745129464375619e-07, "loss": 13.1406, "step": 15746 }, { "epoch": 1.0458258617254432, "grad_norm": 169.89190673828125, "learning_rate": 9.744054295665416e-07, "loss": 15.8906, "step": 15747 }, { "epoch": 1.045892276017799, "grad_norm": 542.4044189453125, "learning_rate": 9.74297912991584e-07, "loss": 24.0781, "step": 15748 }, { "epoch": 1.0459586903101548, "grad_norm": 136.8244171142578, "learning_rate": 9.741903967139338e-07, "loss": 15.9375, "step": 15749 }, { "epoch": 1.0460251046025104, "grad_norm": 254.43170166015625, "learning_rate": 9.740828807348342e-07, "loss": 22.2812, "step": 15750 }, { "epoch": 1.046091518894866, "grad_norm": 234.01747131347656, "learning_rate": 9.73975365055529e-07, "loss": 15.9844, "step": 15751 }, { "epoch": 1.046157933187222, "grad_norm": 331.87432861328125, "learning_rate": 9.738678496772611e-07, "loss": 15.6406, "step": 15752 }, { "epoch": 1.0462243474795776, "grad_norm": 246.46450805664062, "learning_rate": 9.737603346012754e-07, "loss": 22.4688, "step": 15753 }, { "epoch": 1.0462907617719333, "grad_norm": 393.8890686035156, "learning_rate": 9.73652819828815e-07, "loss": 18.0156, "step": 15754 }, { "epoch": 1.0463571760642891, "grad_norm": 295.6360168457031, "learning_rate": 9.735453053611236e-07, "loss": 16.8125, "step": 15755 }, { "epoch": 1.0464235903566448, "grad_norm": 223.6762237548828, "learning_rate": 9.734377911994445e-07, "loss": 18.8906, "step": 15756 }, { "epoch": 1.0464900046490004, "grad_norm": 309.17486572265625, "learning_rate": 9.733302773450222e-07, "loss": 17.7656, "step": 15757 }, { "epoch": 1.046556418941356, "grad_norm": 410.9020690917969, "learning_rate": 9.732227637990995e-07, "loss": 21.1719, "step": 15758 }, { "epoch": 1.046622833233712, "grad_norm": 390.9070129394531, "learning_rate": 9.731152505629208e-07, "loss": 17.9375, "step": 15759 }, { "epoch": 1.0466892475260676, "grad_norm": 173.31884765625, "learning_rate": 9.730077376377288e-07, "loss": 13.9062, "step": 15760 }, { "epoch": 1.0467556618184233, "grad_norm": 211.1187744140625, "learning_rate": 9.729002250247686e-07, "loss": 18.5938, "step": 15761 }, { "epoch": 1.0468220761107792, "grad_norm": 252.29051208496094, "learning_rate": 9.72792712725282e-07, "loss": 17.8906, "step": 15762 }, { "epoch": 1.0468884904031348, "grad_norm": 125.31259155273438, "learning_rate": 9.726852007405142e-07, "loss": 15.5781, "step": 15763 }, { "epoch": 1.0469549046954905, "grad_norm": 101.00634002685547, "learning_rate": 9.725776890717082e-07, "loss": 11.5, "step": 15764 }, { "epoch": 1.0470213189878461, "grad_norm": 290.5904541015625, "learning_rate": 9.724701777201077e-07, "loss": 27.0625, "step": 15765 }, { "epoch": 1.047087733280202, "grad_norm": 137.04991149902344, "learning_rate": 9.723626666869559e-07, "loss": 14.1562, "step": 15766 }, { "epoch": 1.0471541475725576, "grad_norm": 232.32315063476562, "learning_rate": 9.722551559734971e-07, "loss": 14.4375, "step": 15767 }, { "epoch": 1.0472205618649133, "grad_norm": 128.74795532226562, "learning_rate": 9.72147645580975e-07, "loss": 16.0938, "step": 15768 }, { "epoch": 1.047286976157269, "grad_norm": 269.94915771484375, "learning_rate": 9.72040135510632e-07, "loss": 11.5, "step": 15769 }, { "epoch": 1.0473533904496248, "grad_norm": 184.00535583496094, "learning_rate": 9.719326257637133e-07, "loss": 12.2031, "step": 15770 }, { "epoch": 1.0474198047419805, "grad_norm": 146.22630310058594, "learning_rate": 9.718251163414616e-07, "loss": 13.3281, "step": 15771 }, { "epoch": 1.0474862190343361, "grad_norm": 129.60333251953125, "learning_rate": 9.717176072451211e-07, "loss": 12.9375, "step": 15772 }, { "epoch": 1.047552633326692, "grad_norm": 155.48150634765625, "learning_rate": 9.716100984759344e-07, "loss": 15.9844, "step": 15773 }, { "epoch": 1.0476190476190477, "grad_norm": 526.9658203125, "learning_rate": 9.715025900351462e-07, "loss": 16.9062, "step": 15774 }, { "epoch": 1.0476854619114033, "grad_norm": 246.22779846191406, "learning_rate": 9.713950819239992e-07, "loss": 16.5938, "step": 15775 }, { "epoch": 1.047751876203759, "grad_norm": 245.0790252685547, "learning_rate": 9.712875741437378e-07, "loss": 22.5938, "step": 15776 }, { "epoch": 1.0478182904961149, "grad_norm": 280.09954833984375, "learning_rate": 9.711800666956047e-07, "loss": 14.6875, "step": 15777 }, { "epoch": 1.0478847047884705, "grad_norm": 335.3205871582031, "learning_rate": 9.710725595808444e-07, "loss": 19.4844, "step": 15778 }, { "epoch": 1.0479511190808262, "grad_norm": 377.5218505859375, "learning_rate": 9.709650528006998e-07, "loss": 20.5, "step": 15779 }, { "epoch": 1.0480175333731818, "grad_norm": 186.81993103027344, "learning_rate": 9.70857546356415e-07, "loss": 12.2344, "step": 15780 }, { "epoch": 1.0480839476655377, "grad_norm": 146.3345184326172, "learning_rate": 9.707500402492328e-07, "loss": 13.3438, "step": 15781 }, { "epoch": 1.0481503619578934, "grad_norm": 176.16371154785156, "learning_rate": 9.706425344803977e-07, "loss": 15.5781, "step": 15782 }, { "epoch": 1.048216776250249, "grad_norm": 367.4129943847656, "learning_rate": 9.705350290511527e-07, "loss": 13.9219, "step": 15783 }, { "epoch": 1.0482831905426049, "grad_norm": 208.4423370361328, "learning_rate": 9.704275239627417e-07, "loss": 14.8906, "step": 15784 }, { "epoch": 1.0483496048349605, "grad_norm": 705.852294921875, "learning_rate": 9.703200192164077e-07, "loss": 15.0469, "step": 15785 }, { "epoch": 1.0484160191273162, "grad_norm": 215.33908081054688, "learning_rate": 9.70212514813395e-07, "loss": 16.7969, "step": 15786 }, { "epoch": 1.0484824334196718, "grad_norm": 317.5788269042969, "learning_rate": 9.701050107549463e-07, "loss": 15.5625, "step": 15787 }, { "epoch": 1.0485488477120277, "grad_norm": 114.18425750732422, "learning_rate": 9.69997507042306e-07, "loss": 14.5234, "step": 15788 }, { "epoch": 1.0486152620043834, "grad_norm": 135.02713012695312, "learning_rate": 9.69890003676717e-07, "loss": 20.6719, "step": 15789 }, { "epoch": 1.048681676296739, "grad_norm": 164.1778106689453, "learning_rate": 9.697825006594233e-07, "loss": 20.7344, "step": 15790 }, { "epoch": 1.0487480905890947, "grad_norm": 264.9660949707031, "learning_rate": 9.696749979916677e-07, "loss": 16.9688, "step": 15791 }, { "epoch": 1.0488145048814506, "grad_norm": 339.29443359375, "learning_rate": 9.695674956746947e-07, "loss": 19.625, "step": 15792 }, { "epoch": 1.0488809191738062, "grad_norm": 151.0435333251953, "learning_rate": 9.694599937097471e-07, "loss": 18.2344, "step": 15793 }, { "epoch": 1.0489473334661619, "grad_norm": 129.17552185058594, "learning_rate": 9.69352492098069e-07, "loss": 12.6562, "step": 15794 }, { "epoch": 1.0490137477585177, "grad_norm": 292.1891174316406, "learning_rate": 9.69244990840903e-07, "loss": 21.5781, "step": 15795 }, { "epoch": 1.0490801620508734, "grad_norm": 233.49488830566406, "learning_rate": 9.69137489939494e-07, "loss": 22.5312, "step": 15796 }, { "epoch": 1.049146576343229, "grad_norm": 207.8606719970703, "learning_rate": 9.690299893950843e-07, "loss": 14.2969, "step": 15797 }, { "epoch": 1.0492129906355847, "grad_norm": 395.5960388183594, "learning_rate": 9.68922489208918e-07, "loss": 19.875, "step": 15798 }, { "epoch": 1.0492794049279406, "grad_norm": 304.1568603515625, "learning_rate": 9.68814989382238e-07, "loss": 17.4531, "step": 15799 }, { "epoch": 1.0493458192202962, "grad_norm": 1142.4517822265625, "learning_rate": 9.687074899162888e-07, "loss": 20.2812, "step": 15800 }, { "epoch": 1.049412233512652, "grad_norm": 147.7388916015625, "learning_rate": 9.68599990812313e-07, "loss": 17.9219, "step": 15801 }, { "epoch": 1.0494786478050075, "grad_norm": 122.80154418945312, "learning_rate": 9.684924920715544e-07, "loss": 20.1562, "step": 15802 }, { "epoch": 1.0495450620973634, "grad_norm": 133.09959411621094, "learning_rate": 9.683849936952563e-07, "loss": 13.1094, "step": 15803 }, { "epoch": 1.049611476389719, "grad_norm": 229.4424591064453, "learning_rate": 9.682774956846626e-07, "loss": 14.5078, "step": 15804 }, { "epoch": 1.0496778906820747, "grad_norm": 201.26193237304688, "learning_rate": 9.681699980410166e-07, "loss": 11.9375, "step": 15805 }, { "epoch": 1.0497443049744306, "grad_norm": 230.50772094726562, "learning_rate": 9.680625007655615e-07, "loss": 19.2969, "step": 15806 }, { "epoch": 1.0498107192667863, "grad_norm": 129.05706787109375, "learning_rate": 9.679550038595413e-07, "loss": 18.4375, "step": 15807 }, { "epoch": 1.049877133559142, "grad_norm": 316.25628662109375, "learning_rate": 9.678475073241986e-07, "loss": 18.7969, "step": 15808 }, { "epoch": 1.0499435478514976, "grad_norm": 307.29278564453125, "learning_rate": 9.67740011160778e-07, "loss": 20.4219, "step": 15809 }, { "epoch": 1.0500099621438534, "grad_norm": 640.0818481445312, "learning_rate": 9.67632515370522e-07, "loss": 16.75, "step": 15810 }, { "epoch": 1.050076376436209, "grad_norm": 379.7258605957031, "learning_rate": 9.675250199546746e-07, "loss": 19.7969, "step": 15811 }, { "epoch": 1.0501427907285648, "grad_norm": 201.00466918945312, "learning_rate": 9.674175249144787e-07, "loss": 12.5156, "step": 15812 }, { "epoch": 1.0502092050209204, "grad_norm": 192.50743103027344, "learning_rate": 9.673100302511785e-07, "loss": 18.1406, "step": 15813 }, { "epoch": 1.0502756193132763, "grad_norm": 129.2185821533203, "learning_rate": 9.672025359660167e-07, "loss": 14.4688, "step": 15814 }, { "epoch": 1.050342033605632, "grad_norm": 200.6854248046875, "learning_rate": 9.670950420602372e-07, "loss": 18.2344, "step": 15815 }, { "epoch": 1.0504084478979876, "grad_norm": 334.17218017578125, "learning_rate": 9.669875485350832e-07, "loss": 20.0781, "step": 15816 }, { "epoch": 1.0504748621903435, "grad_norm": 407.1122741699219, "learning_rate": 9.668800553917982e-07, "loss": 13.375, "step": 15817 }, { "epoch": 1.0505412764826991, "grad_norm": 135.15525817871094, "learning_rate": 9.667725626316257e-07, "loss": 13.1719, "step": 15818 }, { "epoch": 1.0506076907750548, "grad_norm": 300.6287841796875, "learning_rate": 9.666650702558092e-07, "loss": 18.3125, "step": 15819 }, { "epoch": 1.0506741050674104, "grad_norm": 117.9216537475586, "learning_rate": 9.665575782655915e-07, "loss": 12.625, "step": 15820 }, { "epoch": 1.0507405193597663, "grad_norm": 390.8520202636719, "learning_rate": 9.664500866622169e-07, "loss": 14.0391, "step": 15821 }, { "epoch": 1.050806933652122, "grad_norm": 369.3218078613281, "learning_rate": 9.66342595446928e-07, "loss": 16.625, "step": 15822 }, { "epoch": 1.0508733479444776, "grad_norm": 165.59844970703125, "learning_rate": 9.66235104620969e-07, "loss": 14.9219, "step": 15823 }, { "epoch": 1.0509397622368333, "grad_norm": 140.84329223632812, "learning_rate": 9.661276141855821e-07, "loss": 16.7812, "step": 15824 }, { "epoch": 1.0510061765291892, "grad_norm": 312.8877258300781, "learning_rate": 9.660201241420124e-07, "loss": 13.5, "step": 15825 }, { "epoch": 1.0510725908215448, "grad_norm": 180.06390380859375, "learning_rate": 9.659126344915015e-07, "loss": 16.7969, "step": 15826 }, { "epoch": 1.0511390051139005, "grad_norm": 141.618408203125, "learning_rate": 9.65805145235294e-07, "loss": 14.1094, "step": 15827 }, { "epoch": 1.0512054194062563, "grad_norm": 134.42677307128906, "learning_rate": 9.656976563746326e-07, "loss": 11.2344, "step": 15828 }, { "epoch": 1.051271833698612, "grad_norm": 156.8391571044922, "learning_rate": 9.655901679107613e-07, "loss": 16.0625, "step": 15829 }, { "epoch": 1.0513382479909676, "grad_norm": 165.3582000732422, "learning_rate": 9.654826798449226e-07, "loss": 16.2734, "step": 15830 }, { "epoch": 1.0514046622833233, "grad_norm": 195.67288208007812, "learning_rate": 9.653751921783607e-07, "loss": 17.0781, "step": 15831 }, { "epoch": 1.0514710765756792, "grad_norm": 133.4060821533203, "learning_rate": 9.652677049123185e-07, "loss": 14.8906, "step": 15832 }, { "epoch": 1.0515374908680348, "grad_norm": 200.06585693359375, "learning_rate": 9.651602180480397e-07, "loss": 15.6562, "step": 15833 }, { "epoch": 1.0516039051603905, "grad_norm": 438.20745849609375, "learning_rate": 9.650527315867669e-07, "loss": 17.9844, "step": 15834 }, { "epoch": 1.0516703194527461, "grad_norm": 109.23661041259766, "learning_rate": 9.649452455297443e-07, "loss": 15.3281, "step": 15835 }, { "epoch": 1.051736733745102, "grad_norm": 96.81568908691406, "learning_rate": 9.64837759878215e-07, "loss": 14.6094, "step": 15836 }, { "epoch": 1.0518031480374577, "grad_norm": 159.8877410888672, "learning_rate": 9.64730274633422e-07, "loss": 13.0703, "step": 15837 }, { "epoch": 1.0518695623298133, "grad_norm": 208.6575469970703, "learning_rate": 9.646227897966087e-07, "loss": 17.5469, "step": 15838 }, { "epoch": 1.0519359766221692, "grad_norm": 235.7657012939453, "learning_rate": 9.64515305369019e-07, "loss": 22.0469, "step": 15839 }, { "epoch": 1.0520023909145249, "grad_norm": 300.7801513671875, "learning_rate": 9.644078213518955e-07, "loss": 22.5312, "step": 15840 }, { "epoch": 1.0520688052068805, "grad_norm": 160.65382385253906, "learning_rate": 9.643003377464815e-07, "loss": 17.2969, "step": 15841 }, { "epoch": 1.0521352194992362, "grad_norm": 102.87995910644531, "learning_rate": 9.64192854554021e-07, "loss": 18.7031, "step": 15842 }, { "epoch": 1.052201633791592, "grad_norm": 141.0023956298828, "learning_rate": 9.640853717757568e-07, "loss": 17.3594, "step": 15843 }, { "epoch": 1.0522680480839477, "grad_norm": 332.2563781738281, "learning_rate": 9.639778894129327e-07, "loss": 14.0781, "step": 15844 }, { "epoch": 1.0523344623763033, "grad_norm": 193.3275909423828, "learning_rate": 9.638704074667909e-07, "loss": 19.4219, "step": 15845 }, { "epoch": 1.052400876668659, "grad_norm": 223.30889892578125, "learning_rate": 9.637629259385763e-07, "loss": 14.7031, "step": 15846 }, { "epoch": 1.0524672909610149, "grad_norm": 273.9702453613281, "learning_rate": 9.636554448295304e-07, "loss": 14.2812, "step": 15847 }, { "epoch": 1.0525337052533705, "grad_norm": 118.73443603515625, "learning_rate": 9.635479641408979e-07, "loss": 13.5781, "step": 15848 }, { "epoch": 1.0526001195457262, "grad_norm": 163.0055389404297, "learning_rate": 9.634404838739213e-07, "loss": 15.7656, "step": 15849 }, { "epoch": 1.052666533838082, "grad_norm": 154.10008239746094, "learning_rate": 9.633330040298444e-07, "loss": 13.3594, "step": 15850 }, { "epoch": 1.0527329481304377, "grad_norm": 152.50213623046875, "learning_rate": 9.632255246099095e-07, "loss": 13.0781, "step": 15851 }, { "epoch": 1.0527993624227934, "grad_norm": 142.51312255859375, "learning_rate": 9.63118045615361e-07, "loss": 18.5938, "step": 15852 }, { "epoch": 1.052865776715149, "grad_norm": 213.20526123046875, "learning_rate": 9.630105670474417e-07, "loss": 11.75, "step": 15853 }, { "epoch": 1.052932191007505, "grad_norm": 178.8824920654297, "learning_rate": 9.629030889073947e-07, "loss": 15.9062, "step": 15854 }, { "epoch": 1.0529986052998606, "grad_norm": 227.045166015625, "learning_rate": 9.627956111964633e-07, "loss": 18.3594, "step": 15855 }, { "epoch": 1.0530650195922162, "grad_norm": 118.27508544921875, "learning_rate": 9.626881339158912e-07, "loss": 13.2344, "step": 15856 }, { "epoch": 1.0531314338845719, "grad_norm": 139.892333984375, "learning_rate": 9.625806570669208e-07, "loss": 16.0703, "step": 15857 }, { "epoch": 1.0531978481769277, "grad_norm": 255.20225524902344, "learning_rate": 9.624731806507962e-07, "loss": 13.7188, "step": 15858 }, { "epoch": 1.0532642624692834, "grad_norm": 206.74935913085938, "learning_rate": 9.623657046687596e-07, "loss": 16.5156, "step": 15859 }, { "epoch": 1.053330676761639, "grad_norm": 165.4824676513672, "learning_rate": 9.622582291220554e-07, "loss": 17.6562, "step": 15860 }, { "epoch": 1.053397091053995, "grad_norm": 374.6329650878906, "learning_rate": 9.62150754011926e-07, "loss": 20.4531, "step": 15861 }, { "epoch": 1.0534635053463506, "grad_norm": 175.2999267578125, "learning_rate": 9.620432793396151e-07, "loss": 18.6562, "step": 15862 }, { "epoch": 1.0535299196387062, "grad_norm": 275.85028076171875, "learning_rate": 9.619358051063651e-07, "loss": 15.6328, "step": 15863 }, { "epoch": 1.053596333931062, "grad_norm": 177.88943481445312, "learning_rate": 9.618283313134204e-07, "loss": 17.2344, "step": 15864 }, { "epoch": 1.0536627482234178, "grad_norm": 454.2546691894531, "learning_rate": 9.617208579620232e-07, "loss": 14.9531, "step": 15865 }, { "epoch": 1.0537291625157734, "grad_norm": 144.45535278320312, "learning_rate": 9.61613385053417e-07, "loss": 13.5938, "step": 15866 }, { "epoch": 1.053795576808129, "grad_norm": 244.3921661376953, "learning_rate": 9.615059125888451e-07, "loss": 16.3594, "step": 15867 }, { "epoch": 1.0538619911004847, "grad_norm": 161.33909606933594, "learning_rate": 9.61398440569551e-07, "loss": 14.1094, "step": 15868 }, { "epoch": 1.0539284053928406, "grad_norm": 419.6810302734375, "learning_rate": 9.612909689967768e-07, "loss": 20.8438, "step": 15869 }, { "epoch": 1.0539948196851963, "grad_norm": 226.73602294921875, "learning_rate": 9.61183497871767e-07, "loss": 15.3906, "step": 15870 }, { "epoch": 1.054061233977552, "grad_norm": 208.22727966308594, "learning_rate": 9.610760271957637e-07, "loss": 19.2188, "step": 15871 }, { "epoch": 1.0541276482699078, "grad_norm": 296.87567138671875, "learning_rate": 9.609685569700105e-07, "loss": 22.6094, "step": 15872 }, { "epoch": 1.0541940625622634, "grad_norm": 149.92848205566406, "learning_rate": 9.608610871957503e-07, "loss": 15.9844, "step": 15873 }, { "epoch": 1.054260476854619, "grad_norm": 239.32504272460938, "learning_rate": 9.607536178742271e-07, "loss": 23.1094, "step": 15874 }, { "epoch": 1.0543268911469748, "grad_norm": 235.06173706054688, "learning_rate": 9.606461490066828e-07, "loss": 15.5312, "step": 15875 }, { "epoch": 1.0543933054393306, "grad_norm": 217.55043029785156, "learning_rate": 9.605386805943616e-07, "loss": 14.5469, "step": 15876 }, { "epoch": 1.0544597197316863, "grad_norm": 222.75320434570312, "learning_rate": 9.604312126385058e-07, "loss": 17.3438, "step": 15877 }, { "epoch": 1.054526134024042, "grad_norm": 136.69972229003906, "learning_rate": 9.603237451403593e-07, "loss": 13.9375, "step": 15878 }, { "epoch": 1.0545925483163976, "grad_norm": 344.06890869140625, "learning_rate": 9.602162781011646e-07, "loss": 13.4688, "step": 15879 }, { "epoch": 1.0546589626087535, "grad_norm": 150.4987335205078, "learning_rate": 9.60108811522165e-07, "loss": 14.125, "step": 15880 }, { "epoch": 1.0547253769011091, "grad_norm": 165.4474639892578, "learning_rate": 9.60001345404604e-07, "loss": 23.1875, "step": 15881 }, { "epoch": 1.0547917911934648, "grad_norm": 1164.7493896484375, "learning_rate": 9.59893879749724e-07, "loss": 18.2812, "step": 15882 }, { "epoch": 1.0548582054858207, "grad_norm": 245.25247192382812, "learning_rate": 9.59786414558769e-07, "loss": 19.3906, "step": 15883 }, { "epoch": 1.0549246197781763, "grad_norm": 76.9760971069336, "learning_rate": 9.596789498329808e-07, "loss": 11.2656, "step": 15884 }, { "epoch": 1.054991034070532, "grad_norm": 182.79953002929688, "learning_rate": 9.595714855736038e-07, "loss": 13.4375, "step": 15885 }, { "epoch": 1.0550574483628876, "grad_norm": 520.297607421875, "learning_rate": 9.594640217818804e-07, "loss": 19.7188, "step": 15886 }, { "epoch": 1.0551238626552435, "grad_norm": 170.9327850341797, "learning_rate": 9.59356558459054e-07, "loss": 13.0234, "step": 15887 }, { "epoch": 1.0551902769475991, "grad_norm": 137.7685089111328, "learning_rate": 9.59249095606367e-07, "loss": 17.2344, "step": 15888 }, { "epoch": 1.0552566912399548, "grad_norm": 155.04837036132812, "learning_rate": 9.591416332250636e-07, "loss": 17.4844, "step": 15889 }, { "epoch": 1.0553231055323105, "grad_norm": 222.34713745117188, "learning_rate": 9.590341713163857e-07, "loss": 26.25, "step": 15890 }, { "epoch": 1.0553895198246663, "grad_norm": 146.76229858398438, "learning_rate": 9.589267098815771e-07, "loss": 20.6406, "step": 15891 }, { "epoch": 1.055455934117022, "grad_norm": 174.99822998046875, "learning_rate": 9.588192489218808e-07, "loss": 16.9062, "step": 15892 }, { "epoch": 1.0555223484093776, "grad_norm": 113.17711639404297, "learning_rate": 9.587117884385397e-07, "loss": 14.2344, "step": 15893 }, { "epoch": 1.0555887627017335, "grad_norm": 226.4447479248047, "learning_rate": 9.586043284327962e-07, "loss": 15.2188, "step": 15894 }, { "epoch": 1.0556551769940892, "grad_norm": 93.07332611083984, "learning_rate": 9.584968689058946e-07, "loss": 11.5312, "step": 15895 }, { "epoch": 1.0557215912864448, "grad_norm": 202.7590789794922, "learning_rate": 9.58389409859077e-07, "loss": 16.875, "step": 15896 }, { "epoch": 1.0557880055788005, "grad_norm": 302.8049011230469, "learning_rate": 9.58281951293587e-07, "loss": 15.2812, "step": 15897 }, { "epoch": 1.0558544198711564, "grad_norm": 209.53970336914062, "learning_rate": 9.581744932106667e-07, "loss": 15.7656, "step": 15898 }, { "epoch": 1.055920834163512, "grad_norm": 153.2389678955078, "learning_rate": 9.580670356115605e-07, "loss": 12.5703, "step": 15899 }, { "epoch": 1.0559872484558677, "grad_norm": 138.96621704101562, "learning_rate": 9.579595784975102e-07, "loss": 14.3125, "step": 15900 }, { "epoch": 1.0560536627482233, "grad_norm": 238.1380157470703, "learning_rate": 9.578521218697595e-07, "loss": 19.2656, "step": 15901 }, { "epoch": 1.0561200770405792, "grad_norm": 370.10845947265625, "learning_rate": 9.577446657295508e-07, "loss": 9.5469, "step": 15902 }, { "epoch": 1.0561864913329349, "grad_norm": 277.880859375, "learning_rate": 9.576372100781278e-07, "loss": 15.9844, "step": 15903 }, { "epoch": 1.0562529056252905, "grad_norm": 220.8607940673828, "learning_rate": 9.57529754916733e-07, "loss": 12.5625, "step": 15904 }, { "epoch": 1.0563193199176464, "grad_norm": 166.11036682128906, "learning_rate": 9.574223002466096e-07, "loss": 17.3906, "step": 15905 }, { "epoch": 1.056385734210002, "grad_norm": 378.54571533203125, "learning_rate": 9.573148460690001e-07, "loss": 20.9688, "step": 15906 }, { "epoch": 1.0564521485023577, "grad_norm": 436.9350280761719, "learning_rate": 9.572073923851483e-07, "loss": 14.8594, "step": 15907 }, { "epoch": 1.0565185627947133, "grad_norm": 200.76287841796875, "learning_rate": 9.570999391962965e-07, "loss": 21.9688, "step": 15908 }, { "epoch": 1.0565849770870692, "grad_norm": 333.40679931640625, "learning_rate": 9.56992486503688e-07, "loss": 10.7969, "step": 15909 }, { "epoch": 1.0566513913794249, "grad_norm": 203.79856872558594, "learning_rate": 9.568850343085657e-07, "loss": 26.3438, "step": 15910 }, { "epoch": 1.0567178056717805, "grad_norm": 134.005859375, "learning_rate": 9.567775826121724e-07, "loss": 17.1562, "step": 15911 }, { "epoch": 1.0567842199641362, "grad_norm": 175.8936004638672, "learning_rate": 9.566701314157505e-07, "loss": 16.2344, "step": 15912 }, { "epoch": 1.056850634256492, "grad_norm": 184.4605712890625, "learning_rate": 9.565626807205443e-07, "loss": 14.0391, "step": 15913 }, { "epoch": 1.0569170485488477, "grad_norm": 260.2624206542969, "learning_rate": 9.564552305277956e-07, "loss": 16.4531, "step": 15914 }, { "epoch": 1.0569834628412034, "grad_norm": 226.7474822998047, "learning_rate": 9.563477808387474e-07, "loss": 16.3906, "step": 15915 }, { "epoch": 1.0570498771335592, "grad_norm": 152.80984497070312, "learning_rate": 9.562403316546435e-07, "loss": 14.9688, "step": 15916 }, { "epoch": 1.057116291425915, "grad_norm": 206.98072814941406, "learning_rate": 9.561328829767259e-07, "loss": 17.7656, "step": 15917 }, { "epoch": 1.0571827057182706, "grad_norm": 235.39700317382812, "learning_rate": 9.560254348062381e-07, "loss": 13.9062, "step": 15918 }, { "epoch": 1.0572491200106262, "grad_norm": 279.4226989746094, "learning_rate": 9.559179871444222e-07, "loss": 19.7656, "step": 15919 }, { "epoch": 1.057315534302982, "grad_norm": 231.5633087158203, "learning_rate": 9.558105399925221e-07, "loss": 15.1875, "step": 15920 }, { "epoch": 1.0573819485953377, "grad_norm": 133.0511474609375, "learning_rate": 9.557030933517799e-07, "loss": 11.7969, "step": 15921 }, { "epoch": 1.0574483628876934, "grad_norm": 448.4429931640625, "learning_rate": 9.55595647223439e-07, "loss": 16.6406, "step": 15922 }, { "epoch": 1.057514777180049, "grad_norm": 143.7496795654297, "learning_rate": 9.554882016087417e-07, "loss": 14.4062, "step": 15923 }, { "epoch": 1.057581191472405, "grad_norm": 139.43603515625, "learning_rate": 9.553807565089316e-07, "loss": 23.5625, "step": 15924 }, { "epoch": 1.0576476057647606, "grad_norm": 192.1803741455078, "learning_rate": 9.55273311925251e-07, "loss": 17.3594, "step": 15925 }, { "epoch": 1.0577140200571162, "grad_norm": 212.7152099609375, "learning_rate": 9.55165867858943e-07, "loss": 14.6875, "step": 15926 }, { "epoch": 1.057780434349472, "grad_norm": 192.34922790527344, "learning_rate": 9.550584243112502e-07, "loss": 19.7656, "step": 15927 }, { "epoch": 1.0578468486418278, "grad_norm": 248.2017364501953, "learning_rate": 9.549509812834159e-07, "loss": 18.1719, "step": 15928 }, { "epoch": 1.0579132629341834, "grad_norm": 182.74415588378906, "learning_rate": 9.548435387766825e-07, "loss": 14.1875, "step": 15929 }, { "epoch": 1.057979677226539, "grad_norm": 263.4790344238281, "learning_rate": 9.547360967922932e-07, "loss": 12.9844, "step": 15930 }, { "epoch": 1.058046091518895, "grad_norm": 451.77899169921875, "learning_rate": 9.546286553314902e-07, "loss": 21.1406, "step": 15931 }, { "epoch": 1.0581125058112506, "grad_norm": 200.5897216796875, "learning_rate": 9.545212143955174e-07, "loss": 17.1875, "step": 15932 }, { "epoch": 1.0581789201036063, "grad_norm": 277.2668762207031, "learning_rate": 9.544137739856162e-07, "loss": 16.5156, "step": 15933 }, { "epoch": 1.058245334395962, "grad_norm": 345.4430847167969, "learning_rate": 9.543063341030308e-07, "loss": 23.5, "step": 15934 }, { "epoch": 1.0583117486883178, "grad_norm": 113.98522186279297, "learning_rate": 9.541988947490032e-07, "loss": 18.7812, "step": 15935 }, { "epoch": 1.0583781629806734, "grad_norm": 204.74659729003906, "learning_rate": 9.540914559247765e-07, "loss": 18.9062, "step": 15936 }, { "epoch": 1.058444577273029, "grad_norm": 104.86709594726562, "learning_rate": 9.53984017631593e-07, "loss": 13.5625, "step": 15937 }, { "epoch": 1.058510991565385, "grad_norm": 124.92382049560547, "learning_rate": 9.53876579870696e-07, "loss": 13.2188, "step": 15938 }, { "epoch": 1.0585774058577406, "grad_norm": 385.0640869140625, "learning_rate": 9.537691426433283e-07, "loss": 21.4219, "step": 15939 }, { "epoch": 1.0586438201500963, "grad_norm": 748.35009765625, "learning_rate": 9.536617059507325e-07, "loss": 15.0781, "step": 15940 }, { "epoch": 1.058710234442452, "grad_norm": 223.56112670898438, "learning_rate": 9.535542697941509e-07, "loss": 18.4062, "step": 15941 }, { "epoch": 1.0587766487348078, "grad_norm": 507.6341857910156, "learning_rate": 9.534468341748272e-07, "loss": 17.0781, "step": 15942 }, { "epoch": 1.0588430630271635, "grad_norm": 343.42193603515625, "learning_rate": 9.533393990940035e-07, "loss": 15.1562, "step": 15943 }, { "epoch": 1.0589094773195191, "grad_norm": 307.2028503417969, "learning_rate": 9.532319645529229e-07, "loss": 17.125, "step": 15944 }, { "epoch": 1.0589758916118748, "grad_norm": 317.74163818359375, "learning_rate": 9.531245305528276e-07, "loss": 17.5625, "step": 15945 }, { "epoch": 1.0590423059042307, "grad_norm": 216.7128143310547, "learning_rate": 9.530170970949612e-07, "loss": 19.0, "step": 15946 }, { "epoch": 1.0591087201965863, "grad_norm": 337.0850830078125, "learning_rate": 9.529096641805655e-07, "loss": 16.4688, "step": 15947 }, { "epoch": 1.059175134488942, "grad_norm": 283.22711181640625, "learning_rate": 9.52802231810884e-07, "loss": 16.3125, "step": 15948 }, { "epoch": 1.0592415487812978, "grad_norm": 514.5187377929688, "learning_rate": 9.526947999871586e-07, "loss": 13.6094, "step": 15949 }, { "epoch": 1.0593079630736535, "grad_norm": 261.8975830078125, "learning_rate": 9.525873687106329e-07, "loss": 16.2031, "step": 15950 }, { "epoch": 1.0593743773660091, "grad_norm": 606.9324951171875, "learning_rate": 9.52479937982549e-07, "loss": 18.1094, "step": 15951 }, { "epoch": 1.0594407916583648, "grad_norm": 135.58041381835938, "learning_rate": 9.523725078041495e-07, "loss": 13.3125, "step": 15952 }, { "epoch": 1.0595072059507207, "grad_norm": 299.2630310058594, "learning_rate": 9.522650781766781e-07, "loss": 25.9062, "step": 15953 }, { "epoch": 1.0595736202430763, "grad_norm": 294.93359375, "learning_rate": 9.521576491013759e-07, "loss": 19.5938, "step": 15954 }, { "epoch": 1.059640034535432, "grad_norm": 144.68405151367188, "learning_rate": 9.520502205794871e-07, "loss": 13.875, "step": 15955 }, { "epoch": 1.0597064488277876, "grad_norm": 168.97763061523438, "learning_rate": 9.519427926122534e-07, "loss": 15.6562, "step": 15956 }, { "epoch": 1.0597728631201435, "grad_norm": 237.10862731933594, "learning_rate": 9.51835365200918e-07, "loss": 18.8594, "step": 15957 }, { "epoch": 1.0598392774124992, "grad_norm": 117.1468505859375, "learning_rate": 9.517279383467228e-07, "loss": 16.1875, "step": 15958 }, { "epoch": 1.0599056917048548, "grad_norm": 281.4808349609375, "learning_rate": 9.516205120509115e-07, "loss": 19.5625, "step": 15959 }, { "epoch": 1.0599721059972107, "grad_norm": 243.22779846191406, "learning_rate": 9.51513086314726e-07, "loss": 19.5312, "step": 15960 }, { "epoch": 1.0600385202895664, "grad_norm": 155.84103393554688, "learning_rate": 9.514056611394093e-07, "loss": 15.5312, "step": 15961 }, { "epoch": 1.060104934581922, "grad_norm": 124.70914459228516, "learning_rate": 9.512982365262036e-07, "loss": 16.4062, "step": 15962 }, { "epoch": 1.0601713488742777, "grad_norm": 94.16912841796875, "learning_rate": 9.511908124763522e-07, "loss": 12.25, "step": 15963 }, { "epoch": 1.0602377631666335, "grad_norm": 1043.6802978515625, "learning_rate": 9.510833889910969e-07, "loss": 29.125, "step": 15964 }, { "epoch": 1.0603041774589892, "grad_norm": 148.60098266601562, "learning_rate": 9.509759660716813e-07, "loss": 13.6406, "step": 15965 }, { "epoch": 1.0603705917513448, "grad_norm": 120.1201171875, "learning_rate": 9.508685437193469e-07, "loss": 14.5625, "step": 15966 }, { "epoch": 1.0604370060437005, "grad_norm": 175.84730529785156, "learning_rate": 9.507611219353372e-07, "loss": 19.2969, "step": 15967 }, { "epoch": 1.0605034203360564, "grad_norm": 121.68985748291016, "learning_rate": 9.506537007208942e-07, "loss": 17.9375, "step": 15968 }, { "epoch": 1.060569834628412, "grad_norm": 103.61437225341797, "learning_rate": 9.505462800772612e-07, "loss": 17.4688, "step": 15969 }, { "epoch": 1.0606362489207677, "grad_norm": 88.8518295288086, "learning_rate": 9.504388600056796e-07, "loss": 17.4688, "step": 15970 }, { "epoch": 1.0607026632131236, "grad_norm": 210.25595092773438, "learning_rate": 9.503314405073933e-07, "loss": 17.3125, "step": 15971 }, { "epoch": 1.0607690775054792, "grad_norm": 1197.7401123046875, "learning_rate": 9.50224021583644e-07, "loss": 16.4219, "step": 15972 }, { "epoch": 1.0608354917978349, "grad_norm": 133.88671875, "learning_rate": 9.501166032356746e-07, "loss": 16.2812, "step": 15973 }, { "epoch": 1.0609019060901905, "grad_norm": 153.4811248779297, "learning_rate": 9.500091854647276e-07, "loss": 14.3281, "step": 15974 }, { "epoch": 1.0609683203825464, "grad_norm": 216.78424072265625, "learning_rate": 9.499017682720456e-07, "loss": 17.375, "step": 15975 }, { "epoch": 1.061034734674902, "grad_norm": 260.0699462890625, "learning_rate": 9.497943516588706e-07, "loss": 15.6406, "step": 15976 }, { "epoch": 1.0611011489672577, "grad_norm": 160.8428955078125, "learning_rate": 9.496869356264461e-07, "loss": 14.5469, "step": 15977 }, { "epoch": 1.0611675632596134, "grad_norm": 192.75428771972656, "learning_rate": 9.495795201760137e-07, "loss": 9.8125, "step": 15978 }, { "epoch": 1.0612339775519692, "grad_norm": 91.77561950683594, "learning_rate": 9.494721053088168e-07, "loss": 12.7266, "step": 15979 }, { "epoch": 1.061300391844325, "grad_norm": 320.8747253417969, "learning_rate": 9.493646910260969e-07, "loss": 16.9062, "step": 15980 }, { "epoch": 1.0613668061366806, "grad_norm": 123.34283447265625, "learning_rate": 9.492572773290976e-07, "loss": 14.125, "step": 15981 }, { "epoch": 1.0614332204290364, "grad_norm": 217.758056640625, "learning_rate": 9.491498642190604e-07, "loss": 20.1719, "step": 15982 }, { "epoch": 1.061499634721392, "grad_norm": 341.4192199707031, "learning_rate": 9.490424516972286e-07, "loss": 16.75, "step": 15983 }, { "epoch": 1.0615660490137477, "grad_norm": 335.36566162109375, "learning_rate": 9.48935039764844e-07, "loss": 19.6719, "step": 15984 }, { "epoch": 1.0616324633061034, "grad_norm": 310.64093017578125, "learning_rate": 9.488276284231498e-07, "loss": 20.2656, "step": 15985 }, { "epoch": 1.0616988775984593, "grad_norm": 109.45275115966797, "learning_rate": 9.487202176733877e-07, "loss": 15.0156, "step": 15986 }, { "epoch": 1.061765291890815, "grad_norm": 176.37423706054688, "learning_rate": 9.486128075168009e-07, "loss": 18.1406, "step": 15987 }, { "epoch": 1.0618317061831706, "grad_norm": 535.0554809570312, "learning_rate": 9.485053979546311e-07, "loss": 16.6719, "step": 15988 }, { "epoch": 1.0618981204755262, "grad_norm": 119.86526489257812, "learning_rate": 9.483979889881214e-07, "loss": 16.8594, "step": 15989 }, { "epoch": 1.061964534767882, "grad_norm": 191.89776611328125, "learning_rate": 9.482905806185142e-07, "loss": 18.2188, "step": 15990 }, { "epoch": 1.0620309490602378, "grad_norm": 263.5254211425781, "learning_rate": 9.481831728470513e-07, "loss": 17.8906, "step": 15991 }, { "epoch": 1.0620973633525934, "grad_norm": 372.3282470703125, "learning_rate": 9.480757656749759e-07, "loss": 22.7812, "step": 15992 }, { "epoch": 1.0621637776449493, "grad_norm": 323.0196838378906, "learning_rate": 9.4796835910353e-07, "loss": 13.9844, "step": 15993 }, { "epoch": 1.062230191937305, "grad_norm": 182.75611877441406, "learning_rate": 9.478609531339564e-07, "loss": 15.4531, "step": 15994 }, { "epoch": 1.0622966062296606, "grad_norm": 170.19895935058594, "learning_rate": 9.47753547767497e-07, "loss": 22.0781, "step": 15995 }, { "epoch": 1.0623630205220163, "grad_norm": 189.49444580078125, "learning_rate": 9.476461430053945e-07, "loss": 13.8438, "step": 15996 }, { "epoch": 1.0624294348143721, "grad_norm": 327.63433837890625, "learning_rate": 9.475387388488909e-07, "loss": 20.5781, "step": 15997 }, { "epoch": 1.0624958491067278, "grad_norm": 281.8195495605469, "learning_rate": 9.474313352992294e-07, "loss": 17.0938, "step": 15998 }, { "epoch": 1.0625622633990834, "grad_norm": 344.2239074707031, "learning_rate": 9.473239323576515e-07, "loss": 16.2812, "step": 15999 }, { "epoch": 1.062628677691439, "grad_norm": 157.8995819091797, "learning_rate": 9.472165300254003e-07, "loss": 21.3438, "step": 16000 }, { "epoch": 1.062695091983795, "grad_norm": 335.29388427734375, "learning_rate": 9.471091283037175e-07, "loss": 18.6875, "step": 16001 }, { "epoch": 1.0627615062761506, "grad_norm": 420.61297607421875, "learning_rate": 9.47001727193846e-07, "loss": 15.2812, "step": 16002 }, { "epoch": 1.0628279205685063, "grad_norm": 172.74346923828125, "learning_rate": 9.46894326697028e-07, "loss": 21.3125, "step": 16003 }, { "epoch": 1.0628943348608622, "grad_norm": 157.9315185546875, "learning_rate": 9.46786926814506e-07, "loss": 11.9062, "step": 16004 }, { "epoch": 1.0629607491532178, "grad_norm": 220.38803100585938, "learning_rate": 9.466795275475216e-07, "loss": 12.4531, "step": 16005 }, { "epoch": 1.0630271634455735, "grad_norm": 231.62661743164062, "learning_rate": 9.465721288973182e-07, "loss": 13.1562, "step": 16006 }, { "epoch": 1.0630935777379291, "grad_norm": 363.3004150390625, "learning_rate": 9.464647308651372e-07, "loss": 17.1094, "step": 16007 }, { "epoch": 1.063159992030285, "grad_norm": 129.68191528320312, "learning_rate": 9.463573334522218e-07, "loss": 17.3125, "step": 16008 }, { "epoch": 1.0632264063226406, "grad_norm": 235.6477813720703, "learning_rate": 9.462499366598133e-07, "loss": 13.2812, "step": 16009 }, { "epoch": 1.0632928206149963, "grad_norm": 250.83277893066406, "learning_rate": 9.461425404891548e-07, "loss": 10.9688, "step": 16010 }, { "epoch": 1.063359234907352, "grad_norm": 777.1777954101562, "learning_rate": 9.460351449414884e-07, "loss": 24.125, "step": 16011 }, { "epoch": 1.0634256491997078, "grad_norm": 509.1921691894531, "learning_rate": 9.459277500180564e-07, "loss": 16.2812, "step": 16012 }, { "epoch": 1.0634920634920635, "grad_norm": 341.707763671875, "learning_rate": 9.458203557201006e-07, "loss": 19.9688, "step": 16013 }, { "epoch": 1.0635584777844191, "grad_norm": 262.09716796875, "learning_rate": 9.457129620488643e-07, "loss": 22.1719, "step": 16014 }, { "epoch": 1.063624892076775, "grad_norm": 135.3363037109375, "learning_rate": 9.456055690055885e-07, "loss": 9.8594, "step": 16015 }, { "epoch": 1.0636913063691307, "grad_norm": 354.3305969238281, "learning_rate": 9.454981765915165e-07, "loss": 20.7656, "step": 16016 }, { "epoch": 1.0637577206614863, "grad_norm": 323.8862609863281, "learning_rate": 9.453907848078901e-07, "loss": 20.5625, "step": 16017 }, { "epoch": 1.063824134953842, "grad_norm": 220.31202697753906, "learning_rate": 9.452833936559517e-07, "loss": 22.3438, "step": 16018 }, { "epoch": 1.0638905492461979, "grad_norm": 215.80738830566406, "learning_rate": 9.451760031369432e-07, "loss": 15.5703, "step": 16019 }, { "epoch": 1.0639569635385535, "grad_norm": 136.14642333984375, "learning_rate": 9.450686132521073e-07, "loss": 13.0312, "step": 16020 }, { "epoch": 1.0640233778309092, "grad_norm": 343.7671813964844, "learning_rate": 9.449612240026858e-07, "loss": 21.375, "step": 16021 }, { "epoch": 1.0640897921232648, "grad_norm": 169.79733276367188, "learning_rate": 9.448538353899214e-07, "loss": 10.9844, "step": 16022 }, { "epoch": 1.0641562064156207, "grad_norm": 166.4469451904297, "learning_rate": 9.447464474150556e-07, "loss": 14.75, "step": 16023 }, { "epoch": 1.0642226207079764, "grad_norm": 213.7169952392578, "learning_rate": 9.446390600793315e-07, "loss": 16.875, "step": 16024 }, { "epoch": 1.064289035000332, "grad_norm": 235.55038452148438, "learning_rate": 9.445316733839906e-07, "loss": 16.0781, "step": 16025 }, { "epoch": 1.0643554492926879, "grad_norm": 183.89993286132812, "learning_rate": 9.44424287330275e-07, "loss": 12.8906, "step": 16026 }, { "epoch": 1.0644218635850435, "grad_norm": 220.7615203857422, "learning_rate": 9.443169019194278e-07, "loss": 17.7031, "step": 16027 }, { "epoch": 1.0644882778773992, "grad_norm": 213.3682403564453, "learning_rate": 9.442095171526903e-07, "loss": 19.0312, "step": 16028 }, { "epoch": 1.0645546921697548, "grad_norm": 114.74654388427734, "learning_rate": 9.441021330313052e-07, "loss": 17.0156, "step": 16029 }, { "epoch": 1.0646211064621107, "grad_norm": 182.869384765625, "learning_rate": 9.439947495565138e-07, "loss": 11.5938, "step": 16030 }, { "epoch": 1.0646875207544664, "grad_norm": 267.6246643066406, "learning_rate": 9.438873667295594e-07, "loss": 15.6719, "step": 16031 }, { "epoch": 1.064753935046822, "grad_norm": 167.21925354003906, "learning_rate": 9.437799845516832e-07, "loss": 18.9219, "step": 16032 }, { "epoch": 1.0648203493391777, "grad_norm": 724.951416015625, "learning_rate": 9.43672603024128e-07, "loss": 14.3594, "step": 16033 }, { "epoch": 1.0648867636315336, "grad_norm": 226.5558319091797, "learning_rate": 9.435652221481354e-07, "loss": 16.2812, "step": 16034 }, { "epoch": 1.0649531779238892, "grad_norm": 286.3089904785156, "learning_rate": 9.434578419249481e-07, "loss": 18.9219, "step": 16035 }, { "epoch": 1.0650195922162449, "grad_norm": 120.7099609375, "learning_rate": 9.433504623558076e-07, "loss": 12.1562, "step": 16036 }, { "epoch": 1.0650860065086007, "grad_norm": 241.97463989257812, "learning_rate": 9.432430834419566e-07, "loss": 16.6875, "step": 16037 }, { "epoch": 1.0651524208009564, "grad_norm": 229.6714324951172, "learning_rate": 9.431357051846366e-07, "loss": 16.1562, "step": 16038 }, { "epoch": 1.065218835093312, "grad_norm": 177.89588928222656, "learning_rate": 9.430283275850902e-07, "loss": 15.1562, "step": 16039 }, { "epoch": 1.0652852493856677, "grad_norm": 216.07176208496094, "learning_rate": 9.429209506445588e-07, "loss": 14.0, "step": 16040 }, { "epoch": 1.0653516636780236, "grad_norm": 132.3146514892578, "learning_rate": 9.428135743642855e-07, "loss": 14.4609, "step": 16041 }, { "epoch": 1.0654180779703792, "grad_norm": 133.28817749023438, "learning_rate": 9.427061987455114e-07, "loss": 18.5625, "step": 16042 }, { "epoch": 1.065484492262735, "grad_norm": 280.4554138183594, "learning_rate": 9.425988237894792e-07, "loss": 16.6094, "step": 16043 }, { "epoch": 1.0655509065550905, "grad_norm": 265.30047607421875, "learning_rate": 9.424914494974303e-07, "loss": 14.6875, "step": 16044 }, { "epoch": 1.0656173208474464, "grad_norm": 234.61187744140625, "learning_rate": 9.423840758706077e-07, "loss": 17.6719, "step": 16045 }, { "epoch": 1.065683735139802, "grad_norm": 150.28509521484375, "learning_rate": 9.422767029102526e-07, "loss": 14.875, "step": 16046 }, { "epoch": 1.0657501494321577, "grad_norm": 235.03622436523438, "learning_rate": 9.421693306176074e-07, "loss": 25.6875, "step": 16047 }, { "epoch": 1.0658165637245136, "grad_norm": 130.23292541503906, "learning_rate": 9.420619589939138e-07, "loss": 15.2812, "step": 16048 }, { "epoch": 1.0658829780168693, "grad_norm": 385.932861328125, "learning_rate": 9.419545880404142e-07, "loss": 14.2969, "step": 16049 }, { "epoch": 1.065949392309225, "grad_norm": 201.66921997070312, "learning_rate": 9.418472177583505e-07, "loss": 18.9375, "step": 16050 }, { "epoch": 1.0660158066015806, "grad_norm": 264.0574951171875, "learning_rate": 9.417398481489648e-07, "loss": 20.5938, "step": 16051 }, { "epoch": 1.0660822208939364, "grad_norm": 167.64614868164062, "learning_rate": 9.416324792134986e-07, "loss": 19.3359, "step": 16052 }, { "epoch": 1.066148635186292, "grad_norm": 216.63380432128906, "learning_rate": 9.415251109531944e-07, "loss": 18.4375, "step": 16053 }, { "epoch": 1.0662150494786478, "grad_norm": 132.33868408203125, "learning_rate": 9.414177433692941e-07, "loss": 18.0, "step": 16054 }, { "epoch": 1.0662814637710034, "grad_norm": 727.8587036132812, "learning_rate": 9.413103764630395e-07, "loss": 16.125, "step": 16055 }, { "epoch": 1.0663478780633593, "grad_norm": 411.6907653808594, "learning_rate": 9.412030102356723e-07, "loss": 19.2656, "step": 16056 }, { "epoch": 1.066414292355715, "grad_norm": 158.74745178222656, "learning_rate": 9.410956446884355e-07, "loss": 17.25, "step": 16057 }, { "epoch": 1.0664807066480706, "grad_norm": 147.4766845703125, "learning_rate": 9.409882798225696e-07, "loss": 14.0469, "step": 16058 }, { "epoch": 1.0665471209404265, "grad_norm": 227.11953735351562, "learning_rate": 9.408809156393176e-07, "loss": 16.9844, "step": 16059 }, { "epoch": 1.0666135352327821, "grad_norm": 524.2285766601562, "learning_rate": 9.407735521399209e-07, "loss": 15.9844, "step": 16060 }, { "epoch": 1.0666799495251378, "grad_norm": 303.10699462890625, "learning_rate": 9.406661893256215e-07, "loss": 19.625, "step": 16061 }, { "epoch": 1.0667463638174934, "grad_norm": 202.26344299316406, "learning_rate": 9.405588271976618e-07, "loss": 16.9062, "step": 16062 }, { "epoch": 1.0668127781098493, "grad_norm": 135.05311584472656, "learning_rate": 9.404514657572831e-07, "loss": 10.1094, "step": 16063 }, { "epoch": 1.066879192402205, "grad_norm": 199.88082885742188, "learning_rate": 9.403441050057276e-07, "loss": 14.6719, "step": 16064 }, { "epoch": 1.0669456066945606, "grad_norm": 167.67263793945312, "learning_rate": 9.402367449442368e-07, "loss": 18.8281, "step": 16065 }, { "epoch": 1.0670120209869163, "grad_norm": 306.0230712890625, "learning_rate": 9.401293855740532e-07, "loss": 22.4375, "step": 16066 }, { "epoch": 1.0670784352792722, "grad_norm": 122.56903839111328, "learning_rate": 9.400220268964182e-07, "loss": 16.6094, "step": 16067 }, { "epoch": 1.0671448495716278, "grad_norm": 144.7952117919922, "learning_rate": 9.39914668912574e-07, "loss": 13.5703, "step": 16068 }, { "epoch": 1.0672112638639835, "grad_norm": 360.7085876464844, "learning_rate": 9.398073116237618e-07, "loss": 17.7969, "step": 16069 }, { "epoch": 1.0672776781563393, "grad_norm": 126.71710205078125, "learning_rate": 9.396999550312243e-07, "loss": 14.25, "step": 16070 }, { "epoch": 1.067344092448695, "grad_norm": 180.68418884277344, "learning_rate": 9.395925991362029e-07, "loss": 17.7969, "step": 16071 }, { "epoch": 1.0674105067410506, "grad_norm": 305.734130859375, "learning_rate": 9.394852439399396e-07, "loss": 13.2969, "step": 16072 }, { "epoch": 1.0674769210334063, "grad_norm": 183.22433471679688, "learning_rate": 9.393778894436756e-07, "loss": 15.0156, "step": 16073 }, { "epoch": 1.0675433353257622, "grad_norm": 121.28425598144531, "learning_rate": 9.392705356486538e-07, "loss": 10.25, "step": 16074 }, { "epoch": 1.0676097496181178, "grad_norm": 194.49819946289062, "learning_rate": 9.391631825561151e-07, "loss": 19.2344, "step": 16075 }, { "epoch": 1.0676761639104735, "grad_norm": 125.93961334228516, "learning_rate": 9.390558301673019e-07, "loss": 19.2188, "step": 16076 }, { "epoch": 1.0677425782028291, "grad_norm": 1402.8912353515625, "learning_rate": 9.389484784834553e-07, "loss": 16.8125, "step": 16077 }, { "epoch": 1.067808992495185, "grad_norm": 305.5538024902344, "learning_rate": 9.38841127505818e-07, "loss": 14.9531, "step": 16078 }, { "epoch": 1.0678754067875407, "grad_norm": 96.57259368896484, "learning_rate": 9.387337772356307e-07, "loss": 13.7266, "step": 16079 }, { "epoch": 1.0679418210798963, "grad_norm": 492.0015563964844, "learning_rate": 9.386264276741362e-07, "loss": 26.0, "step": 16080 }, { "epoch": 1.0680082353722522, "grad_norm": 121.15571594238281, "learning_rate": 9.385190788225757e-07, "loss": 13.3281, "step": 16081 }, { "epoch": 1.0680746496646079, "grad_norm": 138.66366577148438, "learning_rate": 9.38411730682191e-07, "loss": 16.0469, "step": 16082 }, { "epoch": 1.0681410639569635, "grad_norm": 157.6082763671875, "learning_rate": 9.383043832542237e-07, "loss": 15.8594, "step": 16083 }, { "epoch": 1.0682074782493192, "grad_norm": 140.12347412109375, "learning_rate": 9.38197036539916e-07, "loss": 13.8125, "step": 16084 }, { "epoch": 1.068273892541675, "grad_norm": 163.4954071044922, "learning_rate": 9.380896905405091e-07, "loss": 16.4375, "step": 16085 }, { "epoch": 1.0683403068340307, "grad_norm": 257.58319091796875, "learning_rate": 9.379823452572453e-07, "loss": 17.7031, "step": 16086 }, { "epoch": 1.0684067211263863, "grad_norm": 268.8889465332031, "learning_rate": 9.378750006913654e-07, "loss": 15.6406, "step": 16087 }, { "epoch": 1.068473135418742, "grad_norm": 193.62774658203125, "learning_rate": 9.377676568441123e-07, "loss": 22.8438, "step": 16088 }, { "epoch": 1.0685395497110979, "grad_norm": 250.51290893554688, "learning_rate": 9.376603137167267e-07, "loss": 21.1875, "step": 16089 }, { "epoch": 1.0686059640034535, "grad_norm": 375.3530578613281, "learning_rate": 9.375529713104508e-07, "loss": 14.6094, "step": 16090 }, { "epoch": 1.0686723782958092, "grad_norm": 275.9169006347656, "learning_rate": 9.374456296265259e-07, "loss": 18.4219, "step": 16091 }, { "epoch": 1.068738792588165, "grad_norm": 276.7448425292969, "learning_rate": 9.373382886661942e-07, "loss": 15.3125, "step": 16092 }, { "epoch": 1.0688052068805207, "grad_norm": 153.70718383789062, "learning_rate": 9.372309484306967e-07, "loss": 12.1094, "step": 16093 }, { "epoch": 1.0688716211728764, "grad_norm": 127.46222686767578, "learning_rate": 9.371236089212758e-07, "loss": 13.0312, "step": 16094 }, { "epoch": 1.068938035465232, "grad_norm": 322.4841613769531, "learning_rate": 9.370162701391722e-07, "loss": 13.6875, "step": 16095 }, { "epoch": 1.069004449757588, "grad_norm": 173.24879455566406, "learning_rate": 9.369089320856284e-07, "loss": 15.0312, "step": 16096 }, { "epoch": 1.0690708640499436, "grad_norm": 148.2786865234375, "learning_rate": 9.368015947618857e-07, "loss": 19.4219, "step": 16097 }, { "epoch": 1.0691372783422992, "grad_norm": 215.25355529785156, "learning_rate": 9.366942581691853e-07, "loss": 24.6562, "step": 16098 }, { "epoch": 1.0692036926346549, "grad_norm": 177.68174743652344, "learning_rate": 9.365869223087698e-07, "loss": 21.7656, "step": 16099 }, { "epoch": 1.0692701069270107, "grad_norm": 211.09075927734375, "learning_rate": 9.3647958718188e-07, "loss": 16.8438, "step": 16100 }, { "epoch": 1.0693365212193664, "grad_norm": 154.27468872070312, "learning_rate": 9.363722527897578e-07, "loss": 18.9375, "step": 16101 }, { "epoch": 1.069402935511722, "grad_norm": 494.5660705566406, "learning_rate": 9.362649191336446e-07, "loss": 11.4219, "step": 16102 }, { "epoch": 1.069469349804078, "grad_norm": 270.8438415527344, "learning_rate": 9.361575862147823e-07, "loss": 12.3438, "step": 16103 }, { "epoch": 1.0695357640964336, "grad_norm": 200.87596130371094, "learning_rate": 9.360502540344117e-07, "loss": 19.5469, "step": 16104 }, { "epoch": 1.0696021783887892, "grad_norm": 360.75762939453125, "learning_rate": 9.359429225937752e-07, "loss": 14.6719, "step": 16105 }, { "epoch": 1.069668592681145, "grad_norm": 348.2312316894531, "learning_rate": 9.35835591894114e-07, "loss": 21.9062, "step": 16106 }, { "epoch": 1.0697350069735008, "grad_norm": 830.4961547851562, "learning_rate": 9.357282619366699e-07, "loss": 18.2656, "step": 16107 }, { "epoch": 1.0698014212658564, "grad_norm": 703.1248168945312, "learning_rate": 9.356209327226837e-07, "loss": 18.4688, "step": 16108 }, { "epoch": 1.069867835558212, "grad_norm": 204.85011291503906, "learning_rate": 9.355136042533978e-07, "loss": 19.8125, "step": 16109 }, { "epoch": 1.0699342498505677, "grad_norm": 221.4340057373047, "learning_rate": 9.354062765300533e-07, "loss": 17.0312, "step": 16110 }, { "epoch": 1.0700006641429236, "grad_norm": 105.44312286376953, "learning_rate": 9.352989495538919e-07, "loss": 14.9219, "step": 16111 }, { "epoch": 1.0700670784352793, "grad_norm": 198.3497772216797, "learning_rate": 9.351916233261544e-07, "loss": 17.7344, "step": 16112 }, { "epoch": 1.070133492727635, "grad_norm": 210.71176147460938, "learning_rate": 9.350842978480834e-07, "loss": 21.5156, "step": 16113 }, { "epoch": 1.0701999070199908, "grad_norm": 206.284912109375, "learning_rate": 9.349769731209195e-07, "loss": 10.7188, "step": 16114 }, { "epoch": 1.0702663213123464, "grad_norm": 244.06117248535156, "learning_rate": 9.348696491459048e-07, "loss": 12.8125, "step": 16115 }, { "epoch": 1.070332735604702, "grad_norm": 147.95884704589844, "learning_rate": 9.347623259242799e-07, "loss": 18.1094, "step": 16116 }, { "epoch": 1.0703991498970578, "grad_norm": 206.17002868652344, "learning_rate": 9.346550034572872e-07, "loss": 11.875, "step": 16117 }, { "epoch": 1.0704655641894136, "grad_norm": 198.73477172851562, "learning_rate": 9.345476817461676e-07, "loss": 14.9844, "step": 16118 }, { "epoch": 1.0705319784817693, "grad_norm": 223.38381958007812, "learning_rate": 9.34440360792163e-07, "loss": 18.2656, "step": 16119 }, { "epoch": 1.070598392774125, "grad_norm": 189.2378692626953, "learning_rate": 9.343330405965138e-07, "loss": 17.8438, "step": 16120 }, { "epoch": 1.0706648070664806, "grad_norm": 356.78271484375, "learning_rate": 9.34225721160463e-07, "loss": 20.3438, "step": 16121 }, { "epoch": 1.0707312213588365, "grad_norm": 270.5723571777344, "learning_rate": 9.341184024852504e-07, "loss": 24.1875, "step": 16122 }, { "epoch": 1.0707976356511921, "grad_norm": 173.3078155517578, "learning_rate": 9.340110845721185e-07, "loss": 11.8906, "step": 16123 }, { "epoch": 1.0708640499435478, "grad_norm": 181.17025756835938, "learning_rate": 9.339037674223083e-07, "loss": 14.0312, "step": 16124 }, { "epoch": 1.0709304642359037, "grad_norm": 240.7349395751953, "learning_rate": 9.337964510370613e-07, "loss": 16.3281, "step": 16125 }, { "epoch": 1.0709968785282593, "grad_norm": 169.37217712402344, "learning_rate": 9.336891354176183e-07, "loss": 17.4062, "step": 16126 }, { "epoch": 1.071063292820615, "grad_norm": 450.699462890625, "learning_rate": 9.335818205652217e-07, "loss": 19.5, "step": 16127 }, { "epoch": 1.0711297071129706, "grad_norm": 191.7657012939453, "learning_rate": 9.33474506481112e-07, "loss": 16.5781, "step": 16128 }, { "epoch": 1.0711961214053265, "grad_norm": 249.6672821044922, "learning_rate": 9.33367193166531e-07, "loss": 20.4688, "step": 16129 }, { "epoch": 1.0712625356976822, "grad_norm": 155.80528259277344, "learning_rate": 9.332598806227194e-07, "loss": 11.2656, "step": 16130 }, { "epoch": 1.0713289499900378, "grad_norm": 475.2540588378906, "learning_rate": 9.331525688509197e-07, "loss": 16.6875, "step": 16131 }, { "epoch": 1.0713953642823935, "grad_norm": 171.49627685546875, "learning_rate": 9.330452578523722e-07, "loss": 15.6406, "step": 16132 }, { "epoch": 1.0714617785747493, "grad_norm": 257.6046142578125, "learning_rate": 9.329379476283187e-07, "loss": 13.5938, "step": 16133 }, { "epoch": 1.071528192867105, "grad_norm": 137.4243927001953, "learning_rate": 9.3283063818e-07, "loss": 15.6719, "step": 16134 }, { "epoch": 1.0715946071594606, "grad_norm": 135.29052734375, "learning_rate": 9.327233295086579e-07, "loss": 17.5938, "step": 16135 }, { "epoch": 1.0716610214518165, "grad_norm": 217.6533966064453, "learning_rate": 9.326160216155337e-07, "loss": 21.9688, "step": 16136 }, { "epoch": 1.0717274357441722, "grad_norm": 155.9487762451172, "learning_rate": 9.325087145018683e-07, "loss": 18.4375, "step": 16137 }, { "epoch": 1.0717938500365278, "grad_norm": 381.04583740234375, "learning_rate": 9.324014081689034e-07, "loss": 18.6094, "step": 16138 }, { "epoch": 1.0718602643288835, "grad_norm": 345.4969787597656, "learning_rate": 9.322941026178799e-07, "loss": 26.0625, "step": 16139 }, { "epoch": 1.0719266786212394, "grad_norm": 656.9705810546875, "learning_rate": 9.321867978500394e-07, "loss": 13.0625, "step": 16140 }, { "epoch": 1.071993092913595, "grad_norm": 150.13385009765625, "learning_rate": 9.320794938666225e-07, "loss": 13.1719, "step": 16141 }, { "epoch": 1.0720595072059507, "grad_norm": 161.78929138183594, "learning_rate": 9.319721906688714e-07, "loss": 13.375, "step": 16142 }, { "epoch": 1.0721259214983063, "grad_norm": 152.59642028808594, "learning_rate": 9.318648882580263e-07, "loss": 14.6875, "step": 16143 }, { "epoch": 1.0721923357906622, "grad_norm": 305.7656555175781, "learning_rate": 9.317575866353291e-07, "loss": 18.8594, "step": 16144 }, { "epoch": 1.0722587500830179, "grad_norm": 207.4468994140625, "learning_rate": 9.316502858020206e-07, "loss": 19.1875, "step": 16145 }, { "epoch": 1.0723251643753735, "grad_norm": 187.12098693847656, "learning_rate": 9.315429857593426e-07, "loss": 11.1953, "step": 16146 }, { "epoch": 1.0723915786677294, "grad_norm": 169.5135955810547, "learning_rate": 9.314356865085352e-07, "loss": 17.4688, "step": 16147 }, { "epoch": 1.072457992960085, "grad_norm": 132.5657501220703, "learning_rate": 9.313283880508408e-07, "loss": 17.4062, "step": 16148 }, { "epoch": 1.0725244072524407, "grad_norm": 246.6159210205078, "learning_rate": 9.312210903874997e-07, "loss": 15.625, "step": 16149 }, { "epoch": 1.0725908215447963, "grad_norm": 157.77687072753906, "learning_rate": 9.311137935197537e-07, "loss": 14.1562, "step": 16150 }, { "epoch": 1.0726572358371522, "grad_norm": 219.4398193359375, "learning_rate": 9.310064974488431e-07, "loss": 18.6562, "step": 16151 }, { "epoch": 1.0727236501295079, "grad_norm": 583.2601928710938, "learning_rate": 9.308992021760101e-07, "loss": 19.6875, "step": 16152 }, { "epoch": 1.0727900644218635, "grad_norm": 144.22035217285156, "learning_rate": 9.30791907702495e-07, "loss": 16.7188, "step": 16153 }, { "epoch": 1.0728564787142192, "grad_norm": 286.7287902832031, "learning_rate": 9.306846140295393e-07, "loss": 15.3438, "step": 16154 }, { "epoch": 1.072922893006575, "grad_norm": 226.13031005859375, "learning_rate": 9.305773211583838e-07, "loss": 18.0625, "step": 16155 }, { "epoch": 1.0729893072989307, "grad_norm": 171.50582885742188, "learning_rate": 9.3047002909027e-07, "loss": 16.2188, "step": 16156 }, { "epoch": 1.0730557215912864, "grad_norm": 206.6575164794922, "learning_rate": 9.303627378264388e-07, "loss": 16.5938, "step": 16157 }, { "epoch": 1.0731221358836422, "grad_norm": 164.03045654296875, "learning_rate": 9.302554473681314e-07, "loss": 18.8906, "step": 16158 }, { "epoch": 1.073188550175998, "grad_norm": 165.3299560546875, "learning_rate": 9.301481577165884e-07, "loss": 12.375, "step": 16159 }, { "epoch": 1.0732549644683536, "grad_norm": 172.43785095214844, "learning_rate": 9.300408688730517e-07, "loss": 22.0, "step": 16160 }, { "epoch": 1.0733213787607092, "grad_norm": 173.59304809570312, "learning_rate": 9.299335808387616e-07, "loss": 16.4062, "step": 16161 }, { "epoch": 1.073387793053065, "grad_norm": 241.3094940185547, "learning_rate": 9.298262936149596e-07, "loss": 16.8594, "step": 16162 }, { "epoch": 1.0734542073454207, "grad_norm": 280.9022216796875, "learning_rate": 9.297190072028864e-07, "loss": 17.6094, "step": 16163 }, { "epoch": 1.0735206216377764, "grad_norm": 639.3889770507812, "learning_rate": 9.296117216037834e-07, "loss": 24.6094, "step": 16164 }, { "epoch": 1.073587035930132, "grad_norm": 178.66131591796875, "learning_rate": 9.295044368188911e-07, "loss": 18.5312, "step": 16165 }, { "epoch": 1.073653450222488, "grad_norm": 206.7062225341797, "learning_rate": 9.293971528494512e-07, "loss": 13.4531, "step": 16166 }, { "epoch": 1.0737198645148436, "grad_norm": 383.55694580078125, "learning_rate": 9.29289869696704e-07, "loss": 23.125, "step": 16167 }, { "epoch": 1.0737862788071992, "grad_norm": 143.375732421875, "learning_rate": 9.291825873618911e-07, "loss": 14.8125, "step": 16168 }, { "epoch": 1.0738526930995551, "grad_norm": 142.65353393554688, "learning_rate": 9.290753058462529e-07, "loss": 17.9688, "step": 16169 }, { "epoch": 1.0739191073919108, "grad_norm": 352.7154235839844, "learning_rate": 9.289680251510309e-07, "loss": 13.9844, "step": 16170 }, { "epoch": 1.0739855216842664, "grad_norm": 132.26097106933594, "learning_rate": 9.288607452774657e-07, "loss": 18.9219, "step": 16171 }, { "epoch": 1.074051935976622, "grad_norm": 577.03466796875, "learning_rate": 9.28753466226798e-07, "loss": 13.2891, "step": 16172 }, { "epoch": 1.074118350268978, "grad_norm": 271.2689514160156, "learning_rate": 9.286461880002698e-07, "loss": 12.4062, "step": 16173 }, { "epoch": 1.0741847645613336, "grad_norm": 451.7845153808594, "learning_rate": 9.28538910599121e-07, "loss": 16.5703, "step": 16174 }, { "epoch": 1.0742511788536893, "grad_norm": 283.597412109375, "learning_rate": 9.284316340245931e-07, "loss": 13.5938, "step": 16175 }, { "epoch": 1.074317593146045, "grad_norm": 285.35675048828125, "learning_rate": 9.283243582779265e-07, "loss": 19.4375, "step": 16176 }, { "epoch": 1.0743840074384008, "grad_norm": 212.91839599609375, "learning_rate": 9.282170833603626e-07, "loss": 16.6562, "step": 16177 }, { "epoch": 1.0744504217307564, "grad_norm": 127.40943908691406, "learning_rate": 9.281098092731419e-07, "loss": 10.2031, "step": 16178 }, { "epoch": 1.074516836023112, "grad_norm": 254.97508239746094, "learning_rate": 9.280025360175059e-07, "loss": 18.6562, "step": 16179 }, { "epoch": 1.074583250315468, "grad_norm": 317.78472900390625, "learning_rate": 9.278952635946944e-07, "loss": 15.5, "step": 16180 }, { "epoch": 1.0746496646078236, "grad_norm": 176.3265838623047, "learning_rate": 9.277879920059494e-07, "loss": 13.1562, "step": 16181 }, { "epoch": 1.0747160789001793, "grad_norm": 181.93746948242188, "learning_rate": 9.276807212525111e-07, "loss": 9.6328, "step": 16182 }, { "epoch": 1.074782493192535, "grad_norm": 197.46878051757812, "learning_rate": 9.275734513356207e-07, "loss": 15.7344, "step": 16183 }, { "epoch": 1.0748489074848908, "grad_norm": 268.68695068359375, "learning_rate": 9.274661822565184e-07, "loss": 16.5312, "step": 16184 }, { "epoch": 1.0749153217772465, "grad_norm": 232.41119384765625, "learning_rate": 9.273589140164461e-07, "loss": 18.8281, "step": 16185 }, { "epoch": 1.0749817360696021, "grad_norm": 358.7488708496094, "learning_rate": 9.272516466166434e-07, "loss": 17.0781, "step": 16186 }, { "epoch": 1.0750481503619578, "grad_norm": 3290.532958984375, "learning_rate": 9.271443800583522e-07, "loss": 10.4375, "step": 16187 }, { "epoch": 1.0751145646543137, "grad_norm": 157.1856231689453, "learning_rate": 9.270371143428125e-07, "loss": 16.7656, "step": 16188 }, { "epoch": 1.0751809789466693, "grad_norm": 104.3530044555664, "learning_rate": 9.269298494712657e-07, "loss": 9.1406, "step": 16189 }, { "epoch": 1.075247393239025, "grad_norm": 368.68133544921875, "learning_rate": 9.268225854449517e-07, "loss": 21.7344, "step": 16190 }, { "epoch": 1.0753138075313808, "grad_norm": 231.40440368652344, "learning_rate": 9.267153222651124e-07, "loss": 18.0938, "step": 16191 }, { "epoch": 1.0753802218237365, "grad_norm": 481.2742614746094, "learning_rate": 9.266080599329876e-07, "loss": 17.3438, "step": 16192 }, { "epoch": 1.0754466361160921, "grad_norm": 179.408447265625, "learning_rate": 9.265007984498187e-07, "loss": 16.3438, "step": 16193 }, { "epoch": 1.0755130504084478, "grad_norm": 218.31594848632812, "learning_rate": 9.26393537816846e-07, "loss": 15.2031, "step": 16194 }, { "epoch": 1.0755794647008037, "grad_norm": 191.9982147216797, "learning_rate": 9.262862780353106e-07, "loss": 13.5781, "step": 16195 }, { "epoch": 1.0756458789931593, "grad_norm": 115.74254608154297, "learning_rate": 9.261790191064528e-07, "loss": 10.4688, "step": 16196 }, { "epoch": 1.075712293285515, "grad_norm": 183.06546020507812, "learning_rate": 9.260717610315138e-07, "loss": 13.9531, "step": 16197 }, { "epoch": 1.0757787075778706, "grad_norm": 220.48922729492188, "learning_rate": 9.259645038117336e-07, "loss": 14.4062, "step": 16198 }, { "epoch": 1.0758451218702265, "grad_norm": 138.82769775390625, "learning_rate": 9.258572474483539e-07, "loss": 12.5469, "step": 16199 }, { "epoch": 1.0759115361625822, "grad_norm": 294.75140380859375, "learning_rate": 9.257499919426145e-07, "loss": 16.0625, "step": 16200 }, { "epoch": 1.0759779504549378, "grad_norm": 122.69493865966797, "learning_rate": 9.256427372957565e-07, "loss": 13.5781, "step": 16201 }, { "epoch": 1.0760443647472937, "grad_norm": 250.45860290527344, "learning_rate": 9.255354835090201e-07, "loss": 19.6719, "step": 16202 }, { "epoch": 1.0761107790396494, "grad_norm": 109.03031921386719, "learning_rate": 9.254282305836468e-07, "loss": 12.6719, "step": 16203 }, { "epoch": 1.076177193332005, "grad_norm": 165.11459350585938, "learning_rate": 9.253209785208763e-07, "loss": 11.8438, "step": 16204 }, { "epoch": 1.0762436076243607, "grad_norm": 259.5111083984375, "learning_rate": 9.2521372732195e-07, "loss": 15.4062, "step": 16205 }, { "epoch": 1.0763100219167165, "grad_norm": 127.0837631225586, "learning_rate": 9.251064769881079e-07, "loss": 14.2969, "step": 16206 }, { "epoch": 1.0763764362090722, "grad_norm": 93.67768859863281, "learning_rate": 9.249992275205912e-07, "loss": 12.6562, "step": 16207 }, { "epoch": 1.0764428505014279, "grad_norm": 174.80552673339844, "learning_rate": 9.248919789206397e-07, "loss": 12.4688, "step": 16208 }, { "epoch": 1.0765092647937835, "grad_norm": 243.60179138183594, "learning_rate": 9.247847311894947e-07, "loss": 24.9844, "step": 16209 }, { "epoch": 1.0765756790861394, "grad_norm": 114.050048828125, "learning_rate": 9.246774843283968e-07, "loss": 11.0312, "step": 16210 }, { "epoch": 1.076642093378495, "grad_norm": 241.4981231689453, "learning_rate": 9.245702383385858e-07, "loss": 24.1562, "step": 16211 }, { "epoch": 1.0767085076708507, "grad_norm": 360.4912414550781, "learning_rate": 9.244629932213034e-07, "loss": 17.4062, "step": 16212 }, { "epoch": 1.0767749219632066, "grad_norm": 160.2305450439453, "learning_rate": 9.243557489777891e-07, "loss": 13.875, "step": 16213 }, { "epoch": 1.0768413362555622, "grad_norm": 148.5599822998047, "learning_rate": 9.242485056092842e-07, "loss": 15.4062, "step": 16214 }, { "epoch": 1.0769077505479179, "grad_norm": 133.0035400390625, "learning_rate": 9.241412631170286e-07, "loss": 16.2188, "step": 16215 }, { "epoch": 1.0769741648402735, "grad_norm": 371.1972961425781, "learning_rate": 9.240340215022634e-07, "loss": 19.7969, "step": 16216 }, { "epoch": 1.0770405791326294, "grad_norm": 323.4873046875, "learning_rate": 9.239267807662287e-07, "loss": 22.3438, "step": 16217 }, { "epoch": 1.077106993424985, "grad_norm": 205.1898193359375, "learning_rate": 9.238195409101653e-07, "loss": 13.0938, "step": 16218 }, { "epoch": 1.0771734077173407, "grad_norm": 1667.2303466796875, "learning_rate": 9.23712301935313e-07, "loss": 20.3594, "step": 16219 }, { "epoch": 1.0772398220096966, "grad_norm": 229.8838348388672, "learning_rate": 9.236050638429134e-07, "loss": 17.7812, "step": 16220 }, { "epoch": 1.0773062363020522, "grad_norm": 215.8916015625, "learning_rate": 9.23497826634206e-07, "loss": 20.9219, "step": 16221 }, { "epoch": 1.077372650594408, "grad_norm": 256.4420166015625, "learning_rate": 9.233905903104319e-07, "loss": 14.9219, "step": 16222 }, { "epoch": 1.0774390648867636, "grad_norm": 343.1720886230469, "learning_rate": 9.232833548728309e-07, "loss": 19.9531, "step": 16223 }, { "epoch": 1.0775054791791194, "grad_norm": 192.06944274902344, "learning_rate": 9.231761203226443e-07, "loss": 14.5156, "step": 16224 }, { "epoch": 1.077571893471475, "grad_norm": 221.1736602783203, "learning_rate": 9.230688866611118e-07, "loss": 18.4531, "step": 16225 }, { "epoch": 1.0776383077638307, "grad_norm": 239.28456115722656, "learning_rate": 9.229616538894741e-07, "loss": 16.6719, "step": 16226 }, { "epoch": 1.0777047220561864, "grad_norm": 168.76589965820312, "learning_rate": 9.228544220089716e-07, "loss": 13.5781, "step": 16227 }, { "epoch": 1.0777711363485423, "grad_norm": 187.36880493164062, "learning_rate": 9.227471910208447e-07, "loss": 14.8672, "step": 16228 }, { "epoch": 1.077837550640898, "grad_norm": 221.90890502929688, "learning_rate": 9.226399609263333e-07, "loss": 13.6484, "step": 16229 }, { "epoch": 1.0779039649332536, "grad_norm": 425.9596252441406, "learning_rate": 9.225327317266787e-07, "loss": 12.0312, "step": 16230 }, { "epoch": 1.0779703792256095, "grad_norm": 146.19357299804688, "learning_rate": 9.224255034231206e-07, "loss": 17.8906, "step": 16231 }, { "epoch": 1.078036793517965, "grad_norm": 304.14520263671875, "learning_rate": 9.223182760168997e-07, "loss": 18.9062, "step": 16232 }, { "epoch": 1.0781032078103208, "grad_norm": 148.86959838867188, "learning_rate": 9.222110495092559e-07, "loss": 13.8906, "step": 16233 }, { "epoch": 1.0781696221026764, "grad_norm": 162.82626342773438, "learning_rate": 9.221038239014302e-07, "loss": 15.2188, "step": 16234 }, { "epoch": 1.0782360363950323, "grad_norm": 152.57713317871094, "learning_rate": 9.219965991946622e-07, "loss": 17.8438, "step": 16235 }, { "epoch": 1.078302450687388, "grad_norm": 486.4727783203125, "learning_rate": 9.218893753901928e-07, "loss": 16.3906, "step": 16236 }, { "epoch": 1.0783688649797436, "grad_norm": 110.50665283203125, "learning_rate": 9.217821524892619e-07, "loss": 14.2188, "step": 16237 }, { "epoch": 1.0784352792720993, "grad_norm": 117.84947204589844, "learning_rate": 9.216749304931101e-07, "loss": 13.7344, "step": 16238 }, { "epoch": 1.0785016935644551, "grad_norm": 368.6373291015625, "learning_rate": 9.215677094029775e-07, "loss": 26.4062, "step": 16239 }, { "epoch": 1.0785681078568108, "grad_norm": 248.59042358398438, "learning_rate": 9.214604892201044e-07, "loss": 19.6719, "step": 16240 }, { "epoch": 1.0786345221491664, "grad_norm": 201.26683044433594, "learning_rate": 9.213532699457308e-07, "loss": 21.1562, "step": 16241 }, { "epoch": 1.0787009364415223, "grad_norm": 170.7799530029297, "learning_rate": 9.212460515810977e-07, "loss": 16.0625, "step": 16242 }, { "epoch": 1.078767350733878, "grad_norm": 646.20947265625, "learning_rate": 9.211388341274445e-07, "loss": 18.9375, "step": 16243 }, { "epoch": 1.0788337650262336, "grad_norm": 1082.0306396484375, "learning_rate": 9.210316175860121e-07, "loss": 17.3594, "step": 16244 }, { "epoch": 1.0789001793185893, "grad_norm": 135.779541015625, "learning_rate": 9.2092440195804e-07, "loss": 15.8438, "step": 16245 }, { "epoch": 1.0789665936109452, "grad_norm": 468.3500671386719, "learning_rate": 9.208171872447689e-07, "loss": 13.5312, "step": 16246 }, { "epoch": 1.0790330079033008, "grad_norm": 225.67677307128906, "learning_rate": 9.207099734474394e-07, "loss": 15.125, "step": 16247 }, { "epoch": 1.0790994221956565, "grad_norm": 87.98237609863281, "learning_rate": 9.206027605672908e-07, "loss": 15.3594, "step": 16248 }, { "epoch": 1.0791658364880121, "grad_norm": 145.98086547851562, "learning_rate": 9.204955486055639e-07, "loss": 14.625, "step": 16249 }, { "epoch": 1.079232250780368, "grad_norm": 314.4940490722656, "learning_rate": 9.203883375634983e-07, "loss": 18.3438, "step": 16250 }, { "epoch": 1.0792986650727237, "grad_norm": 135.25050354003906, "learning_rate": 9.202811274423348e-07, "loss": 18.6719, "step": 16251 }, { "epoch": 1.0793650793650793, "grad_norm": 598.6892700195312, "learning_rate": 9.201739182433131e-07, "loss": 20.5, "step": 16252 }, { "epoch": 1.0794314936574352, "grad_norm": 163.3117218017578, "learning_rate": 9.200667099676738e-07, "loss": 8.1562, "step": 16253 }, { "epoch": 1.0794979079497908, "grad_norm": 243.91253662109375, "learning_rate": 9.199595026166562e-07, "loss": 18.7031, "step": 16254 }, { "epoch": 1.0795643222421465, "grad_norm": 283.1393127441406, "learning_rate": 9.198522961915013e-07, "loss": 16.6094, "step": 16255 }, { "epoch": 1.0796307365345021, "grad_norm": 141.74813842773438, "learning_rate": 9.197450906934487e-07, "loss": 15.1172, "step": 16256 }, { "epoch": 1.079697150826858, "grad_norm": 176.29820251464844, "learning_rate": 9.196378861237388e-07, "loss": 14.7969, "step": 16257 }, { "epoch": 1.0797635651192137, "grad_norm": 174.89291381835938, "learning_rate": 9.19530682483611e-07, "loss": 13.4219, "step": 16258 }, { "epoch": 1.0798299794115693, "grad_norm": 161.26004028320312, "learning_rate": 9.194234797743064e-07, "loss": 26.7812, "step": 16259 }, { "epoch": 1.079896393703925, "grad_norm": 162.83351135253906, "learning_rate": 9.193162779970643e-07, "loss": 12.7188, "step": 16260 }, { "epoch": 1.0799628079962809, "grad_norm": 315.47308349609375, "learning_rate": 9.19209077153125e-07, "loss": 20.8906, "step": 16261 }, { "epoch": 1.0800292222886365, "grad_norm": 209.6862335205078, "learning_rate": 9.191018772437283e-07, "loss": 15.5156, "step": 16262 }, { "epoch": 1.0800956365809922, "grad_norm": 134.40435791015625, "learning_rate": 9.189946782701147e-07, "loss": 15.625, "step": 16263 }, { "epoch": 1.080162050873348, "grad_norm": 464.5975341796875, "learning_rate": 9.188874802335238e-07, "loss": 15.7031, "step": 16264 }, { "epoch": 1.0802284651657037, "grad_norm": 263.1384582519531, "learning_rate": 9.187802831351959e-07, "loss": 14.7969, "step": 16265 }, { "epoch": 1.0802948794580594, "grad_norm": 117.2386245727539, "learning_rate": 9.186730869763706e-07, "loss": 13.4062, "step": 16266 }, { "epoch": 1.080361293750415, "grad_norm": 139.08798217773438, "learning_rate": 9.185658917582887e-07, "loss": 13.7656, "step": 16267 }, { "epoch": 1.0804277080427709, "grad_norm": 136.62135314941406, "learning_rate": 9.184586974821889e-07, "loss": 13.5625, "step": 16268 }, { "epoch": 1.0804941223351265, "grad_norm": 205.26522827148438, "learning_rate": 9.183515041493124e-07, "loss": 19.6406, "step": 16269 }, { "epoch": 1.0805605366274822, "grad_norm": 304.16131591796875, "learning_rate": 9.182443117608983e-07, "loss": 14.8125, "step": 16270 }, { "epoch": 1.0806269509198378, "grad_norm": 263.16815185546875, "learning_rate": 9.181371203181871e-07, "loss": 24.1562, "step": 16271 }, { "epoch": 1.0806933652121937, "grad_norm": 472.6746826171875, "learning_rate": 9.180299298224181e-07, "loss": 20.6562, "step": 16272 }, { "epoch": 1.0807597795045494, "grad_norm": 220.9324188232422, "learning_rate": 9.17922740274832e-07, "loss": 16.3203, "step": 16273 }, { "epoch": 1.080826193796905, "grad_norm": 176.63710021972656, "learning_rate": 9.178155516766682e-07, "loss": 20.8594, "step": 16274 }, { "epoch": 1.080892608089261, "grad_norm": 230.69920349121094, "learning_rate": 9.177083640291667e-07, "loss": 17.7188, "step": 16275 }, { "epoch": 1.0809590223816166, "grad_norm": 318.6575012207031, "learning_rate": 9.176011773335671e-07, "loss": 19.0781, "step": 16276 }, { "epoch": 1.0810254366739722, "grad_norm": 196.88204956054688, "learning_rate": 9.174939915911101e-07, "loss": 20.4844, "step": 16277 }, { "epoch": 1.0810918509663279, "grad_norm": 137.08279418945312, "learning_rate": 9.173868068030346e-07, "loss": 14.7031, "step": 16278 }, { "epoch": 1.0811582652586837, "grad_norm": 131.98731994628906, "learning_rate": 9.172796229705812e-07, "loss": 13.625, "step": 16279 }, { "epoch": 1.0812246795510394, "grad_norm": 147.28036499023438, "learning_rate": 9.171724400949891e-07, "loss": 14.7031, "step": 16280 }, { "epoch": 1.081291093843395, "grad_norm": 390.302490234375, "learning_rate": 9.170652581774987e-07, "loss": 18.6406, "step": 16281 }, { "epoch": 1.0813575081357507, "grad_norm": 170.50732421875, "learning_rate": 9.169580772193495e-07, "loss": 16.5781, "step": 16282 }, { "epoch": 1.0814239224281066, "grad_norm": 327.2966003417969, "learning_rate": 9.16850897221781e-07, "loss": 12.4375, "step": 16283 }, { "epoch": 1.0814903367204622, "grad_norm": 255.01715087890625, "learning_rate": 9.167437181860339e-07, "loss": 17.1094, "step": 16284 }, { "epoch": 1.081556751012818, "grad_norm": 110.69646453857422, "learning_rate": 9.166365401133472e-07, "loss": 16.6094, "step": 16285 }, { "epoch": 1.0816231653051738, "grad_norm": 568.9490966796875, "learning_rate": 9.165293630049613e-07, "loss": 14.4219, "step": 16286 }, { "epoch": 1.0816895795975294, "grad_norm": 511.9656066894531, "learning_rate": 9.16422186862115e-07, "loss": 23.7188, "step": 16287 }, { "epoch": 1.081755993889885, "grad_norm": 382.02508544921875, "learning_rate": 9.163150116860492e-07, "loss": 22.1719, "step": 16288 }, { "epoch": 1.0818224081822407, "grad_norm": 241.1410675048828, "learning_rate": 9.162078374780028e-07, "loss": 18.5156, "step": 16289 }, { "epoch": 1.0818888224745966, "grad_norm": 121.65205383300781, "learning_rate": 9.161006642392161e-07, "loss": 15.7188, "step": 16290 }, { "epoch": 1.0819552367669523, "grad_norm": 169.79818725585938, "learning_rate": 9.159934919709282e-07, "loss": 16.1719, "step": 16291 }, { "epoch": 1.082021651059308, "grad_norm": 97.69673919677734, "learning_rate": 9.158863206743794e-07, "loss": 15.6562, "step": 16292 }, { "epoch": 1.0820880653516636, "grad_norm": 103.19246673583984, "learning_rate": 9.157791503508088e-07, "loss": 14.5312, "step": 16293 }, { "epoch": 1.0821544796440195, "grad_norm": 313.40850830078125, "learning_rate": 9.156719810014568e-07, "loss": 13.3281, "step": 16294 }, { "epoch": 1.082220893936375, "grad_norm": 248.4535675048828, "learning_rate": 9.155648126275625e-07, "loss": 17.8438, "step": 16295 }, { "epoch": 1.0822873082287308, "grad_norm": 178.57369995117188, "learning_rate": 9.154576452303658e-07, "loss": 27.5781, "step": 16296 }, { "epoch": 1.0823537225210866, "grad_norm": 237.8695526123047, "learning_rate": 9.153504788111061e-07, "loss": 23.125, "step": 16297 }, { "epoch": 1.0824201368134423, "grad_norm": 198.91622924804688, "learning_rate": 9.152433133710235e-07, "loss": 14.9375, "step": 16298 }, { "epoch": 1.082486551105798, "grad_norm": 289.3861083984375, "learning_rate": 9.151361489113573e-07, "loss": 16.6875, "step": 16299 }, { "epoch": 1.0825529653981536, "grad_norm": 337.0046081542969, "learning_rate": 9.150289854333475e-07, "loss": 19.375, "step": 16300 }, { "epoch": 1.0826193796905095, "grad_norm": 366.0798645019531, "learning_rate": 9.149218229382329e-07, "loss": 25.6875, "step": 16301 }, { "epoch": 1.0826857939828651, "grad_norm": 163.74734497070312, "learning_rate": 9.148146614272538e-07, "loss": 22.3906, "step": 16302 }, { "epoch": 1.0827522082752208, "grad_norm": 783.9427490234375, "learning_rate": 9.147075009016496e-07, "loss": 17.7188, "step": 16303 }, { "epoch": 1.0828186225675764, "grad_norm": 284.0868835449219, "learning_rate": 9.1460034136266e-07, "loss": 20.0938, "step": 16304 }, { "epoch": 1.0828850368599323, "grad_norm": 130.84262084960938, "learning_rate": 9.14493182811524e-07, "loss": 14.9844, "step": 16305 }, { "epoch": 1.082951451152288, "grad_norm": 191.22035217285156, "learning_rate": 9.143860252494819e-07, "loss": 17.1875, "step": 16306 }, { "epoch": 1.0830178654446436, "grad_norm": 533.5783081054688, "learning_rate": 9.142788686777729e-07, "loss": 15.8438, "step": 16307 }, { "epoch": 1.0830842797369995, "grad_norm": 142.09329223632812, "learning_rate": 9.141717130976364e-07, "loss": 14.4844, "step": 16308 }, { "epoch": 1.0831506940293552, "grad_norm": 296.831298828125, "learning_rate": 9.140645585103119e-07, "loss": 14.3594, "step": 16309 }, { "epoch": 1.0832171083217108, "grad_norm": 523.2372436523438, "learning_rate": 9.139574049170394e-07, "loss": 23.9688, "step": 16310 }, { "epoch": 1.0832835226140665, "grad_norm": 264.154052734375, "learning_rate": 9.138502523190575e-07, "loss": 19.9219, "step": 16311 }, { "epoch": 1.0833499369064223, "grad_norm": 149.90699768066406, "learning_rate": 9.137431007176067e-07, "loss": 15.4375, "step": 16312 }, { "epoch": 1.083416351198778, "grad_norm": 147.4290008544922, "learning_rate": 9.136359501139258e-07, "loss": 13.2656, "step": 16313 }, { "epoch": 1.0834827654911336, "grad_norm": 121.18224334716797, "learning_rate": 9.135288005092545e-07, "loss": 14.0469, "step": 16314 }, { "epoch": 1.0835491797834895, "grad_norm": 201.5596923828125, "learning_rate": 9.134216519048318e-07, "loss": 20.0, "step": 16315 }, { "epoch": 1.0836155940758452, "grad_norm": 214.76443481445312, "learning_rate": 9.133145043018979e-07, "loss": 10.4844, "step": 16316 }, { "epoch": 1.0836820083682008, "grad_norm": 154.4945831298828, "learning_rate": 9.132073577016917e-07, "loss": 16.3594, "step": 16317 }, { "epoch": 1.0837484226605565, "grad_norm": 323.5417175292969, "learning_rate": 9.13100212105453e-07, "loss": 15.5234, "step": 16318 }, { "epoch": 1.0838148369529124, "grad_norm": 189.483154296875, "learning_rate": 9.129930675144204e-07, "loss": 16.8594, "step": 16319 }, { "epoch": 1.083881251245268, "grad_norm": 288.2443542480469, "learning_rate": 9.12885923929834e-07, "loss": 16.0, "step": 16320 }, { "epoch": 1.0839476655376237, "grad_norm": 191.34266662597656, "learning_rate": 9.127787813529333e-07, "loss": 20.5, "step": 16321 }, { "epoch": 1.0840140798299793, "grad_norm": 99.60126495361328, "learning_rate": 9.126716397849569e-07, "loss": 15.75, "step": 16322 }, { "epoch": 1.0840804941223352, "grad_norm": 242.56944274902344, "learning_rate": 9.12564499227145e-07, "loss": 13.4219, "step": 16323 }, { "epoch": 1.0841469084146909, "grad_norm": 505.1687927246094, "learning_rate": 9.124573596807364e-07, "loss": 16.7188, "step": 16324 }, { "epoch": 1.0842133227070465, "grad_norm": 179.37203979492188, "learning_rate": 9.123502211469708e-07, "loss": 15.7812, "step": 16325 }, { "epoch": 1.0842797369994024, "grad_norm": 200.69142150878906, "learning_rate": 9.122430836270869e-07, "loss": 15.8906, "step": 16326 }, { "epoch": 1.084346151291758, "grad_norm": 183.4383087158203, "learning_rate": 9.121359471223248e-07, "loss": 25.7188, "step": 16327 }, { "epoch": 1.0844125655841137, "grad_norm": 289.290283203125, "learning_rate": 9.120288116339231e-07, "loss": 15.7812, "step": 16328 }, { "epoch": 1.0844789798764694, "grad_norm": 126.91263580322266, "learning_rate": 9.119216771631218e-07, "loss": 20.3438, "step": 16329 }, { "epoch": 1.0845453941688252, "grad_norm": 567.6624755859375, "learning_rate": 9.11814543711159e-07, "loss": 22.7031, "step": 16330 }, { "epoch": 1.0846118084611809, "grad_norm": 106.28923034667969, "learning_rate": 9.117074112792756e-07, "loss": 17.3438, "step": 16331 }, { "epoch": 1.0846782227535365, "grad_norm": 304.9417724609375, "learning_rate": 9.116002798687092e-07, "loss": 23.2656, "step": 16332 }, { "epoch": 1.0847446370458922, "grad_norm": 253.5721893310547, "learning_rate": 9.114931494807003e-07, "loss": 21.4375, "step": 16333 }, { "epoch": 1.084811051338248, "grad_norm": 112.75165557861328, "learning_rate": 9.113860201164873e-07, "loss": 16.2344, "step": 16334 }, { "epoch": 1.0848774656306037, "grad_norm": 128.06985473632812, "learning_rate": 9.112788917773101e-07, "loss": 15.5625, "step": 16335 }, { "epoch": 1.0849438799229594, "grad_norm": 216.75555419921875, "learning_rate": 9.11171764464407e-07, "loss": 13.0156, "step": 16336 }, { "epoch": 1.0850102942153153, "grad_norm": 252.55235290527344, "learning_rate": 9.110646381790181e-07, "loss": 21.75, "step": 16337 }, { "epoch": 1.085076708507671, "grad_norm": 517.8705444335938, "learning_rate": 9.10957512922382e-07, "loss": 29.6562, "step": 16338 }, { "epoch": 1.0851431228000266, "grad_norm": 555.9598999023438, "learning_rate": 9.108503886957383e-07, "loss": 17.7969, "step": 16339 }, { "epoch": 1.0852095370923822, "grad_norm": 161.2393035888672, "learning_rate": 9.107432655003254e-07, "loss": 17.125, "step": 16340 }, { "epoch": 1.085275951384738, "grad_norm": 77.97663116455078, "learning_rate": 9.106361433373833e-07, "loss": 12.4531, "step": 16341 }, { "epoch": 1.0853423656770937, "grad_norm": 269.5697021484375, "learning_rate": 9.105290222081508e-07, "loss": 19.4375, "step": 16342 }, { "epoch": 1.0854087799694494, "grad_norm": 166.5648193359375, "learning_rate": 9.104219021138669e-07, "loss": 16.5781, "step": 16343 }, { "epoch": 1.085475194261805, "grad_norm": 530.0556640625, "learning_rate": 9.103147830557705e-07, "loss": 30.5312, "step": 16344 }, { "epoch": 1.085541608554161, "grad_norm": 185.2373504638672, "learning_rate": 9.102076650351014e-07, "loss": 16.875, "step": 16345 }, { "epoch": 1.0856080228465166, "grad_norm": 169.17974853515625, "learning_rate": 9.101005480530981e-07, "loss": 13.9844, "step": 16346 }, { "epoch": 1.0856744371388722, "grad_norm": 129.1804656982422, "learning_rate": 9.099934321109999e-07, "loss": 14.4844, "step": 16347 }, { "epoch": 1.0857408514312281, "grad_norm": 418.2706298828125, "learning_rate": 9.098863172100454e-07, "loss": 16.2188, "step": 16348 }, { "epoch": 1.0858072657235838, "grad_norm": 377.4563293457031, "learning_rate": 9.097792033514745e-07, "loss": 17.3906, "step": 16349 }, { "epoch": 1.0858736800159394, "grad_norm": 178.1102294921875, "learning_rate": 9.096720905365255e-07, "loss": 14.6406, "step": 16350 }, { "epoch": 1.085940094308295, "grad_norm": 214.30140686035156, "learning_rate": 9.095649787664379e-07, "loss": 13.6875, "step": 16351 }, { "epoch": 1.086006508600651, "grad_norm": 186.68917846679688, "learning_rate": 9.0945786804245e-07, "loss": 12.875, "step": 16352 }, { "epoch": 1.0860729228930066, "grad_norm": 164.63624572753906, "learning_rate": 9.09350758365802e-07, "loss": 15.2344, "step": 16353 }, { "epoch": 1.0861393371853623, "grad_norm": 244.09605407714844, "learning_rate": 9.092436497377313e-07, "loss": 14.4844, "step": 16354 }, { "epoch": 1.086205751477718, "grad_norm": 118.67916870117188, "learning_rate": 9.091365421594784e-07, "loss": 13.6797, "step": 16355 }, { "epoch": 1.0862721657700738, "grad_norm": 205.72659301757812, "learning_rate": 9.090294356322813e-07, "loss": 14.6562, "step": 16356 }, { "epoch": 1.0863385800624294, "grad_norm": 153.89224243164062, "learning_rate": 9.089223301573789e-07, "loss": 18.1562, "step": 16357 }, { "epoch": 1.086404994354785, "grad_norm": 115.78285217285156, "learning_rate": 9.088152257360111e-07, "loss": 16.7812, "step": 16358 }, { "epoch": 1.086471408647141, "grad_norm": 209.8332061767578, "learning_rate": 9.087081223694158e-07, "loss": 25.0625, "step": 16359 }, { "epoch": 1.0865378229394966, "grad_norm": 496.5682678222656, "learning_rate": 9.086010200588326e-07, "loss": 26.1875, "step": 16360 }, { "epoch": 1.0866042372318523, "grad_norm": 318.39202880859375, "learning_rate": 9.084939188054998e-07, "loss": 17.3438, "step": 16361 }, { "epoch": 1.086670651524208, "grad_norm": 171.92295837402344, "learning_rate": 9.083868186106568e-07, "loss": 17.0469, "step": 16362 }, { "epoch": 1.0867370658165638, "grad_norm": 551.1101684570312, "learning_rate": 9.08279719475542e-07, "loss": 23.5, "step": 16363 }, { "epoch": 1.0868034801089195, "grad_norm": 176.33737182617188, "learning_rate": 9.08172621401395e-07, "loss": 12.9062, "step": 16364 }, { "epoch": 1.0868698944012751, "grad_norm": 193.7661895751953, "learning_rate": 9.080655243894535e-07, "loss": 20.6094, "step": 16365 }, { "epoch": 1.0869363086936308, "grad_norm": 229.00611877441406, "learning_rate": 9.079584284409575e-07, "loss": 16.0625, "step": 16366 }, { "epoch": 1.0870027229859867, "grad_norm": 82.95240020751953, "learning_rate": 9.078513335571451e-07, "loss": 18.2344, "step": 16367 }, { "epoch": 1.0870691372783423, "grad_norm": 282.3662109375, "learning_rate": 9.077442397392556e-07, "loss": 25.4375, "step": 16368 }, { "epoch": 1.087135551570698, "grad_norm": 213.4114227294922, "learning_rate": 9.07637146988527e-07, "loss": 15.1719, "step": 16369 }, { "epoch": 1.0872019658630538, "grad_norm": 372.52215576171875, "learning_rate": 9.075300553061992e-07, "loss": 18.3906, "step": 16370 }, { "epoch": 1.0872683801554095, "grad_norm": 177.61318969726562, "learning_rate": 9.074229646935101e-07, "loss": 10.9688, "step": 16371 }, { "epoch": 1.0873347944477652, "grad_norm": 545.9898681640625, "learning_rate": 9.073158751516989e-07, "loss": 25.3125, "step": 16372 }, { "epoch": 1.0874012087401208, "grad_norm": 143.2882537841797, "learning_rate": 9.07208786682004e-07, "loss": 24.3438, "step": 16373 }, { "epoch": 1.0874676230324767, "grad_norm": 309.3597412109375, "learning_rate": 9.071016992856648e-07, "loss": 14.1016, "step": 16374 }, { "epoch": 1.0875340373248323, "grad_norm": 448.3187255859375, "learning_rate": 9.069946129639189e-07, "loss": 23.0, "step": 16375 }, { "epoch": 1.087600451617188, "grad_norm": 163.58224487304688, "learning_rate": 9.068875277180062e-07, "loss": 13.6719, "step": 16376 }, { "epoch": 1.0876668659095436, "grad_norm": 209.88009643554688, "learning_rate": 9.067804435491649e-07, "loss": 22.7188, "step": 16377 }, { "epoch": 1.0877332802018995, "grad_norm": 299.51953125, "learning_rate": 9.066733604586335e-07, "loss": 12.6406, "step": 16378 }, { "epoch": 1.0877996944942552, "grad_norm": 295.41937255859375, "learning_rate": 9.065662784476507e-07, "loss": 21.3125, "step": 16379 }, { "epoch": 1.0878661087866108, "grad_norm": 965.758544921875, "learning_rate": 9.064591975174556e-07, "loss": 13.0312, "step": 16380 }, { "epoch": 1.0879325230789667, "grad_norm": 115.7275390625, "learning_rate": 9.063521176692863e-07, "loss": 12.7656, "step": 16381 }, { "epoch": 1.0879989373713224, "grad_norm": 220.06585693359375, "learning_rate": 9.062450389043821e-07, "loss": 17.1719, "step": 16382 }, { "epoch": 1.088065351663678, "grad_norm": 186.919189453125, "learning_rate": 9.061379612239807e-07, "loss": 18.2812, "step": 16383 }, { "epoch": 1.0881317659560337, "grad_norm": 136.79335021972656, "learning_rate": 9.060308846293217e-07, "loss": 16.1562, "step": 16384 }, { "epoch": 1.0881981802483895, "grad_norm": 193.79090881347656, "learning_rate": 9.059238091216429e-07, "loss": 17.2656, "step": 16385 }, { "epoch": 1.0882645945407452, "grad_norm": 458.13525390625, "learning_rate": 9.058167347021835e-07, "loss": 14.9688, "step": 16386 }, { "epoch": 1.0883310088331009, "grad_norm": 327.0042419433594, "learning_rate": 9.057096613721815e-07, "loss": 16.1406, "step": 16387 }, { "epoch": 1.0883974231254565, "grad_norm": 192.47097778320312, "learning_rate": 9.056025891328761e-07, "loss": 17.2969, "step": 16388 }, { "epoch": 1.0884638374178124, "grad_norm": 422.97265625, "learning_rate": 9.054955179855053e-07, "loss": 21.1562, "step": 16389 }, { "epoch": 1.088530251710168, "grad_norm": 316.59014892578125, "learning_rate": 9.053884479313081e-07, "loss": 20.2812, "step": 16390 }, { "epoch": 1.0885966660025237, "grad_norm": 161.1140594482422, "learning_rate": 9.052813789715224e-07, "loss": 23.3125, "step": 16391 }, { "epoch": 1.0886630802948796, "grad_norm": 159.79408264160156, "learning_rate": 9.051743111073872e-07, "loss": 17.8438, "step": 16392 }, { "epoch": 1.0887294945872352, "grad_norm": 256.49468994140625, "learning_rate": 9.050672443401411e-07, "loss": 21.1875, "step": 16393 }, { "epoch": 1.0887959088795909, "grad_norm": 257.2696533203125, "learning_rate": 9.04960178671022e-07, "loss": 10.8594, "step": 16394 }, { "epoch": 1.0888623231719465, "grad_norm": 566.9310913085938, "learning_rate": 9.048531141012694e-07, "loss": 15.1094, "step": 16395 }, { "epoch": 1.0889287374643024, "grad_norm": 214.4571075439453, "learning_rate": 9.047460506321204e-07, "loss": 13.25, "step": 16396 }, { "epoch": 1.088995151756658, "grad_norm": 179.7542266845703, "learning_rate": 9.046389882648146e-07, "loss": 14.7266, "step": 16397 }, { "epoch": 1.0890615660490137, "grad_norm": 215.093505859375, "learning_rate": 9.045319270005899e-07, "loss": 17.4062, "step": 16398 }, { "epoch": 1.0891279803413694, "grad_norm": 181.11578369140625, "learning_rate": 9.044248668406849e-07, "loss": 13.4375, "step": 16399 }, { "epoch": 1.0891943946337252, "grad_norm": 417.22015380859375, "learning_rate": 9.043178077863376e-07, "loss": 19.5, "step": 16400 }, { "epoch": 1.089260808926081, "grad_norm": 191.6075439453125, "learning_rate": 9.042107498387871e-07, "loss": 17.625, "step": 16401 }, { "epoch": 1.0893272232184366, "grad_norm": 425.81292724609375, "learning_rate": 9.041036929992711e-07, "loss": 21.9844, "step": 16402 }, { "epoch": 1.0893936375107924, "grad_norm": 148.34417724609375, "learning_rate": 9.039966372690286e-07, "loss": 13.0156, "step": 16403 }, { "epoch": 1.089460051803148, "grad_norm": 202.00607299804688, "learning_rate": 9.038895826492973e-07, "loss": 18.3906, "step": 16404 }, { "epoch": 1.0895264660955037, "grad_norm": 408.6070861816406, "learning_rate": 9.037825291413163e-07, "loss": 22.5625, "step": 16405 }, { "epoch": 1.0895928803878594, "grad_norm": 140.5023956298828, "learning_rate": 9.036754767463232e-07, "loss": 16.5078, "step": 16406 }, { "epoch": 1.0896592946802153, "grad_norm": 344.6851806640625, "learning_rate": 9.035684254655569e-07, "loss": 13.0625, "step": 16407 }, { "epoch": 1.089725708972571, "grad_norm": 94.74759674072266, "learning_rate": 9.034613753002549e-07, "loss": 12.8594, "step": 16408 }, { "epoch": 1.0897921232649266, "grad_norm": 194.04258728027344, "learning_rate": 9.033543262516567e-07, "loss": 18.125, "step": 16409 }, { "epoch": 1.0898585375572822, "grad_norm": 274.57147216796875, "learning_rate": 9.032472783209995e-07, "loss": 23.6875, "step": 16410 }, { "epoch": 1.0899249518496381, "grad_norm": 207.06500244140625, "learning_rate": 9.031402315095223e-07, "loss": 28.0312, "step": 16411 }, { "epoch": 1.0899913661419938, "grad_norm": 138.1768798828125, "learning_rate": 9.030331858184627e-07, "loss": 15.4062, "step": 16412 }, { "epoch": 1.0900577804343494, "grad_norm": 209.1012725830078, "learning_rate": 9.029261412490595e-07, "loss": 15.6562, "step": 16413 }, { "epoch": 1.0901241947267053, "grad_norm": 2081.17822265625, "learning_rate": 9.028190978025507e-07, "loss": 17.0469, "step": 16414 }, { "epoch": 1.090190609019061, "grad_norm": 180.02597045898438, "learning_rate": 9.027120554801747e-07, "loss": 13.9062, "step": 16415 }, { "epoch": 1.0902570233114166, "grad_norm": 348.5206298828125, "learning_rate": 9.026050142831694e-07, "loss": 17.5625, "step": 16416 }, { "epoch": 1.0903234376037723, "grad_norm": 95.10228729248047, "learning_rate": 9.024979742127732e-07, "loss": 14.5469, "step": 16417 }, { "epoch": 1.0903898518961281, "grad_norm": 303.78900146484375, "learning_rate": 9.023909352702239e-07, "loss": 15.25, "step": 16418 }, { "epoch": 1.0904562661884838, "grad_norm": 119.19649505615234, "learning_rate": 9.022838974567604e-07, "loss": 12.8281, "step": 16419 }, { "epoch": 1.0905226804808394, "grad_norm": 262.3196716308594, "learning_rate": 9.021768607736201e-07, "loss": 11.9922, "step": 16420 }, { "epoch": 1.090589094773195, "grad_norm": 164.97140502929688, "learning_rate": 9.020698252220418e-07, "loss": 14.7656, "step": 16421 }, { "epoch": 1.090655509065551, "grad_norm": 196.53485107421875, "learning_rate": 9.019627908032628e-07, "loss": 18.8125, "step": 16422 }, { "epoch": 1.0907219233579066, "grad_norm": 155.82553100585938, "learning_rate": 9.018557575185221e-07, "loss": 12.75, "step": 16423 }, { "epoch": 1.0907883376502623, "grad_norm": 183.13575744628906, "learning_rate": 9.017487253690574e-07, "loss": 12.75, "step": 16424 }, { "epoch": 1.0908547519426182, "grad_norm": 107.82496643066406, "learning_rate": 9.016416943561068e-07, "loss": 10.6875, "step": 16425 }, { "epoch": 1.0909211662349738, "grad_norm": 158.37303161621094, "learning_rate": 9.01534664480908e-07, "loss": 14.2812, "step": 16426 }, { "epoch": 1.0909875805273295, "grad_norm": 483.19793701171875, "learning_rate": 9.014276357446999e-07, "loss": 22.2812, "step": 16427 }, { "epoch": 1.0910539948196851, "grad_norm": 131.0116729736328, "learning_rate": 9.013206081487197e-07, "loss": 18.0625, "step": 16428 }, { "epoch": 1.091120409112041, "grad_norm": 224.08538818359375, "learning_rate": 9.012135816942057e-07, "loss": 15.9062, "step": 16429 }, { "epoch": 1.0911868234043967, "grad_norm": 217.77915954589844, "learning_rate": 9.011065563823964e-07, "loss": 16.5, "step": 16430 }, { "epoch": 1.0912532376967523, "grad_norm": 105.23603820800781, "learning_rate": 9.009995322145292e-07, "loss": 14.4219, "step": 16431 }, { "epoch": 1.091319651989108, "grad_norm": 204.13661193847656, "learning_rate": 9.008925091918427e-07, "loss": 16.4062, "step": 16432 }, { "epoch": 1.0913860662814638, "grad_norm": 311.99188232421875, "learning_rate": 9.007854873155739e-07, "loss": 18.5156, "step": 16433 }, { "epoch": 1.0914524805738195, "grad_norm": 305.10858154296875, "learning_rate": 9.006784665869618e-07, "loss": 23.0, "step": 16434 }, { "epoch": 1.0915188948661751, "grad_norm": 263.5401306152344, "learning_rate": 9.005714470072438e-07, "loss": 14.9219, "step": 16435 }, { "epoch": 1.091585309158531, "grad_norm": 237.08627319335938, "learning_rate": 9.004644285776582e-07, "loss": 14.3594, "step": 16436 }, { "epoch": 1.0916517234508867, "grad_norm": 213.3809356689453, "learning_rate": 9.003574112994422e-07, "loss": 20.5625, "step": 16437 }, { "epoch": 1.0917181377432423, "grad_norm": 219.57565307617188, "learning_rate": 9.002503951738349e-07, "loss": 19.1719, "step": 16438 }, { "epoch": 1.091784552035598, "grad_norm": 261.5805969238281, "learning_rate": 9.001433802020728e-07, "loss": 18.4062, "step": 16439 }, { "epoch": 1.0918509663279539, "grad_norm": 186.5822296142578, "learning_rate": 9.00036366385395e-07, "loss": 17.0, "step": 16440 }, { "epoch": 1.0919173806203095, "grad_norm": 748.5916137695312, "learning_rate": 8.999293537250386e-07, "loss": 16.7344, "step": 16441 }, { "epoch": 1.0919837949126652, "grad_norm": 253.76304626464844, "learning_rate": 8.99822342222242e-07, "loss": 15.5625, "step": 16442 }, { "epoch": 1.0920502092050208, "grad_norm": 241.88087463378906, "learning_rate": 8.997153318782424e-07, "loss": 14.9375, "step": 16443 }, { "epoch": 1.0921166234973767, "grad_norm": 434.6732482910156, "learning_rate": 8.996083226942783e-07, "loss": 22.5312, "step": 16444 }, { "epoch": 1.0921830377897324, "grad_norm": 323.7601013183594, "learning_rate": 8.99501314671587e-07, "loss": 17.4844, "step": 16445 }, { "epoch": 1.092249452082088, "grad_norm": 169.10366821289062, "learning_rate": 8.993943078114069e-07, "loss": 20.0, "step": 16446 }, { "epoch": 1.092315866374444, "grad_norm": 171.98902893066406, "learning_rate": 8.99287302114975e-07, "loss": 14.9375, "step": 16447 }, { "epoch": 1.0923822806667995, "grad_norm": 339.76385498046875, "learning_rate": 8.991802975835299e-07, "loss": 15.9844, "step": 16448 }, { "epoch": 1.0924486949591552, "grad_norm": 220.08096313476562, "learning_rate": 8.990732942183087e-07, "loss": 17.6719, "step": 16449 }, { "epoch": 1.0925151092515109, "grad_norm": 260.09173583984375, "learning_rate": 8.989662920205498e-07, "loss": 14.5234, "step": 16450 }, { "epoch": 1.0925815235438667, "grad_norm": 186.80853271484375, "learning_rate": 8.9885929099149e-07, "loss": 19.2344, "step": 16451 }, { "epoch": 1.0926479378362224, "grad_norm": 146.38902282714844, "learning_rate": 8.987522911323681e-07, "loss": 14.2344, "step": 16452 }, { "epoch": 1.092714352128578, "grad_norm": 476.3466796875, "learning_rate": 8.98645292444421e-07, "loss": 28.0938, "step": 16453 }, { "epoch": 1.0927807664209337, "grad_norm": 188.81646728515625, "learning_rate": 8.98538294928887e-07, "loss": 14.4688, "step": 16454 }, { "epoch": 1.0928471807132896, "grad_norm": 142.26937866210938, "learning_rate": 8.98431298587003e-07, "loss": 20.75, "step": 16455 }, { "epoch": 1.0929135950056452, "grad_norm": 223.22071838378906, "learning_rate": 8.983243034200078e-07, "loss": 11.4844, "step": 16456 }, { "epoch": 1.0929800092980009, "grad_norm": 323.17974853515625, "learning_rate": 8.982173094291381e-07, "loss": 16.5, "step": 16457 }, { "epoch": 1.0930464235903568, "grad_norm": 262.3351135253906, "learning_rate": 8.981103166156321e-07, "loss": 16.7188, "step": 16458 }, { "epoch": 1.0931128378827124, "grad_norm": 147.6505889892578, "learning_rate": 8.980033249807268e-07, "loss": 14.4062, "step": 16459 }, { "epoch": 1.093179252175068, "grad_norm": 188.63934326171875, "learning_rate": 8.978963345256608e-07, "loss": 13.9219, "step": 16460 }, { "epoch": 1.0932456664674237, "grad_norm": 140.24143981933594, "learning_rate": 8.977893452516704e-07, "loss": 18.8594, "step": 16461 }, { "epoch": 1.0933120807597796, "grad_norm": 170.66490173339844, "learning_rate": 8.976823571599944e-07, "loss": 15.0859, "step": 16462 }, { "epoch": 1.0933784950521352, "grad_norm": 167.6591033935547, "learning_rate": 8.975753702518698e-07, "loss": 13.25, "step": 16463 }, { "epoch": 1.093444909344491, "grad_norm": 157.82467651367188, "learning_rate": 8.974683845285343e-07, "loss": 13.9688, "step": 16464 }, { "epoch": 1.0935113236368466, "grad_norm": 89.01476287841797, "learning_rate": 8.973613999912251e-07, "loss": 14.4062, "step": 16465 }, { "epoch": 1.0935777379292024, "grad_norm": 171.26129150390625, "learning_rate": 8.972544166411803e-07, "loss": 15.0312, "step": 16466 }, { "epoch": 1.093644152221558, "grad_norm": 414.08441162109375, "learning_rate": 8.971474344796372e-07, "loss": 11.4688, "step": 16467 }, { "epoch": 1.0937105665139137, "grad_norm": 213.43919372558594, "learning_rate": 8.970404535078329e-07, "loss": 14.9531, "step": 16468 }, { "epoch": 1.0937769808062696, "grad_norm": 222.70269775390625, "learning_rate": 8.969334737270058e-07, "loss": 24.4375, "step": 16469 }, { "epoch": 1.0938433950986253, "grad_norm": 423.09490966796875, "learning_rate": 8.968264951383926e-07, "loss": 13.1641, "step": 16470 }, { "epoch": 1.093909809390981, "grad_norm": 133.2134552001953, "learning_rate": 8.967195177432312e-07, "loss": 13.7969, "step": 16471 }, { "epoch": 1.0939762236833366, "grad_norm": 255.70693969726562, "learning_rate": 8.966125415427584e-07, "loss": 16.9062, "step": 16472 }, { "epoch": 1.0940426379756925, "grad_norm": 279.5254821777344, "learning_rate": 8.965055665382126e-07, "loss": 15.4375, "step": 16473 }, { "epoch": 1.094109052268048, "grad_norm": 136.25375366210938, "learning_rate": 8.963985927308304e-07, "loss": 18.4375, "step": 16474 }, { "epoch": 1.0941754665604038, "grad_norm": 161.24380493164062, "learning_rate": 8.962916201218499e-07, "loss": 14.3125, "step": 16475 }, { "epoch": 1.0942418808527594, "grad_norm": 352.0204772949219, "learning_rate": 8.961846487125076e-07, "loss": 17.8125, "step": 16476 }, { "epoch": 1.0943082951451153, "grad_norm": 130.2764129638672, "learning_rate": 8.960776785040418e-07, "loss": 13.2812, "step": 16477 }, { "epoch": 1.094374709437471, "grad_norm": 356.40216064453125, "learning_rate": 8.959707094976894e-07, "loss": 21.7812, "step": 16478 }, { "epoch": 1.0944411237298266, "grad_norm": 229.4128875732422, "learning_rate": 8.958637416946881e-07, "loss": 23.9531, "step": 16479 }, { "epoch": 1.0945075380221825, "grad_norm": 414.12841796875, "learning_rate": 8.957567750962746e-07, "loss": 20.9375, "step": 16480 }, { "epoch": 1.0945739523145381, "grad_norm": 337.2123107910156, "learning_rate": 8.95649809703687e-07, "loss": 20.2188, "step": 16481 }, { "epoch": 1.0946403666068938, "grad_norm": 218.69134521484375, "learning_rate": 8.955428455181616e-07, "loss": 17.8594, "step": 16482 }, { "epoch": 1.0947067808992494, "grad_norm": 115.51134490966797, "learning_rate": 8.954358825409369e-07, "loss": 15.5781, "step": 16483 }, { "epoch": 1.0947731951916053, "grad_norm": 432.49932861328125, "learning_rate": 8.953289207732494e-07, "loss": 18.2812, "step": 16484 }, { "epoch": 1.094839609483961, "grad_norm": 678.375244140625, "learning_rate": 8.952219602163368e-07, "loss": 11.5469, "step": 16485 }, { "epoch": 1.0949060237763166, "grad_norm": 170.04986572265625, "learning_rate": 8.951150008714357e-07, "loss": 21.9844, "step": 16486 }, { "epoch": 1.0949724380686723, "grad_norm": 171.9573211669922, "learning_rate": 8.950080427397841e-07, "loss": 17.6094, "step": 16487 }, { "epoch": 1.0950388523610282, "grad_norm": 412.2090759277344, "learning_rate": 8.949010858226189e-07, "loss": 19.6875, "step": 16488 }, { "epoch": 1.0951052666533838, "grad_norm": 244.78753662109375, "learning_rate": 8.947941301211775e-07, "loss": 22.375, "step": 16489 }, { "epoch": 1.0951716809457395, "grad_norm": 166.25819396972656, "learning_rate": 8.946871756366965e-07, "loss": 14.1094, "step": 16490 }, { "epoch": 1.0952380952380953, "grad_norm": 432.4112854003906, "learning_rate": 8.945802223704139e-07, "loss": 18.0938, "step": 16491 }, { "epoch": 1.095304509530451, "grad_norm": 148.18589782714844, "learning_rate": 8.944732703235664e-07, "loss": 17.9844, "step": 16492 }, { "epoch": 1.0953709238228067, "grad_norm": 225.13848876953125, "learning_rate": 8.943663194973914e-07, "loss": 18.625, "step": 16493 }, { "epoch": 1.0954373381151623, "grad_norm": 416.00213623046875, "learning_rate": 8.942593698931254e-07, "loss": 19.7812, "step": 16494 }, { "epoch": 1.0955037524075182, "grad_norm": 103.80288696289062, "learning_rate": 8.941524215120066e-07, "loss": 14.2031, "step": 16495 }, { "epoch": 1.0955701666998738, "grad_norm": 241.87783813476562, "learning_rate": 8.940454743552713e-07, "loss": 19.5625, "step": 16496 }, { "epoch": 1.0956365809922295, "grad_norm": 296.4524230957031, "learning_rate": 8.93938528424157e-07, "loss": 17.0469, "step": 16497 }, { "epoch": 1.0957029952845851, "grad_norm": 216.63421630859375, "learning_rate": 8.938315837199002e-07, "loss": 10.8828, "step": 16498 }, { "epoch": 1.095769409576941, "grad_norm": 131.87306213378906, "learning_rate": 8.937246402437391e-07, "loss": 14.7031, "step": 16499 }, { "epoch": 1.0958358238692967, "grad_norm": 194.70106506347656, "learning_rate": 8.936176979969094e-07, "loss": 16.9375, "step": 16500 }, { "epoch": 1.0959022381616523, "grad_norm": 395.6005554199219, "learning_rate": 8.935107569806494e-07, "loss": 13.8594, "step": 16501 }, { "epoch": 1.0959686524540082, "grad_norm": 295.4632873535156, "learning_rate": 8.934038171961952e-07, "loss": 12.9766, "step": 16502 }, { "epoch": 1.0960350667463639, "grad_norm": 303.738525390625, "learning_rate": 8.932968786447841e-07, "loss": 27.1562, "step": 16503 }, { "epoch": 1.0961014810387195, "grad_norm": 108.08613586425781, "learning_rate": 8.931899413276534e-07, "loss": 15.1719, "step": 16504 }, { "epoch": 1.0961678953310752, "grad_norm": 182.6719970703125, "learning_rate": 8.930830052460398e-07, "loss": 16.5469, "step": 16505 }, { "epoch": 1.096234309623431, "grad_norm": 192.6095428466797, "learning_rate": 8.929760704011806e-07, "loss": 14.4531, "step": 16506 }, { "epoch": 1.0963007239157867, "grad_norm": 128.34921264648438, "learning_rate": 8.928691367943119e-07, "loss": 13.7812, "step": 16507 }, { "epoch": 1.0963671382081424, "grad_norm": 176.80125427246094, "learning_rate": 8.927622044266718e-07, "loss": 14.2031, "step": 16508 }, { "epoch": 1.096433552500498, "grad_norm": 352.7082214355469, "learning_rate": 8.926552732994966e-07, "loss": 13.4062, "step": 16509 }, { "epoch": 1.0964999667928539, "grad_norm": 409.0644836425781, "learning_rate": 8.925483434140233e-07, "loss": 9.5312, "step": 16510 }, { "epoch": 1.0965663810852095, "grad_norm": 324.8294982910156, "learning_rate": 8.924414147714885e-07, "loss": 19.7969, "step": 16511 }, { "epoch": 1.0966327953775652, "grad_norm": 214.77330017089844, "learning_rate": 8.923344873731297e-07, "loss": 16.4844, "step": 16512 }, { "epoch": 1.096699209669921, "grad_norm": 154.6839141845703, "learning_rate": 8.922275612201834e-07, "loss": 13.0312, "step": 16513 }, { "epoch": 1.0967656239622767, "grad_norm": 183.2167510986328, "learning_rate": 8.921206363138866e-07, "loss": 16.4844, "step": 16514 }, { "epoch": 1.0968320382546324, "grad_norm": 158.0415496826172, "learning_rate": 8.920137126554757e-07, "loss": 19.2188, "step": 16515 }, { "epoch": 1.096898452546988, "grad_norm": 299.7941589355469, "learning_rate": 8.919067902461883e-07, "loss": 24.3438, "step": 16516 }, { "epoch": 1.096964866839344, "grad_norm": 245.6236114501953, "learning_rate": 8.917998690872607e-07, "loss": 17.7812, "step": 16517 }, { "epoch": 1.0970312811316996, "grad_norm": 397.8664855957031, "learning_rate": 8.9169294917993e-07, "loss": 13.1094, "step": 16518 }, { "epoch": 1.0970976954240552, "grad_norm": 228.89398193359375, "learning_rate": 8.915860305254323e-07, "loss": 17.2969, "step": 16519 }, { "epoch": 1.0971641097164109, "grad_norm": 145.4934539794922, "learning_rate": 8.914791131250056e-07, "loss": 18.5938, "step": 16520 }, { "epoch": 1.0972305240087667, "grad_norm": 216.6407012939453, "learning_rate": 8.913721969798853e-07, "loss": 16.2656, "step": 16521 }, { "epoch": 1.0972969383011224, "grad_norm": 159.43150329589844, "learning_rate": 8.912652820913092e-07, "loss": 12.9531, "step": 16522 }, { "epoch": 1.097363352593478, "grad_norm": 128.52020263671875, "learning_rate": 8.911583684605134e-07, "loss": 15.8281, "step": 16523 }, { "epoch": 1.097429766885834, "grad_norm": 186.46624755859375, "learning_rate": 8.910514560887351e-07, "loss": 18.2969, "step": 16524 }, { "epoch": 1.0974961811781896, "grad_norm": 160.07652282714844, "learning_rate": 8.909445449772102e-07, "loss": 23.0781, "step": 16525 }, { "epoch": 1.0975625954705452, "grad_norm": 1217.1375732421875, "learning_rate": 8.908376351271764e-07, "loss": 18.9375, "step": 16526 }, { "epoch": 1.097629009762901, "grad_norm": 175.05833435058594, "learning_rate": 8.907307265398697e-07, "loss": 13.6094, "step": 16527 }, { "epoch": 1.0976954240552568, "grad_norm": 138.0218505859375, "learning_rate": 8.906238192165271e-07, "loss": 13.4844, "step": 16528 }, { "epoch": 1.0977618383476124, "grad_norm": 137.7305145263672, "learning_rate": 8.905169131583847e-07, "loss": 17.4531, "step": 16529 }, { "epoch": 1.097828252639968, "grad_norm": 357.7722473144531, "learning_rate": 8.9041000836668e-07, "loss": 21.9844, "step": 16530 }, { "epoch": 1.0978946669323237, "grad_norm": 117.91565704345703, "learning_rate": 8.903031048426488e-07, "loss": 13.5, "step": 16531 }, { "epoch": 1.0979610812246796, "grad_norm": 356.8074951171875, "learning_rate": 8.901962025875284e-07, "loss": 19.875, "step": 16532 }, { "epoch": 1.0980274955170353, "grad_norm": 415.20184326171875, "learning_rate": 8.900893016025545e-07, "loss": 25.7188, "step": 16533 }, { "epoch": 1.098093909809391, "grad_norm": 144.33038330078125, "learning_rate": 8.899824018889645e-07, "loss": 12.9688, "step": 16534 }, { "epoch": 1.0981603241017468, "grad_norm": 148.83499145507812, "learning_rate": 8.898755034479946e-07, "loss": 16.875, "step": 16535 }, { "epoch": 1.0982267383941025, "grad_norm": 536.9830932617188, "learning_rate": 8.897686062808815e-07, "loss": 19.0, "step": 16536 }, { "epoch": 1.098293152686458, "grad_norm": 166.5380859375, "learning_rate": 8.896617103888611e-07, "loss": 15.3281, "step": 16537 }, { "epoch": 1.0983595669788138, "grad_norm": 242.7857666015625, "learning_rate": 8.89554815773171e-07, "loss": 17.0625, "step": 16538 }, { "epoch": 1.0984259812711696, "grad_norm": 244.2813720703125, "learning_rate": 8.894479224350468e-07, "loss": 16.1562, "step": 16539 }, { "epoch": 1.0984923955635253, "grad_norm": 315.624267578125, "learning_rate": 8.89341030375725e-07, "loss": 15.8906, "step": 16540 }, { "epoch": 1.098558809855881, "grad_norm": 123.18913269042969, "learning_rate": 8.892341395964429e-07, "loss": 11.2344, "step": 16541 }, { "epoch": 1.0986252241482366, "grad_norm": 135.6481475830078, "learning_rate": 8.891272500984362e-07, "loss": 21.2344, "step": 16542 }, { "epoch": 1.0986916384405925, "grad_norm": 430.2320251464844, "learning_rate": 8.890203618829417e-07, "loss": 20.2188, "step": 16543 }, { "epoch": 1.0987580527329481, "grad_norm": 249.4479217529297, "learning_rate": 8.889134749511954e-07, "loss": 31.2188, "step": 16544 }, { "epoch": 1.0988244670253038, "grad_norm": 308.2723388671875, "learning_rate": 8.888065893044342e-07, "loss": 21.1562, "step": 16545 }, { "epoch": 1.0988908813176597, "grad_norm": 288.70330810546875, "learning_rate": 8.886997049438939e-07, "loss": 27.1719, "step": 16546 }, { "epoch": 1.0989572956100153, "grad_norm": 182.1138458251953, "learning_rate": 8.885928218708117e-07, "loss": 14.6953, "step": 16547 }, { "epoch": 1.099023709902371, "grad_norm": 434.64453125, "learning_rate": 8.884859400864231e-07, "loss": 15.6094, "step": 16548 }, { "epoch": 1.0990901241947266, "grad_norm": 118.74131774902344, "learning_rate": 8.883790595919652e-07, "loss": 16.1641, "step": 16549 }, { "epoch": 1.0991565384870825, "grad_norm": 264.3023986816406, "learning_rate": 8.882721803886735e-07, "loss": 30.9844, "step": 16550 }, { "epoch": 1.0992229527794382, "grad_norm": 213.79910278320312, "learning_rate": 8.881653024777852e-07, "loss": 11.6719, "step": 16551 }, { "epoch": 1.0992893670717938, "grad_norm": 140.5330352783203, "learning_rate": 8.880584258605359e-07, "loss": 13.1094, "step": 16552 }, { "epoch": 1.0993557813641495, "grad_norm": 171.20448303222656, "learning_rate": 8.879515505381625e-07, "loss": 20.5, "step": 16553 }, { "epoch": 1.0994221956565053, "grad_norm": 256.0195617675781, "learning_rate": 8.878446765119005e-07, "loss": 14.3438, "step": 16554 }, { "epoch": 1.099488609948861, "grad_norm": 243.54400634765625, "learning_rate": 8.877378037829871e-07, "loss": 15.8125, "step": 16555 }, { "epoch": 1.0995550242412166, "grad_norm": 259.607177734375, "learning_rate": 8.876309323526579e-07, "loss": 14.7188, "step": 16556 }, { "epoch": 1.0996214385335725, "grad_norm": 174.1615753173828, "learning_rate": 8.875240622221492e-07, "loss": 17.8281, "step": 16557 }, { "epoch": 1.0996878528259282, "grad_norm": 101.91706848144531, "learning_rate": 8.874171933926972e-07, "loss": 13.4062, "step": 16558 }, { "epoch": 1.0997542671182838, "grad_norm": 168.81130981445312, "learning_rate": 8.873103258655384e-07, "loss": 16.1719, "step": 16559 }, { "epoch": 1.0998206814106395, "grad_norm": 133.6903076171875, "learning_rate": 8.872034596419087e-07, "loss": 11.3281, "step": 16560 }, { "epoch": 1.0998870957029954, "grad_norm": 282.4571228027344, "learning_rate": 8.870965947230445e-07, "loss": 16.6875, "step": 16561 }, { "epoch": 1.099953509995351, "grad_norm": 127.46701049804688, "learning_rate": 8.869897311101813e-07, "loss": 16.0156, "step": 16562 }, { "epoch": 1.1000199242877067, "grad_norm": 142.22177124023438, "learning_rate": 8.868828688045563e-07, "loss": 13.5312, "step": 16563 }, { "epoch": 1.1000863385800623, "grad_norm": 150.51443481445312, "learning_rate": 8.867760078074044e-07, "loss": 15.8594, "step": 16564 }, { "epoch": 1.1001527528724182, "grad_norm": 399.3026123046875, "learning_rate": 8.86669148119963e-07, "loss": 14.5312, "step": 16565 }, { "epoch": 1.1002191671647739, "grad_norm": 205.40916442871094, "learning_rate": 8.865622897434673e-07, "loss": 17.4219, "step": 16566 }, { "epoch": 1.1002855814571295, "grad_norm": 253.28292846679688, "learning_rate": 8.864554326791537e-07, "loss": 17.0156, "step": 16567 }, { "epoch": 1.1003519957494854, "grad_norm": 174.54071044921875, "learning_rate": 8.863485769282577e-07, "loss": 21.7188, "step": 16568 }, { "epoch": 1.100418410041841, "grad_norm": 224.04209899902344, "learning_rate": 8.862417224920165e-07, "loss": 17.3281, "step": 16569 }, { "epoch": 1.1004848243341967, "grad_norm": 451.56951904296875, "learning_rate": 8.861348693716651e-07, "loss": 19.3906, "step": 16570 }, { "epoch": 1.1005512386265524, "grad_norm": 207.29827880859375, "learning_rate": 8.860280175684401e-07, "loss": 15.6094, "step": 16571 }, { "epoch": 1.1006176529189082, "grad_norm": 283.9063720703125, "learning_rate": 8.859211670835767e-07, "loss": 13.7344, "step": 16572 }, { "epoch": 1.1006840672112639, "grad_norm": 230.42153930664062, "learning_rate": 8.85814317918312e-07, "loss": 15.7344, "step": 16573 }, { "epoch": 1.1007504815036195, "grad_norm": 213.08547973632812, "learning_rate": 8.857074700738812e-07, "loss": 19.5938, "step": 16574 }, { "epoch": 1.1008168957959752, "grad_norm": 264.1019287109375, "learning_rate": 8.856006235515208e-07, "loss": 15.9219, "step": 16575 }, { "epoch": 1.100883310088331, "grad_norm": 205.3666534423828, "learning_rate": 8.854937783524659e-07, "loss": 21.2812, "step": 16576 }, { "epoch": 1.1009497243806867, "grad_norm": 148.8829803466797, "learning_rate": 8.853869344779532e-07, "loss": 19.0625, "step": 16577 }, { "epoch": 1.1010161386730424, "grad_norm": 158.45277404785156, "learning_rate": 8.852800919292184e-07, "loss": 15.8125, "step": 16578 }, { "epoch": 1.1010825529653983, "grad_norm": 227.27450561523438, "learning_rate": 8.851732507074971e-07, "loss": 15.7188, "step": 16579 }, { "epoch": 1.101148967257754, "grad_norm": 260.6698303222656, "learning_rate": 8.850664108140259e-07, "loss": 13.3594, "step": 16580 }, { "epoch": 1.1012153815501096, "grad_norm": 476.0931701660156, "learning_rate": 8.849595722500398e-07, "loss": 14.5312, "step": 16581 }, { "epoch": 1.1012817958424652, "grad_norm": 222.99537658691406, "learning_rate": 8.848527350167753e-07, "loss": 15.2344, "step": 16582 }, { "epoch": 1.101348210134821, "grad_norm": 207.6565704345703, "learning_rate": 8.847458991154677e-07, "loss": 16.4531, "step": 16583 }, { "epoch": 1.1014146244271767, "grad_norm": 193.93389892578125, "learning_rate": 8.846390645473536e-07, "loss": 15.3906, "step": 16584 }, { "epoch": 1.1014810387195324, "grad_norm": 196.52317810058594, "learning_rate": 8.845322313136676e-07, "loss": 14.8438, "step": 16585 }, { "epoch": 1.101547453011888, "grad_norm": 240.33648681640625, "learning_rate": 8.844253994156466e-07, "loss": 12.5625, "step": 16586 }, { "epoch": 1.101613867304244, "grad_norm": 177.7179412841797, "learning_rate": 8.843185688545258e-07, "loss": 14.9688, "step": 16587 }, { "epoch": 1.1016802815965996, "grad_norm": 156.6068115234375, "learning_rate": 8.842117396315412e-07, "loss": 15.25, "step": 16588 }, { "epoch": 1.1017466958889552, "grad_norm": 1559.2010498046875, "learning_rate": 8.84104911747928e-07, "loss": 14.8125, "step": 16589 }, { "epoch": 1.1018131101813111, "grad_norm": 169.51992797851562, "learning_rate": 8.839980852049229e-07, "loss": 18.7188, "step": 16590 }, { "epoch": 1.1018795244736668, "grad_norm": 112.0367202758789, "learning_rate": 8.838912600037607e-07, "loss": 15.6406, "step": 16591 }, { "epoch": 1.1019459387660224, "grad_norm": 185.6940460205078, "learning_rate": 8.837844361456777e-07, "loss": 13.7812, "step": 16592 }, { "epoch": 1.102012353058378, "grad_norm": 184.6626739501953, "learning_rate": 8.836776136319089e-07, "loss": 13.5625, "step": 16593 }, { "epoch": 1.102078767350734, "grad_norm": 193.46478271484375, "learning_rate": 8.83570792463691e-07, "loss": 21.3516, "step": 16594 }, { "epoch": 1.1021451816430896, "grad_norm": 102.789794921875, "learning_rate": 8.834639726422586e-07, "loss": 14.375, "step": 16595 }, { "epoch": 1.1022115959354453, "grad_norm": 219.58949279785156, "learning_rate": 8.833571541688481e-07, "loss": 16.4219, "step": 16596 }, { "epoch": 1.102278010227801, "grad_norm": 215.44483947753906, "learning_rate": 8.832503370446943e-07, "loss": 14.875, "step": 16597 }, { "epoch": 1.1023444245201568, "grad_norm": 355.9505615234375, "learning_rate": 8.831435212710337e-07, "loss": 19.9219, "step": 16598 }, { "epoch": 1.1024108388125125, "grad_norm": 149.027099609375, "learning_rate": 8.830367068491012e-07, "loss": 14.5781, "step": 16599 }, { "epoch": 1.102477253104868, "grad_norm": 166.49252319335938, "learning_rate": 8.82929893780133e-07, "loss": 13.2734, "step": 16600 }, { "epoch": 1.102543667397224, "grad_norm": 231.17893981933594, "learning_rate": 8.828230820653638e-07, "loss": 21.3281, "step": 16601 }, { "epoch": 1.1026100816895796, "grad_norm": 95.88373565673828, "learning_rate": 8.827162717060299e-07, "loss": 11.7344, "step": 16602 }, { "epoch": 1.1026764959819353, "grad_norm": 126.45299530029297, "learning_rate": 8.826094627033665e-07, "loss": 14.4844, "step": 16603 }, { "epoch": 1.102742910274291, "grad_norm": 289.25030517578125, "learning_rate": 8.825026550586092e-07, "loss": 13.5469, "step": 16604 }, { "epoch": 1.1028093245666468, "grad_norm": 191.66610717773438, "learning_rate": 8.823958487729931e-07, "loss": 15.2344, "step": 16605 }, { "epoch": 1.1028757388590025, "grad_norm": 249.81195068359375, "learning_rate": 8.822890438477546e-07, "loss": 12.3125, "step": 16606 }, { "epoch": 1.1029421531513581, "grad_norm": 307.1065368652344, "learning_rate": 8.82182240284128e-07, "loss": 15.6875, "step": 16607 }, { "epoch": 1.1030085674437138, "grad_norm": 174.6040496826172, "learning_rate": 8.820754380833497e-07, "loss": 16.1641, "step": 16608 }, { "epoch": 1.1030749817360697, "grad_norm": 140.22181701660156, "learning_rate": 8.819686372466547e-07, "loss": 16.7188, "step": 16609 }, { "epoch": 1.1031413960284253, "grad_norm": 146.1769256591797, "learning_rate": 8.818618377752784e-07, "loss": 15.9375, "step": 16610 }, { "epoch": 1.103207810320781, "grad_norm": 310.48333740234375, "learning_rate": 8.817550396704558e-07, "loss": 15.2969, "step": 16611 }, { "epoch": 1.1032742246131368, "grad_norm": 218.2689208984375, "learning_rate": 8.816482429334233e-07, "loss": 16.1094, "step": 16612 }, { "epoch": 1.1033406389054925, "grad_norm": 150.4641876220703, "learning_rate": 8.815414475654154e-07, "loss": 13.4531, "step": 16613 }, { "epoch": 1.1034070531978482, "grad_norm": 391.45294189453125, "learning_rate": 8.814346535676675e-07, "loss": 26.7812, "step": 16614 }, { "epoch": 1.1034734674902038, "grad_norm": 279.1976623535156, "learning_rate": 8.813278609414155e-07, "loss": 17.6719, "step": 16615 }, { "epoch": 1.1035398817825597, "grad_norm": 248.0396728515625, "learning_rate": 8.812210696878943e-07, "loss": 14.8906, "step": 16616 }, { "epoch": 1.1036062960749153, "grad_norm": 124.51411437988281, "learning_rate": 8.811142798083396e-07, "loss": 15.8281, "step": 16617 }, { "epoch": 1.103672710367271, "grad_norm": 226.50889587402344, "learning_rate": 8.810074913039857e-07, "loss": 18.4531, "step": 16618 }, { "epoch": 1.1037391246596266, "grad_norm": 132.9857940673828, "learning_rate": 8.809007041760689e-07, "loss": 14.9062, "step": 16619 }, { "epoch": 1.1038055389519825, "grad_norm": 251.80645751953125, "learning_rate": 8.807939184258241e-07, "loss": 19.0078, "step": 16620 }, { "epoch": 1.1038719532443382, "grad_norm": 198.91082763671875, "learning_rate": 8.806871340544866e-07, "loss": 13.1719, "step": 16621 }, { "epoch": 1.1039383675366938, "grad_norm": 149.6944580078125, "learning_rate": 8.805803510632911e-07, "loss": 15.2969, "step": 16622 }, { "epoch": 1.1040047818290497, "grad_norm": 138.20361328125, "learning_rate": 8.804735694534737e-07, "loss": 12.4844, "step": 16623 }, { "epoch": 1.1040711961214054, "grad_norm": 138.30349731445312, "learning_rate": 8.80366789226269e-07, "loss": 12.125, "step": 16624 }, { "epoch": 1.104137610413761, "grad_norm": 191.31402587890625, "learning_rate": 8.802600103829123e-07, "loss": 10.9453, "step": 16625 }, { "epoch": 1.1042040247061167, "grad_norm": 92.55276489257812, "learning_rate": 8.801532329246385e-07, "loss": 14.2812, "step": 16626 }, { "epoch": 1.1042704389984725, "grad_norm": 128.84913635253906, "learning_rate": 8.800464568526835e-07, "loss": 13.3906, "step": 16627 }, { "epoch": 1.1043368532908282, "grad_norm": 122.4549560546875, "learning_rate": 8.799396821682814e-07, "loss": 17.4219, "step": 16628 }, { "epoch": 1.1044032675831839, "grad_norm": 357.3239440917969, "learning_rate": 8.798329088726683e-07, "loss": 23.4844, "step": 16629 }, { "epoch": 1.1044696818755395, "grad_norm": 268.0614929199219, "learning_rate": 8.797261369670784e-07, "loss": 19.4688, "step": 16630 }, { "epoch": 1.1045360961678954, "grad_norm": 219.26734924316406, "learning_rate": 8.796193664527475e-07, "loss": 19.5938, "step": 16631 }, { "epoch": 1.104602510460251, "grad_norm": 361.543212890625, "learning_rate": 8.7951259733091e-07, "loss": 13.3594, "step": 16632 }, { "epoch": 1.1046689247526067, "grad_norm": 442.6802062988281, "learning_rate": 8.794058296028018e-07, "loss": 13.1094, "step": 16633 }, { "epoch": 1.1047353390449626, "grad_norm": 136.1439971923828, "learning_rate": 8.792990632696569e-07, "loss": 19.2031, "step": 16634 }, { "epoch": 1.1048017533373182, "grad_norm": 114.88264465332031, "learning_rate": 8.791922983327114e-07, "loss": 14.3438, "step": 16635 }, { "epoch": 1.1048681676296739, "grad_norm": 236.05088806152344, "learning_rate": 8.79085534793199e-07, "loss": 16.3125, "step": 16636 }, { "epoch": 1.1049345819220295, "grad_norm": 212.284912109375, "learning_rate": 8.78978772652356e-07, "loss": 16.5391, "step": 16637 }, { "epoch": 1.1050009962143854, "grad_norm": 210.86557006835938, "learning_rate": 8.788720119114165e-07, "loss": 16.9531, "step": 16638 }, { "epoch": 1.105067410506741, "grad_norm": 536.346923828125, "learning_rate": 8.787652525716161e-07, "loss": 14.4375, "step": 16639 }, { "epoch": 1.1051338247990967, "grad_norm": 122.35015869140625, "learning_rate": 8.786584946341888e-07, "loss": 15.0625, "step": 16640 }, { "epoch": 1.1052002390914524, "grad_norm": 162.881591796875, "learning_rate": 8.785517381003704e-07, "loss": 14.4531, "step": 16641 }, { "epoch": 1.1052666533838083, "grad_norm": 803.5029296875, "learning_rate": 8.784449829713953e-07, "loss": 22.0, "step": 16642 }, { "epoch": 1.105333067676164, "grad_norm": 1288.2283935546875, "learning_rate": 8.783382292484987e-07, "loss": 34.3594, "step": 16643 }, { "epoch": 1.1053994819685196, "grad_norm": 438.7184753417969, "learning_rate": 8.782314769329152e-07, "loss": 15.0938, "step": 16644 }, { "epoch": 1.1054658962608754, "grad_norm": 133.87696838378906, "learning_rate": 8.781247260258798e-07, "loss": 14.25, "step": 16645 }, { "epoch": 1.105532310553231, "grad_norm": 130.61976623535156, "learning_rate": 8.780179765286273e-07, "loss": 17.1562, "step": 16646 }, { "epoch": 1.1055987248455867, "grad_norm": 322.22686767578125, "learning_rate": 8.779112284423927e-07, "loss": 17.6406, "step": 16647 }, { "epoch": 1.1056651391379424, "grad_norm": 132.4329071044922, "learning_rate": 8.778044817684102e-07, "loss": 14.7812, "step": 16648 }, { "epoch": 1.1057315534302983, "grad_norm": 407.0594177246094, "learning_rate": 8.776977365079155e-07, "loss": 17.5781, "step": 16649 }, { "epoch": 1.105797967722654, "grad_norm": 202.53558349609375, "learning_rate": 8.775909926621422e-07, "loss": 18.375, "step": 16650 }, { "epoch": 1.1058643820150096, "grad_norm": 175.1272430419922, "learning_rate": 8.774842502323261e-07, "loss": 20.3438, "step": 16651 }, { "epoch": 1.1059307963073652, "grad_norm": 131.5644989013672, "learning_rate": 8.773775092197017e-07, "loss": 11.6484, "step": 16652 }, { "epoch": 1.1059972105997211, "grad_norm": 302.8008117675781, "learning_rate": 8.77270769625503e-07, "loss": 14.6094, "step": 16653 }, { "epoch": 1.1060636248920768, "grad_norm": 333.0092468261719, "learning_rate": 8.771640314509659e-07, "loss": 16.5469, "step": 16654 }, { "epoch": 1.1061300391844324, "grad_norm": 129.51565551757812, "learning_rate": 8.77057294697324e-07, "loss": 19.7188, "step": 16655 }, { "epoch": 1.1061964534767883, "grad_norm": 488.8129577636719, "learning_rate": 8.769505593658128e-07, "loss": 17.4531, "step": 16656 }, { "epoch": 1.106262867769144, "grad_norm": 154.7448272705078, "learning_rate": 8.768438254576663e-07, "loss": 10.5469, "step": 16657 }, { "epoch": 1.1063292820614996, "grad_norm": 132.17459106445312, "learning_rate": 8.767370929741195e-07, "loss": 14.5469, "step": 16658 }, { "epoch": 1.1063956963538553, "grad_norm": 509.0732421875, "learning_rate": 8.76630361916407e-07, "loss": 18.7344, "step": 16659 }, { "epoch": 1.1064621106462111, "grad_norm": 238.2891845703125, "learning_rate": 8.765236322857635e-07, "loss": 17.875, "step": 16660 }, { "epoch": 1.1065285249385668, "grad_norm": 188.1786346435547, "learning_rate": 8.764169040834228e-07, "loss": 23.0625, "step": 16661 }, { "epoch": 1.1065949392309224, "grad_norm": 304.34405517578125, "learning_rate": 8.763101773106207e-07, "loss": 19.0, "step": 16662 }, { "epoch": 1.106661353523278, "grad_norm": 218.4871826171875, "learning_rate": 8.76203451968591e-07, "loss": 13.9375, "step": 16663 }, { "epoch": 1.106727767815634, "grad_norm": 189.89675903320312, "learning_rate": 8.760967280585684e-07, "loss": 17.4844, "step": 16664 }, { "epoch": 1.1067941821079896, "grad_norm": 142.6123809814453, "learning_rate": 8.75990005581787e-07, "loss": 18.5312, "step": 16665 }, { "epoch": 1.1068605964003453, "grad_norm": 147.265625, "learning_rate": 8.758832845394823e-07, "loss": 14.4844, "step": 16666 }, { "epoch": 1.1069270106927012, "grad_norm": 132.10714721679688, "learning_rate": 8.757765649328877e-07, "loss": 13.9844, "step": 16667 }, { "epoch": 1.1069934249850568, "grad_norm": 167.82656860351562, "learning_rate": 8.756698467632387e-07, "loss": 14.7969, "step": 16668 }, { "epoch": 1.1070598392774125, "grad_norm": 323.55877685546875, "learning_rate": 8.755631300317687e-07, "loss": 15.5156, "step": 16669 }, { "epoch": 1.1071262535697681, "grad_norm": 380.4949645996094, "learning_rate": 8.754564147397131e-07, "loss": 26.375, "step": 16670 }, { "epoch": 1.107192667862124, "grad_norm": 387.7162170410156, "learning_rate": 8.753497008883054e-07, "loss": 16.0, "step": 16671 }, { "epoch": 1.1072590821544797, "grad_norm": 985.164794921875, "learning_rate": 8.752429884787809e-07, "loss": 14.9375, "step": 16672 }, { "epoch": 1.1073254964468353, "grad_norm": 215.01792907714844, "learning_rate": 8.751362775123733e-07, "loss": 17.1562, "step": 16673 }, { "epoch": 1.107391910739191, "grad_norm": 240.98548889160156, "learning_rate": 8.750295679903174e-07, "loss": 18.1875, "step": 16674 }, { "epoch": 1.1074583250315468, "grad_norm": 523.17041015625, "learning_rate": 8.749228599138472e-07, "loss": 17.125, "step": 16675 }, { "epoch": 1.1075247393239025, "grad_norm": 520.3170776367188, "learning_rate": 8.748161532841976e-07, "loss": 20.1562, "step": 16676 }, { "epoch": 1.1075911536162582, "grad_norm": 140.26744079589844, "learning_rate": 8.747094481026022e-07, "loss": 12.7656, "step": 16677 }, { "epoch": 1.107657567908614, "grad_norm": 149.46377563476562, "learning_rate": 8.746027443702959e-07, "loss": 13.2812, "step": 16678 }, { "epoch": 1.1077239822009697, "grad_norm": 274.089599609375, "learning_rate": 8.744960420885124e-07, "loss": 15.2656, "step": 16679 }, { "epoch": 1.1077903964933253, "grad_norm": 321.7341003417969, "learning_rate": 8.743893412584866e-07, "loss": 12.5938, "step": 16680 }, { "epoch": 1.107856810785681, "grad_norm": 172.10328674316406, "learning_rate": 8.742826418814524e-07, "loss": 19.2812, "step": 16681 }, { "epoch": 1.1079232250780369, "grad_norm": 301.5436706542969, "learning_rate": 8.741759439586443e-07, "loss": 13.9766, "step": 16682 }, { "epoch": 1.1079896393703925, "grad_norm": 222.6929168701172, "learning_rate": 8.740692474912958e-07, "loss": 24.7969, "step": 16683 }, { "epoch": 1.1080560536627482, "grad_norm": 231.16033935546875, "learning_rate": 8.739625524806422e-07, "loss": 22.0312, "step": 16684 }, { "epoch": 1.1081224679551038, "grad_norm": 161.71826171875, "learning_rate": 8.738558589279168e-07, "loss": 17.1094, "step": 16685 }, { "epoch": 1.1081888822474597, "grad_norm": 200.39247131347656, "learning_rate": 8.737491668343544e-07, "loss": 17.1875, "step": 16686 }, { "epoch": 1.1082552965398154, "grad_norm": 114.58659362792969, "learning_rate": 8.736424762011882e-07, "loss": 13.8594, "step": 16687 }, { "epoch": 1.108321710832171, "grad_norm": 165.62220764160156, "learning_rate": 8.735357870296533e-07, "loss": 13.625, "step": 16688 }, { "epoch": 1.108388125124527, "grad_norm": 163.27175903320312, "learning_rate": 8.734290993209838e-07, "loss": 15.25, "step": 16689 }, { "epoch": 1.1084545394168825, "grad_norm": 143.6446990966797, "learning_rate": 8.73322413076413e-07, "loss": 15.0, "step": 16690 }, { "epoch": 1.1085209537092382, "grad_norm": 402.6606750488281, "learning_rate": 8.732157282971762e-07, "loss": 22.1094, "step": 16691 }, { "epoch": 1.1085873680015939, "grad_norm": 257.95550537109375, "learning_rate": 8.73109044984506e-07, "loss": 21.5625, "step": 16692 }, { "epoch": 1.1086537822939497, "grad_norm": 236.8621063232422, "learning_rate": 8.730023631396376e-07, "loss": 29.0938, "step": 16693 }, { "epoch": 1.1087201965863054, "grad_norm": 198.52969360351562, "learning_rate": 8.728956827638045e-07, "loss": 13.7188, "step": 16694 }, { "epoch": 1.108786610878661, "grad_norm": 139.25074768066406, "learning_rate": 8.72789003858241e-07, "loss": 20.7188, "step": 16695 }, { "epoch": 1.1088530251710167, "grad_norm": 152.3163604736328, "learning_rate": 8.726823264241806e-07, "loss": 14.3672, "step": 16696 }, { "epoch": 1.1089194394633726, "grad_norm": 106.85684967041016, "learning_rate": 8.72575650462858e-07, "loss": 15.7969, "step": 16697 }, { "epoch": 1.1089858537557282, "grad_norm": 665.3717041015625, "learning_rate": 8.724689759755066e-07, "loss": 16.9844, "step": 16698 }, { "epoch": 1.1090522680480839, "grad_norm": 205.55435180664062, "learning_rate": 8.723623029633608e-07, "loss": 18.3281, "step": 16699 }, { "epoch": 1.1091186823404398, "grad_norm": 287.3592529296875, "learning_rate": 8.722556314276537e-07, "loss": 23.6875, "step": 16700 }, { "epoch": 1.1091850966327954, "grad_norm": 243.35350036621094, "learning_rate": 8.721489613696203e-07, "loss": 16.8438, "step": 16701 }, { "epoch": 1.109251510925151, "grad_norm": 552.53955078125, "learning_rate": 8.720422927904939e-07, "loss": 24.0312, "step": 16702 }, { "epoch": 1.1093179252175067, "grad_norm": 323.7908630371094, "learning_rate": 8.719356256915086e-07, "loss": 16.0312, "step": 16703 }, { "epoch": 1.1093843395098626, "grad_norm": 358.4339599609375, "learning_rate": 8.718289600738977e-07, "loss": 23.3125, "step": 16704 }, { "epoch": 1.1094507538022182, "grad_norm": 139.87570190429688, "learning_rate": 8.717222959388959e-07, "loss": 12.4688, "step": 16705 }, { "epoch": 1.109517168094574, "grad_norm": 322.00592041015625, "learning_rate": 8.716156332877365e-07, "loss": 20.375, "step": 16706 }, { "epoch": 1.1095835823869296, "grad_norm": 127.54247283935547, "learning_rate": 8.715089721216535e-07, "loss": 18.25, "step": 16707 }, { "epoch": 1.1096499966792854, "grad_norm": 330.462890625, "learning_rate": 8.714023124418802e-07, "loss": 18.3281, "step": 16708 }, { "epoch": 1.109716410971641, "grad_norm": 135.7189483642578, "learning_rate": 8.712956542496512e-07, "loss": 17.4688, "step": 16709 }, { "epoch": 1.1097828252639967, "grad_norm": 128.97216796875, "learning_rate": 8.711889975461998e-07, "loss": 19.0781, "step": 16710 }, { "epoch": 1.1098492395563526, "grad_norm": 108.50296783447266, "learning_rate": 8.710823423327599e-07, "loss": 12.2422, "step": 16711 }, { "epoch": 1.1099156538487083, "grad_norm": 161.51194763183594, "learning_rate": 8.70975688610565e-07, "loss": 16.1406, "step": 16712 }, { "epoch": 1.109982068141064, "grad_norm": 264.2538146972656, "learning_rate": 8.708690363808491e-07, "loss": 19.1562, "step": 16713 }, { "epoch": 1.1100484824334196, "grad_norm": 488.71563720703125, "learning_rate": 8.707623856448451e-07, "loss": 20.5938, "step": 16714 }, { "epoch": 1.1101148967257755, "grad_norm": 317.41094970703125, "learning_rate": 8.706557364037879e-07, "loss": 15.2969, "step": 16715 }, { "epoch": 1.1101813110181311, "grad_norm": 245.7240753173828, "learning_rate": 8.705490886589104e-07, "loss": 14.0, "step": 16716 }, { "epoch": 1.1102477253104868, "grad_norm": 215.1116180419922, "learning_rate": 8.704424424114466e-07, "loss": 12.7969, "step": 16717 }, { "epoch": 1.1103141396028424, "grad_norm": 518.5419311523438, "learning_rate": 8.703357976626294e-07, "loss": 11.8438, "step": 16718 }, { "epoch": 1.1103805538951983, "grad_norm": 426.5107116699219, "learning_rate": 8.702291544136935e-07, "loss": 19.0, "step": 16719 }, { "epoch": 1.110446968187554, "grad_norm": 131.65225219726562, "learning_rate": 8.701225126658715e-07, "loss": 13.5469, "step": 16720 }, { "epoch": 1.1105133824799096, "grad_norm": 365.2404479980469, "learning_rate": 8.700158724203977e-07, "loss": 23.8438, "step": 16721 }, { "epoch": 1.1105797967722655, "grad_norm": 97.8497314453125, "learning_rate": 8.69909233678505e-07, "loss": 19.2031, "step": 16722 }, { "epoch": 1.1106462110646211, "grad_norm": 521.9620971679688, "learning_rate": 8.698025964414275e-07, "loss": 12.9922, "step": 16723 }, { "epoch": 1.1107126253569768, "grad_norm": 139.8857421875, "learning_rate": 8.696959607103985e-07, "loss": 16.8594, "step": 16724 }, { "epoch": 1.1107790396493324, "grad_norm": 722.6178588867188, "learning_rate": 8.695893264866511e-07, "loss": 12.0625, "step": 16725 }, { "epoch": 1.1108454539416883, "grad_norm": 650.2688598632812, "learning_rate": 8.694826937714196e-07, "loss": 14.2969, "step": 16726 }, { "epoch": 1.110911868234044, "grad_norm": 210.0203399658203, "learning_rate": 8.693760625659369e-07, "loss": 17.2656, "step": 16727 }, { "epoch": 1.1109782825263996, "grad_norm": 432.9938049316406, "learning_rate": 8.692694328714368e-07, "loss": 19.3594, "step": 16728 }, { "epoch": 1.1110446968187553, "grad_norm": 203.71839904785156, "learning_rate": 8.691628046891521e-07, "loss": 17.7188, "step": 16729 }, { "epoch": 1.1111111111111112, "grad_norm": 139.3466339111328, "learning_rate": 8.69056178020317e-07, "loss": 15.1094, "step": 16730 }, { "epoch": 1.1111775254034668, "grad_norm": 161.51907348632812, "learning_rate": 8.689495528661644e-07, "loss": 17.2344, "step": 16731 }, { "epoch": 1.1112439396958225, "grad_norm": 169.6062469482422, "learning_rate": 8.688429292279278e-07, "loss": 16.8281, "step": 16732 }, { "epoch": 1.1113103539881783, "grad_norm": 130.09860229492188, "learning_rate": 8.687363071068405e-07, "loss": 12.9219, "step": 16733 }, { "epoch": 1.111376768280534, "grad_norm": 549.9080810546875, "learning_rate": 8.686296865041362e-07, "loss": 28.0312, "step": 16734 }, { "epoch": 1.1114431825728897, "grad_norm": 206.84776306152344, "learning_rate": 8.685230674210474e-07, "loss": 16.875, "step": 16735 }, { "epoch": 1.1115095968652453, "grad_norm": 320.4802551269531, "learning_rate": 8.684164498588085e-07, "loss": 14.9688, "step": 16736 }, { "epoch": 1.1115760111576012, "grad_norm": 111.37104034423828, "learning_rate": 8.683098338186518e-07, "loss": 13.3438, "step": 16737 }, { "epoch": 1.1116424254499568, "grad_norm": 130.18739318847656, "learning_rate": 8.682032193018114e-07, "loss": 20.6406, "step": 16738 }, { "epoch": 1.1117088397423125, "grad_norm": 271.73431396484375, "learning_rate": 8.680966063095197e-07, "loss": 15.8906, "step": 16739 }, { "epoch": 1.1117752540346681, "grad_norm": 201.47817993164062, "learning_rate": 8.679899948430107e-07, "loss": 18.5312, "step": 16740 }, { "epoch": 1.111841668327024, "grad_norm": 142.4240264892578, "learning_rate": 8.678833849035172e-07, "loss": 17.25, "step": 16741 }, { "epoch": 1.1119080826193797, "grad_norm": 100.5375747680664, "learning_rate": 8.677767764922728e-07, "loss": 12.3125, "step": 16742 }, { "epoch": 1.1119744969117353, "grad_norm": 101.41900634765625, "learning_rate": 8.676701696105099e-07, "loss": 13.1172, "step": 16743 }, { "epoch": 1.1120409112040912, "grad_norm": 128.4810791015625, "learning_rate": 8.675635642594626e-07, "loss": 15.4375, "step": 16744 }, { "epoch": 1.1121073254964469, "grad_norm": 167.75970458984375, "learning_rate": 8.674569604403634e-07, "loss": 17.8438, "step": 16745 }, { "epoch": 1.1121737397888025, "grad_norm": 150.88055419921875, "learning_rate": 8.673503581544459e-07, "loss": 17.25, "step": 16746 }, { "epoch": 1.1122401540811582, "grad_norm": 119.7342758178711, "learning_rate": 8.672437574029425e-07, "loss": 14.8438, "step": 16747 }, { "epoch": 1.112306568373514, "grad_norm": 229.53367614746094, "learning_rate": 8.671371581870872e-07, "loss": 14.6719, "step": 16748 }, { "epoch": 1.1123729826658697, "grad_norm": 263.6995849609375, "learning_rate": 8.670305605081125e-07, "loss": 13.9922, "step": 16749 }, { "epoch": 1.1124393969582254, "grad_norm": 204.250732421875, "learning_rate": 8.669239643672518e-07, "loss": 21.7969, "step": 16750 }, { "epoch": 1.112505811250581, "grad_norm": 255.2661590576172, "learning_rate": 8.668173697657375e-07, "loss": 13.7656, "step": 16751 }, { "epoch": 1.1125722255429369, "grad_norm": 193.20916748046875, "learning_rate": 8.667107767048036e-07, "loss": 15.4688, "step": 16752 }, { "epoch": 1.1126386398352925, "grad_norm": 146.3156280517578, "learning_rate": 8.66604185185682e-07, "loss": 16.0, "step": 16753 }, { "epoch": 1.1127050541276482, "grad_norm": 177.15280151367188, "learning_rate": 8.664975952096067e-07, "loss": 15.0781, "step": 16754 }, { "epoch": 1.112771468420004, "grad_norm": 168.9071807861328, "learning_rate": 8.663910067778102e-07, "loss": 17.7812, "step": 16755 }, { "epoch": 1.1128378827123597, "grad_norm": 257.32977294921875, "learning_rate": 8.662844198915255e-07, "loss": 17.7812, "step": 16756 }, { "epoch": 1.1129042970047154, "grad_norm": 280.7327880859375, "learning_rate": 8.661778345519851e-07, "loss": 17.7031, "step": 16757 }, { "epoch": 1.112970711297071, "grad_norm": 281.6246032714844, "learning_rate": 8.660712507604228e-07, "loss": 18.0469, "step": 16758 }, { "epoch": 1.113037125589427, "grad_norm": 153.88877868652344, "learning_rate": 8.659646685180709e-07, "loss": 13.875, "step": 16759 }, { "epoch": 1.1131035398817826, "grad_norm": 225.4148406982422, "learning_rate": 8.658580878261621e-07, "loss": 13.8125, "step": 16760 }, { "epoch": 1.1131699541741382, "grad_norm": 369.1506652832031, "learning_rate": 8.6575150868593e-07, "loss": 16.5469, "step": 16761 }, { "epoch": 1.1132363684664939, "grad_norm": 196.79249572753906, "learning_rate": 8.656449310986069e-07, "loss": 18.2812, "step": 16762 }, { "epoch": 1.1133027827588498, "grad_norm": 379.0257568359375, "learning_rate": 8.65538355065426e-07, "loss": 13.875, "step": 16763 }, { "epoch": 1.1133691970512054, "grad_norm": 239.3893585205078, "learning_rate": 8.654317805876195e-07, "loss": 17.2812, "step": 16764 }, { "epoch": 1.113435611343561, "grad_norm": 209.3502655029297, "learning_rate": 8.653252076664208e-07, "loss": 11.25, "step": 16765 }, { "epoch": 1.113502025635917, "grad_norm": 124.32556915283203, "learning_rate": 8.652186363030625e-07, "loss": 14.5469, "step": 16766 }, { "epoch": 1.1135684399282726, "grad_norm": 216.92098999023438, "learning_rate": 8.651120664987772e-07, "loss": 14.4688, "step": 16767 }, { "epoch": 1.1136348542206282, "grad_norm": 206.1227264404297, "learning_rate": 8.650054982547975e-07, "loss": 14.5938, "step": 16768 }, { "epoch": 1.113701268512984, "grad_norm": 148.76434326171875, "learning_rate": 8.648989315723567e-07, "loss": 17.75, "step": 16769 }, { "epoch": 1.1137676828053398, "grad_norm": 263.679931640625, "learning_rate": 8.64792366452687e-07, "loss": 31.0938, "step": 16770 }, { "epoch": 1.1138340970976954, "grad_norm": 124.32276916503906, "learning_rate": 8.646858028970215e-07, "loss": 12.75, "step": 16771 }, { "epoch": 1.113900511390051, "grad_norm": 594.5094604492188, "learning_rate": 8.645792409065921e-07, "loss": 21.9375, "step": 16772 }, { "epoch": 1.1139669256824067, "grad_norm": 92.63166809082031, "learning_rate": 8.644726804826325e-07, "loss": 13.5781, "step": 16773 }, { "epoch": 1.1140333399747626, "grad_norm": 419.2049560546875, "learning_rate": 8.643661216263743e-07, "loss": 13.1172, "step": 16774 }, { "epoch": 1.1140997542671183, "grad_norm": 473.57244873046875, "learning_rate": 8.642595643390509e-07, "loss": 18.5156, "step": 16775 }, { "epoch": 1.114166168559474, "grad_norm": 182.89498901367188, "learning_rate": 8.641530086218945e-07, "loss": 14.2969, "step": 16776 }, { "epoch": 1.1142325828518298, "grad_norm": 225.44305419921875, "learning_rate": 8.640464544761379e-07, "loss": 15.0625, "step": 16777 }, { "epoch": 1.1142989971441855, "grad_norm": 213.2639923095703, "learning_rate": 8.639399019030131e-07, "loss": 13.8438, "step": 16778 }, { "epoch": 1.114365411436541, "grad_norm": 228.14682006835938, "learning_rate": 8.638333509037535e-07, "loss": 23.0625, "step": 16779 }, { "epoch": 1.1144318257288968, "grad_norm": 212.7213592529297, "learning_rate": 8.63726801479591e-07, "loss": 13.8438, "step": 16780 }, { "epoch": 1.1144982400212526, "grad_norm": 175.0293731689453, "learning_rate": 8.636202536317584e-07, "loss": 19.75, "step": 16781 }, { "epoch": 1.1145646543136083, "grad_norm": 211.74261474609375, "learning_rate": 8.635137073614877e-07, "loss": 20.6719, "step": 16782 }, { "epoch": 1.114631068605964, "grad_norm": 1006.2785034179688, "learning_rate": 8.634071626700122e-07, "loss": 15.2031, "step": 16783 }, { "epoch": 1.1146974828983196, "grad_norm": 388.1869201660156, "learning_rate": 8.633006195585635e-07, "loss": 24.4062, "step": 16784 }, { "epoch": 1.1147638971906755, "grad_norm": 308.5697326660156, "learning_rate": 8.631940780283747e-07, "loss": 16.8906, "step": 16785 }, { "epoch": 1.1148303114830311, "grad_norm": 250.30322265625, "learning_rate": 8.630875380806776e-07, "loss": 16.7031, "step": 16786 }, { "epoch": 1.1148967257753868, "grad_norm": 156.89218139648438, "learning_rate": 8.629809997167052e-07, "loss": 17.6562, "step": 16787 }, { "epoch": 1.1149631400677427, "grad_norm": 233.10525512695312, "learning_rate": 8.628744629376895e-07, "loss": 15.8594, "step": 16788 }, { "epoch": 1.1150295543600983, "grad_norm": 160.91233825683594, "learning_rate": 8.62767927744863e-07, "loss": 17.5156, "step": 16789 }, { "epoch": 1.115095968652454, "grad_norm": 308.0180969238281, "learning_rate": 8.626613941394577e-07, "loss": 13.6562, "step": 16790 }, { "epoch": 1.1151623829448096, "grad_norm": 193.32386779785156, "learning_rate": 8.625548621227066e-07, "loss": 15.2969, "step": 16791 }, { "epoch": 1.1152287972371655, "grad_norm": 176.83633422851562, "learning_rate": 8.624483316958412e-07, "loss": 16.25, "step": 16792 }, { "epoch": 1.1152952115295212, "grad_norm": 340.9513854980469, "learning_rate": 8.623418028600946e-07, "loss": 13.1328, "step": 16793 }, { "epoch": 1.1153616258218768, "grad_norm": 270.1194152832031, "learning_rate": 8.622352756166981e-07, "loss": 15.3281, "step": 16794 }, { "epoch": 1.1154280401142325, "grad_norm": 250.31565856933594, "learning_rate": 8.621287499668853e-07, "loss": 17.75, "step": 16795 }, { "epoch": 1.1154944544065883, "grad_norm": 296.32525634765625, "learning_rate": 8.620222259118869e-07, "loss": 20.25, "step": 16796 }, { "epoch": 1.115560868698944, "grad_norm": 298.6339111328125, "learning_rate": 8.619157034529361e-07, "loss": 16.7344, "step": 16797 }, { "epoch": 1.1156272829912997, "grad_norm": 336.06854248046875, "learning_rate": 8.618091825912649e-07, "loss": 19.8906, "step": 16798 }, { "epoch": 1.1156936972836555, "grad_norm": 293.41436767578125, "learning_rate": 8.617026633281051e-07, "loss": 18.4922, "step": 16799 }, { "epoch": 1.1157601115760112, "grad_norm": 632.8198852539062, "learning_rate": 8.615961456646894e-07, "loss": 16.1562, "step": 16800 }, { "epoch": 1.1158265258683668, "grad_norm": 134.81280517578125, "learning_rate": 8.614896296022495e-07, "loss": 13.6875, "step": 16801 }, { "epoch": 1.1158929401607225, "grad_norm": 216.55970764160156, "learning_rate": 8.61383115142018e-07, "loss": 19.5469, "step": 16802 }, { "epoch": 1.1159593544530784, "grad_norm": 191.2196807861328, "learning_rate": 8.612766022852262e-07, "loss": 19.7031, "step": 16803 }, { "epoch": 1.116025768745434, "grad_norm": 161.34991455078125, "learning_rate": 8.611700910331071e-07, "loss": 13.8125, "step": 16804 }, { "epoch": 1.1160921830377897, "grad_norm": 141.42262268066406, "learning_rate": 8.610635813868922e-07, "loss": 14.2969, "step": 16805 }, { "epoch": 1.1161585973301453, "grad_norm": 135.5784454345703, "learning_rate": 8.609570733478139e-07, "loss": 15.1562, "step": 16806 }, { "epoch": 1.1162250116225012, "grad_norm": 252.08482360839844, "learning_rate": 8.608505669171035e-07, "loss": 12.8203, "step": 16807 }, { "epoch": 1.1162914259148569, "grad_norm": 165.68113708496094, "learning_rate": 8.607440620959939e-07, "loss": 16.1719, "step": 16808 }, { "epoch": 1.1163578402072125, "grad_norm": 147.66481018066406, "learning_rate": 8.606375588857165e-07, "loss": 15.1562, "step": 16809 }, { "epoch": 1.1164242544995684, "grad_norm": 247.19126892089844, "learning_rate": 8.605310572875036e-07, "loss": 15.9219, "step": 16810 }, { "epoch": 1.116490668791924, "grad_norm": 169.46084594726562, "learning_rate": 8.604245573025868e-07, "loss": 15.9219, "step": 16811 }, { "epoch": 1.1165570830842797, "grad_norm": 343.0413513183594, "learning_rate": 8.603180589321984e-07, "loss": 16.6562, "step": 16812 }, { "epoch": 1.1166234973766354, "grad_norm": 188.05906677246094, "learning_rate": 8.6021156217757e-07, "loss": 15.5312, "step": 16813 }, { "epoch": 1.1166899116689912, "grad_norm": 649.5828857421875, "learning_rate": 8.60105067039934e-07, "loss": 12.1719, "step": 16814 }, { "epoch": 1.1167563259613469, "grad_norm": 127.37870025634766, "learning_rate": 8.599985735205214e-07, "loss": 11.3125, "step": 16815 }, { "epoch": 1.1168227402537025, "grad_norm": 614.9644775390625, "learning_rate": 8.598920816205652e-07, "loss": 25.3438, "step": 16816 }, { "epoch": 1.1168891545460584, "grad_norm": 281.97076416015625, "learning_rate": 8.59785591341296e-07, "loss": 14.1094, "step": 16817 }, { "epoch": 1.116955568838414, "grad_norm": 147.37933349609375, "learning_rate": 8.596791026839467e-07, "loss": 15.2812, "step": 16818 }, { "epoch": 1.1170219831307697, "grad_norm": 169.8866424560547, "learning_rate": 8.595726156497484e-07, "loss": 12.0625, "step": 16819 }, { "epoch": 1.1170883974231254, "grad_norm": 158.43618774414062, "learning_rate": 8.594661302399331e-07, "loss": 13.6094, "step": 16820 }, { "epoch": 1.1171548117154813, "grad_norm": 203.71624755859375, "learning_rate": 8.593596464557324e-07, "loss": 16.3203, "step": 16821 }, { "epoch": 1.117221226007837, "grad_norm": 137.6654510498047, "learning_rate": 8.592531642983786e-07, "loss": 16.3125, "step": 16822 }, { "epoch": 1.1172876403001926, "grad_norm": 188.5518035888672, "learning_rate": 8.591466837691028e-07, "loss": 15.7188, "step": 16823 }, { "epoch": 1.1173540545925482, "grad_norm": 150.329345703125, "learning_rate": 8.590402048691372e-07, "loss": 13.6094, "step": 16824 }, { "epoch": 1.117420468884904, "grad_norm": 303.8216857910156, "learning_rate": 8.589337275997126e-07, "loss": 15.25, "step": 16825 }, { "epoch": 1.1174868831772597, "grad_norm": 1145.1558837890625, "learning_rate": 8.588272519620619e-07, "loss": 28.2344, "step": 16826 }, { "epoch": 1.1175532974696154, "grad_norm": 256.4826354980469, "learning_rate": 8.58720777957416e-07, "loss": 16.2188, "step": 16827 }, { "epoch": 1.1176197117619713, "grad_norm": 177.78221130371094, "learning_rate": 8.586143055870067e-07, "loss": 15.875, "step": 16828 }, { "epoch": 1.117686126054327, "grad_norm": 105.67941284179688, "learning_rate": 8.585078348520652e-07, "loss": 16.25, "step": 16829 }, { "epoch": 1.1177525403466826, "grad_norm": 183.21434020996094, "learning_rate": 8.584013657538239e-07, "loss": 18.6875, "step": 16830 }, { "epoch": 1.1178189546390382, "grad_norm": 197.76234436035156, "learning_rate": 8.582948982935139e-07, "loss": 20.75, "step": 16831 }, { "epoch": 1.1178853689313941, "grad_norm": 177.09439086914062, "learning_rate": 8.581884324723668e-07, "loss": 13.2969, "step": 16832 }, { "epoch": 1.1179517832237498, "grad_norm": 324.0691223144531, "learning_rate": 8.580819682916139e-07, "loss": 16.4844, "step": 16833 }, { "epoch": 1.1180181975161054, "grad_norm": 285.1216125488281, "learning_rate": 8.57975505752487e-07, "loss": 18.6875, "step": 16834 }, { "epoch": 1.118084611808461, "grad_norm": 247.46762084960938, "learning_rate": 8.578690448562178e-07, "loss": 15.5781, "step": 16835 }, { "epoch": 1.118151026100817, "grad_norm": 146.65101623535156, "learning_rate": 8.577625856040373e-07, "loss": 15.5, "step": 16836 }, { "epoch": 1.1182174403931726, "grad_norm": 150.2501678466797, "learning_rate": 8.576561279971775e-07, "loss": 22.3125, "step": 16837 }, { "epoch": 1.1182838546855283, "grad_norm": 121.49688720703125, "learning_rate": 8.575496720368691e-07, "loss": 17.4375, "step": 16838 }, { "epoch": 1.1183502689778841, "grad_norm": 217.38043212890625, "learning_rate": 8.574432177243445e-07, "loss": 16.9062, "step": 16839 }, { "epoch": 1.1184166832702398, "grad_norm": 468.99273681640625, "learning_rate": 8.573367650608341e-07, "loss": 17.8906, "step": 16840 }, { "epoch": 1.1184830975625955, "grad_norm": 232.1966552734375, "learning_rate": 8.572303140475701e-07, "loss": 19.6406, "step": 16841 }, { "epoch": 1.118549511854951, "grad_norm": 477.1638488769531, "learning_rate": 8.57123864685783e-07, "loss": 16.3438, "step": 16842 }, { "epoch": 1.118615926147307, "grad_norm": 180.22483825683594, "learning_rate": 8.570174169767051e-07, "loss": 16.7969, "step": 16843 }, { "epoch": 1.1186823404396626, "grad_norm": 238.77586364746094, "learning_rate": 8.56910970921567e-07, "loss": 22.875, "step": 16844 }, { "epoch": 1.1187487547320183, "grad_norm": 219.06703186035156, "learning_rate": 8.568045265216007e-07, "loss": 23.0938, "step": 16845 }, { "epoch": 1.118815169024374, "grad_norm": 169.4340057373047, "learning_rate": 8.566980837780363e-07, "loss": 13.2969, "step": 16846 }, { "epoch": 1.1188815833167298, "grad_norm": 379.4667053222656, "learning_rate": 8.565916426921064e-07, "loss": 17.7344, "step": 16847 }, { "epoch": 1.1189479976090855, "grad_norm": 156.2442169189453, "learning_rate": 8.564852032650416e-07, "loss": 12.4219, "step": 16848 }, { "epoch": 1.1190144119014411, "grad_norm": 163.1094207763672, "learning_rate": 8.563787654980733e-07, "loss": 12.2422, "step": 16849 }, { "epoch": 1.119080826193797, "grad_norm": 156.62942504882812, "learning_rate": 8.562723293924322e-07, "loss": 16.3594, "step": 16850 }, { "epoch": 1.1191472404861527, "grad_norm": 315.74969482421875, "learning_rate": 8.561658949493504e-07, "loss": 17.4531, "step": 16851 }, { "epoch": 1.1192136547785083, "grad_norm": 181.46945190429688, "learning_rate": 8.560594621700584e-07, "loss": 17.3906, "step": 16852 }, { "epoch": 1.119280069070864, "grad_norm": 182.47552490234375, "learning_rate": 8.559530310557876e-07, "loss": 14.0625, "step": 16853 }, { "epoch": 1.1193464833632198, "grad_norm": 196.73037719726562, "learning_rate": 8.558466016077688e-07, "loss": 12.75, "step": 16854 }, { "epoch": 1.1194128976555755, "grad_norm": 536.0621337890625, "learning_rate": 8.557401738272338e-07, "loss": 21.3438, "step": 16855 }, { "epoch": 1.1194793119479312, "grad_norm": 135.06935119628906, "learning_rate": 8.556337477154131e-07, "loss": 10.5547, "step": 16856 }, { "epoch": 1.1195457262402868, "grad_norm": 151.5023956298828, "learning_rate": 8.555273232735381e-07, "loss": 16.1719, "step": 16857 }, { "epoch": 1.1196121405326427, "grad_norm": 191.18609619140625, "learning_rate": 8.554209005028392e-07, "loss": 21.4688, "step": 16858 }, { "epoch": 1.1196785548249983, "grad_norm": 237.16915893554688, "learning_rate": 8.553144794045489e-07, "loss": 21.9219, "step": 16859 }, { "epoch": 1.119744969117354, "grad_norm": 209.76162719726562, "learning_rate": 8.552080599798963e-07, "loss": 17.3438, "step": 16860 }, { "epoch": 1.1198113834097099, "grad_norm": 104.04032897949219, "learning_rate": 8.55101642230114e-07, "loss": 19.6406, "step": 16861 }, { "epoch": 1.1198777977020655, "grad_norm": 183.90635681152344, "learning_rate": 8.54995226156432e-07, "loss": 16.4688, "step": 16862 }, { "epoch": 1.1199442119944212, "grad_norm": 310.965087890625, "learning_rate": 8.548888117600819e-07, "loss": 20.4844, "step": 16863 }, { "epoch": 1.1200106262867768, "grad_norm": 329.9076843261719, "learning_rate": 8.547823990422939e-07, "loss": 14.7188, "step": 16864 }, { "epoch": 1.1200770405791327, "grad_norm": 336.076416015625, "learning_rate": 8.546759880042997e-07, "loss": 22.9375, "step": 16865 }, { "epoch": 1.1201434548714884, "grad_norm": 121.03485870361328, "learning_rate": 8.545695786473298e-07, "loss": 12.1406, "step": 16866 }, { "epoch": 1.120209869163844, "grad_norm": 205.6685333251953, "learning_rate": 8.544631709726154e-07, "loss": 11.5625, "step": 16867 }, { "epoch": 1.1202762834561997, "grad_norm": 127.26763916015625, "learning_rate": 8.543567649813866e-07, "loss": 12.6094, "step": 16868 }, { "epoch": 1.1203426977485555, "grad_norm": 256.8840637207031, "learning_rate": 8.542503606748752e-07, "loss": 15.9297, "step": 16869 }, { "epoch": 1.1204091120409112, "grad_norm": 307.6120300292969, "learning_rate": 8.541439580543113e-07, "loss": 23.9688, "step": 16870 }, { "epoch": 1.1204755263332669, "grad_norm": 207.3007354736328, "learning_rate": 8.54037557120926e-07, "loss": 13.0625, "step": 16871 }, { "epoch": 1.1205419406256227, "grad_norm": 186.13180541992188, "learning_rate": 8.539311578759503e-07, "loss": 21.6562, "step": 16872 }, { "epoch": 1.1206083549179784, "grad_norm": 209.47549438476562, "learning_rate": 8.538247603206146e-07, "loss": 20.9688, "step": 16873 }, { "epoch": 1.120674769210334, "grad_norm": 85.89959716796875, "learning_rate": 8.5371836445615e-07, "loss": 12.9531, "step": 16874 }, { "epoch": 1.1207411835026897, "grad_norm": 232.08189392089844, "learning_rate": 8.536119702837865e-07, "loss": 19.375, "step": 16875 }, { "epoch": 1.1208075977950456, "grad_norm": 156.42623901367188, "learning_rate": 8.535055778047557e-07, "loss": 12.9219, "step": 16876 }, { "epoch": 1.1208740120874012, "grad_norm": 158.41802978515625, "learning_rate": 8.533991870202879e-07, "loss": 12.9531, "step": 16877 }, { "epoch": 1.1209404263797569, "grad_norm": 937.5208740234375, "learning_rate": 8.532927979316138e-07, "loss": 14.9688, "step": 16878 }, { "epoch": 1.1210068406721125, "grad_norm": 211.5042724609375, "learning_rate": 8.531864105399638e-07, "loss": 17.4219, "step": 16879 }, { "epoch": 1.1210732549644684, "grad_norm": 314.12811279296875, "learning_rate": 8.530800248465692e-07, "loss": 18.2812, "step": 16880 }, { "epoch": 1.121139669256824, "grad_norm": 138.48207092285156, "learning_rate": 8.529736408526596e-07, "loss": 14.4375, "step": 16881 }, { "epoch": 1.1212060835491797, "grad_norm": 199.9908447265625, "learning_rate": 8.528672585594665e-07, "loss": 15.7031, "step": 16882 }, { "epoch": 1.1212724978415356, "grad_norm": 867.6240234375, "learning_rate": 8.527608779682198e-07, "loss": 14.7812, "step": 16883 }, { "epoch": 1.1213389121338913, "grad_norm": 413.1380920410156, "learning_rate": 8.526544990801507e-07, "loss": 17.8906, "step": 16884 }, { "epoch": 1.121405326426247, "grad_norm": 315.438720703125, "learning_rate": 8.52548121896489e-07, "loss": 20.125, "step": 16885 }, { "epoch": 1.1214717407186026, "grad_norm": 657.84521484375, "learning_rate": 8.52441746418466e-07, "loss": 26.3281, "step": 16886 }, { "epoch": 1.1215381550109584, "grad_norm": 278.2157287597656, "learning_rate": 8.523353726473115e-07, "loss": 20.875, "step": 16887 }, { "epoch": 1.121604569303314, "grad_norm": 166.2617645263672, "learning_rate": 8.522290005842566e-07, "loss": 17.4219, "step": 16888 }, { "epoch": 1.1216709835956697, "grad_norm": 150.9235382080078, "learning_rate": 8.52122630230531e-07, "loss": 15.7031, "step": 16889 }, { "epoch": 1.1217373978880254, "grad_norm": 211.83424377441406, "learning_rate": 8.520162615873659e-07, "loss": 19.2969, "step": 16890 }, { "epoch": 1.1218038121803813, "grad_norm": 265.245849609375, "learning_rate": 8.519098946559911e-07, "loss": 16.6562, "step": 16891 }, { "epoch": 1.121870226472737, "grad_norm": 220.7855987548828, "learning_rate": 8.518035294376376e-07, "loss": 26.0, "step": 16892 }, { "epoch": 1.1219366407650926, "grad_norm": 319.7036437988281, "learning_rate": 8.516971659335348e-07, "loss": 17.0625, "step": 16893 }, { "epoch": 1.1220030550574485, "grad_norm": 158.38577270507812, "learning_rate": 8.515908041449141e-07, "loss": 16.1719, "step": 16894 }, { "epoch": 1.1220694693498041, "grad_norm": 147.68495178222656, "learning_rate": 8.514844440730054e-07, "loss": 18.3906, "step": 16895 }, { "epoch": 1.1221358836421598, "grad_norm": 118.37702178955078, "learning_rate": 8.51378085719039e-07, "loss": 14.5469, "step": 16896 }, { "epoch": 1.1222022979345154, "grad_norm": 225.0634002685547, "learning_rate": 8.512717290842448e-07, "loss": 16.1875, "step": 16897 }, { "epoch": 1.1222687122268713, "grad_norm": 206.39268493652344, "learning_rate": 8.51165374169854e-07, "loss": 21.0625, "step": 16898 }, { "epoch": 1.122335126519227, "grad_norm": 290.86395263671875, "learning_rate": 8.510590209770961e-07, "loss": 16.7812, "step": 16899 }, { "epoch": 1.1224015408115826, "grad_norm": 239.31060791015625, "learning_rate": 8.509526695072018e-07, "loss": 11.6406, "step": 16900 }, { "epoch": 1.1224679551039383, "grad_norm": 244.85958862304688, "learning_rate": 8.508463197614008e-07, "loss": 19.375, "step": 16901 }, { "epoch": 1.1225343693962941, "grad_norm": 131.41876220703125, "learning_rate": 8.507399717409239e-07, "loss": 13.3125, "step": 16902 }, { "epoch": 1.1226007836886498, "grad_norm": 144.8700714111328, "learning_rate": 8.506336254470003e-07, "loss": 11.7344, "step": 16903 }, { "epoch": 1.1226671979810054, "grad_norm": 334.8257141113281, "learning_rate": 8.505272808808613e-07, "loss": 22.0, "step": 16904 }, { "epoch": 1.1227336122733613, "grad_norm": 141.8282928466797, "learning_rate": 8.504209380437364e-07, "loss": 17.2812, "step": 16905 }, { "epoch": 1.122800026565717, "grad_norm": 309.0475158691406, "learning_rate": 8.503145969368561e-07, "loss": 26.6562, "step": 16906 }, { "epoch": 1.1228664408580726, "grad_norm": 318.0658264160156, "learning_rate": 8.502082575614496e-07, "loss": 18.2344, "step": 16907 }, { "epoch": 1.1229328551504283, "grad_norm": 301.9139099121094, "learning_rate": 8.501019199187479e-07, "loss": 15.0, "step": 16908 }, { "epoch": 1.1229992694427842, "grad_norm": 142.3475341796875, "learning_rate": 8.49995584009981e-07, "loss": 17.7344, "step": 16909 }, { "epoch": 1.1230656837351398, "grad_norm": 197.8833770751953, "learning_rate": 8.498892498363782e-07, "loss": 14.0, "step": 16910 }, { "epoch": 1.1231320980274955, "grad_norm": 108.86505126953125, "learning_rate": 8.497829173991705e-07, "loss": 16.0625, "step": 16911 }, { "epoch": 1.1231985123198513, "grad_norm": 388.9773864746094, "learning_rate": 8.49676586699587e-07, "loss": 16.4531, "step": 16912 }, { "epoch": 1.123264926612207, "grad_norm": 174.1903076171875, "learning_rate": 8.495702577388586e-07, "loss": 18.7031, "step": 16913 }, { "epoch": 1.1233313409045627, "grad_norm": 217.71453857421875, "learning_rate": 8.494639305182141e-07, "loss": 13.8438, "step": 16914 }, { "epoch": 1.1233977551969183, "grad_norm": 279.633056640625, "learning_rate": 8.493576050388844e-07, "loss": 19.4844, "step": 16915 }, { "epoch": 1.1234641694892742, "grad_norm": 300.8330383300781, "learning_rate": 8.492512813020989e-07, "loss": 17.2188, "step": 16916 }, { "epoch": 1.1235305837816298, "grad_norm": 143.80592346191406, "learning_rate": 8.491449593090879e-07, "loss": 12.2812, "step": 16917 }, { "epoch": 1.1235969980739855, "grad_norm": 576.6347045898438, "learning_rate": 8.490386390610806e-07, "loss": 18.4844, "step": 16918 }, { "epoch": 1.1236634123663412, "grad_norm": 186.1205596923828, "learning_rate": 8.489323205593078e-07, "loss": 16.1719, "step": 16919 }, { "epoch": 1.123729826658697, "grad_norm": 217.06333923339844, "learning_rate": 8.488260038049983e-07, "loss": 17.0625, "step": 16920 }, { "epoch": 1.1237962409510527, "grad_norm": 154.3000946044922, "learning_rate": 8.48719688799383e-07, "loss": 15.5938, "step": 16921 }, { "epoch": 1.1238626552434083, "grad_norm": 190.8108673095703, "learning_rate": 8.486133755436906e-07, "loss": 14.0312, "step": 16922 }, { "epoch": 1.1239290695357642, "grad_norm": 210.32261657714844, "learning_rate": 8.485070640391518e-07, "loss": 14.2969, "step": 16923 }, { "epoch": 1.1239954838281199, "grad_norm": 264.20513916015625, "learning_rate": 8.484007542869953e-07, "loss": 28.1406, "step": 16924 }, { "epoch": 1.1240618981204755, "grad_norm": 337.3126220703125, "learning_rate": 8.482944462884521e-07, "loss": 16.3906, "step": 16925 }, { "epoch": 1.1241283124128312, "grad_norm": 252.0529327392578, "learning_rate": 8.481881400447508e-07, "loss": 16.1719, "step": 16926 }, { "epoch": 1.124194726705187, "grad_norm": 228.58839416503906, "learning_rate": 8.480818355571219e-07, "loss": 17.8125, "step": 16927 }, { "epoch": 1.1242611409975427, "grad_norm": 187.64468383789062, "learning_rate": 8.479755328267943e-07, "loss": 14.4922, "step": 16928 }, { "epoch": 1.1243275552898984, "grad_norm": 497.0110778808594, "learning_rate": 8.478692318549985e-07, "loss": 15.8281, "step": 16929 }, { "epoch": 1.124393969582254, "grad_norm": 353.951904296875, "learning_rate": 8.477629326429635e-07, "loss": 13.75, "step": 16930 }, { "epoch": 1.12446038387461, "grad_norm": 144.75045776367188, "learning_rate": 8.476566351919193e-07, "loss": 19.8438, "step": 16931 }, { "epoch": 1.1245267981669655, "grad_norm": 744.8181762695312, "learning_rate": 8.47550339503095e-07, "loss": 17.7188, "step": 16932 }, { "epoch": 1.1245932124593212, "grad_norm": 230.506103515625, "learning_rate": 8.474440455777207e-07, "loss": 12.25, "step": 16933 }, { "epoch": 1.124659626751677, "grad_norm": 119.24050903320312, "learning_rate": 8.473377534170255e-07, "loss": 13.0625, "step": 16934 }, { "epoch": 1.1247260410440327, "grad_norm": 3741.7236328125, "learning_rate": 8.472314630222395e-07, "loss": 15.2969, "step": 16935 }, { "epoch": 1.1247924553363884, "grad_norm": 407.2509460449219, "learning_rate": 8.471251743945913e-07, "loss": 15.4688, "step": 16936 }, { "epoch": 1.124858869628744, "grad_norm": 216.62173461914062, "learning_rate": 8.470188875353115e-07, "loss": 21.875, "step": 16937 }, { "epoch": 1.1249252839211, "grad_norm": 340.5565185546875, "learning_rate": 8.469126024456286e-07, "loss": 12.375, "step": 16938 }, { "epoch": 1.1249916982134556, "grad_norm": 93.65553283691406, "learning_rate": 8.468063191267727e-07, "loss": 11.5312, "step": 16939 }, { "epoch": 1.1250581125058112, "grad_norm": 262.74774169921875, "learning_rate": 8.467000375799727e-07, "loss": 16.8125, "step": 16940 }, { "epoch": 1.1251245267981669, "grad_norm": 296.2126770019531, "learning_rate": 8.465937578064587e-07, "loss": 18.4141, "step": 16941 }, { "epoch": 1.1251909410905228, "grad_norm": 228.24270629882812, "learning_rate": 8.46487479807459e-07, "loss": 15.75, "step": 16942 }, { "epoch": 1.1252573553828784, "grad_norm": 164.89877319335938, "learning_rate": 8.463812035842042e-07, "loss": 15.9844, "step": 16943 }, { "epoch": 1.125323769675234, "grad_norm": 178.36322021484375, "learning_rate": 8.462749291379228e-07, "loss": 13.4531, "step": 16944 }, { "epoch": 1.12539018396759, "grad_norm": 181.10592651367188, "learning_rate": 8.46168656469844e-07, "loss": 12.0625, "step": 16945 }, { "epoch": 1.1254565982599456, "grad_norm": 114.14218139648438, "learning_rate": 8.460623855811982e-07, "loss": 13.4141, "step": 16946 }, { "epoch": 1.1255230125523012, "grad_norm": 150.36134338378906, "learning_rate": 8.459561164732136e-07, "loss": 12.4688, "step": 16947 }, { "epoch": 1.125589426844657, "grad_norm": 365.0210266113281, "learning_rate": 8.4584984914712e-07, "loss": 20.1719, "step": 16948 }, { "epoch": 1.1256558411370128, "grad_norm": 193.89608764648438, "learning_rate": 8.45743583604146e-07, "loss": 15.8594, "step": 16949 }, { "epoch": 1.1257222554293684, "grad_norm": 122.2121810913086, "learning_rate": 8.456373198455218e-07, "loss": 13.6719, "step": 16950 }, { "epoch": 1.125788669721724, "grad_norm": 255.36708068847656, "learning_rate": 8.455310578724757e-07, "loss": 11.4062, "step": 16951 }, { "epoch": 1.1258550840140797, "grad_norm": 112.48672485351562, "learning_rate": 8.454247976862377e-07, "loss": 13.7812, "step": 16952 }, { "epoch": 1.1259214983064356, "grad_norm": 88.81465148925781, "learning_rate": 8.453185392880359e-07, "loss": 10.6094, "step": 16953 }, { "epoch": 1.1259879125987913, "grad_norm": 165.3690643310547, "learning_rate": 8.452122826791005e-07, "loss": 16.0625, "step": 16954 }, { "epoch": 1.126054326891147, "grad_norm": 269.99127197265625, "learning_rate": 8.4510602786066e-07, "loss": 17.2188, "step": 16955 }, { "epoch": 1.1261207411835028, "grad_norm": 106.60526275634766, "learning_rate": 8.449997748339438e-07, "loss": 13.6406, "step": 16956 }, { "epoch": 1.1261871554758585, "grad_norm": 152.88314819335938, "learning_rate": 8.448935236001804e-07, "loss": 16.5312, "step": 16957 }, { "epoch": 1.1262535697682141, "grad_norm": 312.3384094238281, "learning_rate": 8.447872741605998e-07, "loss": 16.0156, "step": 16958 }, { "epoch": 1.1263199840605698, "grad_norm": 190.4004669189453, "learning_rate": 8.446810265164301e-07, "loss": 14.9141, "step": 16959 }, { "epoch": 1.1263863983529256, "grad_norm": 269.7235412597656, "learning_rate": 8.445747806689012e-07, "loss": 17.7188, "step": 16960 }, { "epoch": 1.1264528126452813, "grad_norm": 179.44786071777344, "learning_rate": 8.44468536619241e-07, "loss": 17.2344, "step": 16961 }, { "epoch": 1.126519226937637, "grad_norm": 298.8189392089844, "learning_rate": 8.443622943686797e-07, "loss": 14.0781, "step": 16962 }, { "epoch": 1.1265856412299926, "grad_norm": 212.71690368652344, "learning_rate": 8.442560539184453e-07, "loss": 21.2812, "step": 16963 }, { "epoch": 1.1266520555223485, "grad_norm": 207.72836303710938, "learning_rate": 8.441498152697671e-07, "loss": 15.625, "step": 16964 }, { "epoch": 1.1267184698147041, "grad_norm": 337.6517333984375, "learning_rate": 8.44043578423874e-07, "loss": 23.0312, "step": 16965 }, { "epoch": 1.1267848841070598, "grad_norm": 418.83319091796875, "learning_rate": 8.439373433819949e-07, "loss": 23.5625, "step": 16966 }, { "epoch": 1.1268512983994157, "grad_norm": 1181.8177490234375, "learning_rate": 8.438311101453583e-07, "loss": 16.0781, "step": 16967 }, { "epoch": 1.1269177126917713, "grad_norm": 400.6572265625, "learning_rate": 8.437248787151939e-07, "loss": 19.9531, "step": 16968 }, { "epoch": 1.126984126984127, "grad_norm": 115.53910827636719, "learning_rate": 8.436186490927296e-07, "loss": 9.9609, "step": 16969 }, { "epoch": 1.1270505412764826, "grad_norm": 697.7265014648438, "learning_rate": 8.435124212791949e-07, "loss": 36.4062, "step": 16970 }, { "epoch": 1.1271169555688385, "grad_norm": 179.613037109375, "learning_rate": 8.434061952758178e-07, "loss": 20.7188, "step": 16971 }, { "epoch": 1.1271833698611942, "grad_norm": 165.94854736328125, "learning_rate": 8.43299971083828e-07, "loss": 13.4219, "step": 16972 }, { "epoch": 1.1272497841535498, "grad_norm": 129.79367065429688, "learning_rate": 8.431937487044535e-07, "loss": 13.4844, "step": 16973 }, { "epoch": 1.1273161984459055, "grad_norm": 439.6216735839844, "learning_rate": 8.430875281389235e-07, "loss": 19.1719, "step": 16974 }, { "epoch": 1.1273826127382613, "grad_norm": 170.61959838867188, "learning_rate": 8.429813093884661e-07, "loss": 14.9688, "step": 16975 }, { "epoch": 1.127449027030617, "grad_norm": 333.57305908203125, "learning_rate": 8.428750924543109e-07, "loss": 23.8594, "step": 16976 }, { "epoch": 1.1275154413229727, "grad_norm": 204.4817657470703, "learning_rate": 8.427688773376856e-07, "loss": 16.0938, "step": 16977 }, { "epoch": 1.1275818556153285, "grad_norm": 122.59380340576172, "learning_rate": 8.426626640398196e-07, "loss": 14.8438, "step": 16978 }, { "epoch": 1.1276482699076842, "grad_norm": 605.7463989257812, "learning_rate": 8.425564525619407e-07, "loss": 14.7188, "step": 16979 }, { "epoch": 1.1277146842000398, "grad_norm": 191.38026428222656, "learning_rate": 8.424502429052785e-07, "loss": 20.7812, "step": 16980 }, { "epoch": 1.1277810984923955, "grad_norm": 174.81117248535156, "learning_rate": 8.423440350710608e-07, "loss": 19.0469, "step": 16981 }, { "epoch": 1.1278475127847514, "grad_norm": 240.17723083496094, "learning_rate": 8.422378290605161e-07, "loss": 13.875, "step": 16982 }, { "epoch": 1.127913927077107, "grad_norm": 160.00904846191406, "learning_rate": 8.421316248748738e-07, "loss": 16.2344, "step": 16983 }, { "epoch": 1.1279803413694627, "grad_norm": 178.23260498046875, "learning_rate": 8.420254225153615e-07, "loss": 17.5625, "step": 16984 }, { "epoch": 1.1280467556618183, "grad_norm": 141.9241180419922, "learning_rate": 8.419192219832082e-07, "loss": 14.9844, "step": 16985 }, { "epoch": 1.1281131699541742, "grad_norm": 410.41363525390625, "learning_rate": 8.418130232796419e-07, "loss": 19.0156, "step": 16986 }, { "epoch": 1.1281795842465299, "grad_norm": 230.3223876953125, "learning_rate": 8.417068264058917e-07, "loss": 17.8125, "step": 16987 }, { "epoch": 1.1282459985388855, "grad_norm": 104.23890686035156, "learning_rate": 8.416006313631853e-07, "loss": 16.625, "step": 16988 }, { "epoch": 1.1283124128312414, "grad_norm": 196.20407104492188, "learning_rate": 8.414944381527518e-07, "loss": 18.6094, "step": 16989 }, { "epoch": 1.128378827123597, "grad_norm": 103.24414825439453, "learning_rate": 8.41388246775819e-07, "loss": 14.875, "step": 16990 }, { "epoch": 1.1284452414159527, "grad_norm": 275.2757873535156, "learning_rate": 8.412820572336156e-07, "loss": 12.8125, "step": 16991 }, { "epoch": 1.1285116557083084, "grad_norm": 444.03692626953125, "learning_rate": 8.411758695273696e-07, "loss": 24.5625, "step": 16992 }, { "epoch": 1.1285780700006642, "grad_norm": 174.43240356445312, "learning_rate": 8.410696836583099e-07, "loss": 18.5625, "step": 16993 }, { "epoch": 1.12864448429302, "grad_norm": 242.0133514404297, "learning_rate": 8.409634996276642e-07, "loss": 17.0, "step": 16994 }, { "epoch": 1.1287108985853755, "grad_norm": 216.24505615234375, "learning_rate": 8.408573174366613e-07, "loss": 12.6719, "step": 16995 }, { "epoch": 1.1287773128777312, "grad_norm": 254.07476806640625, "learning_rate": 8.407511370865288e-07, "loss": 14.7812, "step": 16996 }, { "epoch": 1.128843727170087, "grad_norm": 463.006103515625, "learning_rate": 8.406449585784957e-07, "loss": 20.1562, "step": 16997 }, { "epoch": 1.1289101414624427, "grad_norm": 242.29664611816406, "learning_rate": 8.405387819137897e-07, "loss": 19.0312, "step": 16998 }, { "epoch": 1.1289765557547984, "grad_norm": 339.78515625, "learning_rate": 8.404326070936391e-07, "loss": 17.0, "step": 16999 }, { "epoch": 1.1290429700471543, "grad_norm": 231.1496124267578, "learning_rate": 8.40326434119272e-07, "loss": 18.6094, "step": 17000 }, { "epoch": 1.12910938433951, "grad_norm": 190.3947296142578, "learning_rate": 8.402202629919168e-07, "loss": 14.0938, "step": 17001 }, { "epoch": 1.1291757986318656, "grad_norm": 369.1614990234375, "learning_rate": 8.401140937128013e-07, "loss": 17.3594, "step": 17002 }, { "epoch": 1.1292422129242212, "grad_norm": 358.6080322265625, "learning_rate": 8.400079262831541e-07, "loss": 17.125, "step": 17003 }, { "epoch": 1.129308627216577, "grad_norm": 103.39234161376953, "learning_rate": 8.399017607042024e-07, "loss": 12.2969, "step": 17004 }, { "epoch": 1.1293750415089328, "grad_norm": 275.64410400390625, "learning_rate": 8.397955969771755e-07, "loss": 16.5625, "step": 17005 }, { "epoch": 1.1294414558012884, "grad_norm": 282.6098327636719, "learning_rate": 8.396894351033001e-07, "loss": 15.625, "step": 17006 }, { "epoch": 1.129507870093644, "grad_norm": 172.4097137451172, "learning_rate": 8.395832750838053e-07, "loss": 13.5938, "step": 17007 }, { "epoch": 1.129574284386, "grad_norm": 112.85791778564453, "learning_rate": 8.394771169199184e-07, "loss": 13.2344, "step": 17008 }, { "epoch": 1.1296406986783556, "grad_norm": 257.1070251464844, "learning_rate": 8.39370960612868e-07, "loss": 17.4219, "step": 17009 }, { "epoch": 1.1297071129707112, "grad_norm": 194.50233459472656, "learning_rate": 8.392648061638812e-07, "loss": 19.3906, "step": 17010 }, { "epoch": 1.1297735272630671, "grad_norm": 159.99525451660156, "learning_rate": 8.391586535741868e-07, "loss": 17.4844, "step": 17011 }, { "epoch": 1.1298399415554228, "grad_norm": 253.16864013671875, "learning_rate": 8.390525028450123e-07, "loss": 18.3594, "step": 17012 }, { "epoch": 1.1299063558477784, "grad_norm": 590.1323852539062, "learning_rate": 8.389463539775857e-07, "loss": 12.5156, "step": 17013 }, { "epoch": 1.129972770140134, "grad_norm": 285.8017272949219, "learning_rate": 8.388402069731344e-07, "loss": 14.125, "step": 17014 }, { "epoch": 1.13003918443249, "grad_norm": 288.99859619140625, "learning_rate": 8.387340618328872e-07, "loss": 16.625, "step": 17015 }, { "epoch": 1.1301055987248456, "grad_norm": 263.3305358886719, "learning_rate": 8.386279185580711e-07, "loss": 25.7188, "step": 17016 }, { "epoch": 1.1301720130172013, "grad_norm": 515.9423217773438, "learning_rate": 8.385217771499144e-07, "loss": 28.6406, "step": 17017 }, { "epoch": 1.130238427309557, "grad_norm": 295.8516845703125, "learning_rate": 8.384156376096442e-07, "loss": 16.1562, "step": 17018 }, { "epoch": 1.1303048416019128, "grad_norm": 173.19741821289062, "learning_rate": 8.38309499938489e-07, "loss": 14.2812, "step": 17019 }, { "epoch": 1.1303712558942685, "grad_norm": 110.78089904785156, "learning_rate": 8.382033641376764e-07, "loss": 12.0469, "step": 17020 }, { "epoch": 1.130437670186624, "grad_norm": 170.33363342285156, "learning_rate": 8.380972302084337e-07, "loss": 20.25, "step": 17021 }, { "epoch": 1.13050408447898, "grad_norm": 191.97442626953125, "learning_rate": 8.379910981519892e-07, "loss": 17.9531, "step": 17022 }, { "epoch": 1.1305704987713356, "grad_norm": 125.51073455810547, "learning_rate": 8.378849679695701e-07, "loss": 12.4219, "step": 17023 }, { "epoch": 1.1306369130636913, "grad_norm": 190.06031799316406, "learning_rate": 8.377788396624044e-07, "loss": 15.25, "step": 17024 }, { "epoch": 1.130703327356047, "grad_norm": 350.9651184082031, "learning_rate": 8.37672713231719e-07, "loss": 11.4531, "step": 17025 }, { "epoch": 1.1307697416484028, "grad_norm": 274.3808288574219, "learning_rate": 8.375665886787426e-07, "loss": 18.1484, "step": 17026 }, { "epoch": 1.1308361559407585, "grad_norm": 219.4838104248047, "learning_rate": 8.37460466004702e-07, "loss": 13.8125, "step": 17027 }, { "epoch": 1.1309025702331141, "grad_norm": 270.58868408203125, "learning_rate": 8.373543452108252e-07, "loss": 14.5312, "step": 17028 }, { "epoch": 1.1309689845254698, "grad_norm": 214.2495880126953, "learning_rate": 8.372482262983393e-07, "loss": 17.4844, "step": 17029 }, { "epoch": 1.1310353988178257, "grad_norm": 523.8204345703125, "learning_rate": 8.371421092684725e-07, "loss": 19.0625, "step": 17030 }, { "epoch": 1.1311018131101813, "grad_norm": 458.144775390625, "learning_rate": 8.370359941224512e-07, "loss": 16.0781, "step": 17031 }, { "epoch": 1.131168227402537, "grad_norm": 335.9231872558594, "learning_rate": 8.36929880861504e-07, "loss": 19.8438, "step": 17032 }, { "epoch": 1.1312346416948929, "grad_norm": 99.35057830810547, "learning_rate": 8.368237694868578e-07, "loss": 12.3906, "step": 17033 }, { "epoch": 1.1313010559872485, "grad_norm": 267.6676330566406, "learning_rate": 8.367176599997404e-07, "loss": 16.2969, "step": 17034 }, { "epoch": 1.1313674702796042, "grad_norm": 171.22056579589844, "learning_rate": 8.366115524013784e-07, "loss": 18.7969, "step": 17035 }, { "epoch": 1.1314338845719598, "grad_norm": 130.4682159423828, "learning_rate": 8.365054466930001e-07, "loss": 13.5312, "step": 17036 }, { "epoch": 1.1315002988643157, "grad_norm": 277.4872741699219, "learning_rate": 8.363993428758325e-07, "loss": 16.5, "step": 17037 }, { "epoch": 1.1315667131566713, "grad_norm": 168.9728546142578, "learning_rate": 8.362932409511031e-07, "loss": 14.2188, "step": 17038 }, { "epoch": 1.131633127449027, "grad_norm": 116.38379669189453, "learning_rate": 8.361871409200386e-07, "loss": 14.4219, "step": 17039 }, { "epoch": 1.1316995417413827, "grad_norm": 263.3050231933594, "learning_rate": 8.360810427838673e-07, "loss": 14.625, "step": 17040 }, { "epoch": 1.1317659560337385, "grad_norm": 175.65982055664062, "learning_rate": 8.359749465438158e-07, "loss": 18.9219, "step": 17041 }, { "epoch": 1.1318323703260942, "grad_norm": 317.6545715332031, "learning_rate": 8.358688522011117e-07, "loss": 19.2188, "step": 17042 }, { "epoch": 1.1318987846184498, "grad_norm": 113.65638732910156, "learning_rate": 8.357627597569816e-07, "loss": 13.5469, "step": 17043 }, { "epoch": 1.1319651989108057, "grad_norm": 202.4790496826172, "learning_rate": 8.356566692126537e-07, "loss": 15.25, "step": 17044 }, { "epoch": 1.1320316132031614, "grad_norm": 135.03810119628906, "learning_rate": 8.355505805693543e-07, "loss": 15.1719, "step": 17045 }, { "epoch": 1.132098027495517, "grad_norm": 316.67755126953125, "learning_rate": 8.354444938283114e-07, "loss": 18.0938, "step": 17046 }, { "epoch": 1.1321644417878727, "grad_norm": 145.8456268310547, "learning_rate": 8.353384089907511e-07, "loss": 18.2656, "step": 17047 }, { "epoch": 1.1322308560802286, "grad_norm": 82.59465026855469, "learning_rate": 8.352323260579019e-07, "loss": 12.2812, "step": 17048 }, { "epoch": 1.1322972703725842, "grad_norm": 240.16754150390625, "learning_rate": 8.351262450309895e-07, "loss": 14.0156, "step": 17049 }, { "epoch": 1.1323636846649399, "grad_norm": 214.54049682617188, "learning_rate": 8.350201659112419e-07, "loss": 15.1094, "step": 17050 }, { "epoch": 1.1324300989572955, "grad_norm": 184.40415954589844, "learning_rate": 8.349140886998858e-07, "loss": 19.4844, "step": 17051 }, { "epoch": 1.1324965132496514, "grad_norm": 172.16763305664062, "learning_rate": 8.348080133981486e-07, "loss": 13.5312, "step": 17052 }, { "epoch": 1.132562927542007, "grad_norm": 217.75762939453125, "learning_rate": 8.347019400072565e-07, "loss": 16.9688, "step": 17053 }, { "epoch": 1.1326293418343627, "grad_norm": 158.68560791015625, "learning_rate": 8.345958685284376e-07, "loss": 13.4375, "step": 17054 }, { "epoch": 1.1326957561267186, "grad_norm": 364.32373046875, "learning_rate": 8.344897989629179e-07, "loss": 16.9375, "step": 17055 }, { "epoch": 1.1327621704190742, "grad_norm": 209.6809539794922, "learning_rate": 8.343837313119246e-07, "loss": 16.1406, "step": 17056 }, { "epoch": 1.1328285847114299, "grad_norm": 791.5152587890625, "learning_rate": 8.342776655766853e-07, "loss": 20.5625, "step": 17057 }, { "epoch": 1.1328949990037855, "grad_norm": 344.494140625, "learning_rate": 8.341716017584262e-07, "loss": 20.5781, "step": 17058 }, { "epoch": 1.1329614132961414, "grad_norm": 169.1805877685547, "learning_rate": 8.340655398583745e-07, "loss": 15.0156, "step": 17059 }, { "epoch": 1.133027827588497, "grad_norm": 171.176025390625, "learning_rate": 8.339594798777567e-07, "loss": 23.9375, "step": 17060 }, { "epoch": 1.1330942418808527, "grad_norm": 135.69569396972656, "learning_rate": 8.338534218178001e-07, "loss": 13.9688, "step": 17061 }, { "epoch": 1.1331606561732084, "grad_norm": 144.9289093017578, "learning_rate": 8.337473656797314e-07, "loss": 14.7188, "step": 17062 }, { "epoch": 1.1332270704655643, "grad_norm": 125.53462982177734, "learning_rate": 8.336413114647773e-07, "loss": 14.3594, "step": 17063 }, { "epoch": 1.13329348475792, "grad_norm": 220.91159057617188, "learning_rate": 8.335352591741642e-07, "loss": 16.4844, "step": 17064 }, { "epoch": 1.1333598990502756, "grad_norm": 245.1351776123047, "learning_rate": 8.334292088091197e-07, "loss": 21.5156, "step": 17065 }, { "epoch": 1.1334263133426314, "grad_norm": 132.77467346191406, "learning_rate": 8.333231603708696e-07, "loss": 11.3047, "step": 17066 }, { "epoch": 1.133492727634987, "grad_norm": 182.02713012695312, "learning_rate": 8.332171138606416e-07, "loss": 15.2188, "step": 17067 }, { "epoch": 1.1335591419273428, "grad_norm": 358.5211181640625, "learning_rate": 8.331110692796613e-07, "loss": 21.7969, "step": 17068 }, { "epoch": 1.1336255562196984, "grad_norm": 337.1881408691406, "learning_rate": 8.330050266291566e-07, "loss": 17.1562, "step": 17069 }, { "epoch": 1.1336919705120543, "grad_norm": 135.72412109375, "learning_rate": 8.328989859103528e-07, "loss": 11.6953, "step": 17070 }, { "epoch": 1.13375838480441, "grad_norm": 188.9627685546875, "learning_rate": 8.327929471244776e-07, "loss": 13.1719, "step": 17071 }, { "epoch": 1.1338247990967656, "grad_norm": 146.29324340820312, "learning_rate": 8.326869102727569e-07, "loss": 20.6875, "step": 17072 }, { "epoch": 1.1338912133891212, "grad_norm": 124.7835464477539, "learning_rate": 8.325808753564179e-07, "loss": 15.8281, "step": 17073 }, { "epoch": 1.1339576276814771, "grad_norm": 289.7815856933594, "learning_rate": 8.324748423766862e-07, "loss": 18.0469, "step": 17074 }, { "epoch": 1.1340240419738328, "grad_norm": 154.803466796875, "learning_rate": 8.323688113347895e-07, "loss": 21.0, "step": 17075 }, { "epoch": 1.1340904562661884, "grad_norm": 163.1384735107422, "learning_rate": 8.322627822319534e-07, "loss": 16.4062, "step": 17076 }, { "epoch": 1.1341568705585443, "grad_norm": 116.26031494140625, "learning_rate": 8.321567550694048e-07, "loss": 13.25, "step": 17077 }, { "epoch": 1.1342232848509, "grad_norm": 570.6826782226562, "learning_rate": 8.320507298483699e-07, "loss": 14.3906, "step": 17078 }, { "epoch": 1.1342896991432556, "grad_norm": 211.30271911621094, "learning_rate": 8.319447065700756e-07, "loss": 18.1562, "step": 17079 }, { "epoch": 1.1343561134356113, "grad_norm": 99.38389587402344, "learning_rate": 8.31838685235748e-07, "loss": 12.6875, "step": 17080 }, { "epoch": 1.1344225277279671, "grad_norm": 170.68055725097656, "learning_rate": 8.317326658466135e-07, "loss": 16.2969, "step": 17081 }, { "epoch": 1.1344889420203228, "grad_norm": 126.29235076904297, "learning_rate": 8.316266484038981e-07, "loss": 13.2891, "step": 17082 }, { "epoch": 1.1345553563126785, "grad_norm": 258.0787658691406, "learning_rate": 8.315206329088291e-07, "loss": 21.3125, "step": 17083 }, { "epoch": 1.134621770605034, "grad_norm": 271.9181823730469, "learning_rate": 8.31414619362632e-07, "loss": 15.875, "step": 17084 }, { "epoch": 1.13468818489739, "grad_norm": 142.8695068359375, "learning_rate": 8.313086077665336e-07, "loss": 12.9062, "step": 17085 }, { "epoch": 1.1347545991897456, "grad_norm": 136.4069061279297, "learning_rate": 8.312025981217594e-07, "loss": 21.3125, "step": 17086 }, { "epoch": 1.1348210134821013, "grad_norm": 180.6735076904297, "learning_rate": 8.310965904295369e-07, "loss": 14.6875, "step": 17087 }, { "epoch": 1.1348874277744572, "grad_norm": 117.94061279296875, "learning_rate": 8.309905846910912e-07, "loss": 12.9531, "step": 17088 }, { "epoch": 1.1349538420668128, "grad_norm": 157.68760681152344, "learning_rate": 8.308845809076492e-07, "loss": 16.3438, "step": 17089 }, { "epoch": 1.1350202563591685, "grad_norm": 559.9027709960938, "learning_rate": 8.307785790804366e-07, "loss": 12.4219, "step": 17090 }, { "epoch": 1.1350866706515241, "grad_norm": 236.2079315185547, "learning_rate": 8.306725792106797e-07, "loss": 13.8281, "step": 17091 }, { "epoch": 1.13515308494388, "grad_norm": 206.11338806152344, "learning_rate": 8.305665812996052e-07, "loss": 16.3047, "step": 17092 }, { "epoch": 1.1352194992362357, "grad_norm": 869.9733276367188, "learning_rate": 8.304605853484385e-07, "loss": 24.2344, "step": 17093 }, { "epoch": 1.1352859135285913, "grad_norm": 320.1970520019531, "learning_rate": 8.303545913584064e-07, "loss": 15.1719, "step": 17094 }, { "epoch": 1.135352327820947, "grad_norm": 299.2796936035156, "learning_rate": 8.30248599330734e-07, "loss": 20.0938, "step": 17095 }, { "epoch": 1.1354187421133028, "grad_norm": 213.47332763671875, "learning_rate": 8.301426092666483e-07, "loss": 15.0, "step": 17096 }, { "epoch": 1.1354851564056585, "grad_norm": 115.16649627685547, "learning_rate": 8.300366211673747e-07, "loss": 13.6016, "step": 17097 }, { "epoch": 1.1355515706980142, "grad_norm": 112.70270538330078, "learning_rate": 8.299306350341397e-07, "loss": 15.7188, "step": 17098 }, { "epoch": 1.13561798499037, "grad_norm": 760.325927734375, "learning_rate": 8.298246508681688e-07, "loss": 13.9062, "step": 17099 }, { "epoch": 1.1356843992827257, "grad_norm": 317.85107421875, "learning_rate": 8.297186686706884e-07, "loss": 17.6719, "step": 17100 }, { "epoch": 1.1357508135750813, "grad_norm": 229.15199279785156, "learning_rate": 8.296126884429242e-07, "loss": 23.0312, "step": 17101 }, { "epoch": 1.135817227867437, "grad_norm": 163.5894012451172, "learning_rate": 8.295067101861023e-07, "loss": 14.7344, "step": 17102 }, { "epoch": 1.1358836421597929, "grad_norm": 214.5677032470703, "learning_rate": 8.29400733901448e-07, "loss": 15.9219, "step": 17103 }, { "epoch": 1.1359500564521485, "grad_norm": 363.61895751953125, "learning_rate": 8.292947595901881e-07, "loss": 17.0938, "step": 17104 }, { "epoch": 1.1360164707445042, "grad_norm": 177.50070190429688, "learning_rate": 8.291887872535479e-07, "loss": 16.1875, "step": 17105 }, { "epoch": 1.1360828850368598, "grad_norm": 178.35986328125, "learning_rate": 8.290828168927534e-07, "loss": 17.2031, "step": 17106 }, { "epoch": 1.1361492993292157, "grad_norm": 129.962890625, "learning_rate": 8.289768485090299e-07, "loss": 13.25, "step": 17107 }, { "epoch": 1.1362157136215714, "grad_norm": 476.62493896484375, "learning_rate": 8.28870882103604e-07, "loss": 17.5469, "step": 17108 }, { "epoch": 1.136282127913927, "grad_norm": 288.8219909667969, "learning_rate": 8.287649176777008e-07, "loss": 14.7188, "step": 17109 }, { "epoch": 1.136348542206283, "grad_norm": 701.0527954101562, "learning_rate": 8.286589552325467e-07, "loss": 32.0312, "step": 17110 }, { "epoch": 1.1364149564986386, "grad_norm": 205.141357421875, "learning_rate": 8.285529947693664e-07, "loss": 16.0938, "step": 17111 }, { "epoch": 1.1364813707909942, "grad_norm": 120.78765869140625, "learning_rate": 8.284470362893869e-07, "loss": 15.0156, "step": 17112 }, { "epoch": 1.1365477850833499, "grad_norm": 221.70970153808594, "learning_rate": 8.283410797938326e-07, "loss": 18.4688, "step": 17113 }, { "epoch": 1.1366141993757057, "grad_norm": 215.2913360595703, "learning_rate": 8.2823512528393e-07, "loss": 15.1562, "step": 17114 }, { "epoch": 1.1366806136680614, "grad_norm": 96.05309295654297, "learning_rate": 8.281291727609043e-07, "loss": 12.6172, "step": 17115 }, { "epoch": 1.136747027960417, "grad_norm": 158.72119140625, "learning_rate": 8.280232222259813e-07, "loss": 15.625, "step": 17116 }, { "epoch": 1.1368134422527727, "grad_norm": 191.453369140625, "learning_rate": 8.279172736803862e-07, "loss": 15.2969, "step": 17117 }, { "epoch": 1.1368798565451286, "grad_norm": 185.68873596191406, "learning_rate": 8.278113271253453e-07, "loss": 16.6562, "step": 17118 }, { "epoch": 1.1369462708374842, "grad_norm": 135.23606872558594, "learning_rate": 8.277053825620834e-07, "loss": 16.1719, "step": 17119 }, { "epoch": 1.1370126851298399, "grad_norm": 350.5794982910156, "learning_rate": 8.275994399918265e-07, "loss": 21.5625, "step": 17120 }, { "epoch": 1.1370790994221958, "grad_norm": 292.3879089355469, "learning_rate": 8.274934994157994e-07, "loss": 18.5156, "step": 17121 }, { "epoch": 1.1371455137145514, "grad_norm": 190.98849487304688, "learning_rate": 8.273875608352286e-07, "loss": 16.8281, "step": 17122 }, { "epoch": 1.137211928006907, "grad_norm": 267.2025451660156, "learning_rate": 8.272816242513385e-07, "loss": 16.0625, "step": 17123 }, { "epoch": 1.1372783422992627, "grad_norm": 178.805419921875, "learning_rate": 8.271756896653554e-07, "loss": 12.625, "step": 17124 }, { "epoch": 1.1373447565916186, "grad_norm": 293.57012939453125, "learning_rate": 8.270697570785038e-07, "loss": 21.4531, "step": 17125 }, { "epoch": 1.1374111708839743, "grad_norm": 443.8506164550781, "learning_rate": 8.269638264920099e-07, "loss": 23.8281, "step": 17126 }, { "epoch": 1.13747758517633, "grad_norm": 279.4477844238281, "learning_rate": 8.268578979070986e-07, "loss": 16.75, "step": 17127 }, { "epoch": 1.1375439994686856, "grad_norm": 155.7620391845703, "learning_rate": 8.267519713249948e-07, "loss": 18.25, "step": 17128 }, { "epoch": 1.1376104137610414, "grad_norm": 256.3530578613281, "learning_rate": 8.26646046746925e-07, "loss": 14.7344, "step": 17129 }, { "epoch": 1.137676828053397, "grad_norm": 191.32867431640625, "learning_rate": 8.265401241741135e-07, "loss": 12.4844, "step": 17130 }, { "epoch": 1.1377432423457527, "grad_norm": 202.2811737060547, "learning_rate": 8.264342036077861e-07, "loss": 14.6719, "step": 17131 }, { "epoch": 1.1378096566381086, "grad_norm": 271.8924560546875, "learning_rate": 8.26328285049167e-07, "loss": 13.5469, "step": 17132 }, { "epoch": 1.1378760709304643, "grad_norm": 291.2768249511719, "learning_rate": 8.262223684994831e-07, "loss": 18.8594, "step": 17133 }, { "epoch": 1.13794248522282, "grad_norm": 890.7680053710938, "learning_rate": 8.261164539599579e-07, "loss": 15.4531, "step": 17134 }, { "epoch": 1.1380088995151756, "grad_norm": 244.66110229492188, "learning_rate": 8.260105414318177e-07, "loss": 13.2656, "step": 17135 }, { "epoch": 1.1380753138075315, "grad_norm": 491.8146057128906, "learning_rate": 8.259046309162872e-07, "loss": 14.625, "step": 17136 }, { "epoch": 1.1381417280998871, "grad_norm": 299.013427734375, "learning_rate": 8.257987224145915e-07, "loss": 19.875, "step": 17137 }, { "epoch": 1.1382081423922428, "grad_norm": 264.2525329589844, "learning_rate": 8.256928159279554e-07, "loss": 13.0, "step": 17138 }, { "epoch": 1.1382745566845984, "grad_norm": 178.4418182373047, "learning_rate": 8.255869114576047e-07, "loss": 19.2422, "step": 17139 }, { "epoch": 1.1383409709769543, "grad_norm": 158.6263885498047, "learning_rate": 8.254810090047638e-07, "loss": 15.4062, "step": 17140 }, { "epoch": 1.13840738526931, "grad_norm": 296.7228698730469, "learning_rate": 8.253751085706583e-07, "loss": 23.4375, "step": 17141 }, { "epoch": 1.1384737995616656, "grad_norm": 132.56446838378906, "learning_rate": 8.252692101565123e-07, "loss": 14.5625, "step": 17142 }, { "epoch": 1.1385402138540215, "grad_norm": 293.4795227050781, "learning_rate": 8.251633137635518e-07, "loss": 15.5156, "step": 17143 }, { "epoch": 1.1386066281463771, "grad_norm": 220.47434997558594, "learning_rate": 8.25057419393001e-07, "loss": 18.4062, "step": 17144 }, { "epoch": 1.1386730424387328, "grad_norm": 235.96095275878906, "learning_rate": 8.249515270460854e-07, "loss": 18.9141, "step": 17145 }, { "epoch": 1.1387394567310885, "grad_norm": 105.16824340820312, "learning_rate": 8.24845636724029e-07, "loss": 11.3281, "step": 17146 }, { "epoch": 1.1388058710234443, "grad_norm": 188.28648376464844, "learning_rate": 8.247397484280578e-07, "loss": 18.3125, "step": 17147 }, { "epoch": 1.1388722853158, "grad_norm": 140.5703887939453, "learning_rate": 8.24633862159396e-07, "loss": 14.75, "step": 17148 }, { "epoch": 1.1389386996081556, "grad_norm": 189.76461791992188, "learning_rate": 8.245279779192686e-07, "loss": 16.3984, "step": 17149 }, { "epoch": 1.1390051139005113, "grad_norm": 214.3081512451172, "learning_rate": 8.244220957089001e-07, "loss": 18.2812, "step": 17150 }, { "epoch": 1.1390715281928672, "grad_norm": 204.8388214111328, "learning_rate": 8.24316215529516e-07, "loss": 18.875, "step": 17151 }, { "epoch": 1.1391379424852228, "grad_norm": 428.29547119140625, "learning_rate": 8.242103373823404e-07, "loss": 16.0938, "step": 17152 }, { "epoch": 1.1392043567775785, "grad_norm": 282.4804382324219, "learning_rate": 8.241044612685983e-07, "loss": 15.7031, "step": 17153 }, { "epoch": 1.1392707710699344, "grad_norm": 123.56965637207031, "learning_rate": 8.239985871895143e-07, "loss": 14.5, "step": 17154 }, { "epoch": 1.13933718536229, "grad_norm": 381.25177001953125, "learning_rate": 8.238927151463133e-07, "loss": 14.4688, "step": 17155 }, { "epoch": 1.1394035996546457, "grad_norm": 177.52452087402344, "learning_rate": 8.237868451402193e-07, "loss": 20.9688, "step": 17156 }, { "epoch": 1.1394700139470013, "grad_norm": 110.02488708496094, "learning_rate": 8.236809771724581e-07, "loss": 14.0781, "step": 17157 }, { "epoch": 1.1395364282393572, "grad_norm": 150.6873779296875, "learning_rate": 8.235751112442532e-07, "loss": 16.4062, "step": 17158 }, { "epoch": 1.1396028425317128, "grad_norm": 319.8907470703125, "learning_rate": 8.234692473568302e-07, "loss": 22.6562, "step": 17159 }, { "epoch": 1.1396692568240685, "grad_norm": 230.7367401123047, "learning_rate": 8.233633855114126e-07, "loss": 20.7656, "step": 17160 }, { "epoch": 1.1397356711164242, "grad_norm": 150.4793701171875, "learning_rate": 8.232575257092259e-07, "loss": 15.2188, "step": 17161 }, { "epoch": 1.13980208540878, "grad_norm": 206.85821533203125, "learning_rate": 8.23151667951494e-07, "loss": 15.0625, "step": 17162 }, { "epoch": 1.1398684997011357, "grad_norm": 252.26895141601562, "learning_rate": 8.230458122394419e-07, "loss": 14.1562, "step": 17163 }, { "epoch": 1.1399349139934913, "grad_norm": 181.9475860595703, "learning_rate": 8.229399585742933e-07, "loss": 13.7812, "step": 17164 }, { "epoch": 1.1400013282858472, "grad_norm": 213.92628479003906, "learning_rate": 8.228341069572735e-07, "loss": 20.875, "step": 17165 }, { "epoch": 1.1400677425782029, "grad_norm": 294.2215576171875, "learning_rate": 8.227282573896068e-07, "loss": 15.3594, "step": 17166 }, { "epoch": 1.1401341568705585, "grad_norm": 145.3199462890625, "learning_rate": 8.226224098725168e-07, "loss": 15.75, "step": 17167 }, { "epoch": 1.1402005711629142, "grad_norm": 292.8584899902344, "learning_rate": 8.225165644072291e-07, "loss": 16.2969, "step": 17168 }, { "epoch": 1.14026698545527, "grad_norm": 207.91358947753906, "learning_rate": 8.224107209949671e-07, "loss": 19.3125, "step": 17169 }, { "epoch": 1.1403333997476257, "grad_norm": 314.93145751953125, "learning_rate": 8.223048796369557e-07, "loss": 19.7344, "step": 17170 }, { "epoch": 1.1403998140399814, "grad_norm": 132.86131286621094, "learning_rate": 8.221990403344185e-07, "loss": 12.7188, "step": 17171 }, { "epoch": 1.140466228332337, "grad_norm": 169.8621063232422, "learning_rate": 8.220932030885808e-07, "loss": 14.5078, "step": 17172 }, { "epoch": 1.140532642624693, "grad_norm": 129.93638610839844, "learning_rate": 8.219873679006662e-07, "loss": 13.0, "step": 17173 }, { "epoch": 1.1405990569170485, "grad_norm": 261.0923156738281, "learning_rate": 8.218815347718993e-07, "loss": 15.9219, "step": 17174 }, { "epoch": 1.1406654712094042, "grad_norm": 207.37741088867188, "learning_rate": 8.217757037035037e-07, "loss": 12.5469, "step": 17175 }, { "epoch": 1.14073188550176, "grad_norm": 352.2001037597656, "learning_rate": 8.216698746967044e-07, "loss": 17.875, "step": 17176 }, { "epoch": 1.1407982997941157, "grad_norm": 165.10511779785156, "learning_rate": 8.215640477527246e-07, "loss": 15.4844, "step": 17177 }, { "epoch": 1.1408647140864714, "grad_norm": 143.5587921142578, "learning_rate": 8.214582228727895e-07, "loss": 18.0781, "step": 17178 }, { "epoch": 1.140931128378827, "grad_norm": 190.88514709472656, "learning_rate": 8.213524000581223e-07, "loss": 14.6875, "step": 17179 }, { "epoch": 1.140997542671183, "grad_norm": 154.6764373779297, "learning_rate": 8.21246579309948e-07, "loss": 17.8281, "step": 17180 }, { "epoch": 1.1410639569635386, "grad_norm": 581.4891357421875, "learning_rate": 8.211407606294897e-07, "loss": 24.7812, "step": 17181 }, { "epoch": 1.1411303712558942, "grad_norm": 204.0880889892578, "learning_rate": 8.210349440179722e-07, "loss": 17.0625, "step": 17182 }, { "epoch": 1.1411967855482499, "grad_norm": 167.9873809814453, "learning_rate": 8.209291294766193e-07, "loss": 13.0469, "step": 17183 }, { "epoch": 1.1412631998406058, "grad_norm": 113.92766571044922, "learning_rate": 8.208233170066551e-07, "loss": 10.875, "step": 17184 }, { "epoch": 1.1413296141329614, "grad_norm": 322.72747802734375, "learning_rate": 8.207175066093031e-07, "loss": 22.5, "step": 17185 }, { "epoch": 1.141396028425317, "grad_norm": 217.6896209716797, "learning_rate": 8.206116982857878e-07, "loss": 15.6094, "step": 17186 }, { "epoch": 1.141462442717673, "grad_norm": 244.53309631347656, "learning_rate": 8.205058920373329e-07, "loss": 18.2656, "step": 17187 }, { "epoch": 1.1415288570100286, "grad_norm": 169.14332580566406, "learning_rate": 8.204000878651624e-07, "loss": 15.8125, "step": 17188 }, { "epoch": 1.1415952713023843, "grad_norm": 172.5889434814453, "learning_rate": 8.202942857705e-07, "loss": 16.8125, "step": 17189 }, { "epoch": 1.14166168559474, "grad_norm": 275.5201110839844, "learning_rate": 8.201884857545698e-07, "loss": 21.1562, "step": 17190 }, { "epoch": 1.1417280998870958, "grad_norm": 209.91673278808594, "learning_rate": 8.200826878185955e-07, "loss": 18.4062, "step": 17191 }, { "epoch": 1.1417945141794514, "grad_norm": 419.142333984375, "learning_rate": 8.19976891963801e-07, "loss": 14.0781, "step": 17192 }, { "epoch": 1.141860928471807, "grad_norm": 125.24259185791016, "learning_rate": 8.198710981914096e-07, "loss": 14.9688, "step": 17193 }, { "epoch": 1.1419273427641627, "grad_norm": 153.98007202148438, "learning_rate": 8.197653065026462e-07, "loss": 12.6406, "step": 17194 }, { "epoch": 1.1419937570565186, "grad_norm": 169.26988220214844, "learning_rate": 8.19659516898733e-07, "loss": 17.4688, "step": 17195 }, { "epoch": 1.1420601713488743, "grad_norm": 224.44473266601562, "learning_rate": 8.195537293808951e-07, "loss": 16.1094, "step": 17196 }, { "epoch": 1.14212658564123, "grad_norm": 605.567626953125, "learning_rate": 8.194479439503553e-07, "loss": 21.1562, "step": 17197 }, { "epoch": 1.1421929999335858, "grad_norm": 245.24530029296875, "learning_rate": 8.193421606083379e-07, "loss": 19.4062, "step": 17198 }, { "epoch": 1.1422594142259415, "grad_norm": 264.4737243652344, "learning_rate": 8.192363793560656e-07, "loss": 14.7109, "step": 17199 }, { "epoch": 1.1423258285182971, "grad_norm": 192.42295837402344, "learning_rate": 8.191306001947632e-07, "loss": 13.9062, "step": 17200 }, { "epoch": 1.1423922428106528, "grad_norm": 244.68112182617188, "learning_rate": 8.190248231256535e-07, "loss": 16.1562, "step": 17201 }, { "epoch": 1.1424586571030086, "grad_norm": 132.70751953125, "learning_rate": 8.1891904814996e-07, "loss": 16.2344, "step": 17202 }, { "epoch": 1.1425250713953643, "grad_norm": 176.7168426513672, "learning_rate": 8.18813275268907e-07, "loss": 17.3438, "step": 17203 }, { "epoch": 1.14259148568772, "grad_norm": 163.13140869140625, "learning_rate": 8.187075044837174e-07, "loss": 19.8125, "step": 17204 }, { "epoch": 1.1426578999800756, "grad_norm": 265.2502136230469, "learning_rate": 8.18601735795615e-07, "loss": 17.3125, "step": 17205 }, { "epoch": 1.1427243142724315, "grad_norm": 231.609130859375, "learning_rate": 8.184959692058226e-07, "loss": 22.1719, "step": 17206 }, { "epoch": 1.1427907285647871, "grad_norm": 110.12841033935547, "learning_rate": 8.183902047155648e-07, "loss": 12.9062, "step": 17207 }, { "epoch": 1.1428571428571428, "grad_norm": 253.90103149414062, "learning_rate": 8.182844423260641e-07, "loss": 15.0625, "step": 17208 }, { "epoch": 1.1429235571494987, "grad_norm": 434.3230285644531, "learning_rate": 8.181786820385444e-07, "loss": 19.0156, "step": 17209 }, { "epoch": 1.1429899714418543, "grad_norm": 144.9958038330078, "learning_rate": 8.180729238542284e-07, "loss": 14.4531, "step": 17210 }, { "epoch": 1.14305638573421, "grad_norm": 464.9582214355469, "learning_rate": 8.179671677743403e-07, "loss": 22.2344, "step": 17211 }, { "epoch": 1.1431228000265656, "grad_norm": 209.07919311523438, "learning_rate": 8.178614138001029e-07, "loss": 19.9531, "step": 17212 }, { "epoch": 1.1431892143189215, "grad_norm": 102.76451873779297, "learning_rate": 8.177556619327398e-07, "loss": 12.375, "step": 17213 }, { "epoch": 1.1432556286112772, "grad_norm": 217.11520385742188, "learning_rate": 8.176499121734736e-07, "loss": 21.9844, "step": 17214 }, { "epoch": 1.1433220429036328, "grad_norm": 115.36080169677734, "learning_rate": 8.175441645235287e-07, "loss": 13.6562, "step": 17215 }, { "epoch": 1.1433884571959885, "grad_norm": 147.77313232421875, "learning_rate": 8.174384189841274e-07, "loss": 13.4062, "step": 17216 }, { "epoch": 1.1434548714883443, "grad_norm": 211.77764892578125, "learning_rate": 8.173326755564933e-07, "loss": 22.4844, "step": 17217 }, { "epoch": 1.1435212857807, "grad_norm": 148.77474975585938, "learning_rate": 8.172269342418494e-07, "loss": 14.1094, "step": 17218 }, { "epoch": 1.1435877000730557, "grad_norm": 413.1478576660156, "learning_rate": 8.171211950414192e-07, "loss": 25.0625, "step": 17219 }, { "epoch": 1.1436541143654115, "grad_norm": 202.7871856689453, "learning_rate": 8.170154579564251e-07, "loss": 16.4062, "step": 17220 }, { "epoch": 1.1437205286577672, "grad_norm": 261.0797119140625, "learning_rate": 8.169097229880909e-07, "loss": 17.7188, "step": 17221 }, { "epoch": 1.1437869429501228, "grad_norm": 152.29119873046875, "learning_rate": 8.168039901376394e-07, "loss": 13.1094, "step": 17222 }, { "epoch": 1.1438533572424785, "grad_norm": 125.05915069580078, "learning_rate": 8.166982594062938e-07, "loss": 12.7344, "step": 17223 }, { "epoch": 1.1439197715348344, "grad_norm": 89.07508850097656, "learning_rate": 8.165925307952767e-07, "loss": 11.7188, "step": 17224 }, { "epoch": 1.14398618582719, "grad_norm": 92.56771087646484, "learning_rate": 8.16486804305812e-07, "loss": 15.0156, "step": 17225 }, { "epoch": 1.1440526001195457, "grad_norm": 106.9585189819336, "learning_rate": 8.163810799391217e-07, "loss": 15.1406, "step": 17226 }, { "epoch": 1.1441190144119013, "grad_norm": 311.39788818359375, "learning_rate": 8.162753576964295e-07, "loss": 17.9375, "step": 17227 }, { "epoch": 1.1441854287042572, "grad_norm": 240.46878051757812, "learning_rate": 8.161696375789576e-07, "loss": 18.2344, "step": 17228 }, { "epoch": 1.1442518429966129, "grad_norm": 162.05111694335938, "learning_rate": 8.160639195879298e-07, "loss": 20.5391, "step": 17229 }, { "epoch": 1.1443182572889685, "grad_norm": 165.5643768310547, "learning_rate": 8.159582037245683e-07, "loss": 16.0469, "step": 17230 }, { "epoch": 1.1443846715813244, "grad_norm": 346.92620849609375, "learning_rate": 8.158524899900963e-07, "loss": 19.2188, "step": 17231 }, { "epoch": 1.14445108587368, "grad_norm": 291.897705078125, "learning_rate": 8.15746778385736e-07, "loss": 24.9062, "step": 17232 }, { "epoch": 1.1445175001660357, "grad_norm": 212.966552734375, "learning_rate": 8.156410689127113e-07, "loss": 16.7344, "step": 17233 }, { "epoch": 1.1445839144583914, "grad_norm": 150.4563446044922, "learning_rate": 8.155353615722441e-07, "loss": 13.5312, "step": 17234 }, { "epoch": 1.1446503287507472, "grad_norm": 125.3262939453125, "learning_rate": 8.154296563655578e-07, "loss": 15.8281, "step": 17235 }, { "epoch": 1.144716743043103, "grad_norm": 217.74156188964844, "learning_rate": 8.153239532938743e-07, "loss": 13.9062, "step": 17236 }, { "epoch": 1.1447831573354585, "grad_norm": 117.21728515625, "learning_rate": 8.152182523584174e-07, "loss": 14.6406, "step": 17237 }, { "epoch": 1.1448495716278142, "grad_norm": 103.37491607666016, "learning_rate": 8.151125535604087e-07, "loss": 12.5859, "step": 17238 }, { "epoch": 1.14491598592017, "grad_norm": 756.7373046875, "learning_rate": 8.150068569010716e-07, "loss": 36.625, "step": 17239 }, { "epoch": 1.1449824002125257, "grad_norm": 179.35739135742188, "learning_rate": 8.149011623816286e-07, "loss": 15.3594, "step": 17240 }, { "epoch": 1.1450488145048814, "grad_norm": 177.76145935058594, "learning_rate": 8.147954700033017e-07, "loss": 11.8125, "step": 17241 }, { "epoch": 1.1451152287972373, "grad_norm": 353.10101318359375, "learning_rate": 8.146897797673146e-07, "loss": 15.7188, "step": 17242 }, { "epoch": 1.145181643089593, "grad_norm": 221.63720703125, "learning_rate": 8.145840916748891e-07, "loss": 15.0156, "step": 17243 }, { "epoch": 1.1452480573819486, "grad_norm": 203.02529907226562, "learning_rate": 8.14478405727248e-07, "loss": 13.8281, "step": 17244 }, { "epoch": 1.1453144716743042, "grad_norm": 317.93475341796875, "learning_rate": 8.143727219256134e-07, "loss": 27.2188, "step": 17245 }, { "epoch": 1.14538088596666, "grad_norm": 958.6619873046875, "learning_rate": 8.142670402712084e-07, "loss": 14.7422, "step": 17246 }, { "epoch": 1.1454473002590158, "grad_norm": 260.5152282714844, "learning_rate": 8.141613607652551e-07, "loss": 14.0938, "step": 17247 }, { "epoch": 1.1455137145513714, "grad_norm": 112.12728118896484, "learning_rate": 8.140556834089762e-07, "loss": 15.3906, "step": 17248 }, { "epoch": 1.145580128843727, "grad_norm": 181.69503784179688, "learning_rate": 8.139500082035935e-07, "loss": 13.625, "step": 17249 }, { "epoch": 1.145646543136083, "grad_norm": 252.6785430908203, "learning_rate": 8.138443351503302e-07, "loss": 16.4688, "step": 17250 }, { "epoch": 1.1457129574284386, "grad_norm": 171.21499633789062, "learning_rate": 8.13738664250408e-07, "loss": 19.5312, "step": 17251 }, { "epoch": 1.1457793717207942, "grad_norm": 277.7489318847656, "learning_rate": 8.136329955050499e-07, "loss": 22.4375, "step": 17252 }, { "epoch": 1.1458457860131501, "grad_norm": 184.11947631835938, "learning_rate": 8.135273289154774e-07, "loss": 14.375, "step": 17253 }, { "epoch": 1.1459122003055058, "grad_norm": 160.39698791503906, "learning_rate": 8.134216644829137e-07, "loss": 13.2188, "step": 17254 }, { "epoch": 1.1459786145978614, "grad_norm": 283.20416259765625, "learning_rate": 8.133160022085803e-07, "loss": 14.4688, "step": 17255 }, { "epoch": 1.146045028890217, "grad_norm": 195.0352783203125, "learning_rate": 8.132103420937e-07, "loss": 20.1562, "step": 17256 }, { "epoch": 1.146111443182573, "grad_norm": 348.0793151855469, "learning_rate": 8.131046841394942e-07, "loss": 19.3906, "step": 17257 }, { "epoch": 1.1461778574749286, "grad_norm": 195.52801513671875, "learning_rate": 8.129990283471864e-07, "loss": 21.2188, "step": 17258 }, { "epoch": 1.1462442717672843, "grad_norm": 137.85389709472656, "learning_rate": 8.128933747179973e-07, "loss": 15.4688, "step": 17259 }, { "epoch": 1.14631068605964, "grad_norm": 1198.84814453125, "learning_rate": 8.127877232531502e-07, "loss": 13.8359, "step": 17260 }, { "epoch": 1.1463771003519958, "grad_norm": 377.09832763671875, "learning_rate": 8.126820739538667e-07, "loss": 15.1094, "step": 17261 }, { "epoch": 1.1464435146443515, "grad_norm": 107.40306091308594, "learning_rate": 8.12576426821369e-07, "loss": 11.9531, "step": 17262 }, { "epoch": 1.1465099289367071, "grad_norm": 738.2540283203125, "learning_rate": 8.124707818568787e-07, "loss": 21.25, "step": 17263 }, { "epoch": 1.146576343229063, "grad_norm": 187.44305419921875, "learning_rate": 8.123651390616187e-07, "loss": 16.2656, "step": 17264 }, { "epoch": 1.1466427575214186, "grad_norm": 143.3568878173828, "learning_rate": 8.122594984368104e-07, "loss": 17.625, "step": 17265 }, { "epoch": 1.1467091718137743, "grad_norm": 126.0385971069336, "learning_rate": 8.121538599836761e-07, "loss": 10.6328, "step": 17266 }, { "epoch": 1.14677558610613, "grad_norm": 294.1977844238281, "learning_rate": 8.120482237034373e-07, "loss": 17.4844, "step": 17267 }, { "epoch": 1.1468420003984858, "grad_norm": 227.67523193359375, "learning_rate": 8.119425895973166e-07, "loss": 16.3906, "step": 17268 }, { "epoch": 1.1469084146908415, "grad_norm": 199.9048614501953, "learning_rate": 8.118369576665354e-07, "loss": 19.9375, "step": 17269 }, { "epoch": 1.1469748289831971, "grad_norm": 287.91357421875, "learning_rate": 8.117313279123161e-07, "loss": 15.5625, "step": 17270 }, { "epoch": 1.1470412432755528, "grad_norm": 258.8262023925781, "learning_rate": 8.116257003358797e-07, "loss": 18.9062, "step": 17271 }, { "epoch": 1.1471076575679087, "grad_norm": 235.1416015625, "learning_rate": 8.115200749384491e-07, "loss": 14.7656, "step": 17272 }, { "epoch": 1.1471740718602643, "grad_norm": 123.22439575195312, "learning_rate": 8.114144517212453e-07, "loss": 14.7031, "step": 17273 }, { "epoch": 1.14724048615262, "grad_norm": 237.57179260253906, "learning_rate": 8.113088306854904e-07, "loss": 15.4531, "step": 17274 }, { "epoch": 1.1473069004449759, "grad_norm": 179.5478057861328, "learning_rate": 8.11203211832406e-07, "loss": 17.7812, "step": 17275 }, { "epoch": 1.1473733147373315, "grad_norm": 220.97463989257812, "learning_rate": 8.110975951632142e-07, "loss": 22.4531, "step": 17276 }, { "epoch": 1.1474397290296872, "grad_norm": 127.96334838867188, "learning_rate": 8.109919806791366e-07, "loss": 18.7344, "step": 17277 }, { "epoch": 1.1475061433220428, "grad_norm": 145.01629638671875, "learning_rate": 8.108863683813943e-07, "loss": 18.125, "step": 17278 }, { "epoch": 1.1475725576143987, "grad_norm": 230.7346954345703, "learning_rate": 8.1078075827121e-07, "loss": 19.125, "step": 17279 }, { "epoch": 1.1476389719067543, "grad_norm": 225.18527221679688, "learning_rate": 8.106751503498044e-07, "loss": 17.0625, "step": 17280 }, { "epoch": 1.14770538619911, "grad_norm": 808.9893188476562, "learning_rate": 8.105695446183998e-07, "loss": 15.25, "step": 17281 }, { "epoch": 1.1477718004914657, "grad_norm": 169.52279663085938, "learning_rate": 8.104639410782172e-07, "loss": 17.5781, "step": 17282 }, { "epoch": 1.1478382147838215, "grad_norm": 234.88714599609375, "learning_rate": 8.103583397304787e-07, "loss": 14.8125, "step": 17283 }, { "epoch": 1.1479046290761772, "grad_norm": 206.50279235839844, "learning_rate": 8.102527405764053e-07, "loss": 14.9531, "step": 17284 }, { "epoch": 1.1479710433685328, "grad_norm": 119.80953979492188, "learning_rate": 8.10147143617219e-07, "loss": 14.0625, "step": 17285 }, { "epoch": 1.1480374576608887, "grad_norm": 126.27608489990234, "learning_rate": 8.100415488541408e-07, "loss": 14.2344, "step": 17286 }, { "epoch": 1.1481038719532444, "grad_norm": 128.56629943847656, "learning_rate": 8.09935956288393e-07, "loss": 15.7812, "step": 17287 }, { "epoch": 1.1481702862456, "grad_norm": 139.52528381347656, "learning_rate": 8.098303659211957e-07, "loss": 15.9688, "step": 17288 }, { "epoch": 1.1482367005379557, "grad_norm": 238.5509796142578, "learning_rate": 8.097247777537716e-07, "loss": 15.9062, "step": 17289 }, { "epoch": 1.1483031148303116, "grad_norm": 151.83961486816406, "learning_rate": 8.096191917873413e-07, "loss": 14.5, "step": 17290 }, { "epoch": 1.1483695291226672, "grad_norm": 427.81512451171875, "learning_rate": 8.095136080231266e-07, "loss": 19.2344, "step": 17291 }, { "epoch": 1.1484359434150229, "grad_norm": 207.01229858398438, "learning_rate": 8.094080264623483e-07, "loss": 14.75, "step": 17292 }, { "epoch": 1.1485023577073785, "grad_norm": 132.51097106933594, "learning_rate": 8.093024471062284e-07, "loss": 13.0781, "step": 17293 }, { "epoch": 1.1485687719997344, "grad_norm": 116.26470184326172, "learning_rate": 8.091968699559876e-07, "loss": 14.0312, "step": 17294 }, { "epoch": 1.14863518629209, "grad_norm": 133.1043701171875, "learning_rate": 8.090912950128477e-07, "loss": 15.3281, "step": 17295 }, { "epoch": 1.1487016005844457, "grad_norm": 120.22500610351562, "learning_rate": 8.089857222780292e-07, "loss": 13.75, "step": 17296 }, { "epoch": 1.1487680148768016, "grad_norm": 454.1765441894531, "learning_rate": 8.08880151752754e-07, "loss": 12.4609, "step": 17297 }, { "epoch": 1.1488344291691572, "grad_norm": 327.67138671875, "learning_rate": 8.087745834382429e-07, "loss": 17.0469, "step": 17298 }, { "epoch": 1.1489008434615129, "grad_norm": 322.78399658203125, "learning_rate": 8.086690173357173e-07, "loss": 14.4766, "step": 17299 }, { "epoch": 1.1489672577538685, "grad_norm": 200.7310791015625, "learning_rate": 8.085634534463979e-07, "loss": 13.4453, "step": 17300 }, { "epoch": 1.1490336720462244, "grad_norm": 192.3250274658203, "learning_rate": 8.084578917715066e-07, "loss": 13.3594, "step": 17301 }, { "epoch": 1.14910008633858, "grad_norm": 263.8064880371094, "learning_rate": 8.083523323122633e-07, "loss": 14.7578, "step": 17302 }, { "epoch": 1.1491665006309357, "grad_norm": 134.3400421142578, "learning_rate": 8.082467750698902e-07, "loss": 17.6719, "step": 17303 }, { "epoch": 1.1492329149232914, "grad_norm": 480.0444641113281, "learning_rate": 8.081412200456075e-07, "loss": 15.3125, "step": 17304 }, { "epoch": 1.1492993292156473, "grad_norm": 841.5144653320312, "learning_rate": 8.080356672406369e-07, "loss": 15.7969, "step": 17305 }, { "epoch": 1.149365743508003, "grad_norm": 212.32290649414062, "learning_rate": 8.079301166561984e-07, "loss": 17.5, "step": 17306 }, { "epoch": 1.1494321578003586, "grad_norm": 299.98016357421875, "learning_rate": 8.078245682935142e-07, "loss": 20.5, "step": 17307 }, { "epoch": 1.1494985720927144, "grad_norm": 104.23426055908203, "learning_rate": 8.077190221538042e-07, "loss": 12.8281, "step": 17308 }, { "epoch": 1.14956498638507, "grad_norm": 154.9980010986328, "learning_rate": 8.076134782382899e-07, "loss": 15.7188, "step": 17309 }, { "epoch": 1.1496314006774258, "grad_norm": 686.0084838867188, "learning_rate": 8.075079365481916e-07, "loss": 27.25, "step": 17310 }, { "epoch": 1.1496978149697814, "grad_norm": 157.11245727539062, "learning_rate": 8.07402397084731e-07, "loss": 19.0938, "step": 17311 }, { "epoch": 1.1497642292621373, "grad_norm": 165.44134521484375, "learning_rate": 8.072968598491281e-07, "loss": 13.3984, "step": 17312 }, { "epoch": 1.149830643554493, "grad_norm": 1170.4937744140625, "learning_rate": 8.071913248426038e-07, "loss": 13.5938, "step": 17313 }, { "epoch": 1.1498970578468486, "grad_norm": 401.2219543457031, "learning_rate": 8.070857920663794e-07, "loss": 23.3906, "step": 17314 }, { "epoch": 1.1499634721392042, "grad_norm": 137.08958435058594, "learning_rate": 8.069802615216752e-07, "loss": 13.1875, "step": 17315 }, { "epoch": 1.1500298864315601, "grad_norm": 312.0267333984375, "learning_rate": 8.068747332097122e-07, "loss": 12.7188, "step": 17316 }, { "epoch": 1.1500963007239158, "grad_norm": 522.7703247070312, "learning_rate": 8.067692071317106e-07, "loss": 35.3125, "step": 17317 }, { "epoch": 1.1501627150162714, "grad_norm": 416.9428405761719, "learning_rate": 8.066636832888918e-07, "loss": 26.5312, "step": 17318 }, { "epoch": 1.1502291293086273, "grad_norm": 123.14723205566406, "learning_rate": 8.065581616824757e-07, "loss": 17.8594, "step": 17319 }, { "epoch": 1.150295543600983, "grad_norm": 108.80439758300781, "learning_rate": 8.064526423136835e-07, "loss": 11.2031, "step": 17320 }, { "epoch": 1.1503619578933386, "grad_norm": 332.0092468261719, "learning_rate": 8.063471251837352e-07, "loss": 14.5703, "step": 17321 }, { "epoch": 1.1504283721856943, "grad_norm": 156.54917907714844, "learning_rate": 8.062416102938522e-07, "loss": 18.2969, "step": 17322 }, { "epoch": 1.1504947864780501, "grad_norm": 115.21110534667969, "learning_rate": 8.061360976452539e-07, "loss": 15.0703, "step": 17323 }, { "epoch": 1.1505612007704058, "grad_norm": 230.3882293701172, "learning_rate": 8.06030587239162e-07, "loss": 14.7031, "step": 17324 }, { "epoch": 1.1506276150627615, "grad_norm": 259.0826110839844, "learning_rate": 8.059250790767959e-07, "loss": 13.1094, "step": 17325 }, { "epoch": 1.150694029355117, "grad_norm": 186.01766967773438, "learning_rate": 8.058195731593771e-07, "loss": 11.7031, "step": 17326 }, { "epoch": 1.150760443647473, "grad_norm": 154.50958251953125, "learning_rate": 8.057140694881249e-07, "loss": 18.9688, "step": 17327 }, { "epoch": 1.1508268579398286, "grad_norm": 169.96119689941406, "learning_rate": 8.056085680642608e-07, "loss": 17.6562, "step": 17328 }, { "epoch": 1.1508932722321843, "grad_norm": 80.08055877685547, "learning_rate": 8.055030688890046e-07, "loss": 11.125, "step": 17329 }, { "epoch": 1.1509596865245402, "grad_norm": 336.92303466796875, "learning_rate": 8.053975719635769e-07, "loss": 11.6094, "step": 17330 }, { "epoch": 1.1510261008168958, "grad_norm": 224.65135192871094, "learning_rate": 8.052920772891974e-07, "loss": 11.625, "step": 17331 }, { "epoch": 1.1510925151092515, "grad_norm": 318.8664245605469, "learning_rate": 8.051865848670872e-07, "loss": 21.375, "step": 17332 }, { "epoch": 1.1511589294016071, "grad_norm": 203.98153686523438, "learning_rate": 8.050810946984661e-07, "loss": 13.3438, "step": 17333 }, { "epoch": 1.151225343693963, "grad_norm": 124.07745361328125, "learning_rate": 8.049756067845548e-07, "loss": 17.5625, "step": 17334 }, { "epoch": 1.1512917579863187, "grad_norm": 100.76802062988281, "learning_rate": 8.048701211265727e-07, "loss": 13.7188, "step": 17335 }, { "epoch": 1.1513581722786743, "grad_norm": 169.33248901367188, "learning_rate": 8.04764637725741e-07, "loss": 17.375, "step": 17336 }, { "epoch": 1.15142458657103, "grad_norm": 140.50225830078125, "learning_rate": 8.046591565832791e-07, "loss": 12.0, "step": 17337 }, { "epoch": 1.1514910008633858, "grad_norm": 150.27967834472656, "learning_rate": 8.045536777004077e-07, "loss": 16.2812, "step": 17338 }, { "epoch": 1.1515574151557415, "grad_norm": 303.7906799316406, "learning_rate": 8.044482010783463e-07, "loss": 19.75, "step": 17339 }, { "epoch": 1.1516238294480972, "grad_norm": 185.12278747558594, "learning_rate": 8.043427267183158e-07, "loss": 14.4062, "step": 17340 }, { "epoch": 1.151690243740453, "grad_norm": 181.1915283203125, "learning_rate": 8.042372546215355e-07, "loss": 18.9688, "step": 17341 }, { "epoch": 1.1517566580328087, "grad_norm": 254.08750915527344, "learning_rate": 8.04131784789226e-07, "loss": 14.8438, "step": 17342 }, { "epoch": 1.1518230723251643, "grad_norm": 186.5098114013672, "learning_rate": 8.040263172226067e-07, "loss": 15.1875, "step": 17343 }, { "epoch": 1.15188948661752, "grad_norm": 204.5781707763672, "learning_rate": 8.039208519228984e-07, "loss": 20.0781, "step": 17344 }, { "epoch": 1.1519559009098759, "grad_norm": 216.77503967285156, "learning_rate": 8.0381538889132e-07, "loss": 13.9219, "step": 17345 }, { "epoch": 1.1520223152022315, "grad_norm": 221.31922912597656, "learning_rate": 8.037099281290926e-07, "loss": 14.0, "step": 17346 }, { "epoch": 1.1520887294945872, "grad_norm": 339.11456298828125, "learning_rate": 8.036044696374354e-07, "loss": 14.6406, "step": 17347 }, { "epoch": 1.1521551437869428, "grad_norm": 437.27203369140625, "learning_rate": 8.034990134175686e-07, "loss": 17.3594, "step": 17348 }, { "epoch": 1.1522215580792987, "grad_norm": 157.40406799316406, "learning_rate": 8.033935594707115e-07, "loss": 12.9219, "step": 17349 }, { "epoch": 1.1522879723716544, "grad_norm": 194.4645538330078, "learning_rate": 8.032881077980846e-07, "loss": 22.5312, "step": 17350 }, { "epoch": 1.15235438666401, "grad_norm": 432.3446350097656, "learning_rate": 8.031826584009074e-07, "loss": 18.2656, "step": 17351 }, { "epoch": 1.152420800956366, "grad_norm": 307.65411376953125, "learning_rate": 8.030772112803995e-07, "loss": 19.0, "step": 17352 }, { "epoch": 1.1524872152487216, "grad_norm": 898.7809448242188, "learning_rate": 8.029717664377813e-07, "loss": 18.1875, "step": 17353 }, { "epoch": 1.1525536295410772, "grad_norm": 178.6490020751953, "learning_rate": 8.028663238742719e-07, "loss": 15.9688, "step": 17354 }, { "epoch": 1.1526200438334329, "grad_norm": 435.826416015625, "learning_rate": 8.027608835910913e-07, "loss": 18.2344, "step": 17355 }, { "epoch": 1.1526864581257887, "grad_norm": 306.1736755371094, "learning_rate": 8.026554455894586e-07, "loss": 21.5625, "step": 17356 }, { "epoch": 1.1527528724181444, "grad_norm": 374.99774169921875, "learning_rate": 8.025500098705945e-07, "loss": 13.5781, "step": 17357 }, { "epoch": 1.1528192867105, "grad_norm": 173.7279052734375, "learning_rate": 8.024445764357177e-07, "loss": 19.2031, "step": 17358 }, { "epoch": 1.1528857010028557, "grad_norm": 395.9400939941406, "learning_rate": 8.023391452860484e-07, "loss": 18.2812, "step": 17359 }, { "epoch": 1.1529521152952116, "grad_norm": 272.68841552734375, "learning_rate": 8.022337164228054e-07, "loss": 20.0625, "step": 17360 }, { "epoch": 1.1530185295875672, "grad_norm": 217.42173767089844, "learning_rate": 8.021282898472093e-07, "loss": 14.6406, "step": 17361 }, { "epoch": 1.1530849438799229, "grad_norm": 215.9873809814453, "learning_rate": 8.020228655604786e-07, "loss": 18.75, "step": 17362 }, { "epoch": 1.1531513581722788, "grad_norm": 394.3887939453125, "learning_rate": 8.019174435638336e-07, "loss": 24.7188, "step": 17363 }, { "epoch": 1.1532177724646344, "grad_norm": 135.9141845703125, "learning_rate": 8.018120238584929e-07, "loss": 16.1875, "step": 17364 }, { "epoch": 1.15328418675699, "grad_norm": 362.8002014160156, "learning_rate": 8.017066064456771e-07, "loss": 13.6875, "step": 17365 }, { "epoch": 1.1533506010493457, "grad_norm": 1768.9996337890625, "learning_rate": 8.016011913266043e-07, "loss": 23.5312, "step": 17366 }, { "epoch": 1.1534170153417016, "grad_norm": 1449.428955078125, "learning_rate": 8.01495778502495e-07, "loss": 27.6875, "step": 17367 }, { "epoch": 1.1534834296340573, "grad_norm": 242.1046905517578, "learning_rate": 8.013903679745677e-07, "loss": 20.7188, "step": 17368 }, { "epoch": 1.153549843926413, "grad_norm": 150.11053466796875, "learning_rate": 8.012849597440425e-07, "loss": 16.2344, "step": 17369 }, { "epoch": 1.1536162582187686, "grad_norm": 249.73387145996094, "learning_rate": 8.011795538121377e-07, "loss": 14.9531, "step": 17370 }, { "epoch": 1.1536826725111244, "grad_norm": 173.45587158203125, "learning_rate": 8.010741501800738e-07, "loss": 15.3594, "step": 17371 }, { "epoch": 1.15374908680348, "grad_norm": 170.17872619628906, "learning_rate": 8.009687488490691e-07, "loss": 11.4219, "step": 17372 }, { "epoch": 1.1538155010958357, "grad_norm": 441.2323913574219, "learning_rate": 8.008633498203432e-07, "loss": 15.0938, "step": 17373 }, { "epoch": 1.1538819153881916, "grad_norm": 177.52125549316406, "learning_rate": 8.00757953095115e-07, "loss": 17.4688, "step": 17374 }, { "epoch": 1.1539483296805473, "grad_norm": 284.87640380859375, "learning_rate": 8.006525586746042e-07, "loss": 13.8281, "step": 17375 }, { "epoch": 1.154014743972903, "grad_norm": 218.53414916992188, "learning_rate": 8.005471665600296e-07, "loss": 12.625, "step": 17376 }, { "epoch": 1.1540811582652586, "grad_norm": 146.39144897460938, "learning_rate": 8.004417767526106e-07, "loss": 13.5391, "step": 17377 }, { "epoch": 1.1541475725576145, "grad_norm": 354.7091064453125, "learning_rate": 8.003363892535655e-07, "loss": 12.375, "step": 17378 }, { "epoch": 1.1542139868499701, "grad_norm": 213.96554565429688, "learning_rate": 8.002310040641144e-07, "loss": 14.9375, "step": 17379 }, { "epoch": 1.1542804011423258, "grad_norm": 301.81494140625, "learning_rate": 8.001256211854756e-07, "loss": 14.2969, "step": 17380 }, { "epoch": 1.1543468154346814, "grad_norm": 480.3675537109375, "learning_rate": 8.000202406188685e-07, "loss": 17.7969, "step": 17381 }, { "epoch": 1.1544132297270373, "grad_norm": 186.97991943359375, "learning_rate": 7.999148623655116e-07, "loss": 15.7656, "step": 17382 }, { "epoch": 1.154479644019393, "grad_norm": 239.409423828125, "learning_rate": 7.998094864266247e-07, "loss": 15.625, "step": 17383 }, { "epoch": 1.1545460583117486, "grad_norm": 292.0128173828125, "learning_rate": 7.99704112803426e-07, "loss": 16.9062, "step": 17384 }, { "epoch": 1.1546124726041045, "grad_norm": 137.78370666503906, "learning_rate": 7.995987414971348e-07, "loss": 14.5625, "step": 17385 }, { "epoch": 1.1546788868964601, "grad_norm": 362.018310546875, "learning_rate": 7.994933725089695e-07, "loss": 15.4219, "step": 17386 }, { "epoch": 1.1547453011888158, "grad_norm": 337.2125244140625, "learning_rate": 7.993880058401491e-07, "loss": 13.8984, "step": 17387 }, { "epoch": 1.1548117154811715, "grad_norm": 154.82261657714844, "learning_rate": 7.99282641491893e-07, "loss": 15.5, "step": 17388 }, { "epoch": 1.1548781297735273, "grad_norm": 184.08604431152344, "learning_rate": 7.991772794654194e-07, "loss": 15.0156, "step": 17389 }, { "epoch": 1.154944544065883, "grad_norm": 189.07357788085938, "learning_rate": 7.990719197619473e-07, "loss": 16.7812, "step": 17390 }, { "epoch": 1.1550109583582386, "grad_norm": 185.12197875976562, "learning_rate": 7.989665623826951e-07, "loss": 15.0156, "step": 17391 }, { "epoch": 1.1550773726505943, "grad_norm": 217.52159118652344, "learning_rate": 7.988612073288822e-07, "loss": 15.7656, "step": 17392 }, { "epoch": 1.1551437869429502, "grad_norm": 259.52496337890625, "learning_rate": 7.987558546017265e-07, "loss": 14.9531, "step": 17393 }, { "epoch": 1.1552102012353058, "grad_norm": 287.6141052246094, "learning_rate": 7.986505042024474e-07, "loss": 20.875, "step": 17394 }, { "epoch": 1.1552766155276615, "grad_norm": 220.09942626953125, "learning_rate": 7.985451561322627e-07, "loss": 16.1094, "step": 17395 }, { "epoch": 1.1553430298200174, "grad_norm": 147.25888061523438, "learning_rate": 7.984398103923919e-07, "loss": 13.0156, "step": 17396 }, { "epoch": 1.155409444112373, "grad_norm": 229.46388244628906, "learning_rate": 7.983344669840528e-07, "loss": 14.0547, "step": 17397 }, { "epoch": 1.1554758584047287, "grad_norm": 146.95303344726562, "learning_rate": 7.982291259084647e-07, "loss": 17.0938, "step": 17398 }, { "epoch": 1.1555422726970843, "grad_norm": 177.78208923339844, "learning_rate": 7.981237871668453e-07, "loss": 15.7031, "step": 17399 }, { "epoch": 1.1556086869894402, "grad_norm": 340.90277099609375, "learning_rate": 7.980184507604137e-07, "loss": 21.0781, "step": 17400 }, { "epoch": 1.1556751012817958, "grad_norm": 771.314208984375, "learning_rate": 7.979131166903883e-07, "loss": 24.7188, "step": 17401 }, { "epoch": 1.1557415155741515, "grad_norm": 245.46609497070312, "learning_rate": 7.978077849579874e-07, "loss": 17.25, "step": 17402 }, { "epoch": 1.1558079298665072, "grad_norm": 267.9134521484375, "learning_rate": 7.977024555644292e-07, "loss": 17.0156, "step": 17403 }, { "epoch": 1.155874344158863, "grad_norm": 217.55877685546875, "learning_rate": 7.975971285109327e-07, "loss": 15.3594, "step": 17404 }, { "epoch": 1.1559407584512187, "grad_norm": 179.4291229248047, "learning_rate": 7.974918037987158e-07, "loss": 13.75, "step": 17405 }, { "epoch": 1.1560071727435743, "grad_norm": 111.39729309082031, "learning_rate": 7.973864814289968e-07, "loss": 13.1562, "step": 17406 }, { "epoch": 1.1560735870359302, "grad_norm": 324.9994201660156, "learning_rate": 7.972811614029943e-07, "loss": 19.6875, "step": 17407 }, { "epoch": 1.1561400013282859, "grad_norm": 373.5892639160156, "learning_rate": 7.971758437219265e-07, "loss": 16.5469, "step": 17408 }, { "epoch": 1.1562064156206415, "grad_norm": 290.37945556640625, "learning_rate": 7.970705283870111e-07, "loss": 15.4688, "step": 17409 }, { "epoch": 1.1562728299129974, "grad_norm": 271.0221252441406, "learning_rate": 7.969652153994674e-07, "loss": 18.375, "step": 17410 }, { "epoch": 1.156339244205353, "grad_norm": 126.59869384765625, "learning_rate": 7.968599047605126e-07, "loss": 16.0156, "step": 17411 }, { "epoch": 1.1564056584977087, "grad_norm": 191.30589294433594, "learning_rate": 7.967545964713658e-07, "loss": 13.6094, "step": 17412 }, { "epoch": 1.1564720727900644, "grad_norm": 200.8471221923828, "learning_rate": 7.96649290533244e-07, "loss": 16.75, "step": 17413 }, { "epoch": 1.15653848708242, "grad_norm": 366.2657470703125, "learning_rate": 7.965439869473663e-07, "loss": 24.0625, "step": 17414 }, { "epoch": 1.156604901374776, "grad_norm": 272.56439208984375, "learning_rate": 7.964386857149503e-07, "loss": 16.7812, "step": 17415 }, { "epoch": 1.1566713156671315, "grad_norm": 212.13882446289062, "learning_rate": 7.963333868372146e-07, "loss": 13.4844, "step": 17416 }, { "epoch": 1.1567377299594872, "grad_norm": 123.68094635009766, "learning_rate": 7.962280903153762e-07, "loss": 17.7031, "step": 17417 }, { "epoch": 1.156804144251843, "grad_norm": 124.5195083618164, "learning_rate": 7.961227961506541e-07, "loss": 14.1719, "step": 17418 }, { "epoch": 1.1568705585441987, "grad_norm": 305.86517333984375, "learning_rate": 7.960175043442658e-07, "loss": 15.2656, "step": 17419 }, { "epoch": 1.1569369728365544, "grad_norm": 235.56146240234375, "learning_rate": 7.959122148974297e-07, "loss": 23.1094, "step": 17420 }, { "epoch": 1.1570033871289103, "grad_norm": 154.40106201171875, "learning_rate": 7.958069278113629e-07, "loss": 24.1406, "step": 17421 }, { "epoch": 1.157069801421266, "grad_norm": 153.07965087890625, "learning_rate": 7.957016430872842e-07, "loss": 18.6562, "step": 17422 }, { "epoch": 1.1571362157136216, "grad_norm": 263.0492858886719, "learning_rate": 7.955963607264111e-07, "loss": 20.0469, "step": 17423 }, { "epoch": 1.1572026300059772, "grad_norm": 227.17283630371094, "learning_rate": 7.954910807299611e-07, "loss": 15.6094, "step": 17424 }, { "epoch": 1.1572690442983329, "grad_norm": 174.6215057373047, "learning_rate": 7.953858030991527e-07, "loss": 13.0938, "step": 17425 }, { "epoch": 1.1573354585906888, "grad_norm": 119.290283203125, "learning_rate": 7.952805278352032e-07, "loss": 16.2031, "step": 17426 }, { "epoch": 1.1574018728830444, "grad_norm": 330.983154296875, "learning_rate": 7.951752549393307e-07, "loss": 19.4688, "step": 17427 }, { "epoch": 1.1574682871754, "grad_norm": 145.8082275390625, "learning_rate": 7.950699844127521e-07, "loss": 17.0781, "step": 17428 }, { "epoch": 1.157534701467756, "grad_norm": 207.5313262939453, "learning_rate": 7.949647162566868e-07, "loss": 18.25, "step": 17429 }, { "epoch": 1.1576011157601116, "grad_norm": 220.53956604003906, "learning_rate": 7.948594504723504e-07, "loss": 15.4219, "step": 17430 }, { "epoch": 1.1576675300524673, "grad_norm": 155.9608917236328, "learning_rate": 7.947541870609624e-07, "loss": 14.7656, "step": 17431 }, { "epoch": 1.1577339443448231, "grad_norm": 712.3617553710938, "learning_rate": 7.946489260237392e-07, "loss": 21.1094, "step": 17432 }, { "epoch": 1.1578003586371788, "grad_norm": 617.5704345703125, "learning_rate": 7.94543667361899e-07, "loss": 18.3906, "step": 17433 }, { "epoch": 1.1578667729295344, "grad_norm": 236.80429077148438, "learning_rate": 7.944384110766588e-07, "loss": 17.8906, "step": 17434 }, { "epoch": 1.15793318722189, "grad_norm": 238.7445831298828, "learning_rate": 7.943331571692369e-07, "loss": 21.8125, "step": 17435 }, { "epoch": 1.1579996015142457, "grad_norm": 220.8023223876953, "learning_rate": 7.942279056408503e-07, "loss": 14.0938, "step": 17436 }, { "epoch": 1.1580660158066016, "grad_norm": 247.0408172607422, "learning_rate": 7.941226564927168e-07, "loss": 17.0312, "step": 17437 }, { "epoch": 1.1581324300989573, "grad_norm": 378.7170715332031, "learning_rate": 7.940174097260532e-07, "loss": 15.75, "step": 17438 }, { "epoch": 1.158198844391313, "grad_norm": 173.2033233642578, "learning_rate": 7.93912165342078e-07, "loss": 15.75, "step": 17439 }, { "epoch": 1.1582652586836688, "grad_norm": 218.23228454589844, "learning_rate": 7.938069233420077e-07, "loss": 19.5312, "step": 17440 }, { "epoch": 1.1583316729760245, "grad_norm": 443.9459533691406, "learning_rate": 7.937016837270603e-07, "loss": 13.875, "step": 17441 }, { "epoch": 1.1583980872683801, "grad_norm": 123.3098373413086, "learning_rate": 7.935964464984525e-07, "loss": 15.7031, "step": 17442 }, { "epoch": 1.158464501560736, "grad_norm": 221.1521453857422, "learning_rate": 7.934912116574023e-07, "loss": 23.625, "step": 17443 }, { "epoch": 1.1585309158530916, "grad_norm": 182.5771484375, "learning_rate": 7.933859792051266e-07, "loss": 14.8125, "step": 17444 }, { "epoch": 1.1585973301454473, "grad_norm": 167.12887573242188, "learning_rate": 7.932807491428428e-07, "loss": 15.7969, "step": 17445 }, { "epoch": 1.158663744437803, "grad_norm": 265.8616027832031, "learning_rate": 7.931755214717677e-07, "loss": 18.25, "step": 17446 }, { "epoch": 1.1587301587301586, "grad_norm": 146.59136962890625, "learning_rate": 7.930702961931196e-07, "loss": 15.5156, "step": 17447 }, { "epoch": 1.1587965730225145, "grad_norm": 304.3560791015625, "learning_rate": 7.929650733081144e-07, "loss": 16.8125, "step": 17448 }, { "epoch": 1.1588629873148701, "grad_norm": 144.8486328125, "learning_rate": 7.928598528179702e-07, "loss": 15.875, "step": 17449 }, { "epoch": 1.1589294016072258, "grad_norm": 587.0965576171875, "learning_rate": 7.927546347239035e-07, "loss": 18.6562, "step": 17450 }, { "epoch": 1.1589958158995817, "grad_norm": 315.8340759277344, "learning_rate": 7.92649419027132e-07, "loss": 12.5156, "step": 17451 }, { "epoch": 1.1590622301919373, "grad_norm": 186.13633728027344, "learning_rate": 7.92544205728872e-07, "loss": 15.875, "step": 17452 }, { "epoch": 1.159128644484293, "grad_norm": 542.7404174804688, "learning_rate": 7.924389948303413e-07, "loss": 24.4062, "step": 17453 }, { "epoch": 1.1591950587766489, "grad_norm": 272.7769470214844, "learning_rate": 7.923337863327567e-07, "loss": 11.8125, "step": 17454 }, { "epoch": 1.1592614730690045, "grad_norm": 164.50637817382812, "learning_rate": 7.922285802373351e-07, "loss": 14.6406, "step": 17455 }, { "epoch": 1.1593278873613602, "grad_norm": 396.5760803222656, "learning_rate": 7.92123376545293e-07, "loss": 21.1406, "step": 17456 }, { "epoch": 1.1593943016537158, "grad_norm": 138.75054931640625, "learning_rate": 7.920181752578484e-07, "loss": 15.1406, "step": 17457 }, { "epoch": 1.1594607159460715, "grad_norm": 486.4584045410156, "learning_rate": 7.919129763762174e-07, "loss": 15.0, "step": 17458 }, { "epoch": 1.1595271302384274, "grad_norm": 140.87477111816406, "learning_rate": 7.918077799016168e-07, "loss": 12.875, "step": 17459 }, { "epoch": 1.159593544530783, "grad_norm": 190.25048828125, "learning_rate": 7.917025858352643e-07, "loss": 16.3125, "step": 17460 }, { "epoch": 1.1596599588231387, "grad_norm": 187.51220703125, "learning_rate": 7.915973941783758e-07, "loss": 18.5156, "step": 17461 }, { "epoch": 1.1597263731154945, "grad_norm": 231.99403381347656, "learning_rate": 7.914922049321688e-07, "loss": 14.9844, "step": 17462 }, { "epoch": 1.1597927874078502, "grad_norm": 308.31317138671875, "learning_rate": 7.913870180978593e-07, "loss": 15.3594, "step": 17463 }, { "epoch": 1.1598592017002058, "grad_norm": 475.6488037109375, "learning_rate": 7.912818336766649e-07, "loss": 26.375, "step": 17464 }, { "epoch": 1.1599256159925617, "grad_norm": 322.5304260253906, "learning_rate": 7.911766516698016e-07, "loss": 16.0938, "step": 17465 }, { "epoch": 1.1599920302849174, "grad_norm": 193.94407653808594, "learning_rate": 7.910714720784867e-07, "loss": 15.1562, "step": 17466 }, { "epoch": 1.160058444577273, "grad_norm": 214.27598571777344, "learning_rate": 7.90966294903936e-07, "loss": 16.8281, "step": 17467 }, { "epoch": 1.1601248588696287, "grad_norm": 165.25233459472656, "learning_rate": 7.908611201473672e-07, "loss": 17.3281, "step": 17468 }, { "epoch": 1.1601912731619843, "grad_norm": 146.8489532470703, "learning_rate": 7.907559478099961e-07, "loss": 12.9375, "step": 17469 }, { "epoch": 1.1602576874543402, "grad_norm": 160.72409057617188, "learning_rate": 7.906507778930398e-07, "loss": 12.8125, "step": 17470 }, { "epoch": 1.1603241017466959, "grad_norm": 227.7079620361328, "learning_rate": 7.905456103977144e-07, "loss": 17.7969, "step": 17471 }, { "epoch": 1.1603905160390515, "grad_norm": 248.68357849121094, "learning_rate": 7.904404453252367e-07, "loss": 13.9219, "step": 17472 }, { "epoch": 1.1604569303314074, "grad_norm": 300.83734130859375, "learning_rate": 7.903352826768227e-07, "loss": 18.5625, "step": 17473 }, { "epoch": 1.160523344623763, "grad_norm": 118.63284301757812, "learning_rate": 7.902301224536897e-07, "loss": 14.7656, "step": 17474 }, { "epoch": 1.1605897589161187, "grad_norm": 605.9520263671875, "learning_rate": 7.901249646570535e-07, "loss": 19.0312, "step": 17475 }, { "epoch": 1.1606561732084746, "grad_norm": 118.08639526367188, "learning_rate": 7.900198092881309e-07, "loss": 13.4062, "step": 17476 }, { "epoch": 1.1607225875008302, "grad_norm": 279.50616455078125, "learning_rate": 7.899146563481376e-07, "loss": 16.8125, "step": 17477 }, { "epoch": 1.160789001793186, "grad_norm": 116.44849395751953, "learning_rate": 7.898095058382907e-07, "loss": 12.4375, "step": 17478 }, { "epoch": 1.1608554160855415, "grad_norm": 149.8636474609375, "learning_rate": 7.897043577598063e-07, "loss": 15.0625, "step": 17479 }, { "epoch": 1.1609218303778972, "grad_norm": 181.0640106201172, "learning_rate": 7.895992121139007e-07, "loss": 16.0, "step": 17480 }, { "epoch": 1.160988244670253, "grad_norm": 489.18994140625, "learning_rate": 7.894940689017896e-07, "loss": 21.25, "step": 17481 }, { "epoch": 1.1610546589626087, "grad_norm": 117.01235961914062, "learning_rate": 7.893889281246902e-07, "loss": 14.2344, "step": 17482 }, { "epoch": 1.1611210732549644, "grad_norm": 559.2738647460938, "learning_rate": 7.89283789783818e-07, "loss": 21.1406, "step": 17483 }, { "epoch": 1.1611874875473203, "grad_norm": 442.2216491699219, "learning_rate": 7.891786538803897e-07, "loss": 15.5469, "step": 17484 }, { "epoch": 1.161253901839676, "grad_norm": 147.26417541503906, "learning_rate": 7.890735204156207e-07, "loss": 18.4375, "step": 17485 }, { "epoch": 1.1613203161320316, "grad_norm": 218.0462646484375, "learning_rate": 7.88968389390728e-07, "loss": 18.875, "step": 17486 }, { "epoch": 1.1613867304243874, "grad_norm": 180.99118041992188, "learning_rate": 7.888632608069271e-07, "loss": 16.9844, "step": 17487 }, { "epoch": 1.161453144716743, "grad_norm": 119.05924224853516, "learning_rate": 7.887581346654345e-07, "loss": 12.875, "step": 17488 }, { "epoch": 1.1615195590090988, "grad_norm": 169.8704833984375, "learning_rate": 7.886530109674655e-07, "loss": 17.9531, "step": 17489 }, { "epoch": 1.1615859733014544, "grad_norm": 142.71815490722656, "learning_rate": 7.885478897142373e-07, "loss": 14.3125, "step": 17490 }, { "epoch": 1.16165238759381, "grad_norm": 169.3375244140625, "learning_rate": 7.884427709069645e-07, "loss": 20.0312, "step": 17491 }, { "epoch": 1.161718801886166, "grad_norm": 220.0456085205078, "learning_rate": 7.883376545468641e-07, "loss": 19.5312, "step": 17492 }, { "epoch": 1.1617852161785216, "grad_norm": 213.80596923828125, "learning_rate": 7.882325406351516e-07, "loss": 12.625, "step": 17493 }, { "epoch": 1.1618516304708773, "grad_norm": 363.7996826171875, "learning_rate": 7.88127429173043e-07, "loss": 19.1562, "step": 17494 }, { "epoch": 1.1619180447632331, "grad_norm": 128.24717712402344, "learning_rate": 7.880223201617537e-07, "loss": 13.0469, "step": 17495 }, { "epoch": 1.1619844590555888, "grad_norm": 226.9009552001953, "learning_rate": 7.879172136025003e-07, "loss": 15.1719, "step": 17496 }, { "epoch": 1.1620508733479444, "grad_norm": 125.3795166015625, "learning_rate": 7.878121094964984e-07, "loss": 18.375, "step": 17497 }, { "epoch": 1.1621172876403003, "grad_norm": 531.1295166015625, "learning_rate": 7.877070078449633e-07, "loss": 24.6094, "step": 17498 }, { "epoch": 1.162183701932656, "grad_norm": 174.71261596679688, "learning_rate": 7.876019086491115e-07, "loss": 16.6094, "step": 17499 }, { "epoch": 1.1622501162250116, "grad_norm": 180.57154846191406, "learning_rate": 7.874968119101582e-07, "loss": 19.3438, "step": 17500 }, { "epoch": 1.1623165305173673, "grad_norm": 152.6283416748047, "learning_rate": 7.873917176293194e-07, "loss": 14.3438, "step": 17501 }, { "epoch": 1.162382944809723, "grad_norm": 174.87460327148438, "learning_rate": 7.872866258078104e-07, "loss": 14.5469, "step": 17502 }, { "epoch": 1.1624493591020788, "grad_norm": 194.2670135498047, "learning_rate": 7.871815364468471e-07, "loss": 16.1875, "step": 17503 }, { "epoch": 1.1625157733944345, "grad_norm": 238.41421508789062, "learning_rate": 7.870764495476452e-07, "loss": 15.5469, "step": 17504 }, { "epoch": 1.1625821876867901, "grad_norm": 184.95144653320312, "learning_rate": 7.869713651114201e-07, "loss": 19.875, "step": 17505 }, { "epoch": 1.162648601979146, "grad_norm": 114.85159301757812, "learning_rate": 7.868662831393871e-07, "loss": 13.2188, "step": 17506 }, { "epoch": 1.1627150162715016, "grad_norm": 232.60955810546875, "learning_rate": 7.867612036327624e-07, "loss": 17.7656, "step": 17507 }, { "epoch": 1.1627814305638573, "grad_norm": 121.9308090209961, "learning_rate": 7.866561265927611e-07, "loss": 13.4219, "step": 17508 }, { "epoch": 1.1628478448562132, "grad_norm": 1203.8126220703125, "learning_rate": 7.865510520205988e-07, "loss": 26.7188, "step": 17509 }, { "epoch": 1.1629142591485688, "grad_norm": 322.31317138671875, "learning_rate": 7.864459799174904e-07, "loss": 18.3281, "step": 17510 }, { "epoch": 1.1629806734409245, "grad_norm": 180.00062561035156, "learning_rate": 7.863409102846525e-07, "loss": 16.875, "step": 17511 }, { "epoch": 1.1630470877332801, "grad_norm": 116.29165649414062, "learning_rate": 7.86235843123299e-07, "loss": 14.2031, "step": 17512 }, { "epoch": 1.1631135020256358, "grad_norm": 152.94107055664062, "learning_rate": 7.861307784346464e-07, "loss": 16.2344, "step": 17513 }, { "epoch": 1.1631799163179917, "grad_norm": 162.93310546875, "learning_rate": 7.860257162199094e-07, "loss": 16.9062, "step": 17514 }, { "epoch": 1.1632463306103473, "grad_norm": 285.8455505371094, "learning_rate": 7.859206564803038e-07, "loss": 14.7812, "step": 17515 }, { "epoch": 1.163312744902703, "grad_norm": 146.69998168945312, "learning_rate": 7.858155992170444e-07, "loss": 16.875, "step": 17516 }, { "epoch": 1.1633791591950589, "grad_norm": 200.60935974121094, "learning_rate": 7.857105444313467e-07, "loss": 11.0781, "step": 17517 }, { "epoch": 1.1634455734874145, "grad_norm": 273.37738037109375, "learning_rate": 7.856054921244258e-07, "loss": 13.4219, "step": 17518 }, { "epoch": 1.1635119877797702, "grad_norm": 297.83514404296875, "learning_rate": 7.855004422974971e-07, "loss": 22.7812, "step": 17519 }, { "epoch": 1.163578402072126, "grad_norm": 158.43197631835938, "learning_rate": 7.853953949517753e-07, "loss": 17.5625, "step": 17520 }, { "epoch": 1.1636448163644817, "grad_norm": 114.7208251953125, "learning_rate": 7.852903500884763e-07, "loss": 13.9531, "step": 17521 }, { "epoch": 1.1637112306568373, "grad_norm": 140.33872985839844, "learning_rate": 7.851853077088143e-07, "loss": 11.9688, "step": 17522 }, { "epoch": 1.163777644949193, "grad_norm": 502.7472839355469, "learning_rate": 7.85080267814005e-07, "loss": 13.9375, "step": 17523 }, { "epoch": 1.1638440592415489, "grad_norm": 253.66751098632812, "learning_rate": 7.84975230405263e-07, "loss": 24.3438, "step": 17524 }, { "epoch": 1.1639104735339045, "grad_norm": 332.1309814453125, "learning_rate": 7.848701954838038e-07, "loss": 16.0, "step": 17525 }, { "epoch": 1.1639768878262602, "grad_norm": 174.80020141601562, "learning_rate": 7.84765163050842e-07, "loss": 14.4219, "step": 17526 }, { "epoch": 1.1640433021186158, "grad_norm": 299.1360168457031, "learning_rate": 7.846601331075929e-07, "loss": 26.1875, "step": 17527 }, { "epoch": 1.1641097164109717, "grad_norm": 212.1162109375, "learning_rate": 7.845551056552707e-07, "loss": 14.8906, "step": 17528 }, { "epoch": 1.1641761307033274, "grad_norm": 200.5056610107422, "learning_rate": 7.844500806950914e-07, "loss": 16.3281, "step": 17529 }, { "epoch": 1.164242544995683, "grad_norm": 202.6641845703125, "learning_rate": 7.843450582282689e-07, "loss": 15.7344, "step": 17530 }, { "epoch": 1.164308959288039, "grad_norm": 185.67495727539062, "learning_rate": 7.842400382560186e-07, "loss": 22.0625, "step": 17531 }, { "epoch": 1.1643753735803946, "grad_norm": 191.28997802734375, "learning_rate": 7.841350207795548e-07, "loss": 21.5312, "step": 17532 }, { "epoch": 1.1644417878727502, "grad_norm": 196.34866333007812, "learning_rate": 7.840300058000928e-07, "loss": 18.25, "step": 17533 }, { "epoch": 1.1645082021651059, "grad_norm": 181.0345458984375, "learning_rate": 7.839249933188474e-07, "loss": 14.6875, "step": 17534 }, { "epoch": 1.1645746164574617, "grad_norm": 177.39105224609375, "learning_rate": 7.838199833370328e-07, "loss": 13.6562, "step": 17535 }, { "epoch": 1.1646410307498174, "grad_norm": 105.66646575927734, "learning_rate": 7.837149758558642e-07, "loss": 11.6406, "step": 17536 }, { "epoch": 1.164707445042173, "grad_norm": 371.6313781738281, "learning_rate": 7.836099708765557e-07, "loss": 19.9375, "step": 17537 }, { "epoch": 1.1647738593345287, "grad_norm": 164.6636505126953, "learning_rate": 7.835049684003225e-07, "loss": 16.8281, "step": 17538 }, { "epoch": 1.1648402736268846, "grad_norm": 172.31935119628906, "learning_rate": 7.83399968428379e-07, "loss": 12.9219, "step": 17539 }, { "epoch": 1.1649066879192402, "grad_norm": 235.9416046142578, "learning_rate": 7.832949709619399e-07, "loss": 12.9219, "step": 17540 }, { "epoch": 1.164973102211596, "grad_norm": 183.65333557128906, "learning_rate": 7.831899760022192e-07, "loss": 17.3906, "step": 17541 }, { "epoch": 1.1650395165039518, "grad_norm": 265.05841064453125, "learning_rate": 7.830849835504323e-07, "loss": 19.1406, "step": 17542 }, { "epoch": 1.1651059307963074, "grad_norm": 313.2357482910156, "learning_rate": 7.829799936077928e-07, "loss": 17.625, "step": 17543 }, { "epoch": 1.165172345088663, "grad_norm": 415.2743835449219, "learning_rate": 7.82875006175516e-07, "loss": 21.7188, "step": 17544 }, { "epoch": 1.1652387593810187, "grad_norm": 195.2158203125, "learning_rate": 7.827700212548155e-07, "loss": 16.1094, "step": 17545 }, { "epoch": 1.1653051736733746, "grad_norm": 309.4183044433594, "learning_rate": 7.826650388469065e-07, "loss": 13.4062, "step": 17546 }, { "epoch": 1.1653715879657303, "grad_norm": 137.83486938476562, "learning_rate": 7.825600589530028e-07, "loss": 15.2344, "step": 17547 }, { "epoch": 1.165438002258086, "grad_norm": 145.22276306152344, "learning_rate": 7.824550815743193e-07, "loss": 14.3281, "step": 17548 }, { "epoch": 1.1655044165504416, "grad_norm": 268.00433349609375, "learning_rate": 7.823501067120694e-07, "loss": 16.6719, "step": 17549 }, { "epoch": 1.1655708308427974, "grad_norm": 134.51171875, "learning_rate": 7.822451343674684e-07, "loss": 10.2031, "step": 17550 }, { "epoch": 1.165637245135153, "grad_norm": 121.15927124023438, "learning_rate": 7.8214016454173e-07, "loss": 14.0938, "step": 17551 }, { "epoch": 1.1657036594275088, "grad_norm": 212.88421630859375, "learning_rate": 7.820351972360687e-07, "loss": 24.2578, "step": 17552 }, { "epoch": 1.1657700737198646, "grad_norm": 263.3363952636719, "learning_rate": 7.819302324516981e-07, "loss": 16.8125, "step": 17553 }, { "epoch": 1.1658364880122203, "grad_norm": 169.70550537109375, "learning_rate": 7.818252701898335e-07, "loss": 11.7188, "step": 17554 }, { "epoch": 1.165902902304576, "grad_norm": 221.8389434814453, "learning_rate": 7.817203104516878e-07, "loss": 14.5938, "step": 17555 }, { "epoch": 1.1659693165969316, "grad_norm": 221.61839294433594, "learning_rate": 7.816153532384759e-07, "loss": 19.375, "step": 17556 }, { "epoch": 1.1660357308892875, "grad_norm": 228.7497100830078, "learning_rate": 7.815103985514115e-07, "loss": 12.625, "step": 17557 }, { "epoch": 1.1661021451816431, "grad_norm": 139.11154174804688, "learning_rate": 7.814054463917092e-07, "loss": 14.1875, "step": 17558 }, { "epoch": 1.1661685594739988, "grad_norm": 218.87669372558594, "learning_rate": 7.813004967605822e-07, "loss": 12.9688, "step": 17559 }, { "epoch": 1.1662349737663544, "grad_norm": 1083.8507080078125, "learning_rate": 7.811955496592453e-07, "loss": 16.1094, "step": 17560 }, { "epoch": 1.1663013880587103, "grad_norm": 125.99971008300781, "learning_rate": 7.810906050889119e-07, "loss": 15.0469, "step": 17561 }, { "epoch": 1.166367802351066, "grad_norm": 146.36984252929688, "learning_rate": 7.809856630507962e-07, "loss": 14.875, "step": 17562 }, { "epoch": 1.1664342166434216, "grad_norm": 232.15162658691406, "learning_rate": 7.808807235461119e-07, "loss": 20.3125, "step": 17563 }, { "epoch": 1.1665006309357775, "grad_norm": 149.32493591308594, "learning_rate": 7.807757865760733e-07, "loss": 15.5781, "step": 17564 }, { "epoch": 1.1665670452281331, "grad_norm": 489.23187255859375, "learning_rate": 7.806708521418938e-07, "loss": 15.9844, "step": 17565 }, { "epoch": 1.1666334595204888, "grad_norm": 328.9189453125, "learning_rate": 7.805659202447879e-07, "loss": 16.1562, "step": 17566 }, { "epoch": 1.1666998738128445, "grad_norm": 112.45072174072266, "learning_rate": 7.80460990885968e-07, "loss": 15.6094, "step": 17567 }, { "epoch": 1.1667662881052003, "grad_norm": 236.72039794921875, "learning_rate": 7.803560640666496e-07, "loss": 14.9062, "step": 17568 }, { "epoch": 1.166832702397556, "grad_norm": 112.4100570678711, "learning_rate": 7.802511397880451e-07, "loss": 15.0469, "step": 17569 }, { "epoch": 1.1668991166899116, "grad_norm": 160.06680297851562, "learning_rate": 7.801462180513687e-07, "loss": 13.4844, "step": 17570 }, { "epoch": 1.1669655309822673, "grad_norm": 215.12579345703125, "learning_rate": 7.800412988578344e-07, "loss": 17.7188, "step": 17571 }, { "epoch": 1.1670319452746232, "grad_norm": 161.6468505859375, "learning_rate": 7.799363822086553e-07, "loss": 19.6406, "step": 17572 }, { "epoch": 1.1670983595669788, "grad_norm": 327.7123718261719, "learning_rate": 7.798314681050454e-07, "loss": 21.4141, "step": 17573 }, { "epoch": 1.1671647738593345, "grad_norm": 120.58265686035156, "learning_rate": 7.797265565482177e-07, "loss": 12.3906, "step": 17574 }, { "epoch": 1.1672311881516904, "grad_norm": 206.17823791503906, "learning_rate": 7.79621647539387e-07, "loss": 15.875, "step": 17575 }, { "epoch": 1.167297602444046, "grad_norm": 232.80050659179688, "learning_rate": 7.795167410797652e-07, "loss": 15.9062, "step": 17576 }, { "epoch": 1.1673640167364017, "grad_norm": 159.2921905517578, "learning_rate": 7.79411837170567e-07, "loss": 22.5156, "step": 17577 }, { "epoch": 1.1674304310287573, "grad_norm": 276.05328369140625, "learning_rate": 7.793069358130054e-07, "loss": 13.3438, "step": 17578 }, { "epoch": 1.1674968453211132, "grad_norm": 231.457275390625, "learning_rate": 7.792020370082939e-07, "loss": 13.4844, "step": 17579 }, { "epoch": 1.1675632596134689, "grad_norm": 200.36474609375, "learning_rate": 7.790971407576456e-07, "loss": 15.5781, "step": 17580 }, { "epoch": 1.1676296739058245, "grad_norm": 325.17022705078125, "learning_rate": 7.789922470622747e-07, "loss": 15.75, "step": 17581 }, { "epoch": 1.1676960881981802, "grad_norm": 177.27926635742188, "learning_rate": 7.788873559233939e-07, "loss": 12.2266, "step": 17582 }, { "epoch": 1.167762502490536, "grad_norm": 190.56101989746094, "learning_rate": 7.787824673422167e-07, "loss": 16.7031, "step": 17583 }, { "epoch": 1.1678289167828917, "grad_norm": 155.7642059326172, "learning_rate": 7.786775813199559e-07, "loss": 15.4844, "step": 17584 }, { "epoch": 1.1678953310752473, "grad_norm": 204.49224853515625, "learning_rate": 7.785726978578257e-07, "loss": 20.6094, "step": 17585 }, { "epoch": 1.1679617453676032, "grad_norm": 152.17430114746094, "learning_rate": 7.784678169570388e-07, "loss": 15.875, "step": 17586 }, { "epoch": 1.1680281596599589, "grad_norm": 216.16632080078125, "learning_rate": 7.783629386188084e-07, "loss": 12.8594, "step": 17587 }, { "epoch": 1.1680945739523145, "grad_norm": 375.8567810058594, "learning_rate": 7.782580628443475e-07, "loss": 15.4688, "step": 17588 }, { "epoch": 1.1681609882446702, "grad_norm": 406.4764404296875, "learning_rate": 7.781531896348699e-07, "loss": 13.9531, "step": 17589 }, { "epoch": 1.168227402537026, "grad_norm": 586.8589477539062, "learning_rate": 7.780483189915879e-07, "loss": 14.7344, "step": 17590 }, { "epoch": 1.1682938168293817, "grad_norm": 230.0780792236328, "learning_rate": 7.779434509157152e-07, "loss": 17.5469, "step": 17591 }, { "epoch": 1.1683602311217374, "grad_norm": 175.38111877441406, "learning_rate": 7.778385854084644e-07, "loss": 16.25, "step": 17592 }, { "epoch": 1.168426645414093, "grad_norm": 367.6282653808594, "learning_rate": 7.777337224710489e-07, "loss": 16.8594, "step": 17593 }, { "epoch": 1.168493059706449, "grad_norm": 146.67877197265625, "learning_rate": 7.776288621046815e-07, "loss": 15.2031, "step": 17594 }, { "epoch": 1.1685594739988046, "grad_norm": 257.96759033203125, "learning_rate": 7.775240043105753e-07, "loss": 12.6406, "step": 17595 }, { "epoch": 1.1686258882911602, "grad_norm": 171.9000244140625, "learning_rate": 7.774191490899429e-07, "loss": 16.6875, "step": 17596 }, { "epoch": 1.168692302583516, "grad_norm": 406.2650451660156, "learning_rate": 7.773142964439978e-07, "loss": 15.5469, "step": 17597 }, { "epoch": 1.1687587168758717, "grad_norm": 614.8400268554688, "learning_rate": 7.772094463739519e-07, "loss": 27.4375, "step": 17598 }, { "epoch": 1.1688251311682274, "grad_norm": 391.0210876464844, "learning_rate": 7.771045988810192e-07, "loss": 27.4375, "step": 17599 }, { "epoch": 1.168891545460583, "grad_norm": 165.62608337402344, "learning_rate": 7.769997539664117e-07, "loss": 16.0938, "step": 17600 }, { "epoch": 1.168957959752939, "grad_norm": 256.2532653808594, "learning_rate": 7.768949116313428e-07, "loss": 15.6406, "step": 17601 }, { "epoch": 1.1690243740452946, "grad_norm": 134.48471069335938, "learning_rate": 7.767900718770244e-07, "loss": 17.3438, "step": 17602 }, { "epoch": 1.1690907883376502, "grad_norm": 200.53839111328125, "learning_rate": 7.7668523470467e-07, "loss": 26.7188, "step": 17603 }, { "epoch": 1.1691572026300059, "grad_norm": 459.4993896484375, "learning_rate": 7.76580400115492e-07, "loss": 22.0, "step": 17604 }, { "epoch": 1.1692236169223618, "grad_norm": 321.47412109375, "learning_rate": 7.764755681107034e-07, "loss": 16.1875, "step": 17605 }, { "epoch": 1.1692900312147174, "grad_norm": 288.0174560546875, "learning_rate": 7.763707386915159e-07, "loss": 14.4531, "step": 17606 }, { "epoch": 1.169356445507073, "grad_norm": 491.6966857910156, "learning_rate": 7.762659118591431e-07, "loss": 12.8125, "step": 17607 }, { "epoch": 1.169422859799429, "grad_norm": 592.3446044921875, "learning_rate": 7.761610876147973e-07, "loss": 13.2188, "step": 17608 }, { "epoch": 1.1694892740917846, "grad_norm": 308.4627380371094, "learning_rate": 7.760562659596908e-07, "loss": 14.8125, "step": 17609 }, { "epoch": 1.1695556883841403, "grad_norm": 189.02037048339844, "learning_rate": 7.759514468950364e-07, "loss": 14.0078, "step": 17610 }, { "epoch": 1.169622102676496, "grad_norm": 112.40382385253906, "learning_rate": 7.758466304220465e-07, "loss": 14.0625, "step": 17611 }, { "epoch": 1.1696885169688518, "grad_norm": 132.79949951171875, "learning_rate": 7.757418165419336e-07, "loss": 11.8125, "step": 17612 }, { "epoch": 1.1697549312612074, "grad_norm": 121.86669158935547, "learning_rate": 7.756370052559096e-07, "loss": 13.5469, "step": 17613 }, { "epoch": 1.169821345553563, "grad_norm": 261.1663513183594, "learning_rate": 7.755321965651879e-07, "loss": 14.0156, "step": 17614 }, { "epoch": 1.1698877598459188, "grad_norm": 163.2501678466797, "learning_rate": 7.754273904709799e-07, "loss": 16.1094, "step": 17615 }, { "epoch": 1.1699541741382746, "grad_norm": 199.75599670410156, "learning_rate": 7.753225869744988e-07, "loss": 15.2188, "step": 17616 }, { "epoch": 1.1700205884306303, "grad_norm": 161.91043090820312, "learning_rate": 7.752177860769558e-07, "loss": 15.5469, "step": 17617 }, { "epoch": 1.170087002722986, "grad_norm": 306.150146484375, "learning_rate": 7.751129877795646e-07, "loss": 21.7656, "step": 17618 }, { "epoch": 1.1701534170153418, "grad_norm": 413.77313232421875, "learning_rate": 7.75008192083536e-07, "loss": 25.875, "step": 17619 }, { "epoch": 1.1702198313076975, "grad_norm": 120.72925567626953, "learning_rate": 7.749033989900835e-07, "loss": 11.9844, "step": 17620 }, { "epoch": 1.1702862456000531, "grad_norm": 270.65557861328125, "learning_rate": 7.747986085004183e-07, "loss": 14.4688, "step": 17621 }, { "epoch": 1.1703526598924088, "grad_norm": 156.0135498046875, "learning_rate": 7.746938206157531e-07, "loss": 14.1719, "step": 17622 }, { "epoch": 1.1704190741847647, "grad_norm": 378.08740234375, "learning_rate": 7.745890353372995e-07, "loss": 18.0938, "step": 17623 }, { "epoch": 1.1704854884771203, "grad_norm": 168.88235473632812, "learning_rate": 7.744842526662705e-07, "loss": 13.1562, "step": 17624 }, { "epoch": 1.170551902769476, "grad_norm": 205.49754333496094, "learning_rate": 7.743794726038772e-07, "loss": 14.0859, "step": 17625 }, { "epoch": 1.1706183170618316, "grad_norm": 124.06128692626953, "learning_rate": 7.742746951513324e-07, "loss": 13.9219, "step": 17626 }, { "epoch": 1.1706847313541875, "grad_norm": 469.3865661621094, "learning_rate": 7.741699203098474e-07, "loss": 20.1875, "step": 17627 }, { "epoch": 1.1707511456465431, "grad_norm": 282.68280029296875, "learning_rate": 7.740651480806349e-07, "loss": 18.9062, "step": 17628 }, { "epoch": 1.1708175599388988, "grad_norm": 86.96739196777344, "learning_rate": 7.739603784649063e-07, "loss": 12.8906, "step": 17629 }, { "epoch": 1.1708839742312547, "grad_norm": 401.01336669921875, "learning_rate": 7.738556114638739e-07, "loss": 18.2656, "step": 17630 }, { "epoch": 1.1709503885236103, "grad_norm": 127.38609313964844, "learning_rate": 7.73750847078749e-07, "loss": 17.5625, "step": 17631 }, { "epoch": 1.171016802815966, "grad_norm": 378.5154724121094, "learning_rate": 7.736460853107442e-07, "loss": 15.5, "step": 17632 }, { "epoch": 1.1710832171083216, "grad_norm": 167.82786560058594, "learning_rate": 7.735413261610707e-07, "loss": 17.5625, "step": 17633 }, { "epoch": 1.1711496314006775, "grad_norm": 269.83856201171875, "learning_rate": 7.734365696309408e-07, "loss": 15.3906, "step": 17634 }, { "epoch": 1.1712160456930332, "grad_norm": 294.719482421875, "learning_rate": 7.733318157215656e-07, "loss": 19.1562, "step": 17635 }, { "epoch": 1.1712824599853888, "grad_norm": 156.0894775390625, "learning_rate": 7.732270644341576e-07, "loss": 25.7812, "step": 17636 }, { "epoch": 1.1713488742777445, "grad_norm": 144.9691162109375, "learning_rate": 7.731223157699282e-07, "loss": 13.5625, "step": 17637 }, { "epoch": 1.1714152885701004, "grad_norm": 315.2096252441406, "learning_rate": 7.73017569730089e-07, "loss": 13.6562, "step": 17638 }, { "epoch": 1.171481702862456, "grad_norm": 217.3922882080078, "learning_rate": 7.729128263158515e-07, "loss": 18.5312, "step": 17639 }, { "epoch": 1.1715481171548117, "grad_norm": 318.5798645019531, "learning_rate": 7.728080855284278e-07, "loss": 20.0938, "step": 17640 }, { "epoch": 1.1716145314471675, "grad_norm": 199.1121826171875, "learning_rate": 7.727033473690285e-07, "loss": 14.5156, "step": 17641 }, { "epoch": 1.1716809457395232, "grad_norm": 230.7648162841797, "learning_rate": 7.725986118388666e-07, "loss": 21.1875, "step": 17642 }, { "epoch": 1.1717473600318788, "grad_norm": 127.08417510986328, "learning_rate": 7.724938789391523e-07, "loss": 15.5625, "step": 17643 }, { "epoch": 1.1718137743242345, "grad_norm": 317.8690490722656, "learning_rate": 7.723891486710975e-07, "loss": 19.5469, "step": 17644 }, { "epoch": 1.1718801886165904, "grad_norm": 95.06637573242188, "learning_rate": 7.722844210359143e-07, "loss": 14.75, "step": 17645 }, { "epoch": 1.171946602908946, "grad_norm": 137.7960205078125, "learning_rate": 7.721796960348133e-07, "loss": 13.8125, "step": 17646 }, { "epoch": 1.1720130172013017, "grad_norm": 311.729248046875, "learning_rate": 7.720749736690065e-07, "loss": 17.7812, "step": 17647 }, { "epoch": 1.1720794314936573, "grad_norm": 241.95648193359375, "learning_rate": 7.719702539397043e-07, "loss": 13.125, "step": 17648 }, { "epoch": 1.1721458457860132, "grad_norm": 154.2388153076172, "learning_rate": 7.718655368481194e-07, "loss": 11.375, "step": 17649 }, { "epoch": 1.1722122600783689, "grad_norm": 202.59117126464844, "learning_rate": 7.717608223954621e-07, "loss": 16.5469, "step": 17650 }, { "epoch": 1.1722786743707245, "grad_norm": 230.63743591308594, "learning_rate": 7.716561105829443e-07, "loss": 18.0625, "step": 17651 }, { "epoch": 1.1723450886630804, "grad_norm": 162.51722717285156, "learning_rate": 7.715514014117764e-07, "loss": 12.4766, "step": 17652 }, { "epoch": 1.172411502955436, "grad_norm": 142.1806182861328, "learning_rate": 7.714466948831707e-07, "loss": 14.1094, "step": 17653 }, { "epoch": 1.1724779172477917, "grad_norm": 145.32777404785156, "learning_rate": 7.713419909983375e-07, "loss": 14.5938, "step": 17654 }, { "epoch": 1.1725443315401474, "grad_norm": 377.0090637207031, "learning_rate": 7.712372897584887e-07, "loss": 17.0312, "step": 17655 }, { "epoch": 1.1726107458325032, "grad_norm": 137.1147003173828, "learning_rate": 7.711325911648344e-07, "loss": 16.6406, "step": 17656 }, { "epoch": 1.172677160124859, "grad_norm": 244.98110961914062, "learning_rate": 7.710278952185868e-07, "loss": 19.3125, "step": 17657 }, { "epoch": 1.1727435744172146, "grad_norm": 419.81396484375, "learning_rate": 7.709232019209563e-07, "loss": 14.1094, "step": 17658 }, { "epoch": 1.1728099887095702, "grad_norm": 641.5509033203125, "learning_rate": 7.708185112731544e-07, "loss": 15.6094, "step": 17659 }, { "epoch": 1.172876403001926, "grad_norm": 236.45872497558594, "learning_rate": 7.707138232763915e-07, "loss": 18.3906, "step": 17660 }, { "epoch": 1.1729428172942817, "grad_norm": 447.65277099609375, "learning_rate": 7.70609137931879e-07, "loss": 17.4844, "step": 17661 }, { "epoch": 1.1730092315866374, "grad_norm": 139.16522216796875, "learning_rate": 7.705044552408273e-07, "loss": 12.9141, "step": 17662 }, { "epoch": 1.1730756458789933, "grad_norm": 323.9077453613281, "learning_rate": 7.703997752044482e-07, "loss": 15.2188, "step": 17663 }, { "epoch": 1.173142060171349, "grad_norm": 224.6107177734375, "learning_rate": 7.70295097823952e-07, "loss": 12.9531, "step": 17664 }, { "epoch": 1.1732084744637046, "grad_norm": 324.9121398925781, "learning_rate": 7.701904231005498e-07, "loss": 15.2812, "step": 17665 }, { "epoch": 1.1732748887560602, "grad_norm": 206.50074768066406, "learning_rate": 7.700857510354519e-07, "loss": 18.2188, "step": 17666 }, { "epoch": 1.173341303048416, "grad_norm": 178.17959594726562, "learning_rate": 7.699810816298697e-07, "loss": 12.1328, "step": 17667 }, { "epoch": 1.1734077173407718, "grad_norm": 121.91891479492188, "learning_rate": 7.698764148850136e-07, "loss": 11.6719, "step": 17668 }, { "epoch": 1.1734741316331274, "grad_norm": 171.323974609375, "learning_rate": 7.697717508020947e-07, "loss": 17.7969, "step": 17669 }, { "epoch": 1.173540545925483, "grad_norm": 124.04303741455078, "learning_rate": 7.696670893823229e-07, "loss": 12.6328, "step": 17670 }, { "epoch": 1.173606960217839, "grad_norm": 246.12515258789062, "learning_rate": 7.695624306269099e-07, "loss": 17.8125, "step": 17671 }, { "epoch": 1.1736733745101946, "grad_norm": 368.3274841308594, "learning_rate": 7.694577745370656e-07, "loss": 21.1562, "step": 17672 }, { "epoch": 1.1737397888025503, "grad_norm": 262.7695617675781, "learning_rate": 7.693531211140011e-07, "loss": 21.375, "step": 17673 }, { "epoch": 1.1738062030949061, "grad_norm": 509.00146484375, "learning_rate": 7.692484703589261e-07, "loss": 15.2969, "step": 17674 }, { "epoch": 1.1738726173872618, "grad_norm": 168.45033264160156, "learning_rate": 7.691438222730523e-07, "loss": 14.2188, "step": 17675 }, { "epoch": 1.1739390316796174, "grad_norm": 285.5798034667969, "learning_rate": 7.690391768575895e-07, "loss": 19.2969, "step": 17676 }, { "epoch": 1.174005445971973, "grad_norm": 178.7461395263672, "learning_rate": 7.689345341137483e-07, "loss": 19.8438, "step": 17677 }, { "epoch": 1.174071860264329, "grad_norm": 177.2509765625, "learning_rate": 7.688298940427389e-07, "loss": 16.75, "step": 17678 }, { "epoch": 1.1741382745566846, "grad_norm": 157.1539306640625, "learning_rate": 7.687252566457726e-07, "loss": 15.0938, "step": 17679 }, { "epoch": 1.1742046888490403, "grad_norm": 299.3498840332031, "learning_rate": 7.686206219240585e-07, "loss": 14.5547, "step": 17680 }, { "epoch": 1.174271103141396, "grad_norm": 143.21397399902344, "learning_rate": 7.685159898788077e-07, "loss": 16.0781, "step": 17681 }, { "epoch": 1.1743375174337518, "grad_norm": 367.38128662109375, "learning_rate": 7.68411360511231e-07, "loss": 13.2812, "step": 17682 }, { "epoch": 1.1744039317261075, "grad_norm": 320.7132263183594, "learning_rate": 7.683067338225375e-07, "loss": 20.8281, "step": 17683 }, { "epoch": 1.1744703460184631, "grad_norm": 145.3070831298828, "learning_rate": 7.682021098139387e-07, "loss": 14.0547, "step": 17684 }, { "epoch": 1.174536760310819, "grad_norm": 159.32188415527344, "learning_rate": 7.680974884866438e-07, "loss": 14.3906, "step": 17685 }, { "epoch": 1.1746031746031746, "grad_norm": 170.69117736816406, "learning_rate": 7.679928698418638e-07, "loss": 16.625, "step": 17686 }, { "epoch": 1.1746695888955303, "grad_norm": 164.21609497070312, "learning_rate": 7.67888253880808e-07, "loss": 14.2031, "step": 17687 }, { "epoch": 1.174736003187886, "grad_norm": 217.71792602539062, "learning_rate": 7.677836406046873e-07, "loss": 19.4062, "step": 17688 }, { "epoch": 1.1748024174802418, "grad_norm": 156.77699279785156, "learning_rate": 7.676790300147116e-07, "loss": 16.2188, "step": 17689 }, { "epoch": 1.1748688317725975, "grad_norm": 274.80487060546875, "learning_rate": 7.67574422112091e-07, "loss": 19.1562, "step": 17690 }, { "epoch": 1.1749352460649531, "grad_norm": 141.3939971923828, "learning_rate": 7.674698168980352e-07, "loss": 16.9375, "step": 17691 }, { "epoch": 1.1750016603573088, "grad_norm": 130.1848907470703, "learning_rate": 7.673652143737546e-07, "loss": 13.1172, "step": 17692 }, { "epoch": 1.1750680746496647, "grad_norm": 305.5452575683594, "learning_rate": 7.672606145404591e-07, "loss": 15.4453, "step": 17693 }, { "epoch": 1.1751344889420203, "grad_norm": 782.112548828125, "learning_rate": 7.671560173993586e-07, "loss": 18.625, "step": 17694 }, { "epoch": 1.175200903234376, "grad_norm": 147.15756225585938, "learning_rate": 7.670514229516628e-07, "loss": 17.3438, "step": 17695 }, { "epoch": 1.1752673175267319, "grad_norm": 149.83522033691406, "learning_rate": 7.669468311985822e-07, "loss": 15.625, "step": 17696 }, { "epoch": 1.1753337318190875, "grad_norm": 110.26860046386719, "learning_rate": 7.66842242141326e-07, "loss": 12.0156, "step": 17697 }, { "epoch": 1.1754001461114432, "grad_norm": 176.2427215576172, "learning_rate": 7.667376557811045e-07, "loss": 16.5781, "step": 17698 }, { "epoch": 1.1754665604037988, "grad_norm": 227.57647705078125, "learning_rate": 7.666330721191268e-07, "loss": 15.25, "step": 17699 }, { "epoch": 1.1755329746961547, "grad_norm": 380.24627685546875, "learning_rate": 7.66528491156604e-07, "loss": 14.0, "step": 17700 }, { "epoch": 1.1755993889885104, "grad_norm": 128.5869140625, "learning_rate": 7.664239128947442e-07, "loss": 13.4062, "step": 17701 }, { "epoch": 1.175665803280866, "grad_norm": 185.16256713867188, "learning_rate": 7.663193373347582e-07, "loss": 23.0938, "step": 17702 }, { "epoch": 1.1757322175732217, "grad_norm": 427.9493103027344, "learning_rate": 7.662147644778554e-07, "loss": 19.0, "step": 17703 }, { "epoch": 1.1757986318655775, "grad_norm": 133.49362182617188, "learning_rate": 7.661101943252455e-07, "loss": 18.3906, "step": 17704 }, { "epoch": 1.1758650461579332, "grad_norm": 147.09339904785156, "learning_rate": 7.660056268781376e-07, "loss": 17.3281, "step": 17705 }, { "epoch": 1.1759314604502888, "grad_norm": 279.7727966308594, "learning_rate": 7.65901062137742e-07, "loss": 23.2812, "step": 17706 }, { "epoch": 1.1759978747426447, "grad_norm": 553.9464111328125, "learning_rate": 7.657965001052678e-07, "loss": 26.1562, "step": 17707 }, { "epoch": 1.1760642890350004, "grad_norm": 338.8232421875, "learning_rate": 7.656919407819249e-07, "loss": 22.7188, "step": 17708 }, { "epoch": 1.176130703327356, "grad_norm": 208.0997772216797, "learning_rate": 7.655873841689221e-07, "loss": 19.5938, "step": 17709 }, { "epoch": 1.1761971176197117, "grad_norm": 314.91168212890625, "learning_rate": 7.654828302674696e-07, "loss": 16.8438, "step": 17710 }, { "epoch": 1.1762635319120676, "grad_norm": 144.2444610595703, "learning_rate": 7.653782790787763e-07, "loss": 16.5312, "step": 17711 }, { "epoch": 1.1763299462044232, "grad_norm": 353.4610290527344, "learning_rate": 7.65273730604052e-07, "loss": 16.9531, "step": 17712 }, { "epoch": 1.1763963604967789, "grad_norm": 205.4808807373047, "learning_rate": 7.651691848445054e-07, "loss": 18.5312, "step": 17713 }, { "epoch": 1.1764627747891345, "grad_norm": 500.0388488769531, "learning_rate": 7.650646418013467e-07, "loss": 17.2656, "step": 17714 }, { "epoch": 1.1765291890814904, "grad_norm": 211.7627716064453, "learning_rate": 7.649601014757845e-07, "loss": 12.7812, "step": 17715 }, { "epoch": 1.176595603373846, "grad_norm": 225.31431579589844, "learning_rate": 7.648555638690286e-07, "loss": 16.7344, "step": 17716 }, { "epoch": 1.1766620176662017, "grad_norm": 153.68597412109375, "learning_rate": 7.647510289822875e-07, "loss": 14.5938, "step": 17717 }, { "epoch": 1.1767284319585576, "grad_norm": 730.1641235351562, "learning_rate": 7.64646496816771e-07, "loss": 20.9219, "step": 17718 }, { "epoch": 1.1767948462509132, "grad_norm": 157.8502197265625, "learning_rate": 7.645419673736884e-07, "loss": 14.4844, "step": 17719 }, { "epoch": 1.176861260543269, "grad_norm": 404.0727233886719, "learning_rate": 7.644374406542479e-07, "loss": 19.0469, "step": 17720 }, { "epoch": 1.1769276748356245, "grad_norm": 249.3336639404297, "learning_rate": 7.643329166596601e-07, "loss": 15.1094, "step": 17721 }, { "epoch": 1.1769940891279804, "grad_norm": 150.9966278076172, "learning_rate": 7.642283953911327e-07, "loss": 18.9219, "step": 17722 }, { "epoch": 1.177060503420336, "grad_norm": 164.73162841796875, "learning_rate": 7.641238768498756e-07, "loss": 16.1094, "step": 17723 }, { "epoch": 1.1771269177126917, "grad_norm": 286.6077880859375, "learning_rate": 7.640193610370972e-07, "loss": 14.7656, "step": 17724 }, { "epoch": 1.1771933320050474, "grad_norm": 228.9165496826172, "learning_rate": 7.639148479540069e-07, "loss": 17.5156, "step": 17725 }, { "epoch": 1.1772597462974033, "grad_norm": 166.4838104248047, "learning_rate": 7.638103376018132e-07, "loss": 15.4219, "step": 17726 }, { "epoch": 1.177326160589759, "grad_norm": 305.6656494140625, "learning_rate": 7.637058299817258e-07, "loss": 18.3438, "step": 17727 }, { "epoch": 1.1773925748821146, "grad_norm": 230.6614227294922, "learning_rate": 7.636013250949526e-07, "loss": 16.625, "step": 17728 }, { "epoch": 1.1774589891744704, "grad_norm": 306.11456298828125, "learning_rate": 7.634968229427035e-07, "loss": 22.9062, "step": 17729 }, { "epoch": 1.177525403466826, "grad_norm": 344.8258056640625, "learning_rate": 7.633923235261863e-07, "loss": 18.2031, "step": 17730 }, { "epoch": 1.1775918177591818, "grad_norm": 213.00828552246094, "learning_rate": 7.632878268466105e-07, "loss": 15.375, "step": 17731 }, { "epoch": 1.1776582320515374, "grad_norm": 305.6706237792969, "learning_rate": 7.631833329051846e-07, "loss": 19.8438, "step": 17732 }, { "epoch": 1.1777246463438933, "grad_norm": 198.2183837890625, "learning_rate": 7.630788417031176e-07, "loss": 18.5625, "step": 17733 }, { "epoch": 1.177791060636249, "grad_norm": 160.4662628173828, "learning_rate": 7.629743532416173e-07, "loss": 13.4062, "step": 17734 }, { "epoch": 1.1778574749286046, "grad_norm": 153.9591827392578, "learning_rate": 7.628698675218935e-07, "loss": 13.1562, "step": 17735 }, { "epoch": 1.1779238892209603, "grad_norm": 163.35186767578125, "learning_rate": 7.627653845451541e-07, "loss": 13.2656, "step": 17736 }, { "epoch": 1.1779903035133161, "grad_norm": 165.85609436035156, "learning_rate": 7.626609043126083e-07, "loss": 14.5, "step": 17737 }, { "epoch": 1.1780567178056718, "grad_norm": 676.7308349609375, "learning_rate": 7.625564268254637e-07, "loss": 18.6562, "step": 17738 }, { "epoch": 1.1781231320980274, "grad_norm": 147.2596435546875, "learning_rate": 7.6245195208493e-07, "loss": 17.4375, "step": 17739 }, { "epoch": 1.1781895463903833, "grad_norm": 127.781005859375, "learning_rate": 7.62347480092215e-07, "loss": 13.9844, "step": 17740 }, { "epoch": 1.178255960682739, "grad_norm": 229.40359497070312, "learning_rate": 7.622430108485273e-07, "loss": 11.1719, "step": 17741 }, { "epoch": 1.1783223749750946, "grad_norm": 155.7412109375, "learning_rate": 7.62138544355075e-07, "loss": 15.0, "step": 17742 }, { "epoch": 1.1783887892674503, "grad_norm": 129.451904296875, "learning_rate": 7.620340806130675e-07, "loss": 12.2344, "step": 17743 }, { "epoch": 1.1784552035598062, "grad_norm": 181.09349060058594, "learning_rate": 7.619296196237119e-07, "loss": 12.7109, "step": 17744 }, { "epoch": 1.1785216178521618, "grad_norm": 409.0323181152344, "learning_rate": 7.618251613882176e-07, "loss": 22.8438, "step": 17745 }, { "epoch": 1.1785880321445175, "grad_norm": 751.747314453125, "learning_rate": 7.617207059077924e-07, "loss": 16.8125, "step": 17746 }, { "epoch": 1.1786544464368731, "grad_norm": 334.43487548828125, "learning_rate": 7.616162531836447e-07, "loss": 17.75, "step": 17747 }, { "epoch": 1.178720860729229, "grad_norm": 275.314208984375, "learning_rate": 7.615118032169825e-07, "loss": 16.7656, "step": 17748 }, { "epoch": 1.1787872750215846, "grad_norm": 115.40287017822266, "learning_rate": 7.614073560090145e-07, "loss": 9.9531, "step": 17749 }, { "epoch": 1.1788536893139403, "grad_norm": 161.60501098632812, "learning_rate": 7.613029115609485e-07, "loss": 14.0156, "step": 17750 }, { "epoch": 1.1789201036062962, "grad_norm": 212.1392364501953, "learning_rate": 7.611984698739929e-07, "loss": 16.0781, "step": 17751 }, { "epoch": 1.1789865178986518, "grad_norm": 272.7602844238281, "learning_rate": 7.610940309493554e-07, "loss": 13.9375, "step": 17752 }, { "epoch": 1.1790529321910075, "grad_norm": 104.90901184082031, "learning_rate": 7.609895947882448e-07, "loss": 13.0234, "step": 17753 }, { "epoch": 1.1791193464833631, "grad_norm": 155.21258544921875, "learning_rate": 7.608851613918684e-07, "loss": 15.3906, "step": 17754 }, { "epoch": 1.179185760775719, "grad_norm": 190.33978271484375, "learning_rate": 7.607807307614344e-07, "loss": 16.2422, "step": 17755 }, { "epoch": 1.1792521750680747, "grad_norm": 187.7404022216797, "learning_rate": 7.606763028981516e-07, "loss": 14.3281, "step": 17756 }, { "epoch": 1.1793185893604303, "grad_norm": 290.4053955078125, "learning_rate": 7.605718778032269e-07, "loss": 18.0312, "step": 17757 }, { "epoch": 1.179385003652786, "grad_norm": 260.2407531738281, "learning_rate": 7.604674554778688e-07, "loss": 28.1875, "step": 17758 }, { "epoch": 1.1794514179451419, "grad_norm": 164.88296508789062, "learning_rate": 7.603630359232848e-07, "loss": 18.2344, "step": 17759 }, { "epoch": 1.1795178322374975, "grad_norm": 3313.362548828125, "learning_rate": 7.602586191406834e-07, "loss": 15.1094, "step": 17760 }, { "epoch": 1.1795842465298532, "grad_norm": 123.32572174072266, "learning_rate": 7.601542051312718e-07, "loss": 16.1562, "step": 17761 }, { "epoch": 1.179650660822209, "grad_norm": 160.5934295654297, "learning_rate": 7.600497938962582e-07, "loss": 13.5938, "step": 17762 }, { "epoch": 1.1797170751145647, "grad_norm": 137.662841796875, "learning_rate": 7.5994538543685e-07, "loss": 16.25, "step": 17763 }, { "epoch": 1.1797834894069203, "grad_norm": 201.55667114257812, "learning_rate": 7.598409797542556e-07, "loss": 17.0938, "step": 17764 }, { "epoch": 1.179849903699276, "grad_norm": 327.1702880859375, "learning_rate": 7.597365768496817e-07, "loss": 20.5469, "step": 17765 }, { "epoch": 1.1799163179916319, "grad_norm": 261.6137390136719, "learning_rate": 7.59632176724337e-07, "loss": 15.5625, "step": 17766 }, { "epoch": 1.1799827322839875, "grad_norm": 153.1864776611328, "learning_rate": 7.595277793794285e-07, "loss": 13.625, "step": 17767 }, { "epoch": 1.1800491465763432, "grad_norm": 179.5815887451172, "learning_rate": 7.594233848161641e-07, "loss": 12.5625, "step": 17768 }, { "epoch": 1.1801155608686988, "grad_norm": 109.07597351074219, "learning_rate": 7.593189930357509e-07, "loss": 13.1094, "step": 17769 }, { "epoch": 1.1801819751610547, "grad_norm": 158.99447631835938, "learning_rate": 7.592146040393971e-07, "loss": 14.2969, "step": 17770 }, { "epoch": 1.1802483894534104, "grad_norm": 270.0950622558594, "learning_rate": 7.591102178283098e-07, "loss": 15.0, "step": 17771 }, { "epoch": 1.180314803745766, "grad_norm": 484.1032409667969, "learning_rate": 7.590058344036968e-07, "loss": 29.1562, "step": 17772 }, { "epoch": 1.180381218038122, "grad_norm": 141.5408172607422, "learning_rate": 7.58901453766765e-07, "loss": 13.0469, "step": 17773 }, { "epoch": 1.1804476323304776, "grad_norm": 168.4581298828125, "learning_rate": 7.587970759187225e-07, "loss": 14.2969, "step": 17774 }, { "epoch": 1.1805140466228332, "grad_norm": 185.39794921875, "learning_rate": 7.586927008607761e-07, "loss": 21.1875, "step": 17775 }, { "epoch": 1.1805804609151889, "grad_norm": 180.32945251464844, "learning_rate": 7.585883285941336e-07, "loss": 15.8281, "step": 17776 }, { "epoch": 1.1806468752075447, "grad_norm": 147.0623016357422, "learning_rate": 7.584839591200017e-07, "loss": 13.0625, "step": 17777 }, { "epoch": 1.1807132894999004, "grad_norm": 108.23426055908203, "learning_rate": 7.583795924395885e-07, "loss": 16.6406, "step": 17778 }, { "epoch": 1.180779703792256, "grad_norm": 183.93960571289062, "learning_rate": 7.582752285541006e-07, "loss": 20.4531, "step": 17779 }, { "epoch": 1.1808461180846117, "grad_norm": 160.5325927734375, "learning_rate": 7.581708674647458e-07, "loss": 14.5469, "step": 17780 }, { "epoch": 1.1809125323769676, "grad_norm": 235.2970733642578, "learning_rate": 7.580665091727305e-07, "loss": 15.0625, "step": 17781 }, { "epoch": 1.1809789466693232, "grad_norm": 653.1466674804688, "learning_rate": 7.579621536792627e-07, "loss": 20.7812, "step": 17782 }, { "epoch": 1.181045360961679, "grad_norm": 135.60890197753906, "learning_rate": 7.57857800985549e-07, "loss": 14.8594, "step": 17783 }, { "epoch": 1.1811117752540348, "grad_norm": 180.30215454101562, "learning_rate": 7.577534510927968e-07, "loss": 11.125, "step": 17784 }, { "epoch": 1.1811781895463904, "grad_norm": 193.3443603515625, "learning_rate": 7.576491040022127e-07, "loss": 12.7734, "step": 17785 }, { "epoch": 1.181244603838746, "grad_norm": 158.83673095703125, "learning_rate": 7.575447597150044e-07, "loss": 16.0156, "step": 17786 }, { "epoch": 1.1813110181311017, "grad_norm": 577.3397827148438, "learning_rate": 7.574404182323779e-07, "loss": 21.1875, "step": 17787 }, { "epoch": 1.1813774324234576, "grad_norm": 197.2484130859375, "learning_rate": 7.573360795555413e-07, "loss": 16.7188, "step": 17788 }, { "epoch": 1.1814438467158133, "grad_norm": 272.82958984375, "learning_rate": 7.572317436857008e-07, "loss": 15.4062, "step": 17789 }, { "epoch": 1.181510261008169, "grad_norm": 127.16973876953125, "learning_rate": 7.571274106240633e-07, "loss": 15.9062, "step": 17790 }, { "epoch": 1.1815766753005246, "grad_norm": 621.6401977539062, "learning_rate": 7.570230803718362e-07, "loss": 33.25, "step": 17791 }, { "epoch": 1.1816430895928804, "grad_norm": 257.23785400390625, "learning_rate": 7.569187529302259e-07, "loss": 12.2109, "step": 17792 }, { "epoch": 1.181709503885236, "grad_norm": 783.9899291992188, "learning_rate": 7.568144283004394e-07, "loss": 24.2969, "step": 17793 }, { "epoch": 1.1817759181775918, "grad_norm": 150.0982208251953, "learning_rate": 7.56710106483683e-07, "loss": 16.1719, "step": 17794 }, { "epoch": 1.1818423324699476, "grad_norm": 155.7386474609375, "learning_rate": 7.566057874811642e-07, "loss": 14.8125, "step": 17795 }, { "epoch": 1.1819087467623033, "grad_norm": 883.1956176757812, "learning_rate": 7.565014712940891e-07, "loss": 27.8594, "step": 17796 }, { "epoch": 1.181975161054659, "grad_norm": 158.56942749023438, "learning_rate": 7.563971579236649e-07, "loss": 16.6406, "step": 17797 }, { "epoch": 1.1820415753470146, "grad_norm": 371.77716064453125, "learning_rate": 7.562928473710974e-07, "loss": 15.25, "step": 17798 }, { "epoch": 1.1821079896393705, "grad_norm": 166.23373413085938, "learning_rate": 7.56188539637594e-07, "loss": 14.6094, "step": 17799 }, { "epoch": 1.1821744039317261, "grad_norm": 173.92483520507812, "learning_rate": 7.560842347243609e-07, "loss": 15.8906, "step": 17800 }, { "epoch": 1.1822408182240818, "grad_norm": 208.15878295898438, "learning_rate": 7.559799326326049e-07, "loss": 22.7812, "step": 17801 }, { "epoch": 1.1823072325164374, "grad_norm": 250.52716064453125, "learning_rate": 7.558756333635319e-07, "loss": 16.8281, "step": 17802 }, { "epoch": 1.1823736468087933, "grad_norm": 203.68899536132812, "learning_rate": 7.557713369183492e-07, "loss": 20.625, "step": 17803 }, { "epoch": 1.182440061101149, "grad_norm": 214.2058868408203, "learning_rate": 7.556670432982628e-07, "loss": 16.0469, "step": 17804 }, { "epoch": 1.1825064753935046, "grad_norm": 217.5001220703125, "learning_rate": 7.555627525044792e-07, "loss": 16.1094, "step": 17805 }, { "epoch": 1.1825728896858605, "grad_norm": 171.30038452148438, "learning_rate": 7.554584645382043e-07, "loss": 16.4844, "step": 17806 }, { "epoch": 1.1826393039782161, "grad_norm": 141.2515106201172, "learning_rate": 7.553541794006456e-07, "loss": 14.3906, "step": 17807 }, { "epoch": 1.1827057182705718, "grad_norm": 174.8175506591797, "learning_rate": 7.55249897093008e-07, "loss": 15.1562, "step": 17808 }, { "epoch": 1.1827721325629275, "grad_norm": 286.8881530761719, "learning_rate": 7.551456176164988e-07, "loss": 18.6875, "step": 17809 }, { "epoch": 1.1828385468552833, "grad_norm": 139.18328857421875, "learning_rate": 7.550413409723238e-07, "loss": 25.0312, "step": 17810 }, { "epoch": 1.182904961147639, "grad_norm": 163.8253631591797, "learning_rate": 7.549370671616895e-07, "loss": 13.7031, "step": 17811 }, { "epoch": 1.1829713754399946, "grad_norm": 183.24935913085938, "learning_rate": 7.548327961858016e-07, "loss": 14.3906, "step": 17812 }, { "epoch": 1.1830377897323503, "grad_norm": 219.74290466308594, "learning_rate": 7.547285280458668e-07, "loss": 16.0938, "step": 17813 }, { "epoch": 1.1831042040247062, "grad_norm": 131.52659606933594, "learning_rate": 7.546242627430907e-07, "loss": 13.4922, "step": 17814 }, { "epoch": 1.1831706183170618, "grad_norm": 187.24081420898438, "learning_rate": 7.5452000027868e-07, "loss": 14.2188, "step": 17815 }, { "epoch": 1.1832370326094175, "grad_norm": 229.8904571533203, "learning_rate": 7.544157406538399e-07, "loss": 13.9062, "step": 17816 }, { "epoch": 1.1833034469017734, "grad_norm": 366.9916687011719, "learning_rate": 7.543114838697772e-07, "loss": 23.1406, "step": 17817 }, { "epoch": 1.183369861194129, "grad_norm": 245.57672119140625, "learning_rate": 7.542072299276976e-07, "loss": 13.5781, "step": 17818 }, { "epoch": 1.1834362754864847, "grad_norm": 279.65545654296875, "learning_rate": 7.541029788288071e-07, "loss": 12.5, "step": 17819 }, { "epoch": 1.1835026897788403, "grad_norm": 123.22383880615234, "learning_rate": 7.539987305743112e-07, "loss": 13.0625, "step": 17820 }, { "epoch": 1.1835691040711962, "grad_norm": 602.912109375, "learning_rate": 7.538944851654165e-07, "loss": 14.25, "step": 17821 }, { "epoch": 1.1836355183635519, "grad_norm": 657.8953247070312, "learning_rate": 7.537902426033284e-07, "loss": 26.2188, "step": 17822 }, { "epoch": 1.1837019326559075, "grad_norm": 172.12132263183594, "learning_rate": 7.536860028892529e-07, "loss": 16.7656, "step": 17823 }, { "epoch": 1.1837683469482632, "grad_norm": 176.81089782714844, "learning_rate": 7.535817660243952e-07, "loss": 12.4219, "step": 17824 }, { "epoch": 1.183834761240619, "grad_norm": 261.7245788574219, "learning_rate": 7.53477532009962e-07, "loss": 23.4688, "step": 17825 }, { "epoch": 1.1839011755329747, "grad_norm": 260.4737548828125, "learning_rate": 7.533733008471583e-07, "loss": 18.4688, "step": 17826 }, { "epoch": 1.1839675898253303, "grad_norm": 252.14830017089844, "learning_rate": 7.5326907253719e-07, "loss": 21.5312, "step": 17827 }, { "epoch": 1.1840340041176862, "grad_norm": 278.097412109375, "learning_rate": 7.531648470812633e-07, "loss": 17.7812, "step": 17828 }, { "epoch": 1.1841004184100419, "grad_norm": 389.50567626953125, "learning_rate": 7.530606244805827e-07, "loss": 26.7188, "step": 17829 }, { "epoch": 1.1841668327023975, "grad_norm": 251.39703369140625, "learning_rate": 7.529564047363548e-07, "loss": 26.625, "step": 17830 }, { "epoch": 1.1842332469947532, "grad_norm": 383.9652404785156, "learning_rate": 7.528521878497847e-07, "loss": 13.7344, "step": 17831 }, { "epoch": 1.184299661287109, "grad_norm": 234.17823791503906, "learning_rate": 7.527479738220779e-07, "loss": 15.2969, "step": 17832 }, { "epoch": 1.1843660755794647, "grad_norm": 141.50985717773438, "learning_rate": 7.526437626544398e-07, "loss": 15.0469, "step": 17833 }, { "epoch": 1.1844324898718204, "grad_norm": 218.21994018554688, "learning_rate": 7.525395543480763e-07, "loss": 16.9531, "step": 17834 }, { "epoch": 1.184498904164176, "grad_norm": 225.6494903564453, "learning_rate": 7.524353489041924e-07, "loss": 18.4688, "step": 17835 }, { "epoch": 1.184565318456532, "grad_norm": 170.35650634765625, "learning_rate": 7.523311463239939e-07, "loss": 16.8281, "step": 17836 }, { "epoch": 1.1846317327488876, "grad_norm": 169.94149780273438, "learning_rate": 7.522269466086854e-07, "loss": 14.4844, "step": 17837 }, { "epoch": 1.1846981470412432, "grad_norm": 451.73260498046875, "learning_rate": 7.521227497594731e-07, "loss": 21.3438, "step": 17838 }, { "epoch": 1.184764561333599, "grad_norm": 166.50509643554688, "learning_rate": 7.520185557775617e-07, "loss": 12.8281, "step": 17839 }, { "epoch": 1.1848309756259547, "grad_norm": 295.9305419921875, "learning_rate": 7.519143646641568e-07, "loss": 24.7969, "step": 17840 }, { "epoch": 1.1848973899183104, "grad_norm": 283.44525146484375, "learning_rate": 7.518101764204631e-07, "loss": 17.8594, "step": 17841 }, { "epoch": 1.184963804210666, "grad_norm": 236.1573486328125, "learning_rate": 7.517059910476868e-07, "loss": 18.7188, "step": 17842 }, { "epoch": 1.185030218503022, "grad_norm": 745.6867065429688, "learning_rate": 7.516018085470319e-07, "loss": 14.3125, "step": 17843 }, { "epoch": 1.1850966327953776, "grad_norm": 86.50686645507812, "learning_rate": 7.514976289197043e-07, "loss": 13.3438, "step": 17844 }, { "epoch": 1.1851630470877332, "grad_norm": 227.35194396972656, "learning_rate": 7.513934521669086e-07, "loss": 11.9219, "step": 17845 }, { "epoch": 1.1852294613800889, "grad_norm": 135.72608947753906, "learning_rate": 7.512892782898503e-07, "loss": 13.75, "step": 17846 }, { "epoch": 1.1852958756724448, "grad_norm": 335.654541015625, "learning_rate": 7.511851072897342e-07, "loss": 15.75, "step": 17847 }, { "epoch": 1.1853622899648004, "grad_norm": 129.78765869140625, "learning_rate": 7.510809391677653e-07, "loss": 11.2969, "step": 17848 }, { "epoch": 1.185428704257156, "grad_norm": 208.40341186523438, "learning_rate": 7.509767739251486e-07, "loss": 19.0938, "step": 17849 }, { "epoch": 1.185495118549512, "grad_norm": 420.0544128417969, "learning_rate": 7.50872611563089e-07, "loss": 17.9219, "step": 17850 }, { "epoch": 1.1855615328418676, "grad_norm": 321.5174255371094, "learning_rate": 7.50768452082791e-07, "loss": 14.2344, "step": 17851 }, { "epoch": 1.1856279471342233, "grad_norm": 244.03672790527344, "learning_rate": 7.506642954854602e-07, "loss": 18.5156, "step": 17852 }, { "epoch": 1.185694361426579, "grad_norm": 79.57307434082031, "learning_rate": 7.50560141772301e-07, "loss": 16.0, "step": 17853 }, { "epoch": 1.1857607757189348, "grad_norm": 129.0429229736328, "learning_rate": 7.504559909445184e-07, "loss": 16.1562, "step": 17854 }, { "epoch": 1.1858271900112904, "grad_norm": 125.40264129638672, "learning_rate": 7.503518430033165e-07, "loss": 11.7812, "step": 17855 }, { "epoch": 1.185893604303646, "grad_norm": 144.23548889160156, "learning_rate": 7.50247697949901e-07, "loss": 17.8281, "step": 17856 }, { "epoch": 1.1859600185960018, "grad_norm": 130.56272888183594, "learning_rate": 7.501435557854759e-07, "loss": 17.0, "step": 17857 }, { "epoch": 1.1860264328883576, "grad_norm": 90.09568786621094, "learning_rate": 7.500394165112463e-07, "loss": 15.9531, "step": 17858 }, { "epoch": 1.1860928471807133, "grad_norm": 169.81097412109375, "learning_rate": 7.499352801284161e-07, "loss": 12.7812, "step": 17859 }, { "epoch": 1.186159261473069, "grad_norm": 143.1747589111328, "learning_rate": 7.49831146638191e-07, "loss": 13.6484, "step": 17860 }, { "epoch": 1.1862256757654248, "grad_norm": 196.60682678222656, "learning_rate": 7.497270160417745e-07, "loss": 18.2188, "step": 17861 }, { "epoch": 1.1862920900577805, "grad_norm": 256.13116455078125, "learning_rate": 7.496228883403719e-07, "loss": 25.6875, "step": 17862 }, { "epoch": 1.1863585043501361, "grad_norm": 129.37472534179688, "learning_rate": 7.495187635351868e-07, "loss": 13.4219, "step": 17863 }, { "epoch": 1.1864249186424918, "grad_norm": 144.676513671875, "learning_rate": 7.494146416274246e-07, "loss": 14.3594, "step": 17864 }, { "epoch": 1.1864913329348477, "grad_norm": 175.1190948486328, "learning_rate": 7.493105226182894e-07, "loss": 14.4375, "step": 17865 }, { "epoch": 1.1865577472272033, "grad_norm": 151.75341796875, "learning_rate": 7.492064065089852e-07, "loss": 17.0781, "step": 17866 }, { "epoch": 1.186624161519559, "grad_norm": 239.70791625976562, "learning_rate": 7.491022933007169e-07, "loss": 18.375, "step": 17867 }, { "epoch": 1.1866905758119146, "grad_norm": 177.0485382080078, "learning_rate": 7.489981829946885e-07, "loss": 14.8281, "step": 17868 }, { "epoch": 1.1867569901042705, "grad_norm": 121.39627075195312, "learning_rate": 7.488940755921047e-07, "loss": 16.2969, "step": 17869 }, { "epoch": 1.1868234043966261, "grad_norm": 308.61065673828125, "learning_rate": 7.48789971094169e-07, "loss": 21.3438, "step": 17870 }, { "epoch": 1.1868898186889818, "grad_norm": 209.95640563964844, "learning_rate": 7.486858695020865e-07, "loss": 15.875, "step": 17871 }, { "epoch": 1.1869562329813377, "grad_norm": 105.53950500488281, "learning_rate": 7.485817708170605e-07, "loss": 15.8906, "step": 17872 }, { "epoch": 1.1870226472736933, "grad_norm": 1583.455322265625, "learning_rate": 7.484776750402959e-07, "loss": 16.6094, "step": 17873 }, { "epoch": 1.187089061566049, "grad_norm": 3615.3603515625, "learning_rate": 7.483735821729964e-07, "loss": 14.5625, "step": 17874 }, { "epoch": 1.1871554758584046, "grad_norm": 234.2282257080078, "learning_rate": 7.482694922163664e-07, "loss": 13.4531, "step": 17875 }, { "epoch": 1.1872218901507605, "grad_norm": 112.96623229980469, "learning_rate": 7.481654051716093e-07, "loss": 16.4375, "step": 17876 }, { "epoch": 1.1872883044431162, "grad_norm": 287.2563171386719, "learning_rate": 7.4806132103993e-07, "loss": 13.5078, "step": 17877 }, { "epoch": 1.1873547187354718, "grad_norm": 278.0723571777344, "learning_rate": 7.47957239822532e-07, "loss": 16.4688, "step": 17878 }, { "epoch": 1.1874211330278275, "grad_norm": 218.97193908691406, "learning_rate": 7.478531615206194e-07, "loss": 25.6562, "step": 17879 }, { "epoch": 1.1874875473201834, "grad_norm": 208.8278045654297, "learning_rate": 7.477490861353955e-07, "loss": 19.2188, "step": 17880 }, { "epoch": 1.187553961612539, "grad_norm": 233.19602966308594, "learning_rate": 7.476450136680652e-07, "loss": 13.3281, "step": 17881 }, { "epoch": 1.1876203759048947, "grad_norm": 140.43382263183594, "learning_rate": 7.475409441198317e-07, "loss": 15.5156, "step": 17882 }, { "epoch": 1.1876867901972505, "grad_norm": 303.5624694824219, "learning_rate": 7.474368774918993e-07, "loss": 16.0312, "step": 17883 }, { "epoch": 1.1877532044896062, "grad_norm": 117.89801025390625, "learning_rate": 7.473328137854709e-07, "loss": 15.6719, "step": 17884 }, { "epoch": 1.1878196187819619, "grad_norm": 218.89891052246094, "learning_rate": 7.472287530017513e-07, "loss": 17.2188, "step": 17885 }, { "epoch": 1.1878860330743175, "grad_norm": 208.15440368652344, "learning_rate": 7.471246951419435e-07, "loss": 13.4375, "step": 17886 }, { "epoch": 1.1879524473666734, "grad_norm": 178.81365966796875, "learning_rate": 7.470206402072515e-07, "loss": 18.0312, "step": 17887 }, { "epoch": 1.188018861659029, "grad_norm": 180.99923706054688, "learning_rate": 7.469165881988786e-07, "loss": 14.3125, "step": 17888 }, { "epoch": 1.1880852759513847, "grad_norm": 374.70672607421875, "learning_rate": 7.46812539118029e-07, "loss": 17.1719, "step": 17889 }, { "epoch": 1.1881516902437403, "grad_norm": 158.09434509277344, "learning_rate": 7.467084929659057e-07, "loss": 13.6875, "step": 17890 }, { "epoch": 1.1882181045360962, "grad_norm": 290.2857666015625, "learning_rate": 7.46604449743713e-07, "loss": 20.7656, "step": 17891 }, { "epoch": 1.1882845188284519, "grad_norm": 4850.60498046875, "learning_rate": 7.465004094526533e-07, "loss": 13.4844, "step": 17892 }, { "epoch": 1.1883509331208075, "grad_norm": 188.76174926757812, "learning_rate": 7.463963720939311e-07, "loss": 16.6406, "step": 17893 }, { "epoch": 1.1884173474131634, "grad_norm": 417.9599914550781, "learning_rate": 7.462923376687491e-07, "loss": 15.9375, "step": 17894 }, { "epoch": 1.188483761705519, "grad_norm": 361.1965637207031, "learning_rate": 7.461883061783113e-07, "loss": 11.8281, "step": 17895 }, { "epoch": 1.1885501759978747, "grad_norm": 210.0699005126953, "learning_rate": 7.460842776238207e-07, "loss": 16.4688, "step": 17896 }, { "epoch": 1.1886165902902304, "grad_norm": 221.52833557128906, "learning_rate": 7.459802520064808e-07, "loss": 17.625, "step": 17897 }, { "epoch": 1.1886830045825862, "grad_norm": 232.47401428222656, "learning_rate": 7.458762293274947e-07, "loss": 16.8281, "step": 17898 }, { "epoch": 1.188749418874942, "grad_norm": 161.61196899414062, "learning_rate": 7.457722095880661e-07, "loss": 17.1406, "step": 17899 }, { "epoch": 1.1888158331672976, "grad_norm": 218.54640197753906, "learning_rate": 7.456681927893978e-07, "loss": 20.8438, "step": 17900 }, { "epoch": 1.1888822474596532, "grad_norm": 143.47781372070312, "learning_rate": 7.455641789326929e-07, "loss": 13.5469, "step": 17901 }, { "epoch": 1.188948661752009, "grad_norm": 120.75272369384766, "learning_rate": 7.454601680191555e-07, "loss": 13.625, "step": 17902 }, { "epoch": 1.1890150760443647, "grad_norm": 133.22885131835938, "learning_rate": 7.453561600499879e-07, "loss": 13.6875, "step": 17903 }, { "epoch": 1.1890814903367204, "grad_norm": 713.19970703125, "learning_rate": 7.452521550263933e-07, "loss": 18.9062, "step": 17904 }, { "epoch": 1.1891479046290763, "grad_norm": 182.87921142578125, "learning_rate": 7.451481529495747e-07, "loss": 17.8906, "step": 17905 }, { "epoch": 1.189214318921432, "grad_norm": 428.9552001953125, "learning_rate": 7.450441538207357e-07, "loss": 22.7344, "step": 17906 }, { "epoch": 1.1892807332137876, "grad_norm": 216.73658752441406, "learning_rate": 7.449401576410785e-07, "loss": 10.8125, "step": 17907 }, { "epoch": 1.1893471475061432, "grad_norm": 236.028564453125, "learning_rate": 7.448361644118069e-07, "loss": 14.6562, "step": 17908 }, { "epoch": 1.189413561798499, "grad_norm": 377.85919189453125, "learning_rate": 7.447321741341231e-07, "loss": 18.4844, "step": 17909 }, { "epoch": 1.1894799760908548, "grad_norm": 224.83328247070312, "learning_rate": 7.446281868092306e-07, "loss": 15.25, "step": 17910 }, { "epoch": 1.1895463903832104, "grad_norm": 248.165771484375, "learning_rate": 7.445242024383317e-07, "loss": 19.3594, "step": 17911 }, { "epoch": 1.189612804675566, "grad_norm": 1908.968017578125, "learning_rate": 7.444202210226298e-07, "loss": 19.1875, "step": 17912 }, { "epoch": 1.189679218967922, "grad_norm": 131.69036865234375, "learning_rate": 7.443162425633272e-07, "loss": 12.875, "step": 17913 }, { "epoch": 1.1897456332602776, "grad_norm": 238.96563720703125, "learning_rate": 7.44212267061627e-07, "loss": 11.9062, "step": 17914 }, { "epoch": 1.1898120475526333, "grad_norm": 141.0591278076172, "learning_rate": 7.441082945187314e-07, "loss": 16.9844, "step": 17915 }, { "epoch": 1.1898784618449891, "grad_norm": 162.8836669921875, "learning_rate": 7.440043249358439e-07, "loss": 14.5625, "step": 17916 }, { "epoch": 1.1899448761373448, "grad_norm": 583.8858642578125, "learning_rate": 7.439003583141667e-07, "loss": 20.3125, "step": 17917 }, { "epoch": 1.1900112904297004, "grad_norm": 188.70506286621094, "learning_rate": 7.437963946549026e-07, "loss": 18.7188, "step": 17918 }, { "epoch": 1.190077704722056, "grad_norm": 212.70960998535156, "learning_rate": 7.436924339592536e-07, "loss": 12.7969, "step": 17919 }, { "epoch": 1.190144119014412, "grad_norm": 185.72474670410156, "learning_rate": 7.435884762284232e-07, "loss": 13.6562, "step": 17920 }, { "epoch": 1.1902105333067676, "grad_norm": 254.4515838623047, "learning_rate": 7.434845214636131e-07, "loss": 18.2344, "step": 17921 }, { "epoch": 1.1902769475991233, "grad_norm": 141.7631378173828, "learning_rate": 7.433805696660265e-07, "loss": 16.75, "step": 17922 }, { "epoch": 1.190343361891479, "grad_norm": 260.34759521484375, "learning_rate": 7.43276620836865e-07, "loss": 17.4531, "step": 17923 }, { "epoch": 1.1904097761838348, "grad_norm": 184.68612670898438, "learning_rate": 7.431726749773321e-07, "loss": 15.875, "step": 17924 }, { "epoch": 1.1904761904761905, "grad_norm": 271.88470458984375, "learning_rate": 7.430687320886293e-07, "loss": 16.7188, "step": 17925 }, { "epoch": 1.1905426047685461, "grad_norm": 211.61532592773438, "learning_rate": 7.429647921719594e-07, "loss": 17.6406, "step": 17926 }, { "epoch": 1.190609019060902, "grad_norm": 495.8840637207031, "learning_rate": 7.428608552285242e-07, "loss": 23.6562, "step": 17927 }, { "epoch": 1.1906754333532577, "grad_norm": 156.69876098632812, "learning_rate": 7.427569212595267e-07, "loss": 18.0312, "step": 17928 }, { "epoch": 1.1907418476456133, "grad_norm": 390.0904235839844, "learning_rate": 7.426529902661687e-07, "loss": 20.0156, "step": 17929 }, { "epoch": 1.190808261937969, "grad_norm": 969.561767578125, "learning_rate": 7.425490622496527e-07, "loss": 15.6094, "step": 17930 }, { "epoch": 1.1908746762303248, "grad_norm": 151.70338439941406, "learning_rate": 7.424451372111802e-07, "loss": 13.3438, "step": 17931 }, { "epoch": 1.1909410905226805, "grad_norm": 182.56044006347656, "learning_rate": 7.423412151519546e-07, "loss": 17.375, "step": 17932 }, { "epoch": 1.1910075048150361, "grad_norm": 202.62451171875, "learning_rate": 7.422372960731764e-07, "loss": 18.7188, "step": 17933 }, { "epoch": 1.1910739191073918, "grad_norm": 320.1753845214844, "learning_rate": 7.421333799760492e-07, "loss": 19.5, "step": 17934 }, { "epoch": 1.1911403333997477, "grad_norm": 237.29913330078125, "learning_rate": 7.42029466861774e-07, "loss": 17.375, "step": 17935 }, { "epoch": 1.1912067476921033, "grad_norm": 249.96697998046875, "learning_rate": 7.419255567315535e-07, "loss": 17.8438, "step": 17936 }, { "epoch": 1.191273161984459, "grad_norm": 139.06210327148438, "learning_rate": 7.418216495865887e-07, "loss": 14.2969, "step": 17937 }, { "epoch": 1.1913395762768149, "grad_norm": 222.8668975830078, "learning_rate": 7.417177454280825e-07, "loss": 14.4531, "step": 17938 }, { "epoch": 1.1914059905691705, "grad_norm": 140.14907836914062, "learning_rate": 7.416138442572369e-07, "loss": 10.8438, "step": 17939 }, { "epoch": 1.1914724048615262, "grad_norm": 266.0635681152344, "learning_rate": 7.415099460752526e-07, "loss": 16.8438, "step": 17940 }, { "epoch": 1.1915388191538818, "grad_norm": 241.65750122070312, "learning_rate": 7.414060508833329e-07, "loss": 16.0469, "step": 17941 }, { "epoch": 1.1916052334462377, "grad_norm": 456.8829040527344, "learning_rate": 7.413021586826784e-07, "loss": 28.3281, "step": 17942 }, { "epoch": 1.1916716477385934, "grad_norm": 195.5966339111328, "learning_rate": 7.411982694744918e-07, "loss": 20.9844, "step": 17943 }, { "epoch": 1.191738062030949, "grad_norm": 587.7904052734375, "learning_rate": 7.410943832599738e-07, "loss": 18.25, "step": 17944 }, { "epoch": 1.1918044763233047, "grad_norm": 275.6663513183594, "learning_rate": 7.409905000403273e-07, "loss": 19.375, "step": 17945 }, { "epoch": 1.1918708906156605, "grad_norm": 126.67346954345703, "learning_rate": 7.408866198167529e-07, "loss": 16.6406, "step": 17946 }, { "epoch": 1.1919373049080162, "grad_norm": 480.77728271484375, "learning_rate": 7.40782742590453e-07, "loss": 19.75, "step": 17947 }, { "epoch": 1.1920037192003718, "grad_norm": 367.65191650390625, "learning_rate": 7.406788683626285e-07, "loss": 23.0938, "step": 17948 }, { "epoch": 1.1920701334927277, "grad_norm": 335.4351501464844, "learning_rate": 7.405749971344817e-07, "loss": 21.7812, "step": 17949 }, { "epoch": 1.1921365477850834, "grad_norm": 134.23529052734375, "learning_rate": 7.404711289072134e-07, "loss": 10.9688, "step": 17950 }, { "epoch": 1.192202962077439, "grad_norm": 212.69093322753906, "learning_rate": 7.403672636820258e-07, "loss": 22.3594, "step": 17951 }, { "epoch": 1.1922693763697947, "grad_norm": 170.4894256591797, "learning_rate": 7.402634014601195e-07, "loss": 19.1406, "step": 17952 }, { "epoch": 1.1923357906621506, "grad_norm": 201.0638427734375, "learning_rate": 7.401595422426969e-07, "loss": 13.8906, "step": 17953 }, { "epoch": 1.1924022049545062, "grad_norm": 302.4914855957031, "learning_rate": 7.400556860309586e-07, "loss": 21.0, "step": 17954 }, { "epoch": 1.1924686192468619, "grad_norm": 143.33563232421875, "learning_rate": 7.399518328261063e-07, "loss": 14.7656, "step": 17955 }, { "epoch": 1.1925350335392175, "grad_norm": 166.23519897460938, "learning_rate": 7.398479826293412e-07, "loss": 13.5156, "step": 17956 }, { "epoch": 1.1926014478315734, "grad_norm": 928.48095703125, "learning_rate": 7.397441354418649e-07, "loss": 15.7188, "step": 17957 }, { "epoch": 1.192667862123929, "grad_norm": 451.88116455078125, "learning_rate": 7.396402912648778e-07, "loss": 23.5938, "step": 17958 }, { "epoch": 1.1927342764162847, "grad_norm": 310.9792785644531, "learning_rate": 7.395364500995822e-07, "loss": 14.4219, "step": 17959 }, { "epoch": 1.1928006907086406, "grad_norm": 173.03395080566406, "learning_rate": 7.394326119471785e-07, "loss": 19.4062, "step": 17960 }, { "epoch": 1.1928671050009962, "grad_norm": 68.07583618164062, "learning_rate": 7.393287768088683e-07, "loss": 11.7969, "step": 17961 }, { "epoch": 1.192933519293352, "grad_norm": 325.1595764160156, "learning_rate": 7.39224944685852e-07, "loss": 13.3906, "step": 17962 }, { "epoch": 1.1929999335857076, "grad_norm": 149.51138305664062, "learning_rate": 7.391211155793318e-07, "loss": 12.4844, "step": 17963 }, { "epoch": 1.1930663478780634, "grad_norm": 191.71029663085938, "learning_rate": 7.390172894905076e-07, "loss": 16.4453, "step": 17964 }, { "epoch": 1.193132762170419, "grad_norm": 112.57662963867188, "learning_rate": 7.389134664205814e-07, "loss": 17.375, "step": 17965 }, { "epoch": 1.1931991764627747, "grad_norm": 364.20196533203125, "learning_rate": 7.388096463707531e-07, "loss": 21.9062, "step": 17966 }, { "epoch": 1.1932655907551304, "grad_norm": 103.81877136230469, "learning_rate": 7.387058293422246e-07, "loss": 16.2031, "step": 17967 }, { "epoch": 1.1933320050474863, "grad_norm": 145.2547149658203, "learning_rate": 7.386020153361963e-07, "loss": 12.5312, "step": 17968 }, { "epoch": 1.193398419339842, "grad_norm": 249.1996612548828, "learning_rate": 7.384982043538692e-07, "loss": 13.8281, "step": 17969 }, { "epoch": 1.1934648336321976, "grad_norm": 239.22174072265625, "learning_rate": 7.383943963964438e-07, "loss": 20.2812, "step": 17970 }, { "epoch": 1.1935312479245535, "grad_norm": 171.44561767578125, "learning_rate": 7.382905914651216e-07, "loss": 16.0625, "step": 17971 }, { "epoch": 1.193597662216909, "grad_norm": 229.09791564941406, "learning_rate": 7.381867895611026e-07, "loss": 19.6406, "step": 17972 }, { "epoch": 1.1936640765092648, "grad_norm": 166.25491333007812, "learning_rate": 7.380829906855881e-07, "loss": 13.7656, "step": 17973 }, { "epoch": 1.1937304908016204, "grad_norm": 109.7330093383789, "learning_rate": 7.379791948397782e-07, "loss": 12.2031, "step": 17974 }, { "epoch": 1.1937969050939763, "grad_norm": 312.37188720703125, "learning_rate": 7.378754020248741e-07, "loss": 16.2188, "step": 17975 }, { "epoch": 1.193863319386332, "grad_norm": 346.79412841796875, "learning_rate": 7.377716122420763e-07, "loss": 17.6719, "step": 17976 }, { "epoch": 1.1939297336786876, "grad_norm": 312.11517333984375, "learning_rate": 7.37667825492585e-07, "loss": 24.0469, "step": 17977 }, { "epoch": 1.1939961479710433, "grad_norm": 315.3387451171875, "learning_rate": 7.375640417776015e-07, "loss": 12.125, "step": 17978 }, { "epoch": 1.1940625622633991, "grad_norm": 247.1280059814453, "learning_rate": 7.374602610983252e-07, "loss": 16.7812, "step": 17979 }, { "epoch": 1.1941289765557548, "grad_norm": 170.23062133789062, "learning_rate": 7.373564834559577e-07, "loss": 16.7344, "step": 17980 }, { "epoch": 1.1941953908481104, "grad_norm": 349.32781982421875, "learning_rate": 7.372527088516986e-07, "loss": 16.7344, "step": 17981 }, { "epoch": 1.1942618051404663, "grad_norm": 345.4637145996094, "learning_rate": 7.371489372867491e-07, "loss": 17.7812, "step": 17982 }, { "epoch": 1.194328219432822, "grad_norm": 154.7828369140625, "learning_rate": 7.370451687623086e-07, "loss": 11.7969, "step": 17983 }, { "epoch": 1.1943946337251776, "grad_norm": 269.0357360839844, "learning_rate": 7.369414032795784e-07, "loss": 13.2578, "step": 17984 }, { "epoch": 1.1944610480175333, "grad_norm": 192.16757202148438, "learning_rate": 7.36837640839758e-07, "loss": 23.25, "step": 17985 }, { "epoch": 1.1945274623098892, "grad_norm": 294.0013732910156, "learning_rate": 7.367338814440485e-07, "loss": 17.5469, "step": 17986 }, { "epoch": 1.1945938766022448, "grad_norm": 247.2404022216797, "learning_rate": 7.366301250936491e-07, "loss": 16.6094, "step": 17987 }, { "epoch": 1.1946602908946005, "grad_norm": 372.3330078125, "learning_rate": 7.365263717897609e-07, "loss": 17.3906, "step": 17988 }, { "epoch": 1.1947267051869561, "grad_norm": 151.13168334960938, "learning_rate": 7.364226215335836e-07, "loss": 18.9688, "step": 17989 }, { "epoch": 1.194793119479312, "grad_norm": 153.2083282470703, "learning_rate": 7.363188743263175e-07, "loss": 15.7031, "step": 17990 }, { "epoch": 1.1948595337716676, "grad_norm": 249.41143798828125, "learning_rate": 7.362151301691624e-07, "loss": 15.0469, "step": 17991 }, { "epoch": 1.1949259480640233, "grad_norm": 385.910400390625, "learning_rate": 7.361113890633188e-07, "loss": 22.4531, "step": 17992 }, { "epoch": 1.1949923623563792, "grad_norm": 252.75445556640625, "learning_rate": 7.360076510099865e-07, "loss": 23.0781, "step": 17993 }, { "epoch": 1.1950587766487348, "grad_norm": 196.27406311035156, "learning_rate": 7.359039160103655e-07, "loss": 18.2656, "step": 17994 }, { "epoch": 1.1951251909410905, "grad_norm": 183.4938201904297, "learning_rate": 7.358001840656553e-07, "loss": 16.2656, "step": 17995 }, { "epoch": 1.1951916052334461, "grad_norm": 472.83538818359375, "learning_rate": 7.356964551770568e-07, "loss": 24.0, "step": 17996 }, { "epoch": 1.195258019525802, "grad_norm": 248.26722717285156, "learning_rate": 7.355927293457688e-07, "loss": 15.6719, "step": 17997 }, { "epoch": 1.1953244338181577, "grad_norm": 186.87510681152344, "learning_rate": 7.35489006572992e-07, "loss": 18.375, "step": 17998 }, { "epoch": 1.1953908481105133, "grad_norm": 116.18437957763672, "learning_rate": 7.353852868599255e-07, "loss": 15.0781, "step": 17999 }, { "epoch": 1.195457262402869, "grad_norm": 228.101318359375, "learning_rate": 7.352815702077699e-07, "loss": 13.4766, "step": 18000 }, { "epoch": 1.1955236766952249, "grad_norm": 150.09373474121094, "learning_rate": 7.351778566177238e-07, "loss": 18.2812, "step": 18001 }, { "epoch": 1.1955900909875805, "grad_norm": 348.75299072265625, "learning_rate": 7.350741460909882e-07, "loss": 12.3516, "step": 18002 }, { "epoch": 1.1956565052799362, "grad_norm": 462.4609069824219, "learning_rate": 7.349704386287616e-07, "loss": 13.2188, "step": 18003 }, { "epoch": 1.195722919572292, "grad_norm": 163.53494262695312, "learning_rate": 7.348667342322445e-07, "loss": 10.1172, "step": 18004 }, { "epoch": 1.1957893338646477, "grad_norm": 153.70223999023438, "learning_rate": 7.347630329026359e-07, "loss": 15.875, "step": 18005 }, { "epoch": 1.1958557481570034, "grad_norm": 162.8670196533203, "learning_rate": 7.346593346411359e-07, "loss": 19.6562, "step": 18006 }, { "epoch": 1.195922162449359, "grad_norm": 184.1095733642578, "learning_rate": 7.345556394489434e-07, "loss": 18.7344, "step": 18007 }, { "epoch": 1.1959885767417149, "grad_norm": 169.1959991455078, "learning_rate": 7.344519473272584e-07, "loss": 19.4375, "step": 18008 }, { "epoch": 1.1960549910340705, "grad_norm": 150.03688049316406, "learning_rate": 7.343482582772798e-07, "loss": 13.2031, "step": 18009 }, { "epoch": 1.1961214053264262, "grad_norm": 285.28167724609375, "learning_rate": 7.342445723002078e-07, "loss": 14.5625, "step": 18010 }, { "epoch": 1.1961878196187818, "grad_norm": 474.087646484375, "learning_rate": 7.341408893972411e-07, "loss": 14.8906, "step": 18011 }, { "epoch": 1.1962542339111377, "grad_norm": 255.51629638671875, "learning_rate": 7.34037209569579e-07, "loss": 16.8438, "step": 18012 }, { "epoch": 1.1963206482034934, "grad_norm": 681.76025390625, "learning_rate": 7.339335328184216e-07, "loss": 19.4844, "step": 18013 }, { "epoch": 1.196387062495849, "grad_norm": 222.40773010253906, "learning_rate": 7.338298591449674e-07, "loss": 16.2812, "step": 18014 }, { "epoch": 1.196453476788205, "grad_norm": 174.2684326171875, "learning_rate": 7.337261885504163e-07, "loss": 18.5781, "step": 18015 }, { "epoch": 1.1965198910805606, "grad_norm": 214.28184509277344, "learning_rate": 7.336225210359665e-07, "loss": 17.4844, "step": 18016 }, { "epoch": 1.1965863053729162, "grad_norm": 199.4649200439453, "learning_rate": 7.335188566028186e-07, "loss": 17.2656, "step": 18017 }, { "epoch": 1.196652719665272, "grad_norm": 139.289306640625, "learning_rate": 7.334151952521701e-07, "loss": 12.9688, "step": 18018 }, { "epoch": 1.1967191339576277, "grad_norm": 119.63074493408203, "learning_rate": 7.333115369852213e-07, "loss": 9.5703, "step": 18019 }, { "epoch": 1.1967855482499834, "grad_norm": 215.8094024658203, "learning_rate": 7.332078818031708e-07, "loss": 14.5781, "step": 18020 }, { "epoch": 1.196851962542339, "grad_norm": 157.21575927734375, "learning_rate": 7.331042297072177e-07, "loss": 14.5625, "step": 18021 }, { "epoch": 1.1969183768346947, "grad_norm": 233.6062774658203, "learning_rate": 7.330005806985607e-07, "loss": 20.875, "step": 18022 }, { "epoch": 1.1969847911270506, "grad_norm": 125.59394836425781, "learning_rate": 7.328969347783994e-07, "loss": 14.125, "step": 18023 }, { "epoch": 1.1970512054194062, "grad_norm": 192.11351013183594, "learning_rate": 7.327932919479321e-07, "loss": 13.5, "step": 18024 }, { "epoch": 1.197117619711762, "grad_norm": 235.2083282470703, "learning_rate": 7.326896522083581e-07, "loss": 18.6875, "step": 18025 }, { "epoch": 1.1971840340041178, "grad_norm": 441.0107727050781, "learning_rate": 7.325860155608758e-07, "loss": 27.75, "step": 18026 }, { "epoch": 1.1972504482964734, "grad_norm": 432.23577880859375, "learning_rate": 7.324823820066846e-07, "loss": 21.5625, "step": 18027 }, { "epoch": 1.197316862588829, "grad_norm": 145.28392028808594, "learning_rate": 7.323787515469826e-07, "loss": 11.9062, "step": 18028 }, { "epoch": 1.197383276881185, "grad_norm": 170.1995849609375, "learning_rate": 7.322751241829692e-07, "loss": 13.9531, "step": 18029 }, { "epoch": 1.1974496911735406, "grad_norm": 140.70289611816406, "learning_rate": 7.321714999158424e-07, "loss": 16.2344, "step": 18030 }, { "epoch": 1.1975161054658963, "grad_norm": 131.03631591796875, "learning_rate": 7.320678787468017e-07, "loss": 15.0156, "step": 18031 }, { "epoch": 1.197582519758252, "grad_norm": 637.61376953125, "learning_rate": 7.319642606770451e-07, "loss": 14.125, "step": 18032 }, { "epoch": 1.1976489340506076, "grad_norm": 165.69883728027344, "learning_rate": 7.318606457077714e-07, "loss": 18.1875, "step": 18033 }, { "epoch": 1.1977153483429634, "grad_norm": 471.0079040527344, "learning_rate": 7.317570338401789e-07, "loss": 13.5469, "step": 18034 }, { "epoch": 1.197781762635319, "grad_norm": 359.61663818359375, "learning_rate": 7.316534250754667e-07, "loss": 25.4375, "step": 18035 }, { "epoch": 1.1978481769276748, "grad_norm": 216.48875427246094, "learning_rate": 7.315498194148326e-07, "loss": 14.5312, "step": 18036 }, { "epoch": 1.1979145912200306, "grad_norm": 183.42239379882812, "learning_rate": 7.314462168594757e-07, "loss": 20.2188, "step": 18037 }, { "epoch": 1.1979810055123863, "grad_norm": 527.5914916992188, "learning_rate": 7.313426174105938e-07, "loss": 23.5156, "step": 18038 }, { "epoch": 1.198047419804742, "grad_norm": 1414.465576171875, "learning_rate": 7.312390210693862e-07, "loss": 18.2188, "step": 18039 }, { "epoch": 1.1981138340970978, "grad_norm": 326.3335876464844, "learning_rate": 7.311354278370499e-07, "loss": 16.7656, "step": 18040 }, { "epoch": 1.1981802483894535, "grad_norm": 125.25265502929688, "learning_rate": 7.310318377147844e-07, "loss": 16.8594, "step": 18041 }, { "epoch": 1.1982466626818091, "grad_norm": 418.1712646484375, "learning_rate": 7.309282507037872e-07, "loss": 19.5156, "step": 18042 }, { "epoch": 1.1983130769741648, "grad_norm": 98.02146911621094, "learning_rate": 7.308246668052571e-07, "loss": 11.8906, "step": 18043 }, { "epoch": 1.1983794912665204, "grad_norm": 120.75273895263672, "learning_rate": 7.307210860203916e-07, "loss": 14.0, "step": 18044 }, { "epoch": 1.1984459055588763, "grad_norm": 273.93017578125, "learning_rate": 7.306175083503898e-07, "loss": 14.5156, "step": 18045 }, { "epoch": 1.198512319851232, "grad_norm": 212.54806518554688, "learning_rate": 7.305139337964491e-07, "loss": 17.9453, "step": 18046 }, { "epoch": 1.1985787341435876, "grad_norm": 209.0491180419922, "learning_rate": 7.304103623597681e-07, "loss": 19.9844, "step": 18047 }, { "epoch": 1.1986451484359435, "grad_norm": 142.96144104003906, "learning_rate": 7.303067940415439e-07, "loss": 19.5469, "step": 18048 }, { "epoch": 1.1987115627282992, "grad_norm": 403.571044921875, "learning_rate": 7.302032288429756e-07, "loss": 18.2031, "step": 18049 }, { "epoch": 1.1987779770206548, "grad_norm": 685.4907836914062, "learning_rate": 7.30099666765261e-07, "loss": 16.8281, "step": 18050 }, { "epoch": 1.1988443913130107, "grad_norm": 443.94744873046875, "learning_rate": 7.299961078095973e-07, "loss": 20.4531, "step": 18051 }, { "epoch": 1.1989108056053663, "grad_norm": 143.42677307128906, "learning_rate": 7.298925519771834e-07, "loss": 18.0469, "step": 18052 }, { "epoch": 1.198977219897722, "grad_norm": 110.65176391601562, "learning_rate": 7.297889992692166e-07, "loss": 13.2969, "step": 18053 }, { "epoch": 1.1990436341900776, "grad_norm": 249.44915771484375, "learning_rate": 7.296854496868949e-07, "loss": 18.1562, "step": 18054 }, { "epoch": 1.1991100484824333, "grad_norm": 365.2225646972656, "learning_rate": 7.295819032314156e-07, "loss": 13.7031, "step": 18055 }, { "epoch": 1.1991764627747892, "grad_norm": 259.2659912109375, "learning_rate": 7.294783599039775e-07, "loss": 13.6562, "step": 18056 }, { "epoch": 1.1992428770671448, "grad_norm": 458.72796630859375, "learning_rate": 7.293748197057774e-07, "loss": 19.0938, "step": 18057 }, { "epoch": 1.1993092913595005, "grad_norm": 522.2647094726562, "learning_rate": 7.292712826380137e-07, "loss": 15.9844, "step": 18058 }, { "epoch": 1.1993757056518564, "grad_norm": 117.97760009765625, "learning_rate": 7.291677487018831e-07, "loss": 14.7969, "step": 18059 }, { "epoch": 1.199442119944212, "grad_norm": 213.12388610839844, "learning_rate": 7.290642178985845e-07, "loss": 20.4062, "step": 18060 }, { "epoch": 1.1995085342365677, "grad_norm": 182.36215209960938, "learning_rate": 7.289606902293142e-07, "loss": 13.3906, "step": 18061 }, { "epoch": 1.1995749485289235, "grad_norm": 152.02781677246094, "learning_rate": 7.288571656952707e-07, "loss": 13.7656, "step": 18062 }, { "epoch": 1.1996413628212792, "grad_norm": 219.8287353515625, "learning_rate": 7.28753644297651e-07, "loss": 18.2031, "step": 18063 }, { "epoch": 1.1997077771136349, "grad_norm": 154.36085510253906, "learning_rate": 7.28650126037653e-07, "loss": 12.4688, "step": 18064 }, { "epoch": 1.1997741914059905, "grad_norm": 307.0432434082031, "learning_rate": 7.285466109164733e-07, "loss": 22.6094, "step": 18065 }, { "epoch": 1.1998406056983462, "grad_norm": 111.47766876220703, "learning_rate": 7.284430989353104e-07, "loss": 12.8438, "step": 18066 }, { "epoch": 1.199907019990702, "grad_norm": 146.02871704101562, "learning_rate": 7.283395900953609e-07, "loss": 15.0312, "step": 18067 }, { "epoch": 1.1999734342830577, "grad_norm": 386.002197265625, "learning_rate": 7.282360843978226e-07, "loss": 14.8906, "step": 18068 }, { "epoch": 1.2000398485754133, "grad_norm": 201.04054260253906, "learning_rate": 7.281325818438921e-07, "loss": 16.6562, "step": 18069 }, { "epoch": 1.2001062628677692, "grad_norm": 208.29356384277344, "learning_rate": 7.280290824347677e-07, "loss": 15.0, "step": 18070 }, { "epoch": 1.2001726771601249, "grad_norm": 180.98492431640625, "learning_rate": 7.279255861716457e-07, "loss": 19.9219, "step": 18071 }, { "epoch": 1.2002390914524805, "grad_norm": 217.85975646972656, "learning_rate": 7.278220930557238e-07, "loss": 16.3281, "step": 18072 }, { "epoch": 1.2003055057448364, "grad_norm": 133.74920654296875, "learning_rate": 7.277186030881987e-07, "loss": 16.5781, "step": 18073 }, { "epoch": 1.200371920037192, "grad_norm": 256.2759094238281, "learning_rate": 7.27615116270268e-07, "loss": 19.0, "step": 18074 }, { "epoch": 1.2004383343295477, "grad_norm": 237.65846252441406, "learning_rate": 7.275116326031285e-07, "loss": 13.2969, "step": 18075 }, { "epoch": 1.2005047486219034, "grad_norm": 167.7753143310547, "learning_rate": 7.274081520879775e-07, "loss": 15.9219, "step": 18076 }, { "epoch": 1.200571162914259, "grad_norm": 192.60467529296875, "learning_rate": 7.273046747260114e-07, "loss": 21.1562, "step": 18077 }, { "epoch": 1.200637577206615, "grad_norm": 248.5672149658203, "learning_rate": 7.27201200518428e-07, "loss": 15.6094, "step": 18078 }, { "epoch": 1.2007039914989706, "grad_norm": 313.5723571777344, "learning_rate": 7.270977294664237e-07, "loss": 16.6094, "step": 18079 }, { "epoch": 1.2007704057913262, "grad_norm": 159.1045379638672, "learning_rate": 7.269942615711956e-07, "loss": 15.875, "step": 18080 }, { "epoch": 1.200836820083682, "grad_norm": 363.3292236328125, "learning_rate": 7.268907968339403e-07, "loss": 18.375, "step": 18081 }, { "epoch": 1.2009032343760377, "grad_norm": 153.97250366210938, "learning_rate": 7.267873352558549e-07, "loss": 14.1406, "step": 18082 }, { "epoch": 1.2009696486683934, "grad_norm": 290.8710632324219, "learning_rate": 7.266838768381357e-07, "loss": 15.1406, "step": 18083 }, { "epoch": 1.2010360629607493, "grad_norm": 165.46412658691406, "learning_rate": 7.265804215819802e-07, "loss": 16.125, "step": 18084 }, { "epoch": 1.201102477253105, "grad_norm": 129.7757568359375, "learning_rate": 7.264769694885846e-07, "loss": 16.7812, "step": 18085 }, { "epoch": 1.2011688915454606, "grad_norm": 208.26727294921875, "learning_rate": 7.263735205591453e-07, "loss": 18.6406, "step": 18086 }, { "epoch": 1.2012353058378162, "grad_norm": 172.42831420898438, "learning_rate": 7.2627007479486e-07, "loss": 16.0156, "step": 18087 }, { "epoch": 1.201301720130172, "grad_norm": 168.23828125, "learning_rate": 7.261666321969242e-07, "loss": 13.0938, "step": 18088 }, { "epoch": 1.2013681344225278, "grad_norm": 162.6629180908203, "learning_rate": 7.260631927665353e-07, "loss": 15.0312, "step": 18089 }, { "epoch": 1.2014345487148834, "grad_norm": 269.2633361816406, "learning_rate": 7.259597565048888e-07, "loss": 19.5, "step": 18090 }, { "epoch": 1.201500963007239, "grad_norm": 172.52296447753906, "learning_rate": 7.258563234131823e-07, "loss": 19.0625, "step": 18091 }, { "epoch": 1.201567377299595, "grad_norm": 89.23053741455078, "learning_rate": 7.257528934926116e-07, "loss": 16.0156, "step": 18092 }, { "epoch": 1.2016337915919506, "grad_norm": 877.6488647460938, "learning_rate": 7.256494667443736e-07, "loss": 14.3906, "step": 18093 }, { "epoch": 1.2017002058843063, "grad_norm": 153.3217315673828, "learning_rate": 7.255460431696638e-07, "loss": 17.1406, "step": 18094 }, { "epoch": 1.2017666201766621, "grad_norm": 596.3847045898438, "learning_rate": 7.254426227696796e-07, "loss": 21.3594, "step": 18095 }, { "epoch": 1.2018330344690178, "grad_norm": 361.91412353515625, "learning_rate": 7.253392055456166e-07, "loss": 17.6094, "step": 18096 }, { "epoch": 1.2018994487613734, "grad_norm": 859.7628784179688, "learning_rate": 7.252357914986716e-07, "loss": 25.4531, "step": 18097 }, { "epoch": 1.201965863053729, "grad_norm": 307.0325622558594, "learning_rate": 7.251323806300398e-07, "loss": 17.7812, "step": 18098 }, { "epoch": 1.2020322773460848, "grad_norm": 195.18368530273438, "learning_rate": 7.250289729409188e-07, "loss": 15.9375, "step": 18099 }, { "epoch": 1.2020986916384406, "grad_norm": 152.39341735839844, "learning_rate": 7.249255684325037e-07, "loss": 13.3906, "step": 18100 }, { "epoch": 1.2021651059307963, "grad_norm": 384.8532409667969, "learning_rate": 7.248221671059914e-07, "loss": 21.125, "step": 18101 }, { "epoch": 1.202231520223152, "grad_norm": 382.2464904785156, "learning_rate": 7.247187689625774e-07, "loss": 16.2031, "step": 18102 }, { "epoch": 1.2022979345155078, "grad_norm": 171.69166564941406, "learning_rate": 7.24615374003458e-07, "loss": 16.9375, "step": 18103 }, { "epoch": 1.2023643488078635, "grad_norm": 550.6312255859375, "learning_rate": 7.245119822298289e-07, "loss": 19.2812, "step": 18104 }, { "epoch": 1.2024307631002191, "grad_norm": 139.9482879638672, "learning_rate": 7.244085936428865e-07, "loss": 19.8125, "step": 18105 }, { "epoch": 1.202497177392575, "grad_norm": 304.2582092285156, "learning_rate": 7.243052082438265e-07, "loss": 20.625, "step": 18106 }, { "epoch": 1.2025635916849307, "grad_norm": 224.04273986816406, "learning_rate": 7.242018260338452e-07, "loss": 15.2031, "step": 18107 }, { "epoch": 1.2026300059772863, "grad_norm": 209.7005157470703, "learning_rate": 7.240984470141377e-07, "loss": 19.2812, "step": 18108 }, { "epoch": 1.202696420269642, "grad_norm": 239.12899780273438, "learning_rate": 7.239950711859006e-07, "loss": 12.375, "step": 18109 }, { "epoch": 1.2027628345619976, "grad_norm": 142.67919921875, "learning_rate": 7.238916985503292e-07, "loss": 17.4219, "step": 18110 }, { "epoch": 1.2028292488543535, "grad_norm": 215.92713928222656, "learning_rate": 7.237883291086196e-07, "loss": 16.5625, "step": 18111 }, { "epoch": 1.2028956631467091, "grad_norm": 186.4083709716797, "learning_rate": 7.236849628619669e-07, "loss": 14.5312, "step": 18112 }, { "epoch": 1.2029620774390648, "grad_norm": 244.2615203857422, "learning_rate": 7.235815998115678e-07, "loss": 16.125, "step": 18113 }, { "epoch": 1.2030284917314207, "grad_norm": 187.21156311035156, "learning_rate": 7.234782399586169e-07, "loss": 17.9219, "step": 18114 }, { "epoch": 1.2030949060237763, "grad_norm": 308.8018798828125, "learning_rate": 7.233748833043105e-07, "loss": 17.8594, "step": 18115 }, { "epoch": 1.203161320316132, "grad_norm": 260.6674499511719, "learning_rate": 7.232715298498437e-07, "loss": 15.3125, "step": 18116 }, { "epoch": 1.2032277346084879, "grad_norm": 380.80487060546875, "learning_rate": 7.231681795964126e-07, "loss": 13.1797, "step": 18117 }, { "epoch": 1.2032941489008435, "grad_norm": 114.16128540039062, "learning_rate": 7.230648325452122e-07, "loss": 16.0, "step": 18118 }, { "epoch": 1.2033605631931992, "grad_norm": 217.31085205078125, "learning_rate": 7.229614886974382e-07, "loss": 18.2656, "step": 18119 }, { "epoch": 1.2034269774855548, "grad_norm": 155.8886260986328, "learning_rate": 7.228581480542857e-07, "loss": 20.4062, "step": 18120 }, { "epoch": 1.2034933917779105, "grad_norm": 101.41374969482422, "learning_rate": 7.227548106169504e-07, "loss": 13.625, "step": 18121 }, { "epoch": 1.2035598060702664, "grad_norm": 346.2398681640625, "learning_rate": 7.226514763866278e-07, "loss": 19.7188, "step": 18122 }, { "epoch": 1.203626220362622, "grad_norm": 120.75347137451172, "learning_rate": 7.225481453645127e-07, "loss": 11.2031, "step": 18123 }, { "epoch": 1.2036926346549777, "grad_norm": 605.6893920898438, "learning_rate": 7.224448175518012e-07, "loss": 20.5156, "step": 18124 }, { "epoch": 1.2037590489473335, "grad_norm": 211.2713165283203, "learning_rate": 7.223414929496874e-07, "loss": 12.375, "step": 18125 }, { "epoch": 1.2038254632396892, "grad_norm": 156.7390594482422, "learning_rate": 7.222381715593675e-07, "loss": 14.4375, "step": 18126 }, { "epoch": 1.2038918775320449, "grad_norm": 374.44140625, "learning_rate": 7.221348533820362e-07, "loss": 15.4844, "step": 18127 }, { "epoch": 1.2039582918244007, "grad_norm": 185.7707977294922, "learning_rate": 7.220315384188888e-07, "loss": 14.75, "step": 18128 }, { "epoch": 1.2040247061167564, "grad_norm": 320.7282409667969, "learning_rate": 7.219282266711197e-07, "loss": 19.0781, "step": 18129 }, { "epoch": 1.204091120409112, "grad_norm": 233.5338134765625, "learning_rate": 7.218249181399252e-07, "loss": 14.5156, "step": 18130 }, { "epoch": 1.2041575347014677, "grad_norm": 176.96939086914062, "learning_rate": 7.217216128264993e-07, "loss": 11.0547, "step": 18131 }, { "epoch": 1.2042239489938236, "grad_norm": 446.43426513671875, "learning_rate": 7.216183107320375e-07, "loss": 20.1406, "step": 18132 }, { "epoch": 1.2042903632861792, "grad_norm": 1576.3433837890625, "learning_rate": 7.215150118577342e-07, "loss": 11.6875, "step": 18133 }, { "epoch": 1.2043567775785349, "grad_norm": 149.71910095214844, "learning_rate": 7.214117162047851e-07, "loss": 13.1562, "step": 18134 }, { "epoch": 1.2044231918708905, "grad_norm": 168.44882202148438, "learning_rate": 7.213084237743842e-07, "loss": 20.0312, "step": 18135 }, { "epoch": 1.2044896061632464, "grad_norm": 341.10797119140625, "learning_rate": 7.212051345677273e-07, "loss": 27.0469, "step": 18136 }, { "epoch": 1.204556020455602, "grad_norm": 263.6160583496094, "learning_rate": 7.211018485860081e-07, "loss": 18.5781, "step": 18137 }, { "epoch": 1.2046224347479577, "grad_norm": 386.90380859375, "learning_rate": 7.209985658304223e-07, "loss": 21.9219, "step": 18138 }, { "epoch": 1.2046888490403136, "grad_norm": 137.3497314453125, "learning_rate": 7.208952863021639e-07, "loss": 18.2031, "step": 18139 }, { "epoch": 1.2047552633326692, "grad_norm": 103.69804382324219, "learning_rate": 7.207920100024282e-07, "loss": 12.5781, "step": 18140 }, { "epoch": 1.204821677625025, "grad_norm": 225.2301025390625, "learning_rate": 7.20688736932409e-07, "loss": 14.5156, "step": 18141 }, { "epoch": 1.2048880919173806, "grad_norm": 379.8690490722656, "learning_rate": 7.20585467093302e-07, "loss": 17.9062, "step": 18142 }, { "epoch": 1.2049545062097364, "grad_norm": 237.1198272705078, "learning_rate": 7.204822004863009e-07, "loss": 19.9375, "step": 18143 }, { "epoch": 1.205020920502092, "grad_norm": 225.79904174804688, "learning_rate": 7.203789371126008e-07, "loss": 15.5312, "step": 18144 }, { "epoch": 1.2050873347944477, "grad_norm": 223.4560089111328, "learning_rate": 7.202756769733955e-07, "loss": 15.5469, "step": 18145 }, { "epoch": 1.2051537490868034, "grad_norm": 166.63136291503906, "learning_rate": 7.201724200698803e-07, "loss": 18.0625, "step": 18146 }, { "epoch": 1.2052201633791593, "grad_norm": 180.5031280517578, "learning_rate": 7.200691664032487e-07, "loss": 15.1406, "step": 18147 }, { "epoch": 1.205286577671515, "grad_norm": 685.981689453125, "learning_rate": 7.199659159746961e-07, "loss": 25.1406, "step": 18148 }, { "epoch": 1.2053529919638706, "grad_norm": 307.26385498046875, "learning_rate": 7.198626687854159e-07, "loss": 13.4219, "step": 18149 }, { "epoch": 1.2054194062562265, "grad_norm": 1557.4521484375, "learning_rate": 7.197594248366032e-07, "loss": 14.2266, "step": 18150 }, { "epoch": 1.205485820548582, "grad_norm": 635.9171752929688, "learning_rate": 7.196561841294513e-07, "loss": 13.0938, "step": 18151 }, { "epoch": 1.2055522348409378, "grad_norm": 153.92236328125, "learning_rate": 7.195529466651555e-07, "loss": 17.4844, "step": 18152 }, { "epoch": 1.2056186491332934, "grad_norm": 196.38775634765625, "learning_rate": 7.194497124449093e-07, "loss": 17.4688, "step": 18153 }, { "epoch": 1.2056850634256493, "grad_norm": 145.31239318847656, "learning_rate": 7.193464814699072e-07, "loss": 11.25, "step": 18154 }, { "epoch": 1.205751477718005, "grad_norm": 155.7511749267578, "learning_rate": 7.192432537413427e-07, "loss": 18.8906, "step": 18155 }, { "epoch": 1.2058178920103606, "grad_norm": 254.09715270996094, "learning_rate": 7.191400292604109e-07, "loss": 24.9531, "step": 18156 }, { "epoch": 1.2058843063027163, "grad_norm": 165.2103729248047, "learning_rate": 7.190368080283051e-07, "loss": 13.1719, "step": 18157 }, { "epoch": 1.2059507205950721, "grad_norm": 141.17034912109375, "learning_rate": 7.189335900462192e-07, "loss": 14.5, "step": 18158 }, { "epoch": 1.2060171348874278, "grad_norm": 286.8425598144531, "learning_rate": 7.18830375315348e-07, "loss": 14.0625, "step": 18159 }, { "epoch": 1.2060835491797834, "grad_norm": 198.7587432861328, "learning_rate": 7.187271638368845e-07, "loss": 21.2344, "step": 18160 }, { "epoch": 1.2061499634721393, "grad_norm": 205.5132293701172, "learning_rate": 7.186239556120233e-07, "loss": 14.4219, "step": 18161 }, { "epoch": 1.206216377764495, "grad_norm": 114.99022674560547, "learning_rate": 7.185207506419573e-07, "loss": 15.7188, "step": 18162 }, { "epoch": 1.2062827920568506, "grad_norm": 141.0342559814453, "learning_rate": 7.184175489278816e-07, "loss": 14.9688, "step": 18163 }, { "epoch": 1.2063492063492063, "grad_norm": 113.33275604248047, "learning_rate": 7.183143504709891e-07, "loss": 11.8359, "step": 18164 }, { "epoch": 1.2064156206415622, "grad_norm": 216.0926055908203, "learning_rate": 7.182111552724739e-07, "loss": 23.0938, "step": 18165 }, { "epoch": 1.2064820349339178, "grad_norm": 364.6902160644531, "learning_rate": 7.181079633335294e-07, "loss": 19.0469, "step": 18166 }, { "epoch": 1.2065484492262735, "grad_norm": 121.52067565917969, "learning_rate": 7.180047746553496e-07, "loss": 14.375, "step": 18167 }, { "epoch": 1.2066148635186291, "grad_norm": 602.2765502929688, "learning_rate": 7.179015892391275e-07, "loss": 14.2109, "step": 18168 }, { "epoch": 1.206681277810985, "grad_norm": 230.7069854736328, "learning_rate": 7.177984070860577e-07, "loss": 11.4609, "step": 18169 }, { "epoch": 1.2067476921033407, "grad_norm": 180.6962890625, "learning_rate": 7.176952281973329e-07, "loss": 17.75, "step": 18170 }, { "epoch": 1.2068141063956963, "grad_norm": 263.1969909667969, "learning_rate": 7.175920525741472e-07, "loss": 13.875, "step": 18171 }, { "epoch": 1.2068805206880522, "grad_norm": 113.56190490722656, "learning_rate": 7.174888802176933e-07, "loss": 11.8906, "step": 18172 }, { "epoch": 1.2069469349804078, "grad_norm": 217.60513305664062, "learning_rate": 7.173857111291656e-07, "loss": 18.375, "step": 18173 }, { "epoch": 1.2070133492727635, "grad_norm": 146.1751708984375, "learning_rate": 7.172825453097568e-07, "loss": 14.125, "step": 18174 }, { "epoch": 1.2070797635651191, "grad_norm": 182.5890350341797, "learning_rate": 7.171793827606607e-07, "loss": 14.0, "step": 18175 }, { "epoch": 1.207146177857475, "grad_norm": 240.60682678222656, "learning_rate": 7.170762234830698e-07, "loss": 18.4219, "step": 18176 }, { "epoch": 1.2072125921498307, "grad_norm": 190.92787170410156, "learning_rate": 7.169730674781787e-07, "loss": 16.9688, "step": 18177 }, { "epoch": 1.2072790064421863, "grad_norm": 130.54110717773438, "learning_rate": 7.168699147471797e-07, "loss": 12.625, "step": 18178 }, { "epoch": 1.207345420734542, "grad_norm": 172.4846649169922, "learning_rate": 7.167667652912664e-07, "loss": 17.7344, "step": 18179 }, { "epoch": 1.2074118350268979, "grad_norm": 372.55767822265625, "learning_rate": 7.166636191116314e-07, "loss": 15.75, "step": 18180 }, { "epoch": 1.2074782493192535, "grad_norm": 279.6119384765625, "learning_rate": 7.165604762094689e-07, "loss": 18.4219, "step": 18181 }, { "epoch": 1.2075446636116092, "grad_norm": 138.42739868164062, "learning_rate": 7.164573365859709e-07, "loss": 15.5312, "step": 18182 }, { "epoch": 1.207611077903965, "grad_norm": 173.71900939941406, "learning_rate": 7.163542002423312e-07, "loss": 13.2031, "step": 18183 }, { "epoch": 1.2076774921963207, "grad_norm": 166.0388641357422, "learning_rate": 7.162510671797422e-07, "loss": 17.0781, "step": 18184 }, { "epoch": 1.2077439064886764, "grad_norm": 203.50221252441406, "learning_rate": 7.161479373993977e-07, "loss": 18.4219, "step": 18185 }, { "epoch": 1.207810320781032, "grad_norm": 340.660888671875, "learning_rate": 7.160448109024897e-07, "loss": 16.5156, "step": 18186 }, { "epoch": 1.2078767350733879, "grad_norm": 150.6143341064453, "learning_rate": 7.15941687690212e-07, "loss": 15.0625, "step": 18187 }, { "epoch": 1.2079431493657435, "grad_norm": 183.9560546875, "learning_rate": 7.158385677637568e-07, "loss": 14.5781, "step": 18188 }, { "epoch": 1.2080095636580992, "grad_norm": 226.951904296875, "learning_rate": 7.157354511243176e-07, "loss": 12.5469, "step": 18189 }, { "epoch": 1.2080759779504548, "grad_norm": 144.77015686035156, "learning_rate": 7.156323377730862e-07, "loss": 16.7812, "step": 18190 }, { "epoch": 1.2081423922428107, "grad_norm": 223.2513427734375, "learning_rate": 7.155292277112563e-07, "loss": 14.0781, "step": 18191 }, { "epoch": 1.2082088065351664, "grad_norm": 170.83514404296875, "learning_rate": 7.154261209400201e-07, "loss": 15.125, "step": 18192 }, { "epoch": 1.208275220827522, "grad_norm": 136.1679229736328, "learning_rate": 7.153230174605707e-07, "loss": 16.125, "step": 18193 }, { "epoch": 1.208341635119878, "grad_norm": 392.1712646484375, "learning_rate": 7.152199172741001e-07, "loss": 18.4688, "step": 18194 }, { "epoch": 1.2084080494122336, "grad_norm": 893.5430297851562, "learning_rate": 7.151168203818013e-07, "loss": 22.5625, "step": 18195 }, { "epoch": 1.2084744637045892, "grad_norm": 140.41000366210938, "learning_rate": 7.150137267848673e-07, "loss": 16.125, "step": 18196 }, { "epoch": 1.2085408779969449, "grad_norm": 131.22750854492188, "learning_rate": 7.149106364844895e-07, "loss": 14.5156, "step": 18197 }, { "epoch": 1.2086072922893007, "grad_norm": 238.59243774414062, "learning_rate": 7.148075494818618e-07, "loss": 12.9844, "step": 18198 }, { "epoch": 1.2086737065816564, "grad_norm": 226.7390899658203, "learning_rate": 7.147044657781753e-07, "loss": 21.2656, "step": 18199 }, { "epoch": 1.208740120874012, "grad_norm": 209.81712341308594, "learning_rate": 7.146013853746236e-07, "loss": 15.1562, "step": 18200 }, { "epoch": 1.2088065351663677, "grad_norm": 238.74270629882812, "learning_rate": 7.144983082723979e-07, "loss": 21.0312, "step": 18201 }, { "epoch": 1.2088729494587236, "grad_norm": 197.08741760253906, "learning_rate": 7.143952344726916e-07, "loss": 16.5938, "step": 18202 }, { "epoch": 1.2089393637510792, "grad_norm": 413.6668701171875, "learning_rate": 7.142921639766964e-07, "loss": 22.5312, "step": 18203 }, { "epoch": 1.209005778043435, "grad_norm": 381.57403564453125, "learning_rate": 7.141890967856048e-07, "loss": 20.9688, "step": 18204 }, { "epoch": 1.2090721923357908, "grad_norm": 203.2981719970703, "learning_rate": 7.140860329006085e-07, "loss": 13.7812, "step": 18205 }, { "epoch": 1.2091386066281464, "grad_norm": 145.57611083984375, "learning_rate": 7.139829723229006e-07, "loss": 14.1875, "step": 18206 }, { "epoch": 1.209205020920502, "grad_norm": 301.0003662109375, "learning_rate": 7.138799150536725e-07, "loss": 15.0, "step": 18207 }, { "epoch": 1.2092714352128577, "grad_norm": 380.63262939453125, "learning_rate": 7.137768610941169e-07, "loss": 14.1641, "step": 18208 }, { "epoch": 1.2093378495052136, "grad_norm": 143.83851623535156, "learning_rate": 7.136738104454251e-07, "loss": 12.4062, "step": 18209 }, { "epoch": 1.2094042637975693, "grad_norm": 218.62051391601562, "learning_rate": 7.135707631087898e-07, "loss": 23.5781, "step": 18210 }, { "epoch": 1.209470678089925, "grad_norm": 194.61766052246094, "learning_rate": 7.134677190854023e-07, "loss": 33.5938, "step": 18211 }, { "epoch": 1.2095370923822806, "grad_norm": 165.59654235839844, "learning_rate": 7.133646783764556e-07, "loss": 13.5625, "step": 18212 }, { "epoch": 1.2096035066746365, "grad_norm": 119.62936401367188, "learning_rate": 7.132616409831406e-07, "loss": 12.4219, "step": 18213 }, { "epoch": 1.209669920966992, "grad_norm": 408.51702880859375, "learning_rate": 7.1315860690665e-07, "loss": 22.9844, "step": 18214 }, { "epoch": 1.2097363352593478, "grad_norm": 530.5352172851562, "learning_rate": 7.130555761481748e-07, "loss": 17.3125, "step": 18215 }, { "epoch": 1.2098027495517036, "grad_norm": 136.54898071289062, "learning_rate": 7.129525487089076e-07, "loss": 18.5, "step": 18216 }, { "epoch": 1.2098691638440593, "grad_norm": 189.9576873779297, "learning_rate": 7.128495245900396e-07, "loss": 18.0312, "step": 18217 }, { "epoch": 1.209935578136415, "grad_norm": 176.85281372070312, "learning_rate": 7.127465037927629e-07, "loss": 13.8438, "step": 18218 }, { "epoch": 1.2100019924287706, "grad_norm": 283.8900146484375, "learning_rate": 7.126434863182684e-07, "loss": 21.4688, "step": 18219 }, { "epoch": 1.2100684067211265, "grad_norm": 308.0807800292969, "learning_rate": 7.125404721677491e-07, "loss": 23.1875, "step": 18220 }, { "epoch": 1.2101348210134821, "grad_norm": 195.1781768798828, "learning_rate": 7.124374613423955e-07, "loss": 16.3281, "step": 18221 }, { "epoch": 1.2102012353058378, "grad_norm": 290.548828125, "learning_rate": 7.123344538433998e-07, "loss": 14.4688, "step": 18222 }, { "epoch": 1.2102676495981934, "grad_norm": 289.67059326171875, "learning_rate": 7.122314496719527e-07, "loss": 14.0781, "step": 18223 }, { "epoch": 1.2103340638905493, "grad_norm": 111.39934539794922, "learning_rate": 7.121284488292467e-07, "loss": 16.2188, "step": 18224 }, { "epoch": 1.210400478182905, "grad_norm": 3716.21337890625, "learning_rate": 7.120254513164727e-07, "loss": 25.7188, "step": 18225 }, { "epoch": 1.2104668924752606, "grad_norm": 172.33441162109375, "learning_rate": 7.119224571348224e-07, "loss": 16.9844, "step": 18226 }, { "epoch": 1.2105333067676165, "grad_norm": 314.8550109863281, "learning_rate": 7.118194662854866e-07, "loss": 9.9688, "step": 18227 }, { "epoch": 1.2105997210599722, "grad_norm": 134.15113830566406, "learning_rate": 7.117164787696576e-07, "loss": 13.2969, "step": 18228 }, { "epoch": 1.2106661353523278, "grad_norm": 269.7289733886719, "learning_rate": 7.116134945885255e-07, "loss": 12.0156, "step": 18229 }, { "epoch": 1.2107325496446835, "grad_norm": 709.9984741210938, "learning_rate": 7.115105137432826e-07, "loss": 19.5312, "step": 18230 }, { "epoch": 1.2107989639370393, "grad_norm": 229.20880126953125, "learning_rate": 7.114075362351197e-07, "loss": 16.0, "step": 18231 }, { "epoch": 1.210865378229395, "grad_norm": 1150.269775390625, "learning_rate": 7.113045620652276e-07, "loss": 12.9062, "step": 18232 }, { "epoch": 1.2109317925217506, "grad_norm": 193.58065795898438, "learning_rate": 7.112015912347984e-07, "loss": 16.5781, "step": 18233 }, { "epoch": 1.2109982068141063, "grad_norm": 249.0192413330078, "learning_rate": 7.110986237450225e-07, "loss": 19.9531, "step": 18234 }, { "epoch": 1.2110646211064622, "grad_norm": 168.31350708007812, "learning_rate": 7.109956595970912e-07, "loss": 21.7812, "step": 18235 }, { "epoch": 1.2111310353988178, "grad_norm": 194.5171661376953, "learning_rate": 7.10892698792195e-07, "loss": 16.3125, "step": 18236 }, { "epoch": 1.2111974496911735, "grad_norm": 150.1419677734375, "learning_rate": 7.107897413315259e-07, "loss": 11.8359, "step": 18237 }, { "epoch": 1.2112638639835294, "grad_norm": 218.37408447265625, "learning_rate": 7.10686787216274e-07, "loss": 16.1797, "step": 18238 }, { "epoch": 1.211330278275885, "grad_norm": 342.24517822265625, "learning_rate": 7.105838364476309e-07, "loss": 14.9531, "step": 18239 }, { "epoch": 1.2113966925682407, "grad_norm": 140.9661102294922, "learning_rate": 7.104808890267866e-07, "loss": 17.0156, "step": 18240 }, { "epoch": 1.2114631068605963, "grad_norm": 294.839599609375, "learning_rate": 7.103779449549327e-07, "loss": 20.2891, "step": 18241 }, { "epoch": 1.2115295211529522, "grad_norm": 411.7449035644531, "learning_rate": 7.102750042332597e-07, "loss": 14.3281, "step": 18242 }, { "epoch": 1.2115959354453079, "grad_norm": 308.4681396484375, "learning_rate": 7.101720668629585e-07, "loss": 18.8125, "step": 18243 }, { "epoch": 1.2116623497376635, "grad_norm": 239.84039306640625, "learning_rate": 7.100691328452193e-07, "loss": 12.0156, "step": 18244 }, { "epoch": 1.2117287640300192, "grad_norm": 219.70437622070312, "learning_rate": 7.099662021812336e-07, "loss": 13.3281, "step": 18245 }, { "epoch": 1.211795178322375, "grad_norm": 255.8448028564453, "learning_rate": 7.098632748721914e-07, "loss": 29.2969, "step": 18246 }, { "epoch": 1.2118615926147307, "grad_norm": 479.6396484375, "learning_rate": 7.097603509192837e-07, "loss": 24.2188, "step": 18247 }, { "epoch": 1.2119280069070864, "grad_norm": 180.66555786132812, "learning_rate": 7.096574303237006e-07, "loss": 17.1562, "step": 18248 }, { "epoch": 1.2119944211994422, "grad_norm": 109.18968963623047, "learning_rate": 7.095545130866336e-07, "loss": 14.5625, "step": 18249 }, { "epoch": 1.2120608354917979, "grad_norm": 249.98681640625, "learning_rate": 7.094515992092718e-07, "loss": 17.7344, "step": 18250 }, { "epoch": 1.2121272497841535, "grad_norm": 155.83078002929688, "learning_rate": 7.093486886928067e-07, "loss": 18.1875, "step": 18251 }, { "epoch": 1.2121936640765092, "grad_norm": 187.2969970703125, "learning_rate": 7.092457815384283e-07, "loss": 16.1562, "step": 18252 }, { "epoch": 1.212260078368865, "grad_norm": 262.5191650390625, "learning_rate": 7.091428777473271e-07, "loss": 15.7344, "step": 18253 }, { "epoch": 1.2123264926612207, "grad_norm": 95.60713958740234, "learning_rate": 7.09039977320693e-07, "loss": 12.5156, "step": 18254 }, { "epoch": 1.2123929069535764, "grad_norm": 237.17601013183594, "learning_rate": 7.089370802597172e-07, "loss": 17.4219, "step": 18255 }, { "epoch": 1.212459321245932, "grad_norm": 503.4901428222656, "learning_rate": 7.088341865655891e-07, "loss": 20.2344, "step": 18256 }, { "epoch": 1.212525735538288, "grad_norm": 197.35780334472656, "learning_rate": 7.087312962394995e-07, "loss": 13.7031, "step": 18257 }, { "epoch": 1.2125921498306436, "grad_norm": 177.7439727783203, "learning_rate": 7.086284092826378e-07, "loss": 16.6094, "step": 18258 }, { "epoch": 1.2126585641229992, "grad_norm": 1321.3604736328125, "learning_rate": 7.085255256961951e-07, "loss": 15.9219, "step": 18259 }, { "epoch": 1.212724978415355, "grad_norm": 153.5955047607422, "learning_rate": 7.084226454813608e-07, "loss": 16.1406, "step": 18260 }, { "epoch": 1.2127913927077107, "grad_norm": 191.82298278808594, "learning_rate": 7.083197686393256e-07, "loss": 15.6562, "step": 18261 }, { "epoch": 1.2128578070000664, "grad_norm": 168.38243103027344, "learning_rate": 7.082168951712786e-07, "loss": 18.9062, "step": 18262 }, { "epoch": 1.212924221292422, "grad_norm": 285.82513427734375, "learning_rate": 7.081140250784108e-07, "loss": 21.6562, "step": 18263 }, { "epoch": 1.212990635584778, "grad_norm": 236.0021514892578, "learning_rate": 7.080111583619113e-07, "loss": 15.2188, "step": 18264 }, { "epoch": 1.2130570498771336, "grad_norm": 200.88629150390625, "learning_rate": 7.079082950229705e-07, "loss": 19.9609, "step": 18265 }, { "epoch": 1.2131234641694892, "grad_norm": 182.19869995117188, "learning_rate": 7.078054350627778e-07, "loss": 13.4844, "step": 18266 }, { "epoch": 1.213189878461845, "grad_norm": 105.77255249023438, "learning_rate": 7.077025784825238e-07, "loss": 13.3516, "step": 18267 }, { "epoch": 1.2132562927542008, "grad_norm": 369.54608154296875, "learning_rate": 7.075997252833976e-07, "loss": 18.8906, "step": 18268 }, { "epoch": 1.2133227070465564, "grad_norm": 175.40740966796875, "learning_rate": 7.074968754665888e-07, "loss": 20.0938, "step": 18269 }, { "epoch": 1.213389121338912, "grad_norm": 326.2547912597656, "learning_rate": 7.073940290332883e-07, "loss": 20.0312, "step": 18270 }, { "epoch": 1.213455535631268, "grad_norm": 219.0745391845703, "learning_rate": 7.072911859846843e-07, "loss": 18.6406, "step": 18271 }, { "epoch": 1.2135219499236236, "grad_norm": 181.62767028808594, "learning_rate": 7.071883463219674e-07, "loss": 11.4062, "step": 18272 }, { "epoch": 1.2135883642159793, "grad_norm": 191.54290771484375, "learning_rate": 7.070855100463268e-07, "loss": 16.4375, "step": 18273 }, { "epoch": 1.213654778508335, "grad_norm": 151.11122131347656, "learning_rate": 7.069826771589522e-07, "loss": 12.625, "step": 18274 }, { "epoch": 1.2137211928006908, "grad_norm": 636.4026489257812, "learning_rate": 7.068798476610327e-07, "loss": 16.5469, "step": 18275 }, { "epoch": 1.2137876070930465, "grad_norm": 210.982177734375, "learning_rate": 7.067770215537584e-07, "loss": 25.9844, "step": 18276 }, { "epoch": 1.213854021385402, "grad_norm": 335.7365417480469, "learning_rate": 7.066741988383183e-07, "loss": 15.7344, "step": 18277 }, { "epoch": 1.2139204356777578, "grad_norm": 167.972412109375, "learning_rate": 7.06571379515902e-07, "loss": 16.1094, "step": 18278 }, { "epoch": 1.2139868499701136, "grad_norm": 381.5212097167969, "learning_rate": 7.064685635876986e-07, "loss": 18.0938, "step": 18279 }, { "epoch": 1.2140532642624693, "grad_norm": 464.87811279296875, "learning_rate": 7.063657510548978e-07, "loss": 14.5781, "step": 18280 }, { "epoch": 1.214119678554825, "grad_norm": 556.68408203125, "learning_rate": 7.062629419186885e-07, "loss": 16.3438, "step": 18281 }, { "epoch": 1.2141860928471808, "grad_norm": 193.04542541503906, "learning_rate": 7.061601361802605e-07, "loss": 11.9219, "step": 18282 }, { "epoch": 1.2142525071395365, "grad_norm": 238.83970642089844, "learning_rate": 7.06057333840802e-07, "loss": 16.8125, "step": 18283 }, { "epoch": 1.2143189214318921, "grad_norm": 130.28660583496094, "learning_rate": 7.059545349015032e-07, "loss": 15.125, "step": 18284 }, { "epoch": 1.2143853357242478, "grad_norm": 100.0658950805664, "learning_rate": 7.058517393635527e-07, "loss": 11.0938, "step": 18285 }, { "epoch": 1.2144517500166037, "grad_norm": 200.8166961669922, "learning_rate": 7.057489472281397e-07, "loss": 25.9219, "step": 18286 }, { "epoch": 1.2145181643089593, "grad_norm": 650.5463256835938, "learning_rate": 7.05646158496453e-07, "loss": 10.5938, "step": 18287 }, { "epoch": 1.214584578601315, "grad_norm": 178.45358276367188, "learning_rate": 7.05543373169682e-07, "loss": 20.5469, "step": 18288 }, { "epoch": 1.2146509928936706, "grad_norm": 178.28549194335938, "learning_rate": 7.054405912490153e-07, "loss": 16.7031, "step": 18289 }, { "epoch": 1.2147174071860265, "grad_norm": 159.1908416748047, "learning_rate": 7.053378127356424e-07, "loss": 18.7188, "step": 18290 }, { "epoch": 1.2147838214783822, "grad_norm": 220.8880157470703, "learning_rate": 7.052350376307512e-07, "loss": 17.4375, "step": 18291 }, { "epoch": 1.2148502357707378, "grad_norm": 169.39910888671875, "learning_rate": 7.051322659355317e-07, "loss": 21.2969, "step": 18292 }, { "epoch": 1.2149166500630937, "grad_norm": 337.8200988769531, "learning_rate": 7.050294976511716e-07, "loss": 19.5312, "step": 18293 }, { "epoch": 1.2149830643554493, "grad_norm": 195.87754821777344, "learning_rate": 7.049267327788605e-07, "loss": 17.4219, "step": 18294 }, { "epoch": 1.215049478647805, "grad_norm": 361.8920593261719, "learning_rate": 7.048239713197866e-07, "loss": 21.8125, "step": 18295 }, { "epoch": 1.2151158929401606, "grad_norm": 179.88536071777344, "learning_rate": 7.04721213275139e-07, "loss": 18.0625, "step": 18296 }, { "epoch": 1.2151823072325165, "grad_norm": 162.97080993652344, "learning_rate": 7.046184586461058e-07, "loss": 18.0312, "step": 18297 }, { "epoch": 1.2152487215248722, "grad_norm": 153.74803161621094, "learning_rate": 7.045157074338763e-07, "loss": 14.5938, "step": 18298 }, { "epoch": 1.2153151358172278, "grad_norm": 176.66943359375, "learning_rate": 7.044129596396385e-07, "loss": 17.8906, "step": 18299 }, { "epoch": 1.2153815501095835, "grad_norm": 380.9910888671875, "learning_rate": 7.043102152645814e-07, "loss": 18.2188, "step": 18300 }, { "epoch": 1.2154479644019394, "grad_norm": 178.7952117919922, "learning_rate": 7.042074743098928e-07, "loss": 18.5, "step": 18301 }, { "epoch": 1.215514378694295, "grad_norm": 154.87489318847656, "learning_rate": 7.041047367767619e-07, "loss": 14.5156, "step": 18302 }, { "epoch": 1.2155807929866507, "grad_norm": 236.466796875, "learning_rate": 7.040020026663766e-07, "loss": 18.0781, "step": 18303 }, { "epoch": 1.2156472072790065, "grad_norm": 128.79290771484375, "learning_rate": 7.038992719799257e-07, "loss": 16.8203, "step": 18304 }, { "epoch": 1.2157136215713622, "grad_norm": 207.4348907470703, "learning_rate": 7.037965447185969e-07, "loss": 20.25, "step": 18305 }, { "epoch": 1.2157800358637179, "grad_norm": 146.47503662109375, "learning_rate": 7.036938208835791e-07, "loss": 17.2344, "step": 18306 }, { "epoch": 1.2158464501560735, "grad_norm": 387.7962951660156, "learning_rate": 7.035911004760604e-07, "loss": 21.1094, "step": 18307 }, { "epoch": 1.2159128644484294, "grad_norm": 161.37274169921875, "learning_rate": 7.034883834972286e-07, "loss": 17.6875, "step": 18308 }, { "epoch": 1.215979278740785, "grad_norm": 204.38697814941406, "learning_rate": 7.033856699482726e-07, "loss": 19.4062, "step": 18309 }, { "epoch": 1.2160456930331407, "grad_norm": 438.8343811035156, "learning_rate": 7.0328295983038e-07, "loss": 20.9688, "step": 18310 }, { "epoch": 1.2161121073254963, "grad_norm": 299.8879089355469, "learning_rate": 7.03180253144739e-07, "loss": 16.7656, "step": 18311 }, { "epoch": 1.2161785216178522, "grad_norm": 103.9207763671875, "learning_rate": 7.030775498925374e-07, "loss": 14.2656, "step": 18312 }, { "epoch": 1.2162449359102079, "grad_norm": 299.793701171875, "learning_rate": 7.029748500749641e-07, "loss": 17.75, "step": 18313 }, { "epoch": 1.2163113502025635, "grad_norm": 156.78207397460938, "learning_rate": 7.028721536932059e-07, "loss": 14.4062, "step": 18314 }, { "epoch": 1.2163777644949194, "grad_norm": 270.4862365722656, "learning_rate": 7.027694607484518e-07, "loss": 18.7344, "step": 18315 }, { "epoch": 1.216444178787275, "grad_norm": 245.76507568359375, "learning_rate": 7.026667712418888e-07, "loss": 17.0469, "step": 18316 }, { "epoch": 1.2165105930796307, "grad_norm": 377.2717590332031, "learning_rate": 7.025640851747054e-07, "loss": 22.1562, "step": 18317 }, { "epoch": 1.2165770073719864, "grad_norm": 291.02264404296875, "learning_rate": 7.024614025480888e-07, "loss": 11.6875, "step": 18318 }, { "epoch": 1.2166434216643423, "grad_norm": 287.2084045410156, "learning_rate": 7.023587233632273e-07, "loss": 17.7812, "step": 18319 }, { "epoch": 1.216709835956698, "grad_norm": 157.07981872558594, "learning_rate": 7.022560476213085e-07, "loss": 14.4375, "step": 18320 }, { "epoch": 1.2167762502490536, "grad_norm": 147.78164672851562, "learning_rate": 7.021533753235202e-07, "loss": 18.7344, "step": 18321 }, { "epoch": 1.2168426645414092, "grad_norm": 408.2299499511719, "learning_rate": 7.020507064710495e-07, "loss": 19.1094, "step": 18322 }, { "epoch": 1.216909078833765, "grad_norm": 120.04749298095703, "learning_rate": 7.019480410650847e-07, "loss": 13.3438, "step": 18323 }, { "epoch": 1.2169754931261207, "grad_norm": 1055.7042236328125, "learning_rate": 7.01845379106813e-07, "loss": 13.4375, "step": 18324 }, { "epoch": 1.2170419074184764, "grad_norm": 134.85205078125, "learning_rate": 7.017427205974222e-07, "loss": 12.7656, "step": 18325 }, { "epoch": 1.2171083217108323, "grad_norm": 212.54112243652344, "learning_rate": 7.016400655380993e-07, "loss": 12.625, "step": 18326 }, { "epoch": 1.217174736003188, "grad_norm": 244.26690673828125, "learning_rate": 7.015374139300324e-07, "loss": 19.8281, "step": 18327 }, { "epoch": 1.2172411502955436, "grad_norm": 281.0057678222656, "learning_rate": 7.014347657744084e-07, "loss": 18.4688, "step": 18328 }, { "epoch": 1.2173075645878992, "grad_norm": 260.4472961425781, "learning_rate": 7.013321210724151e-07, "loss": 19.0625, "step": 18329 }, { "epoch": 1.2173739788802551, "grad_norm": 199.90142822265625, "learning_rate": 7.012294798252392e-07, "loss": 12.6641, "step": 18330 }, { "epoch": 1.2174403931726108, "grad_norm": 168.43702697753906, "learning_rate": 7.011268420340688e-07, "loss": 23.6875, "step": 18331 }, { "epoch": 1.2175068074649664, "grad_norm": 168.34678649902344, "learning_rate": 7.010242077000905e-07, "loss": 22.0625, "step": 18332 }, { "epoch": 1.217573221757322, "grad_norm": 198.55838012695312, "learning_rate": 7.009215768244921e-07, "loss": 16.4062, "step": 18333 }, { "epoch": 1.217639636049678, "grad_norm": 1488.953857421875, "learning_rate": 7.0081894940846e-07, "loss": 13.0469, "step": 18334 }, { "epoch": 1.2177060503420336, "grad_norm": 752.0648193359375, "learning_rate": 7.007163254531822e-07, "loss": 23.4688, "step": 18335 }, { "epoch": 1.2177724646343893, "grad_norm": 261.1750183105469, "learning_rate": 7.006137049598449e-07, "loss": 21.2656, "step": 18336 }, { "epoch": 1.2178388789267451, "grad_norm": 209.27154541015625, "learning_rate": 7.005110879296359e-07, "loss": 14.4375, "step": 18337 }, { "epoch": 1.2179052932191008, "grad_norm": 227.24896240234375, "learning_rate": 7.004084743637419e-07, "loss": 16.7188, "step": 18338 }, { "epoch": 1.2179717075114564, "grad_norm": 175.26449584960938, "learning_rate": 7.0030586426335e-07, "loss": 20.7188, "step": 18339 }, { "epoch": 1.218038121803812, "grad_norm": 341.1151428222656, "learning_rate": 7.002032576296469e-07, "loss": 22.0938, "step": 18340 }, { "epoch": 1.218104536096168, "grad_norm": 103.48307800292969, "learning_rate": 7.001006544638197e-07, "loss": 15.625, "step": 18341 }, { "epoch": 1.2181709503885236, "grad_norm": 685.2308959960938, "learning_rate": 6.999980547670553e-07, "loss": 10.0625, "step": 18342 }, { "epoch": 1.2182373646808793, "grad_norm": 285.0768127441406, "learning_rate": 6.998954585405399e-07, "loss": 19.0781, "step": 18343 }, { "epoch": 1.218303778973235, "grad_norm": 153.4168701171875, "learning_rate": 6.997928657854614e-07, "loss": 16.3906, "step": 18344 }, { "epoch": 1.2183701932655908, "grad_norm": 157.3195343017578, "learning_rate": 6.996902765030056e-07, "loss": 20.8125, "step": 18345 }, { "epoch": 1.2184366075579465, "grad_norm": 224.05032348632812, "learning_rate": 6.995876906943599e-07, "loss": 16.6797, "step": 18346 }, { "epoch": 1.2185030218503021, "grad_norm": 183.96719360351562, "learning_rate": 6.9948510836071e-07, "loss": 18.1875, "step": 18347 }, { "epoch": 1.218569436142658, "grad_norm": 251.28639221191406, "learning_rate": 6.993825295032434e-07, "loss": 20.375, "step": 18348 }, { "epoch": 1.2186358504350137, "grad_norm": 250.55191040039062, "learning_rate": 6.992799541231464e-07, "loss": 21.1406, "step": 18349 }, { "epoch": 1.2187022647273693, "grad_norm": 434.2076416015625, "learning_rate": 6.991773822216054e-07, "loss": 15.3281, "step": 18350 }, { "epoch": 1.218768679019725, "grad_norm": 175.35308837890625, "learning_rate": 6.990748137998069e-07, "loss": 14.125, "step": 18351 }, { "epoch": 1.2188350933120808, "grad_norm": 109.30919647216797, "learning_rate": 6.989722488589376e-07, "loss": 13.4531, "step": 18352 }, { "epoch": 1.2189015076044365, "grad_norm": 662.8594360351562, "learning_rate": 6.988696874001836e-07, "loss": 15.7031, "step": 18353 }, { "epoch": 1.2189679218967922, "grad_norm": 201.3497772216797, "learning_rate": 6.987671294247317e-07, "loss": 15.7812, "step": 18354 }, { "epoch": 1.2190343361891478, "grad_norm": 406.03228759765625, "learning_rate": 6.986645749337674e-07, "loss": 18.5234, "step": 18355 }, { "epoch": 1.2191007504815037, "grad_norm": 252.62571716308594, "learning_rate": 6.985620239284783e-07, "loss": 20.2969, "step": 18356 }, { "epoch": 1.2191671647738593, "grad_norm": 171.0106201171875, "learning_rate": 6.984594764100491e-07, "loss": 12.7891, "step": 18357 }, { "epoch": 1.219233579066215, "grad_norm": 189.37356567382812, "learning_rate": 6.983569323796674e-07, "loss": 18.2031, "step": 18358 }, { "epoch": 1.2192999933585709, "grad_norm": 220.9240264892578, "learning_rate": 6.982543918385185e-07, "loss": 23.4062, "step": 18359 }, { "epoch": 1.2193664076509265, "grad_norm": 170.439453125, "learning_rate": 6.981518547877889e-07, "loss": 15.8125, "step": 18360 }, { "epoch": 1.2194328219432822, "grad_norm": 187.37399291992188, "learning_rate": 6.980493212286642e-07, "loss": 21.8203, "step": 18361 }, { "epoch": 1.2194992362356378, "grad_norm": 152.37001037597656, "learning_rate": 6.979467911623314e-07, "loss": 15.6406, "step": 18362 }, { "epoch": 1.2195656505279937, "grad_norm": 966.510009765625, "learning_rate": 6.978442645899756e-07, "loss": 20.7656, "step": 18363 }, { "epoch": 1.2196320648203494, "grad_norm": 125.5551528930664, "learning_rate": 6.977417415127833e-07, "loss": 14.8438, "step": 18364 }, { "epoch": 1.219698479112705, "grad_norm": 693.786865234375, "learning_rate": 6.976392219319399e-07, "loss": 17.3125, "step": 18365 }, { "epoch": 1.2197648934050607, "grad_norm": 177.43797302246094, "learning_rate": 6.975367058486321e-07, "loss": 25.1406, "step": 18366 }, { "epoch": 1.2198313076974165, "grad_norm": 215.53341674804688, "learning_rate": 6.97434193264045e-07, "loss": 15.7812, "step": 18367 }, { "epoch": 1.2198977219897722, "grad_norm": 222.25784301757812, "learning_rate": 6.973316841793649e-07, "loss": 14.8594, "step": 18368 }, { "epoch": 1.2199641362821279, "grad_norm": 175.32757568359375, "learning_rate": 6.972291785957769e-07, "loss": 10.3906, "step": 18369 }, { "epoch": 1.2200305505744837, "grad_norm": 179.52586364746094, "learning_rate": 6.971266765144677e-07, "loss": 17.4688, "step": 18370 }, { "epoch": 1.2200969648668394, "grad_norm": 221.98330688476562, "learning_rate": 6.97024177936622e-07, "loss": 14.4844, "step": 18371 }, { "epoch": 1.220163379159195, "grad_norm": 271.5057678222656, "learning_rate": 6.969216828634264e-07, "loss": 16.6875, "step": 18372 }, { "epoch": 1.2202297934515507, "grad_norm": 520.7045288085938, "learning_rate": 6.968191912960653e-07, "loss": 20.4688, "step": 18373 }, { "epoch": 1.2202962077439066, "grad_norm": 203.4638671875, "learning_rate": 6.967167032357258e-07, "loss": 17.1875, "step": 18374 }, { "epoch": 1.2203626220362622, "grad_norm": 338.4277038574219, "learning_rate": 6.96614218683592e-07, "loss": 16.4062, "step": 18375 }, { "epoch": 1.2204290363286179, "grad_norm": 134.6614532470703, "learning_rate": 6.965117376408502e-07, "loss": 14.7656, "step": 18376 }, { "epoch": 1.2204954506209735, "grad_norm": 119.24775695800781, "learning_rate": 6.964092601086856e-07, "loss": 16.1719, "step": 18377 }, { "epoch": 1.2205618649133294, "grad_norm": 169.04403686523438, "learning_rate": 6.963067860882839e-07, "loss": 13.9219, "step": 18378 }, { "epoch": 1.220628279205685, "grad_norm": 216.57528686523438, "learning_rate": 6.962043155808295e-07, "loss": 15.0781, "step": 18379 }, { "epoch": 1.2206946934980407, "grad_norm": 579.1102294921875, "learning_rate": 6.961018485875087e-07, "loss": 12.0156, "step": 18380 }, { "epoch": 1.2207611077903966, "grad_norm": 105.41983795166016, "learning_rate": 6.959993851095067e-07, "loss": 13.0625, "step": 18381 }, { "epoch": 1.2208275220827522, "grad_norm": 563.5646362304688, "learning_rate": 6.958969251480083e-07, "loss": 26.1562, "step": 18382 }, { "epoch": 1.220893936375108, "grad_norm": 352.7172546386719, "learning_rate": 6.95794468704199e-07, "loss": 19.25, "step": 18383 }, { "epoch": 1.2209603506674636, "grad_norm": 249.7528533935547, "learning_rate": 6.956920157792639e-07, "loss": 14.0938, "step": 18384 }, { "epoch": 1.2210267649598194, "grad_norm": 333.9798583984375, "learning_rate": 6.955895663743882e-07, "loss": 20.2812, "step": 18385 }, { "epoch": 1.221093179252175, "grad_norm": 338.7722473144531, "learning_rate": 6.954871204907564e-07, "loss": 17.9062, "step": 18386 }, { "epoch": 1.2211595935445307, "grad_norm": 149.06396484375, "learning_rate": 6.953846781295545e-07, "loss": 14.2656, "step": 18387 }, { "epoch": 1.2212260078368864, "grad_norm": 195.48817443847656, "learning_rate": 6.952822392919667e-07, "loss": 16.3594, "step": 18388 }, { "epoch": 1.2212924221292423, "grad_norm": 184.57984924316406, "learning_rate": 6.951798039791785e-07, "loss": 16.1094, "step": 18389 }, { "epoch": 1.221358836421598, "grad_norm": 256.691162109375, "learning_rate": 6.950773721923742e-07, "loss": 18.9375, "step": 18390 }, { "epoch": 1.2214252507139536, "grad_norm": 243.36734008789062, "learning_rate": 6.949749439327394e-07, "loss": 17.4922, "step": 18391 }, { "epoch": 1.2214916650063095, "grad_norm": 239.1417999267578, "learning_rate": 6.948725192014583e-07, "loss": 21.1875, "step": 18392 }, { "epoch": 1.2215580792986651, "grad_norm": 257.2270812988281, "learning_rate": 6.947700979997162e-07, "loss": 18.1562, "step": 18393 }, { "epoch": 1.2216244935910208, "grad_norm": 225.29629516601562, "learning_rate": 6.946676803286971e-07, "loss": 16.2031, "step": 18394 }, { "epoch": 1.2216909078833764, "grad_norm": 272.87872314453125, "learning_rate": 6.945652661895868e-07, "loss": 19.8906, "step": 18395 }, { "epoch": 1.2217573221757323, "grad_norm": 403.7915954589844, "learning_rate": 6.944628555835691e-07, "loss": 15.5625, "step": 18396 }, { "epoch": 1.221823736468088, "grad_norm": 438.5132141113281, "learning_rate": 6.94360448511829e-07, "loss": 17.8281, "step": 18397 }, { "epoch": 1.2218901507604436, "grad_norm": 239.1940460205078, "learning_rate": 6.94258044975551e-07, "loss": 17.125, "step": 18398 }, { "epoch": 1.2219565650527993, "grad_norm": 141.95835876464844, "learning_rate": 6.941556449759196e-07, "loss": 15.9531, "step": 18399 }, { "epoch": 1.2220229793451551, "grad_norm": 210.9256134033203, "learning_rate": 6.94053248514119e-07, "loss": 12.3281, "step": 18400 }, { "epoch": 1.2220893936375108, "grad_norm": 204.87725830078125, "learning_rate": 6.939508555913346e-07, "loss": 22.6406, "step": 18401 }, { "epoch": 1.2221558079298664, "grad_norm": 264.0321960449219, "learning_rate": 6.938484662087498e-07, "loss": 16.25, "step": 18402 }, { "epoch": 1.2222222222222223, "grad_norm": 544.3114624023438, "learning_rate": 6.937460803675497e-07, "loss": 17.8594, "step": 18403 }, { "epoch": 1.222288636514578, "grad_norm": 130.5782470703125, "learning_rate": 6.93643698068918e-07, "loss": 14.3906, "step": 18404 }, { "epoch": 1.2223550508069336, "grad_norm": 547.976318359375, "learning_rate": 6.935413193140396e-07, "loss": 17.3438, "step": 18405 }, { "epoch": 1.2224214650992893, "grad_norm": 114.00392150878906, "learning_rate": 6.934389441040985e-07, "loss": 13.3125, "step": 18406 }, { "epoch": 1.2224878793916452, "grad_norm": 297.5960388183594, "learning_rate": 6.933365724402788e-07, "loss": 17.2656, "step": 18407 }, { "epoch": 1.2225542936840008, "grad_norm": 252.17764282226562, "learning_rate": 6.932342043237647e-07, "loss": 13.9375, "step": 18408 }, { "epoch": 1.2226207079763565, "grad_norm": 704.8826904296875, "learning_rate": 6.931318397557408e-07, "loss": 25.4844, "step": 18409 }, { "epoch": 1.2226871222687121, "grad_norm": 241.3828582763672, "learning_rate": 6.930294787373905e-07, "loss": 17.3125, "step": 18410 }, { "epoch": 1.222753536561068, "grad_norm": 675.64892578125, "learning_rate": 6.929271212698984e-07, "loss": 18.9062, "step": 18411 }, { "epoch": 1.2228199508534237, "grad_norm": 164.40589904785156, "learning_rate": 6.92824767354448e-07, "loss": 14.6406, "step": 18412 }, { "epoch": 1.2228863651457793, "grad_norm": 243.51104736328125, "learning_rate": 6.927224169922239e-07, "loss": 17.1406, "step": 18413 }, { "epoch": 1.2229527794381352, "grad_norm": 531.611083984375, "learning_rate": 6.926200701844095e-07, "loss": 13.3281, "step": 18414 }, { "epoch": 1.2230191937304908, "grad_norm": 265.3531799316406, "learning_rate": 6.925177269321892e-07, "loss": 21.2031, "step": 18415 }, { "epoch": 1.2230856080228465, "grad_norm": 224.85691833496094, "learning_rate": 6.924153872367459e-07, "loss": 16.625, "step": 18416 }, { "epoch": 1.2231520223152021, "grad_norm": 237.7779083251953, "learning_rate": 6.923130510992643e-07, "loss": 19.6406, "step": 18417 }, { "epoch": 1.223218436607558, "grad_norm": 393.5396423339844, "learning_rate": 6.922107185209281e-07, "loss": 14.9375, "step": 18418 }, { "epoch": 1.2232848508999137, "grad_norm": 105.34147644042969, "learning_rate": 6.921083895029207e-07, "loss": 12.6094, "step": 18419 }, { "epoch": 1.2233512651922693, "grad_norm": 368.36767578125, "learning_rate": 6.920060640464261e-07, "loss": 24.25, "step": 18420 }, { "epoch": 1.223417679484625, "grad_norm": 204.62208557128906, "learning_rate": 6.919037421526273e-07, "loss": 19.4844, "step": 18421 }, { "epoch": 1.2234840937769809, "grad_norm": 247.30465698242188, "learning_rate": 6.918014238227088e-07, "loss": 15.6406, "step": 18422 }, { "epoch": 1.2235505080693365, "grad_norm": 303.4980163574219, "learning_rate": 6.916991090578534e-07, "loss": 16.0625, "step": 18423 }, { "epoch": 1.2236169223616922, "grad_norm": 264.0089111328125, "learning_rate": 6.915967978592453e-07, "loss": 13.7656, "step": 18424 }, { "epoch": 1.223683336654048, "grad_norm": 545.4867553710938, "learning_rate": 6.914944902280672e-07, "loss": 15.7578, "step": 18425 }, { "epoch": 1.2237497509464037, "grad_norm": 753.5519409179688, "learning_rate": 6.913921861655032e-07, "loss": 15.4531, "step": 18426 }, { "epoch": 1.2238161652387594, "grad_norm": 298.9073791503906, "learning_rate": 6.912898856727362e-07, "loss": 13.9062, "step": 18427 }, { "epoch": 1.223882579531115, "grad_norm": 127.86556243896484, "learning_rate": 6.911875887509502e-07, "loss": 16.6875, "step": 18428 }, { "epoch": 1.2239489938234709, "grad_norm": 172.74496459960938, "learning_rate": 6.910852954013277e-07, "loss": 16.0312, "step": 18429 }, { "epoch": 1.2240154081158265, "grad_norm": 326.762451171875, "learning_rate": 6.909830056250526e-07, "loss": 20.9531, "step": 18430 }, { "epoch": 1.2240818224081822, "grad_norm": 97.00283813476562, "learning_rate": 6.908807194233078e-07, "loss": 12.1094, "step": 18431 }, { "epoch": 1.2241482367005379, "grad_norm": 155.7801971435547, "learning_rate": 6.907784367972768e-07, "loss": 12.75, "step": 18432 }, { "epoch": 1.2242146509928937, "grad_norm": 260.5281677246094, "learning_rate": 6.90676157748142e-07, "loss": 18.4219, "step": 18433 }, { "epoch": 1.2242810652852494, "grad_norm": 11436.056640625, "learning_rate": 6.905738822770877e-07, "loss": 16.9062, "step": 18434 }, { "epoch": 1.224347479577605, "grad_norm": 184.26766967773438, "learning_rate": 6.904716103852958e-07, "loss": 16.0469, "step": 18435 }, { "epoch": 1.224413893869961, "grad_norm": 95.55416107177734, "learning_rate": 6.903693420739503e-07, "loss": 12.3125, "step": 18436 }, { "epoch": 1.2244803081623166, "grad_norm": 167.9170379638672, "learning_rate": 6.902670773442332e-07, "loss": 12.7344, "step": 18437 }, { "epoch": 1.2245467224546722, "grad_norm": 186.60638427734375, "learning_rate": 6.901648161973284e-07, "loss": 17.5312, "step": 18438 }, { "epoch": 1.2246131367470279, "grad_norm": 263.1343688964844, "learning_rate": 6.90062558634418e-07, "loss": 19.3906, "step": 18439 }, { "epoch": 1.2246795510393838, "grad_norm": 237.4715576171875, "learning_rate": 6.899603046566853e-07, "loss": 17.5781, "step": 18440 }, { "epoch": 1.2247459653317394, "grad_norm": 184.51878356933594, "learning_rate": 6.898580542653131e-07, "loss": 15.4531, "step": 18441 }, { "epoch": 1.224812379624095, "grad_norm": 232.2737274169922, "learning_rate": 6.897558074614842e-07, "loss": 15.5, "step": 18442 }, { "epoch": 1.2248787939164507, "grad_norm": 207.0204620361328, "learning_rate": 6.896535642463808e-07, "loss": 15.875, "step": 18443 }, { "epoch": 1.2249452082088066, "grad_norm": 137.49972534179688, "learning_rate": 6.895513246211864e-07, "loss": 15.6094, "step": 18444 }, { "epoch": 1.2250116225011622, "grad_norm": 263.00067138671875, "learning_rate": 6.894490885870831e-07, "loss": 16.9531, "step": 18445 }, { "epoch": 1.225078036793518, "grad_norm": 257.6295471191406, "learning_rate": 6.893468561452538e-07, "loss": 19.875, "step": 18446 }, { "epoch": 1.2251444510858738, "grad_norm": 180.1463165283203, "learning_rate": 6.892446272968806e-07, "loss": 16.3594, "step": 18447 }, { "epoch": 1.2252108653782294, "grad_norm": 143.04043579101562, "learning_rate": 6.891424020431467e-07, "loss": 16.375, "step": 18448 }, { "epoch": 1.225277279670585, "grad_norm": 128.77223205566406, "learning_rate": 6.890401803852341e-07, "loss": 16.1094, "step": 18449 }, { "epoch": 1.2253436939629407, "grad_norm": 169.25799560546875, "learning_rate": 6.889379623243257e-07, "loss": 18.4062, "step": 18450 }, { "epoch": 1.2254101082552966, "grad_norm": 283.89990234375, "learning_rate": 6.888357478616029e-07, "loss": 15.0625, "step": 18451 }, { "epoch": 1.2254765225476523, "grad_norm": 706.5927734375, "learning_rate": 6.887335369982494e-07, "loss": 15.9844, "step": 18452 }, { "epoch": 1.225542936840008, "grad_norm": 301.0103454589844, "learning_rate": 6.886313297354465e-07, "loss": 16.1562, "step": 18453 }, { "epoch": 1.2256093511323636, "grad_norm": 232.95196533203125, "learning_rate": 6.885291260743766e-07, "loss": 16.5625, "step": 18454 }, { "epoch": 1.2256757654247195, "grad_norm": 288.103515625, "learning_rate": 6.884269260162228e-07, "loss": 10.25, "step": 18455 }, { "epoch": 1.225742179717075, "grad_norm": 368.77008056640625, "learning_rate": 6.883247295621662e-07, "loss": 24.25, "step": 18456 }, { "epoch": 1.2258085940094308, "grad_norm": 172.2616729736328, "learning_rate": 6.882225367133898e-07, "loss": 15.6719, "step": 18457 }, { "epoch": 1.2258750083017866, "grad_norm": 146.45700073242188, "learning_rate": 6.881203474710749e-07, "loss": 10.8438, "step": 18458 }, { "epoch": 1.2259414225941423, "grad_norm": 138.88168334960938, "learning_rate": 6.880181618364041e-07, "loss": 16.0781, "step": 18459 }, { "epoch": 1.226007836886498, "grad_norm": 212.38595581054688, "learning_rate": 6.879159798105593e-07, "loss": 17.75, "step": 18460 }, { "epoch": 1.2260742511788536, "grad_norm": 180.22662353515625, "learning_rate": 6.878138013947227e-07, "loss": 14.3125, "step": 18461 }, { "epoch": 1.2261406654712095, "grad_norm": 303.3546142578125, "learning_rate": 6.87711626590076e-07, "loss": 19.5156, "step": 18462 }, { "epoch": 1.2262070797635651, "grad_norm": 282.2728271484375, "learning_rate": 6.876094553978012e-07, "loss": 18.5469, "step": 18463 }, { "epoch": 1.2262734940559208, "grad_norm": 161.0057373046875, "learning_rate": 6.875072878190795e-07, "loss": 16.6406, "step": 18464 }, { "epoch": 1.2263399083482764, "grad_norm": 181.9626007080078, "learning_rate": 6.874051238550939e-07, "loss": 13.4688, "step": 18465 }, { "epoch": 1.2264063226406323, "grad_norm": 132.5433349609375, "learning_rate": 6.873029635070254e-07, "loss": 19.1406, "step": 18466 }, { "epoch": 1.226472736932988, "grad_norm": 104.82037353515625, "learning_rate": 6.87200806776056e-07, "loss": 14.875, "step": 18467 }, { "epoch": 1.2265391512253436, "grad_norm": 106.5405502319336, "learning_rate": 6.870986536633667e-07, "loss": 15.5625, "step": 18468 }, { "epoch": 1.2266055655176995, "grad_norm": 87.8199462890625, "learning_rate": 6.869965041701404e-07, "loss": 14.2031, "step": 18469 }, { "epoch": 1.2266719798100552, "grad_norm": 1099.513916015625, "learning_rate": 6.868943582975579e-07, "loss": 15.25, "step": 18470 }, { "epoch": 1.2267383941024108, "grad_norm": 216.6953887939453, "learning_rate": 6.867922160468011e-07, "loss": 15.9219, "step": 18471 }, { "epoch": 1.2268048083947665, "grad_norm": 253.74208068847656, "learning_rate": 6.866900774190507e-07, "loss": 19.9844, "step": 18472 }, { "epoch": 1.2268712226871223, "grad_norm": 278.3248596191406, "learning_rate": 6.865879424154895e-07, "loss": 20.4375, "step": 18473 }, { "epoch": 1.226937636979478, "grad_norm": 670.3837280273438, "learning_rate": 6.864858110372979e-07, "loss": 12.0781, "step": 18474 }, { "epoch": 1.2270040512718337, "grad_norm": 156.2468719482422, "learning_rate": 6.863836832856579e-07, "loss": 10.0938, "step": 18475 }, { "epoch": 1.2270704655641893, "grad_norm": 424.47314453125, "learning_rate": 6.862815591617503e-07, "loss": 17.375, "step": 18476 }, { "epoch": 1.2271368798565452, "grad_norm": 268.89404296875, "learning_rate": 6.86179438666757e-07, "loss": 15.9531, "step": 18477 }, { "epoch": 1.2272032941489008, "grad_norm": 435.7847900390625, "learning_rate": 6.860773218018588e-07, "loss": 20.9219, "step": 18478 }, { "epoch": 1.2272697084412565, "grad_norm": 147.71827697753906, "learning_rate": 6.859752085682373e-07, "loss": 13.2188, "step": 18479 }, { "epoch": 1.2273361227336124, "grad_norm": 145.68365478515625, "learning_rate": 6.858730989670732e-07, "loss": 20.0312, "step": 18480 }, { "epoch": 1.227402537025968, "grad_norm": 225.0443572998047, "learning_rate": 6.857709929995485e-07, "loss": 18.6562, "step": 18481 }, { "epoch": 1.2274689513183237, "grad_norm": 159.73492431640625, "learning_rate": 6.856688906668433e-07, "loss": 17.2656, "step": 18482 }, { "epoch": 1.2275353656106793, "grad_norm": 175.82855224609375, "learning_rate": 6.855667919701393e-07, "loss": 27.625, "step": 18483 }, { "epoch": 1.2276017799030352, "grad_norm": 646.6146240234375, "learning_rate": 6.854646969106173e-07, "loss": 16.0469, "step": 18484 }, { "epoch": 1.2276681941953909, "grad_norm": 180.32174682617188, "learning_rate": 6.853626054894583e-07, "loss": 14.1719, "step": 18485 }, { "epoch": 1.2277346084877465, "grad_norm": 117.91899108886719, "learning_rate": 6.852605177078432e-07, "loss": 15.2812, "step": 18486 }, { "epoch": 1.2278010227801022, "grad_norm": 247.30088806152344, "learning_rate": 6.851584335669531e-07, "loss": 14.3906, "step": 18487 }, { "epoch": 1.227867437072458, "grad_norm": 315.20562744140625, "learning_rate": 6.850563530679685e-07, "loss": 16.9531, "step": 18488 }, { "epoch": 1.2279338513648137, "grad_norm": 160.11663818359375, "learning_rate": 6.849542762120701e-07, "loss": 14.6719, "step": 18489 }, { "epoch": 1.2280002656571694, "grad_norm": 193.026123046875, "learning_rate": 6.848522030004395e-07, "loss": 13.2031, "step": 18490 }, { "epoch": 1.2280666799495252, "grad_norm": 276.4999694824219, "learning_rate": 6.847501334342568e-07, "loss": 15.1719, "step": 18491 }, { "epoch": 1.2281330942418809, "grad_norm": 233.5753173828125, "learning_rate": 6.846480675147028e-07, "loss": 21.8594, "step": 18492 }, { "epoch": 1.2281995085342365, "grad_norm": 277.74981689453125, "learning_rate": 6.845460052429579e-07, "loss": 22.2656, "step": 18493 }, { "epoch": 1.2282659228265922, "grad_norm": 139.09596252441406, "learning_rate": 6.844439466202033e-07, "loss": 18.0, "step": 18494 }, { "epoch": 1.228332337118948, "grad_norm": 298.7821044921875, "learning_rate": 6.843418916476187e-07, "loss": 17.0156, "step": 18495 }, { "epoch": 1.2283987514113037, "grad_norm": 123.42842864990234, "learning_rate": 6.842398403263853e-07, "loss": 17.125, "step": 18496 }, { "epoch": 1.2284651657036594, "grad_norm": 176.62310791015625, "learning_rate": 6.841377926576832e-07, "loss": 18.7188, "step": 18497 }, { "epoch": 1.228531579996015, "grad_norm": 185.3522491455078, "learning_rate": 6.840357486426934e-07, "loss": 20.7344, "step": 18498 }, { "epoch": 1.228597994288371, "grad_norm": 231.0794677734375, "learning_rate": 6.839337082825953e-07, "loss": 17.9219, "step": 18499 }, { "epoch": 1.2286644085807266, "grad_norm": 259.49700927734375, "learning_rate": 6.838316715785703e-07, "loss": 14.0312, "step": 18500 }, { "epoch": 1.2287308228730822, "grad_norm": 377.01953125, "learning_rate": 6.837296385317977e-07, "loss": 18.5469, "step": 18501 }, { "epoch": 1.228797237165438, "grad_norm": 331.1976623535156, "learning_rate": 6.83627609143459e-07, "loss": 17.5469, "step": 18502 }, { "epoch": 1.2288636514577937, "grad_norm": 226.9539794921875, "learning_rate": 6.83525583414733e-07, "loss": 18.875, "step": 18503 }, { "epoch": 1.2289300657501494, "grad_norm": 92.389404296875, "learning_rate": 6.834235613468009e-07, "loss": 17.0312, "step": 18504 }, { "epoch": 1.228996480042505, "grad_norm": 294.57135009765625, "learning_rate": 6.833215429408424e-07, "loss": 15.5156, "step": 18505 }, { "epoch": 1.229062894334861, "grad_norm": 293.0491638183594, "learning_rate": 6.832195281980379e-07, "loss": 13.3281, "step": 18506 }, { "epoch": 1.2291293086272166, "grad_norm": 188.6357421875, "learning_rate": 6.831175171195666e-07, "loss": 15.2344, "step": 18507 }, { "epoch": 1.2291957229195722, "grad_norm": 251.82334899902344, "learning_rate": 6.830155097066098e-07, "loss": 15.4531, "step": 18508 }, { "epoch": 1.229262137211928, "grad_norm": 214.9506378173828, "learning_rate": 6.829135059603466e-07, "loss": 12.2656, "step": 18509 }, { "epoch": 1.2293285515042838, "grad_norm": 168.39222717285156, "learning_rate": 6.828115058819571e-07, "loss": 19.8984, "step": 18510 }, { "epoch": 1.2293949657966394, "grad_norm": 240.97825622558594, "learning_rate": 6.82709509472621e-07, "loss": 24.6562, "step": 18511 }, { "epoch": 1.229461380088995, "grad_norm": 242.67999267578125, "learning_rate": 6.826075167335186e-07, "loss": 15.9375, "step": 18512 }, { "epoch": 1.229527794381351, "grad_norm": 220.10726928710938, "learning_rate": 6.825055276658294e-07, "loss": 15.7031, "step": 18513 }, { "epoch": 1.2295942086737066, "grad_norm": 174.50088500976562, "learning_rate": 6.824035422707332e-07, "loss": 13.8438, "step": 18514 }, { "epoch": 1.2296606229660623, "grad_norm": 174.5514678955078, "learning_rate": 6.823015605494093e-07, "loss": 11.5938, "step": 18515 }, { "epoch": 1.229727037258418, "grad_norm": 142.1597442626953, "learning_rate": 6.821995825030382e-07, "loss": 16.4531, "step": 18516 }, { "epoch": 1.2297934515507738, "grad_norm": 126.76184844970703, "learning_rate": 6.820976081327989e-07, "loss": 17.0625, "step": 18517 }, { "epoch": 1.2298598658431295, "grad_norm": 250.80612182617188, "learning_rate": 6.819956374398714e-07, "loss": 12.6953, "step": 18518 }, { "epoch": 1.229926280135485, "grad_norm": 235.1708221435547, "learning_rate": 6.818936704254345e-07, "loss": 12.7656, "step": 18519 }, { "epoch": 1.2299926944278408, "grad_norm": 180.6484832763672, "learning_rate": 6.817917070906686e-07, "loss": 15.8438, "step": 18520 }, { "epoch": 1.2300591087201966, "grad_norm": 143.36358642578125, "learning_rate": 6.816897474367526e-07, "loss": 15.0312, "step": 18521 }, { "epoch": 1.2301255230125523, "grad_norm": 214.81752014160156, "learning_rate": 6.815877914648662e-07, "loss": 15.2969, "step": 18522 }, { "epoch": 1.230191937304908, "grad_norm": 493.1742248535156, "learning_rate": 6.814858391761884e-07, "loss": 15.1562, "step": 18523 }, { "epoch": 1.2302583515972638, "grad_norm": 146.03224182128906, "learning_rate": 6.813838905718989e-07, "loss": 12.4062, "step": 18524 }, { "epoch": 1.2303247658896195, "grad_norm": 154.4071502685547, "learning_rate": 6.812819456531766e-07, "loss": 15.4844, "step": 18525 }, { "epoch": 1.2303911801819751, "grad_norm": 326.0797119140625, "learning_rate": 6.81180004421201e-07, "loss": 15.9531, "step": 18526 }, { "epoch": 1.2304575944743308, "grad_norm": 132.95379638671875, "learning_rate": 6.810780668771516e-07, "loss": 19.0312, "step": 18527 }, { "epoch": 1.2305240087666867, "grad_norm": 186.79566955566406, "learning_rate": 6.809761330222068e-07, "loss": 14.5781, "step": 18528 }, { "epoch": 1.2305904230590423, "grad_norm": 174.69927978515625, "learning_rate": 6.808742028575465e-07, "loss": 15.5781, "step": 18529 }, { "epoch": 1.230656837351398, "grad_norm": 256.7933349609375, "learning_rate": 6.807722763843492e-07, "loss": 14.7969, "step": 18530 }, { "epoch": 1.2307232516437536, "grad_norm": 157.0043487548828, "learning_rate": 6.806703536037943e-07, "loss": 13.2969, "step": 18531 }, { "epoch": 1.2307896659361095, "grad_norm": 147.25291442871094, "learning_rate": 6.805684345170604e-07, "loss": 17.2266, "step": 18532 }, { "epoch": 1.2308560802284652, "grad_norm": 144.85972595214844, "learning_rate": 6.804665191253269e-07, "loss": 15.9531, "step": 18533 }, { "epoch": 1.2309224945208208, "grad_norm": 164.58775329589844, "learning_rate": 6.803646074297722e-07, "loss": 16.9062, "step": 18534 }, { "epoch": 1.2309889088131767, "grad_norm": 194.48373413085938, "learning_rate": 6.802626994315756e-07, "loss": 13.9844, "step": 18535 }, { "epoch": 1.2310553231055323, "grad_norm": 202.84751892089844, "learning_rate": 6.801607951319155e-07, "loss": 18.0312, "step": 18536 }, { "epoch": 1.231121737397888, "grad_norm": 264.4496765136719, "learning_rate": 6.80058894531971e-07, "loss": 18.4219, "step": 18537 }, { "epoch": 1.2311881516902436, "grad_norm": 284.94580078125, "learning_rate": 6.799569976329208e-07, "loss": 25.6875, "step": 18538 }, { "epoch": 1.2312545659825995, "grad_norm": 251.6920623779297, "learning_rate": 6.798551044359436e-07, "loss": 15.0938, "step": 18539 }, { "epoch": 1.2313209802749552, "grad_norm": 189.6193084716797, "learning_rate": 6.797532149422176e-07, "loss": 17.8125, "step": 18540 }, { "epoch": 1.2313873945673108, "grad_norm": 160.55337524414062, "learning_rate": 6.79651329152922e-07, "loss": 17.0156, "step": 18541 }, { "epoch": 1.2314538088596665, "grad_norm": 231.29812622070312, "learning_rate": 6.795494470692351e-07, "loss": 15.7656, "step": 18542 }, { "epoch": 1.2315202231520224, "grad_norm": 273.2544250488281, "learning_rate": 6.794475686923355e-07, "loss": 10.5, "step": 18543 }, { "epoch": 1.231586637444378, "grad_norm": 227.407470703125, "learning_rate": 6.793456940234011e-07, "loss": 18.4375, "step": 18544 }, { "epoch": 1.2316530517367337, "grad_norm": 332.7454833984375, "learning_rate": 6.792438230636115e-07, "loss": 14.0391, "step": 18545 }, { "epoch": 1.2317194660290895, "grad_norm": 221.0797882080078, "learning_rate": 6.791419558141437e-07, "loss": 14.1719, "step": 18546 }, { "epoch": 1.2317858803214452, "grad_norm": 388.56298828125, "learning_rate": 6.790400922761775e-07, "loss": 13.7344, "step": 18547 }, { "epoch": 1.2318522946138009, "grad_norm": 193.76705932617188, "learning_rate": 6.789382324508899e-07, "loss": 19.8281, "step": 18548 }, { "epoch": 1.2319187089061565, "grad_norm": 211.92535400390625, "learning_rate": 6.7883637633946e-07, "loss": 13.9219, "step": 18549 }, { "epoch": 1.2319851231985124, "grad_norm": 306.8650207519531, "learning_rate": 6.787345239430651e-07, "loss": 15.5938, "step": 18550 }, { "epoch": 1.232051537490868, "grad_norm": 138.7302703857422, "learning_rate": 6.786326752628846e-07, "loss": 16.4531, "step": 18551 }, { "epoch": 1.2321179517832237, "grad_norm": 173.1810302734375, "learning_rate": 6.785308303000958e-07, "loss": 12.875, "step": 18552 }, { "epoch": 1.2321843660755794, "grad_norm": 221.1193084716797, "learning_rate": 6.784289890558772e-07, "loss": 22.4844, "step": 18553 }, { "epoch": 1.2322507803679352, "grad_norm": 250.61329650878906, "learning_rate": 6.783271515314061e-07, "loss": 15.2344, "step": 18554 }, { "epoch": 1.2323171946602909, "grad_norm": 171.11138916015625, "learning_rate": 6.782253177278614e-07, "loss": 17.6875, "step": 18555 }, { "epoch": 1.2323836089526465, "grad_norm": 191.3485870361328, "learning_rate": 6.781234876464206e-07, "loss": 13.2188, "step": 18556 }, { "epoch": 1.2324500232450024, "grad_norm": 165.11094665527344, "learning_rate": 6.780216612882619e-07, "loss": 15.7969, "step": 18557 }, { "epoch": 1.232516437537358, "grad_norm": 232.5052947998047, "learning_rate": 6.779198386545625e-07, "loss": 16.5703, "step": 18558 }, { "epoch": 1.2325828518297137, "grad_norm": 201.36807250976562, "learning_rate": 6.778180197465011e-07, "loss": 14.4219, "step": 18559 }, { "epoch": 1.2326492661220694, "grad_norm": 125.7235107421875, "learning_rate": 6.77716204565255e-07, "loss": 18.4844, "step": 18560 }, { "epoch": 1.2327156804144253, "grad_norm": 193.8264923095703, "learning_rate": 6.77614393112002e-07, "loss": 19.4688, "step": 18561 }, { "epoch": 1.232782094706781, "grad_norm": 204.40408325195312, "learning_rate": 6.775125853879194e-07, "loss": 16.5938, "step": 18562 }, { "epoch": 1.2328485089991366, "grad_norm": 220.45994567871094, "learning_rate": 6.774107813941855e-07, "loss": 18.4219, "step": 18563 }, { "epoch": 1.2329149232914922, "grad_norm": 193.84031677246094, "learning_rate": 6.773089811319778e-07, "loss": 11.6016, "step": 18564 }, { "epoch": 1.232981337583848, "grad_norm": 147.85739135742188, "learning_rate": 6.772071846024736e-07, "loss": 17.3906, "step": 18565 }, { "epoch": 1.2330477518762037, "grad_norm": 187.92172241210938, "learning_rate": 6.77105391806851e-07, "loss": 16.9531, "step": 18566 }, { "epoch": 1.2331141661685594, "grad_norm": 326.4048156738281, "learning_rate": 6.770036027462863e-07, "loss": 19.25, "step": 18567 }, { "epoch": 1.2331805804609153, "grad_norm": 255.3961181640625, "learning_rate": 6.769018174219583e-07, "loss": 21.2344, "step": 18568 }, { "epoch": 1.233246994753271, "grad_norm": 300.8358459472656, "learning_rate": 6.768000358350435e-07, "loss": 23.2969, "step": 18569 }, { "epoch": 1.2333134090456266, "grad_norm": 223.9627685546875, "learning_rate": 6.766982579867195e-07, "loss": 18.6406, "step": 18570 }, { "epoch": 1.2333798233379822, "grad_norm": 126.89056396484375, "learning_rate": 6.765964838781635e-07, "loss": 12.3594, "step": 18571 }, { "epoch": 1.2334462376303381, "grad_norm": 270.3037414550781, "learning_rate": 6.764947135105532e-07, "loss": 17.9688, "step": 18572 }, { "epoch": 1.2335126519226938, "grad_norm": 199.79934692382812, "learning_rate": 6.763929468850653e-07, "loss": 15.0469, "step": 18573 }, { "epoch": 1.2335790662150494, "grad_norm": 269.3251953125, "learning_rate": 6.762911840028774e-07, "loss": 16.9375, "step": 18574 }, { "epoch": 1.233645480507405, "grad_norm": 675.4781494140625, "learning_rate": 6.76189424865166e-07, "loss": 15.125, "step": 18575 }, { "epoch": 1.233711894799761, "grad_norm": 151.97317504882812, "learning_rate": 6.76087669473109e-07, "loss": 20.375, "step": 18576 }, { "epoch": 1.2337783090921166, "grad_norm": 149.44583129882812, "learning_rate": 6.759859178278828e-07, "loss": 15.0938, "step": 18577 }, { "epoch": 1.2338447233844723, "grad_norm": 117.65778350830078, "learning_rate": 6.75884169930665e-07, "loss": 18.9219, "step": 18578 }, { "epoch": 1.2339111376768281, "grad_norm": 259.17041015625, "learning_rate": 6.757824257826317e-07, "loss": 17.5781, "step": 18579 }, { "epoch": 1.2339775519691838, "grad_norm": 245.61026000976562, "learning_rate": 6.756806853849608e-07, "loss": 16.8281, "step": 18580 }, { "epoch": 1.2340439662615394, "grad_norm": 122.52361297607422, "learning_rate": 6.755789487388286e-07, "loss": 14.625, "step": 18581 }, { "epoch": 1.234110380553895, "grad_norm": 243.19686889648438, "learning_rate": 6.754772158454122e-07, "loss": 15.3594, "step": 18582 }, { "epoch": 1.234176794846251, "grad_norm": 879.6861572265625, "learning_rate": 6.753754867058879e-07, "loss": 26.0, "step": 18583 }, { "epoch": 1.2342432091386066, "grad_norm": 305.2471618652344, "learning_rate": 6.75273761321433e-07, "loss": 18.7031, "step": 18584 }, { "epoch": 1.2343096234309623, "grad_norm": 1759.0350341796875, "learning_rate": 6.751720396932238e-07, "loss": 18.9844, "step": 18585 }, { "epoch": 1.234376037723318, "grad_norm": 147.11795043945312, "learning_rate": 6.750703218224375e-07, "loss": 12.1719, "step": 18586 }, { "epoch": 1.2344424520156738, "grad_norm": 410.6148376464844, "learning_rate": 6.749686077102501e-07, "loss": 14.1719, "step": 18587 }, { "epoch": 1.2345088663080295, "grad_norm": 143.25535583496094, "learning_rate": 6.748668973578387e-07, "loss": 16.0469, "step": 18588 }, { "epoch": 1.2345752806003851, "grad_norm": 277.7967529296875, "learning_rate": 6.747651907663789e-07, "loss": 23.9375, "step": 18589 }, { "epoch": 1.234641694892741, "grad_norm": 595.0712890625, "learning_rate": 6.746634879370484e-07, "loss": 23.4531, "step": 18590 }, { "epoch": 1.2347081091850967, "grad_norm": 246.9907989501953, "learning_rate": 6.745617888710229e-07, "loss": 12.8438, "step": 18591 }, { "epoch": 1.2347745234774523, "grad_norm": 147.58021545410156, "learning_rate": 6.744600935694791e-07, "loss": 14.0312, "step": 18592 }, { "epoch": 1.234840937769808, "grad_norm": 155.6219482421875, "learning_rate": 6.743584020335928e-07, "loss": 14.4219, "step": 18593 }, { "epoch": 1.2349073520621638, "grad_norm": 513.04150390625, "learning_rate": 6.742567142645412e-07, "loss": 27.2969, "step": 18594 }, { "epoch": 1.2349737663545195, "grad_norm": 148.0772247314453, "learning_rate": 6.741550302634998e-07, "loss": 16.9531, "step": 18595 }, { "epoch": 1.2350401806468752, "grad_norm": 129.23292541503906, "learning_rate": 6.740533500316455e-07, "loss": 15.9219, "step": 18596 }, { "epoch": 1.2351065949392308, "grad_norm": 178.37240600585938, "learning_rate": 6.739516735701535e-07, "loss": 12.4062, "step": 18597 }, { "epoch": 1.2351730092315867, "grad_norm": 207.35140991210938, "learning_rate": 6.738500008802011e-07, "loss": 16.9062, "step": 18598 }, { "epoch": 1.2352394235239423, "grad_norm": 127.12202453613281, "learning_rate": 6.737483319629635e-07, "loss": 15.0, "step": 18599 }, { "epoch": 1.235305837816298, "grad_norm": 133.00096130371094, "learning_rate": 6.736466668196168e-07, "loss": 16.125, "step": 18600 }, { "epoch": 1.2353722521086539, "grad_norm": 134.5223846435547, "learning_rate": 6.735450054513379e-07, "loss": 16.1875, "step": 18601 }, { "epoch": 1.2354386664010095, "grad_norm": 236.4583282470703, "learning_rate": 6.734433478593017e-07, "loss": 16.8438, "step": 18602 }, { "epoch": 1.2355050806933652, "grad_norm": 170.47763061523438, "learning_rate": 6.733416940446848e-07, "loss": 15.5312, "step": 18603 }, { "epoch": 1.2355714949857208, "grad_norm": 335.75079345703125, "learning_rate": 6.732400440086626e-07, "loss": 21.8438, "step": 18604 }, { "epoch": 1.2356379092780767, "grad_norm": 180.1547088623047, "learning_rate": 6.731383977524113e-07, "loss": 15.7344, "step": 18605 }, { "epoch": 1.2357043235704324, "grad_norm": 200.12588500976562, "learning_rate": 6.730367552771065e-07, "loss": 14.0156, "step": 18606 }, { "epoch": 1.235770737862788, "grad_norm": 276.47674560546875, "learning_rate": 6.729351165839241e-07, "loss": 15.0625, "step": 18607 }, { "epoch": 1.2358371521551437, "grad_norm": 185.02593994140625, "learning_rate": 6.728334816740393e-07, "loss": 14.6094, "step": 18608 }, { "epoch": 1.2359035664474995, "grad_norm": 301.7026062011719, "learning_rate": 6.727318505486288e-07, "loss": 14.5, "step": 18609 }, { "epoch": 1.2359699807398552, "grad_norm": 143.84765625, "learning_rate": 6.726302232088668e-07, "loss": 17.3594, "step": 18610 }, { "epoch": 1.2360363950322109, "grad_norm": 378.6376953125, "learning_rate": 6.7252859965593e-07, "loss": 17.3125, "step": 18611 }, { "epoch": 1.2361028093245667, "grad_norm": 183.41995239257812, "learning_rate": 6.724269798909935e-07, "loss": 15.4219, "step": 18612 }, { "epoch": 1.2361692236169224, "grad_norm": 345.75201416015625, "learning_rate": 6.723253639152329e-07, "loss": 19.875, "step": 18613 }, { "epoch": 1.236235637909278, "grad_norm": 577.0604248046875, "learning_rate": 6.722237517298231e-07, "loss": 16.4688, "step": 18614 }, { "epoch": 1.236302052201634, "grad_norm": 329.2543029785156, "learning_rate": 6.721221433359405e-07, "loss": 18.8281, "step": 18615 }, { "epoch": 1.2363684664939896, "grad_norm": 204.57131958007812, "learning_rate": 6.720205387347595e-07, "loss": 24.7969, "step": 18616 }, { "epoch": 1.2364348807863452, "grad_norm": 409.5426940917969, "learning_rate": 6.719189379274561e-07, "loss": 16.9531, "step": 18617 }, { "epoch": 1.2365012950787009, "grad_norm": 496.79254150390625, "learning_rate": 6.718173409152048e-07, "loss": 25.2812, "step": 18618 }, { "epoch": 1.2365677093710565, "grad_norm": 1259.999755859375, "learning_rate": 6.717157476991817e-07, "loss": 16.0312, "step": 18619 }, { "epoch": 1.2366341236634124, "grad_norm": 212.7269744873047, "learning_rate": 6.716141582805612e-07, "loss": 14.1406, "step": 18620 }, { "epoch": 1.236700537955768, "grad_norm": 200.657958984375, "learning_rate": 6.715125726605191e-07, "loss": 14.3125, "step": 18621 }, { "epoch": 1.2367669522481237, "grad_norm": 320.8392028808594, "learning_rate": 6.714109908402297e-07, "loss": 16.9062, "step": 18622 }, { "epoch": 1.2368333665404796, "grad_norm": 201.98126220703125, "learning_rate": 6.713094128208688e-07, "loss": 11.5078, "step": 18623 }, { "epoch": 1.2368997808328352, "grad_norm": 241.08560180664062, "learning_rate": 6.71207838603611e-07, "loss": 15.0312, "step": 18624 }, { "epoch": 1.236966195125191, "grad_norm": 181.72853088378906, "learning_rate": 6.711062681896314e-07, "loss": 17.8281, "step": 18625 }, { "epoch": 1.2370326094175468, "grad_norm": 156.38003540039062, "learning_rate": 6.710047015801046e-07, "loss": 12.2188, "step": 18626 }, { "epoch": 1.2370990237099024, "grad_norm": 229.95726013183594, "learning_rate": 6.709031387762063e-07, "loss": 17.0312, "step": 18627 }, { "epoch": 1.237165438002258, "grad_norm": 1117.0516357421875, "learning_rate": 6.7080157977911e-07, "loss": 16.5625, "step": 18628 }, { "epoch": 1.2372318522946137, "grad_norm": 221.70249938964844, "learning_rate": 6.707000245899917e-07, "loss": 22.0, "step": 18629 }, { "epoch": 1.2372982665869694, "grad_norm": 212.91290283203125, "learning_rate": 6.705984732100253e-07, "loss": 13.6094, "step": 18630 }, { "epoch": 1.2373646808793253, "grad_norm": 166.04466247558594, "learning_rate": 6.704969256403861e-07, "loss": 14.0781, "step": 18631 }, { "epoch": 1.237431095171681, "grad_norm": 351.8772277832031, "learning_rate": 6.703953818822481e-07, "loss": 15.8281, "step": 18632 }, { "epoch": 1.2374975094640366, "grad_norm": 97.08809661865234, "learning_rate": 6.702938419367865e-07, "loss": 12.5938, "step": 18633 }, { "epoch": 1.2375639237563925, "grad_norm": 155.7783966064453, "learning_rate": 6.701923058051755e-07, "loss": 15.2969, "step": 18634 }, { "epoch": 1.2376303380487481, "grad_norm": 136.4520263671875, "learning_rate": 6.700907734885901e-07, "loss": 11.9688, "step": 18635 }, { "epoch": 1.2376967523411038, "grad_norm": 176.6942596435547, "learning_rate": 6.699892449882039e-07, "loss": 14.4219, "step": 18636 }, { "epoch": 1.2377631666334596, "grad_norm": 193.5024871826172, "learning_rate": 6.698877203051918e-07, "loss": 15.0938, "step": 18637 }, { "epoch": 1.2378295809258153, "grad_norm": 193.49183654785156, "learning_rate": 6.697861994407285e-07, "loss": 13.8125, "step": 18638 }, { "epoch": 1.237895995218171, "grad_norm": 363.15716552734375, "learning_rate": 6.696846823959878e-07, "loss": 18.1875, "step": 18639 }, { "epoch": 1.2379624095105266, "grad_norm": 151.0556182861328, "learning_rate": 6.695831691721446e-07, "loss": 13.9453, "step": 18640 }, { "epoch": 1.2380288238028823, "grad_norm": 157.5080108642578, "learning_rate": 6.694816597703725e-07, "loss": 22.7969, "step": 18641 }, { "epoch": 1.2380952380952381, "grad_norm": 165.9993133544922, "learning_rate": 6.693801541918461e-07, "loss": 11.0469, "step": 18642 }, { "epoch": 1.2381616523875938, "grad_norm": 815.1029663085938, "learning_rate": 6.692786524377392e-07, "loss": 14.7969, "step": 18643 }, { "epoch": 1.2382280666799494, "grad_norm": 234.9213409423828, "learning_rate": 6.691771545092264e-07, "loss": 15.5781, "step": 18644 }, { "epoch": 1.2382944809723053, "grad_norm": 234.99673461914062, "learning_rate": 6.690756604074814e-07, "loss": 16.3125, "step": 18645 }, { "epoch": 1.238360895264661, "grad_norm": 133.98728942871094, "learning_rate": 6.689741701336784e-07, "loss": 14.6172, "step": 18646 }, { "epoch": 1.2384273095570166, "grad_norm": 139.0279541015625, "learning_rate": 6.688726836889911e-07, "loss": 16.7812, "step": 18647 }, { "epoch": 1.2384937238493725, "grad_norm": 200.80892944335938, "learning_rate": 6.687712010745939e-07, "loss": 20.0312, "step": 18648 }, { "epoch": 1.2385601381417282, "grad_norm": 135.99301147460938, "learning_rate": 6.686697222916605e-07, "loss": 14.125, "step": 18649 }, { "epoch": 1.2386265524340838, "grad_norm": 145.94093322753906, "learning_rate": 6.685682473413645e-07, "loss": 12.3906, "step": 18650 }, { "epoch": 1.2386929667264395, "grad_norm": 188.17874145507812, "learning_rate": 6.684667762248801e-07, "loss": 14.5156, "step": 18651 }, { "epoch": 1.2387593810187951, "grad_norm": 147.42010498046875, "learning_rate": 6.683653089433807e-07, "loss": 17.4219, "step": 18652 }, { "epoch": 1.238825795311151, "grad_norm": 262.9342041015625, "learning_rate": 6.682638454980401e-07, "loss": 16.8438, "step": 18653 }, { "epoch": 1.2388922096035067, "grad_norm": 173.6879119873047, "learning_rate": 6.681623858900323e-07, "loss": 17.5469, "step": 18654 }, { "epoch": 1.2389586238958623, "grad_norm": 173.93023681640625, "learning_rate": 6.680609301205304e-07, "loss": 11.5, "step": 18655 }, { "epoch": 1.2390250381882182, "grad_norm": 240.12799072265625, "learning_rate": 6.679594781907085e-07, "loss": 13.2734, "step": 18656 }, { "epoch": 1.2390914524805738, "grad_norm": 307.8880310058594, "learning_rate": 6.678580301017396e-07, "loss": 23.0938, "step": 18657 }, { "epoch": 1.2391578667729295, "grad_norm": 456.22479248046875, "learning_rate": 6.677565858547977e-07, "loss": 18.2344, "step": 18658 }, { "epoch": 1.2392242810652854, "grad_norm": 193.07325744628906, "learning_rate": 6.67655145451056e-07, "loss": 21.2812, "step": 18659 }, { "epoch": 1.239290695357641, "grad_norm": 105.06246185302734, "learning_rate": 6.675537088916881e-07, "loss": 12.3281, "step": 18660 }, { "epoch": 1.2393571096499967, "grad_norm": 214.55447387695312, "learning_rate": 6.674522761778667e-07, "loss": 17.0469, "step": 18661 }, { "epoch": 1.2394235239423523, "grad_norm": 212.41409301757812, "learning_rate": 6.673508473107661e-07, "loss": 21.7188, "step": 18662 }, { "epoch": 1.239489938234708, "grad_norm": 204.23678588867188, "learning_rate": 6.672494222915588e-07, "loss": 19.0156, "step": 18663 }, { "epoch": 1.2395563525270639, "grad_norm": 150.84962463378906, "learning_rate": 6.671480011214186e-07, "loss": 19.7656, "step": 18664 }, { "epoch": 1.2396227668194195, "grad_norm": 181.78285217285156, "learning_rate": 6.670465838015179e-07, "loss": 14.0938, "step": 18665 }, { "epoch": 1.2396891811117752, "grad_norm": 774.1065673828125, "learning_rate": 6.669451703330308e-07, "loss": 17.3906, "step": 18666 }, { "epoch": 1.239755595404131, "grad_norm": 133.9020233154297, "learning_rate": 6.668437607171298e-07, "loss": 22.3125, "step": 18667 }, { "epoch": 1.2398220096964867, "grad_norm": 203.8839569091797, "learning_rate": 6.66742354954988e-07, "loss": 12.4453, "step": 18668 }, { "epoch": 1.2398884239888424, "grad_norm": 197.05374145507812, "learning_rate": 6.666409530477784e-07, "loss": 23.1094, "step": 18669 }, { "epoch": 1.2399548382811982, "grad_norm": 785.8466186523438, "learning_rate": 6.665395549966743e-07, "loss": 19.0781, "step": 18670 }, { "epoch": 1.240021252573554, "grad_norm": 194.89271545410156, "learning_rate": 6.66438160802848e-07, "loss": 19.9062, "step": 18671 }, { "epoch": 1.2400876668659095, "grad_norm": 132.4224090576172, "learning_rate": 6.66336770467473e-07, "loss": 19.5469, "step": 18672 }, { "epoch": 1.2401540811582652, "grad_norm": 256.12896728515625, "learning_rate": 6.662353839917216e-07, "loss": 14.0625, "step": 18673 }, { "epoch": 1.2402204954506209, "grad_norm": 170.98338317871094, "learning_rate": 6.661340013767667e-07, "loss": 16.7031, "step": 18674 }, { "epoch": 1.2402869097429767, "grad_norm": 274.4854431152344, "learning_rate": 6.660326226237816e-07, "loss": 17.2656, "step": 18675 }, { "epoch": 1.2403533240353324, "grad_norm": 187.6240692138672, "learning_rate": 6.659312477339383e-07, "loss": 16.875, "step": 18676 }, { "epoch": 1.240419738327688, "grad_norm": 274.4452209472656, "learning_rate": 6.658298767084099e-07, "loss": 18.1875, "step": 18677 }, { "epoch": 1.240486152620044, "grad_norm": 241.98028564453125, "learning_rate": 6.657285095483682e-07, "loss": 17.4375, "step": 18678 }, { "epoch": 1.2405525669123996, "grad_norm": 261.1669921875, "learning_rate": 6.656271462549869e-07, "loss": 17.1875, "step": 18679 }, { "epoch": 1.2406189812047552, "grad_norm": 238.1981658935547, "learning_rate": 6.655257868294378e-07, "loss": 17.0938, "step": 18680 }, { "epoch": 1.240685395497111, "grad_norm": 194.65704345703125, "learning_rate": 6.654244312728938e-07, "loss": 20.8906, "step": 18681 }, { "epoch": 1.2407518097894668, "grad_norm": 173.8130645751953, "learning_rate": 6.653230795865266e-07, "loss": 11.9531, "step": 18682 }, { "epoch": 1.2408182240818224, "grad_norm": 220.85995483398438, "learning_rate": 6.652217317715092e-07, "loss": 15.4062, "step": 18683 }, { "epoch": 1.240884638374178, "grad_norm": 305.6994934082031, "learning_rate": 6.651203878290138e-07, "loss": 11.3672, "step": 18684 }, { "epoch": 1.2409510526665337, "grad_norm": 176.50094604492188, "learning_rate": 6.650190477602129e-07, "loss": 16.1094, "step": 18685 }, { "epoch": 1.2410174669588896, "grad_norm": 215.6669464111328, "learning_rate": 6.649177115662778e-07, "loss": 15.1094, "step": 18686 }, { "epoch": 1.2410838812512452, "grad_norm": 140.9961700439453, "learning_rate": 6.64816379248382e-07, "loss": 17.5469, "step": 18687 }, { "epoch": 1.241150295543601, "grad_norm": 114.68461608886719, "learning_rate": 6.647150508076969e-07, "loss": 12.6875, "step": 18688 }, { "epoch": 1.2412167098359568, "grad_norm": 158.83331298828125, "learning_rate": 6.64613726245395e-07, "loss": 16.75, "step": 18689 }, { "epoch": 1.2412831241283124, "grad_norm": 156.77281188964844, "learning_rate": 6.645124055626474e-07, "loss": 15.0469, "step": 18690 }, { "epoch": 1.241349538420668, "grad_norm": 272.7125244140625, "learning_rate": 6.644110887606278e-07, "loss": 17.7734, "step": 18691 }, { "epoch": 1.241415952713024, "grad_norm": 169.91741943359375, "learning_rate": 6.643097758405064e-07, "loss": 12.7812, "step": 18692 }, { "epoch": 1.2414823670053796, "grad_norm": 1669.9691162109375, "learning_rate": 6.642084668034564e-07, "loss": 13.4531, "step": 18693 }, { "epoch": 1.2415487812977353, "grad_norm": 198.3609161376953, "learning_rate": 6.641071616506491e-07, "loss": 16.9531, "step": 18694 }, { "epoch": 1.241615195590091, "grad_norm": 115.33932495117188, "learning_rate": 6.640058603832566e-07, "loss": 13.8594, "step": 18695 }, { "epoch": 1.2416816098824466, "grad_norm": 936.2240600585938, "learning_rate": 6.6390456300245e-07, "loss": 16.5938, "step": 18696 }, { "epoch": 1.2417480241748025, "grad_norm": 323.8195495605469, "learning_rate": 6.638032695094022e-07, "loss": 11.7344, "step": 18697 }, { "epoch": 1.241814438467158, "grad_norm": 171.0701141357422, "learning_rate": 6.637019799052841e-07, "loss": 16.25, "step": 18698 }, { "epoch": 1.2418808527595138, "grad_norm": 173.58197021484375, "learning_rate": 6.636006941912677e-07, "loss": 14.7188, "step": 18699 }, { "epoch": 1.2419472670518696, "grad_norm": 160.66973876953125, "learning_rate": 6.634994123685242e-07, "loss": 15.4062, "step": 18700 }, { "epoch": 1.2420136813442253, "grad_norm": 184.45779418945312, "learning_rate": 6.633981344382259e-07, "loss": 16.3125, "step": 18701 }, { "epoch": 1.242080095636581, "grad_norm": 242.94122314453125, "learning_rate": 6.632968604015436e-07, "loss": 20.3125, "step": 18702 }, { "epoch": 1.2421465099289368, "grad_norm": 271.5477294921875, "learning_rate": 6.631955902596492e-07, "loss": 16.2969, "step": 18703 }, { "epoch": 1.2422129242212925, "grad_norm": 200.15057373046875, "learning_rate": 6.630943240137137e-07, "loss": 19.3125, "step": 18704 }, { "epoch": 1.2422793385136481, "grad_norm": 250.51132202148438, "learning_rate": 6.629930616649091e-07, "loss": 18.8906, "step": 18705 }, { "epoch": 1.2423457528060038, "grad_norm": 272.4100036621094, "learning_rate": 6.628918032144063e-07, "loss": 16.9062, "step": 18706 }, { "epoch": 1.2424121670983594, "grad_norm": 250.34841918945312, "learning_rate": 6.627905486633769e-07, "loss": 17.9688, "step": 18707 }, { "epoch": 1.2424785813907153, "grad_norm": 140.55203247070312, "learning_rate": 6.626892980129916e-07, "loss": 17.3438, "step": 18708 }, { "epoch": 1.242544995683071, "grad_norm": 110.68602752685547, "learning_rate": 6.625880512644224e-07, "loss": 13.8906, "step": 18709 }, { "epoch": 1.2426114099754266, "grad_norm": 106.68917083740234, "learning_rate": 6.6248680841884e-07, "loss": 14.6406, "step": 18710 }, { "epoch": 1.2426778242677825, "grad_norm": 1380.6917724609375, "learning_rate": 6.623855694774151e-07, "loss": 11.2891, "step": 18711 }, { "epoch": 1.2427442385601382, "grad_norm": 224.2925262451172, "learning_rate": 6.622843344413199e-07, "loss": 16.7969, "step": 18712 }, { "epoch": 1.2428106528524938, "grad_norm": 241.25914001464844, "learning_rate": 6.621831033117244e-07, "loss": 15.5312, "step": 18713 }, { "epoch": 1.2428770671448497, "grad_norm": 175.43174743652344, "learning_rate": 6.620818760898003e-07, "loss": 18.3438, "step": 18714 }, { "epoch": 1.2429434814372053, "grad_norm": 534.5104370117188, "learning_rate": 6.619806527767179e-07, "loss": 20.3594, "step": 18715 }, { "epoch": 1.243009895729561, "grad_norm": 437.88873291015625, "learning_rate": 6.618794333736486e-07, "loss": 12.8281, "step": 18716 }, { "epoch": 1.2430763100219167, "grad_norm": 307.2725524902344, "learning_rate": 6.617782178817627e-07, "loss": 16.0625, "step": 18717 }, { "epoch": 1.2431427243142723, "grad_norm": 127.5473403930664, "learning_rate": 6.616770063022317e-07, "loss": 13.4375, "step": 18718 }, { "epoch": 1.2432091386066282, "grad_norm": 149.41075134277344, "learning_rate": 6.615757986362259e-07, "loss": 12.2031, "step": 18719 }, { "epoch": 1.2432755528989838, "grad_norm": 129.80540466308594, "learning_rate": 6.614745948849162e-07, "loss": 15.5, "step": 18720 }, { "epoch": 1.2433419671913395, "grad_norm": 152.7552032470703, "learning_rate": 6.613733950494728e-07, "loss": 17.375, "step": 18721 }, { "epoch": 1.2434083814836954, "grad_norm": 442.6773986816406, "learning_rate": 6.612721991310671e-07, "loss": 17.375, "step": 18722 }, { "epoch": 1.243474795776051, "grad_norm": 282.2135009765625, "learning_rate": 6.611710071308691e-07, "loss": 19.4062, "step": 18723 }, { "epoch": 1.2435412100684067, "grad_norm": 838.6262817382812, "learning_rate": 6.610698190500498e-07, "loss": 29.0, "step": 18724 }, { "epoch": 1.2436076243607626, "grad_norm": 220.59141540527344, "learning_rate": 6.609686348897788e-07, "loss": 20.8906, "step": 18725 }, { "epoch": 1.2436740386531182, "grad_norm": 889.7048950195312, "learning_rate": 6.608674546512276e-07, "loss": 13.9688, "step": 18726 }, { "epoch": 1.2437404529454739, "grad_norm": 176.61952209472656, "learning_rate": 6.607662783355661e-07, "loss": 12.375, "step": 18727 }, { "epoch": 1.2438068672378295, "grad_norm": 186.12562561035156, "learning_rate": 6.606651059439648e-07, "loss": 11.1719, "step": 18728 }, { "epoch": 1.2438732815301854, "grad_norm": 235.1989288330078, "learning_rate": 6.605639374775933e-07, "loss": 18.5312, "step": 18729 }, { "epoch": 1.243939695822541, "grad_norm": 102.98391723632812, "learning_rate": 6.604627729376231e-07, "loss": 12.2656, "step": 18730 }, { "epoch": 1.2440061101148967, "grad_norm": 239.51165771484375, "learning_rate": 6.603616123252233e-07, "loss": 19.6719, "step": 18731 }, { "epoch": 1.2440725244072524, "grad_norm": 343.7413024902344, "learning_rate": 6.60260455641565e-07, "loss": 18.6094, "step": 18732 }, { "epoch": 1.2441389386996082, "grad_norm": 156.14488220214844, "learning_rate": 6.601593028878172e-07, "loss": 13.3672, "step": 18733 }, { "epoch": 1.2442053529919639, "grad_norm": 168.43931579589844, "learning_rate": 6.600581540651513e-07, "loss": 14.7812, "step": 18734 }, { "epoch": 1.2442717672843195, "grad_norm": 170.6539306640625, "learning_rate": 6.599570091747361e-07, "loss": 16.3125, "step": 18735 }, { "epoch": 1.2443381815766754, "grad_norm": 337.6177062988281, "learning_rate": 6.598558682177426e-07, "loss": 24.5, "step": 18736 }, { "epoch": 1.244404595869031, "grad_norm": 174.88877868652344, "learning_rate": 6.597547311953399e-07, "loss": 15.2812, "step": 18737 }, { "epoch": 1.2444710101613867, "grad_norm": 399.1219482421875, "learning_rate": 6.596535981086986e-07, "loss": 20.5938, "step": 18738 }, { "epoch": 1.2445374244537424, "grad_norm": 407.1099853515625, "learning_rate": 6.595524689589878e-07, "loss": 23.8906, "step": 18739 }, { "epoch": 1.2446038387460983, "grad_norm": 125.12702178955078, "learning_rate": 6.594513437473781e-07, "loss": 17.6328, "step": 18740 }, { "epoch": 1.244670253038454, "grad_norm": 291.46343994140625, "learning_rate": 6.593502224750387e-07, "loss": 18.1719, "step": 18741 }, { "epoch": 1.2447366673308096, "grad_norm": 332.4741516113281, "learning_rate": 6.592491051431397e-07, "loss": 11.7578, "step": 18742 }, { "epoch": 1.2448030816231652, "grad_norm": 168.10671997070312, "learning_rate": 6.591479917528501e-07, "loss": 15.3281, "step": 18743 }, { "epoch": 1.244869495915521, "grad_norm": 146.8859405517578, "learning_rate": 6.590468823053403e-07, "loss": 15.4688, "step": 18744 }, { "epoch": 1.2449359102078768, "grad_norm": 224.12132263183594, "learning_rate": 6.589457768017797e-07, "loss": 16.8438, "step": 18745 }, { "epoch": 1.2450023245002324, "grad_norm": 169.2655029296875, "learning_rate": 6.588446752433376e-07, "loss": 20.1562, "step": 18746 }, { "epoch": 1.2450687387925883, "grad_norm": 147.2586212158203, "learning_rate": 6.587435776311832e-07, "loss": 18.4844, "step": 18747 }, { "epoch": 1.245135153084944, "grad_norm": 319.4734191894531, "learning_rate": 6.586424839664866e-07, "loss": 16.8906, "step": 18748 }, { "epoch": 1.2452015673772996, "grad_norm": 524.7647705078125, "learning_rate": 6.585413942504171e-07, "loss": 16.2812, "step": 18749 }, { "epoch": 1.2452679816696552, "grad_norm": 147.2221221923828, "learning_rate": 6.584403084841434e-07, "loss": 12.6016, "step": 18750 }, { "epoch": 1.2453343959620111, "grad_norm": 242.45724487304688, "learning_rate": 6.583392266688356e-07, "loss": 12.9219, "step": 18751 }, { "epoch": 1.2454008102543668, "grad_norm": 258.68121337890625, "learning_rate": 6.582381488056626e-07, "loss": 33.0156, "step": 18752 }, { "epoch": 1.2454672245467224, "grad_norm": 340.0829772949219, "learning_rate": 6.581370748957937e-07, "loss": 16.7891, "step": 18753 }, { "epoch": 1.245533638839078, "grad_norm": 251.8162841796875, "learning_rate": 6.580360049403977e-07, "loss": 14.75, "step": 18754 }, { "epoch": 1.245600053131434, "grad_norm": 150.1481170654297, "learning_rate": 6.579349389406446e-07, "loss": 16.4844, "step": 18755 }, { "epoch": 1.2456664674237896, "grad_norm": 323.5795593261719, "learning_rate": 6.578338768977021e-07, "loss": 29.875, "step": 18756 }, { "epoch": 1.2457328817161453, "grad_norm": 180.22653198242188, "learning_rate": 6.577328188127404e-07, "loss": 16.9375, "step": 18757 }, { "epoch": 1.2457992960085011, "grad_norm": 168.59739685058594, "learning_rate": 6.57631764686928e-07, "loss": 16.4219, "step": 18758 }, { "epoch": 1.2458657103008568, "grad_norm": 678.8681030273438, "learning_rate": 6.575307145214343e-07, "loss": 12.6562, "step": 18759 }, { "epoch": 1.2459321245932125, "grad_norm": 141.3552703857422, "learning_rate": 6.574296683174272e-07, "loss": 13.0156, "step": 18760 }, { "epoch": 1.245998538885568, "grad_norm": 127.51820373535156, "learning_rate": 6.573286260760766e-07, "loss": 16.5156, "step": 18761 }, { "epoch": 1.246064953177924, "grad_norm": 272.6967468261719, "learning_rate": 6.572275877985507e-07, "loss": 17.6562, "step": 18762 }, { "epoch": 1.2461313674702796, "grad_norm": 190.8177947998047, "learning_rate": 6.571265534860185e-07, "loss": 21.5156, "step": 18763 }, { "epoch": 1.2461977817626353, "grad_norm": 217.68756103515625, "learning_rate": 6.570255231396483e-07, "loss": 18.6562, "step": 18764 }, { "epoch": 1.246264196054991, "grad_norm": 250.3018341064453, "learning_rate": 6.569244967606096e-07, "loss": 14.7812, "step": 18765 }, { "epoch": 1.2463306103473468, "grad_norm": 167.56350708007812, "learning_rate": 6.568234743500701e-07, "loss": 13.7344, "step": 18766 }, { "epoch": 1.2463970246397025, "grad_norm": 224.10971069335938, "learning_rate": 6.567224559091991e-07, "loss": 19.125, "step": 18767 }, { "epoch": 1.2464634389320581, "grad_norm": 299.5075988769531, "learning_rate": 6.566214414391642e-07, "loss": 21.5312, "step": 18768 }, { "epoch": 1.246529853224414, "grad_norm": 224.10614013671875, "learning_rate": 6.56520430941135e-07, "loss": 15.0703, "step": 18769 }, { "epoch": 1.2465962675167697, "grad_norm": 171.54013061523438, "learning_rate": 6.56419424416279e-07, "loss": 19.2812, "step": 18770 }, { "epoch": 1.2466626818091253, "grad_norm": 165.50784301757812, "learning_rate": 6.563184218657655e-07, "loss": 18.0312, "step": 18771 }, { "epoch": 1.246729096101481, "grad_norm": 116.08321380615234, "learning_rate": 6.562174232907617e-07, "loss": 11.3516, "step": 18772 }, { "epoch": 1.2467955103938368, "grad_norm": 745.333251953125, "learning_rate": 6.56116428692437e-07, "loss": 22.7656, "step": 18773 }, { "epoch": 1.2468619246861925, "grad_norm": 495.19189453125, "learning_rate": 6.560154380719589e-07, "loss": 16.1719, "step": 18774 }, { "epoch": 1.2469283389785482, "grad_norm": 165.2052459716797, "learning_rate": 6.559144514304962e-07, "loss": 23.2656, "step": 18775 }, { "epoch": 1.2469947532709038, "grad_norm": 538.7367553710938, "learning_rate": 6.558134687692163e-07, "loss": 27.6875, "step": 18776 }, { "epoch": 1.2470611675632597, "grad_norm": 151.02133178710938, "learning_rate": 6.557124900892881e-07, "loss": 14.0312, "step": 18777 }, { "epoch": 1.2471275818556153, "grad_norm": 373.21124267578125, "learning_rate": 6.556115153918787e-07, "loss": 20.2188, "step": 18778 }, { "epoch": 1.247193996147971, "grad_norm": 326.8614196777344, "learning_rate": 6.555105446781573e-07, "loss": 14.3125, "step": 18779 }, { "epoch": 1.2472604104403269, "grad_norm": 390.2535705566406, "learning_rate": 6.55409577949291e-07, "loss": 16.0312, "step": 18780 }, { "epoch": 1.2473268247326825, "grad_norm": 247.12362670898438, "learning_rate": 6.553086152064482e-07, "loss": 16.8203, "step": 18781 }, { "epoch": 1.2473932390250382, "grad_norm": 140.52456665039062, "learning_rate": 6.552076564507961e-07, "loss": 13.7031, "step": 18782 }, { "epoch": 1.2474596533173938, "grad_norm": 223.7956085205078, "learning_rate": 6.551067016835034e-07, "loss": 13.1719, "step": 18783 }, { "epoch": 1.2475260676097497, "grad_norm": 235.9989471435547, "learning_rate": 6.550057509057374e-07, "loss": 17.0781, "step": 18784 }, { "epoch": 1.2475924819021054, "grad_norm": 218.4537811279297, "learning_rate": 6.549048041186657e-07, "loss": 14.5625, "step": 18785 }, { "epoch": 1.247658896194461, "grad_norm": 257.2552185058594, "learning_rate": 6.548038613234568e-07, "loss": 20.4375, "step": 18786 }, { "epoch": 1.2477253104868167, "grad_norm": 300.7508544921875, "learning_rate": 6.547029225212774e-07, "loss": 32.4844, "step": 18787 }, { "epoch": 1.2477917247791726, "grad_norm": 267.9399719238281, "learning_rate": 6.546019877132957e-07, "loss": 18.8984, "step": 18788 }, { "epoch": 1.2478581390715282, "grad_norm": 1090.9151611328125, "learning_rate": 6.545010569006786e-07, "loss": 19.4609, "step": 18789 }, { "epoch": 1.2479245533638839, "grad_norm": 139.3411865234375, "learning_rate": 6.544001300845945e-07, "loss": 17.4766, "step": 18790 }, { "epoch": 1.2479909676562397, "grad_norm": 439.3275451660156, "learning_rate": 6.542992072662103e-07, "loss": 13.9062, "step": 18791 }, { "epoch": 1.2480573819485954, "grad_norm": 123.44405364990234, "learning_rate": 6.541982884466937e-07, "loss": 18.9844, "step": 18792 }, { "epoch": 1.248123796240951, "grad_norm": 227.68746948242188, "learning_rate": 6.540973736272113e-07, "loss": 24.5469, "step": 18793 }, { "epoch": 1.2481902105333067, "grad_norm": 153.93948364257812, "learning_rate": 6.539964628089316e-07, "loss": 11.375, "step": 18794 }, { "epoch": 1.2482566248256626, "grad_norm": 185.02955627441406, "learning_rate": 6.538955559930211e-07, "loss": 16.3281, "step": 18795 }, { "epoch": 1.2483230391180182, "grad_norm": 185.8226776123047, "learning_rate": 6.537946531806473e-07, "loss": 13.8125, "step": 18796 }, { "epoch": 1.2483894534103739, "grad_norm": 186.0641632080078, "learning_rate": 6.536937543729771e-07, "loss": 16.75, "step": 18797 }, { "epoch": 1.2484558677027295, "grad_norm": 171.84010314941406, "learning_rate": 6.535928595711784e-07, "loss": 17.1719, "step": 18798 }, { "epoch": 1.2485222819950854, "grad_norm": 215.5784149169922, "learning_rate": 6.534919687764171e-07, "loss": 15.7188, "step": 18799 }, { "epoch": 1.248588696287441, "grad_norm": 419.77728271484375, "learning_rate": 6.533910819898612e-07, "loss": 20.5, "step": 18800 }, { "epoch": 1.2486551105797967, "grad_norm": 145.4680938720703, "learning_rate": 6.532901992126773e-07, "loss": 12.75, "step": 18801 }, { "epoch": 1.2487215248721526, "grad_norm": 361.9874572753906, "learning_rate": 6.531893204460328e-07, "loss": 17.1172, "step": 18802 }, { "epoch": 1.2487879391645083, "grad_norm": 331.1067810058594, "learning_rate": 6.530884456910937e-07, "loss": 13.7188, "step": 18803 }, { "epoch": 1.248854353456864, "grad_norm": 115.7475814819336, "learning_rate": 6.529875749490279e-07, "loss": 13.7656, "step": 18804 }, { "epoch": 1.2489207677492196, "grad_norm": 190.2462158203125, "learning_rate": 6.528867082210015e-07, "loss": 18.9062, "step": 18805 }, { "epoch": 1.2489871820415754, "grad_norm": 257.1759338378906, "learning_rate": 6.527858455081816e-07, "loss": 18.7969, "step": 18806 }, { "epoch": 1.249053596333931, "grad_norm": 162.86447143554688, "learning_rate": 6.526849868117345e-07, "loss": 16.0, "step": 18807 }, { "epoch": 1.2491200106262867, "grad_norm": 199.04176330566406, "learning_rate": 6.525841321328276e-07, "loss": 20.8906, "step": 18808 }, { "epoch": 1.2491864249186424, "grad_norm": 409.2171936035156, "learning_rate": 6.524832814726268e-07, "loss": 22.6875, "step": 18809 }, { "epoch": 1.2492528392109983, "grad_norm": 208.2623748779297, "learning_rate": 6.523824348322995e-07, "loss": 17.375, "step": 18810 }, { "epoch": 1.249319253503354, "grad_norm": 426.3310546875, "learning_rate": 6.522815922130111e-07, "loss": 15.6719, "step": 18811 }, { "epoch": 1.2493856677957096, "grad_norm": 160.5722198486328, "learning_rate": 6.521807536159293e-07, "loss": 12.5234, "step": 18812 }, { "epoch": 1.2494520820880655, "grad_norm": 336.7317810058594, "learning_rate": 6.520799190422197e-07, "loss": 13.4688, "step": 18813 }, { "epoch": 1.2495184963804211, "grad_norm": 445.7084045410156, "learning_rate": 6.519790884930492e-07, "loss": 13.7656, "step": 18814 }, { "epoch": 1.2495849106727768, "grad_norm": 357.3811340332031, "learning_rate": 6.518782619695836e-07, "loss": 18.0312, "step": 18815 }, { "epoch": 1.2496513249651324, "grad_norm": 288.002685546875, "learning_rate": 6.517774394729897e-07, "loss": 14.8281, "step": 18816 }, { "epoch": 1.2497177392574883, "grad_norm": 166.90921020507812, "learning_rate": 6.516766210044335e-07, "loss": 15.5312, "step": 18817 }, { "epoch": 1.249784153549844, "grad_norm": 331.8792419433594, "learning_rate": 6.515758065650816e-07, "loss": 23.4062, "step": 18818 }, { "epoch": 1.2498505678421996, "grad_norm": 129.2334747314453, "learning_rate": 6.514749961560995e-07, "loss": 17.8594, "step": 18819 }, { "epoch": 1.2499169821345553, "grad_norm": 396.24066162109375, "learning_rate": 6.513741897786536e-07, "loss": 20.2188, "step": 18820 }, { "epoch": 1.2499833964269111, "grad_norm": 349.559326171875, "learning_rate": 6.512733874339102e-07, "loss": 16.5, "step": 18821 }, { "epoch": 1.2500498107192668, "grad_norm": 209.40615844726562, "learning_rate": 6.511725891230353e-07, "loss": 19.5938, "step": 18822 }, { "epoch": 1.2501162250116225, "grad_norm": 232.99322509765625, "learning_rate": 6.510717948471947e-07, "loss": 20.2031, "step": 18823 }, { "epoch": 1.2501826393039783, "grad_norm": 251.16061401367188, "learning_rate": 6.50971004607554e-07, "loss": 15.9062, "step": 18824 }, { "epoch": 1.250249053596334, "grad_norm": 816.8379516601562, "learning_rate": 6.5087021840528e-07, "loss": 16.0, "step": 18825 }, { "epoch": 1.2503154678886896, "grad_norm": 104.87820434570312, "learning_rate": 6.507694362415377e-07, "loss": 13.8906, "step": 18826 }, { "epoch": 1.2503818821810453, "grad_norm": 147.9222412109375, "learning_rate": 6.506686581174934e-07, "loss": 15.1562, "step": 18827 }, { "epoch": 1.250448296473401, "grad_norm": 176.16030883789062, "learning_rate": 6.505678840343123e-07, "loss": 13.9844, "step": 18828 }, { "epoch": 1.2505147107657568, "grad_norm": 140.27261352539062, "learning_rate": 6.504671139931607e-07, "loss": 17.9688, "step": 18829 }, { "epoch": 1.2505811250581125, "grad_norm": 180.2552947998047, "learning_rate": 6.503663479952038e-07, "loss": 13.0, "step": 18830 }, { "epoch": 1.2506475393504681, "grad_norm": 162.4140625, "learning_rate": 6.502655860416077e-07, "loss": 15.125, "step": 18831 }, { "epoch": 1.250713953642824, "grad_norm": 191.2261962890625, "learning_rate": 6.50164828133537e-07, "loss": 16.3125, "step": 18832 }, { "epoch": 1.2507803679351797, "grad_norm": 164.53819274902344, "learning_rate": 6.500640742721584e-07, "loss": 16.1094, "step": 18833 }, { "epoch": 1.2508467822275353, "grad_norm": 1710.238037109375, "learning_rate": 6.499633244586366e-07, "loss": 13.625, "step": 18834 }, { "epoch": 1.2509131965198912, "grad_norm": 271.0054016113281, "learning_rate": 6.498625786941374e-07, "loss": 17.5781, "step": 18835 }, { "epoch": 1.2509796108122468, "grad_norm": 120.44408416748047, "learning_rate": 6.497618369798255e-07, "loss": 11.2188, "step": 18836 }, { "epoch": 1.2510460251046025, "grad_norm": 154.54454040527344, "learning_rate": 6.496610993168672e-07, "loss": 16.9531, "step": 18837 }, { "epoch": 1.2511124393969582, "grad_norm": 253.79736328125, "learning_rate": 6.495603657064272e-07, "loss": 18.6875, "step": 18838 }, { "epoch": 1.2511788536893138, "grad_norm": 264.6788635253906, "learning_rate": 6.494596361496709e-07, "loss": 17.9375, "step": 18839 }, { "epoch": 1.2512452679816697, "grad_norm": 1769.2083740234375, "learning_rate": 6.493589106477632e-07, "loss": 31.6094, "step": 18840 }, { "epoch": 1.2513116822740253, "grad_norm": 301.6267395019531, "learning_rate": 6.492581892018696e-07, "loss": 17.9375, "step": 18841 }, { "epoch": 1.251378096566381, "grad_norm": 436.2171325683594, "learning_rate": 6.491574718131547e-07, "loss": 20.5938, "step": 18842 }, { "epoch": 1.2514445108587369, "grad_norm": 309.3599548339844, "learning_rate": 6.490567584827842e-07, "loss": 16.2188, "step": 18843 }, { "epoch": 1.2515109251510925, "grad_norm": 322.9537353515625, "learning_rate": 6.489560492119224e-07, "loss": 18.1406, "step": 18844 }, { "epoch": 1.2515773394434482, "grad_norm": 189.3126678466797, "learning_rate": 6.48855344001735e-07, "loss": 16.1406, "step": 18845 }, { "epoch": 1.251643753735804, "grad_norm": 238.7117919921875, "learning_rate": 6.48754642853386e-07, "loss": 13.6094, "step": 18846 }, { "epoch": 1.2517101680281597, "grad_norm": 185.0677490234375, "learning_rate": 6.486539457680413e-07, "loss": 17.3438, "step": 18847 }, { "epoch": 1.2517765823205154, "grad_norm": 259.12847900390625, "learning_rate": 6.485532527468648e-07, "loss": 12.1641, "step": 18848 }, { "epoch": 1.251842996612871, "grad_norm": 228.21920776367188, "learning_rate": 6.484525637910218e-07, "loss": 14.0312, "step": 18849 }, { "epoch": 1.2519094109052267, "grad_norm": 280.39776611328125, "learning_rate": 6.483518789016767e-07, "loss": 15.4062, "step": 18850 }, { "epoch": 1.2519758251975825, "grad_norm": 241.5608367919922, "learning_rate": 6.482511980799944e-07, "loss": 19.3125, "step": 18851 }, { "epoch": 1.2520422394899382, "grad_norm": 152.3676300048828, "learning_rate": 6.481505213271392e-07, "loss": 18.8438, "step": 18852 }, { "epoch": 1.2521086537822939, "grad_norm": 246.13499450683594, "learning_rate": 6.480498486442763e-07, "loss": 12.7891, "step": 18853 }, { "epoch": 1.2521750680746497, "grad_norm": 182.88479614257812, "learning_rate": 6.479491800325692e-07, "loss": 15.375, "step": 18854 }, { "epoch": 1.2522414823670054, "grad_norm": 226.5944366455078, "learning_rate": 6.478485154931835e-07, "loss": 14.1094, "step": 18855 }, { "epoch": 1.252307896659361, "grad_norm": 166.83934020996094, "learning_rate": 6.477478550272829e-07, "loss": 15.5, "step": 18856 }, { "epoch": 1.252374310951717, "grad_norm": 320.997802734375, "learning_rate": 6.476471986360318e-07, "loss": 12.0312, "step": 18857 }, { "epoch": 1.2524407252440726, "grad_norm": 186.8944549560547, "learning_rate": 6.475465463205951e-07, "loss": 13.9375, "step": 18858 }, { "epoch": 1.2525071395364282, "grad_norm": 121.71649932861328, "learning_rate": 6.474458980821365e-07, "loss": 13.25, "step": 18859 }, { "epoch": 1.2525735538287839, "grad_norm": 204.05564880371094, "learning_rate": 6.473452539218207e-07, "loss": 16.1484, "step": 18860 }, { "epoch": 1.2526399681211395, "grad_norm": 126.90174102783203, "learning_rate": 6.472446138408111e-07, "loss": 13.9375, "step": 18861 }, { "epoch": 1.2527063824134954, "grad_norm": 241.9027862548828, "learning_rate": 6.471439778402732e-07, "loss": 28.0, "step": 18862 }, { "epoch": 1.252772796705851, "grad_norm": 390.75433349609375, "learning_rate": 6.470433459213696e-07, "loss": 21.7031, "step": 18863 }, { "epoch": 1.2528392109982067, "grad_norm": 455.3348083496094, "learning_rate": 6.469427180852656e-07, "loss": 16.6875, "step": 18864 }, { "epoch": 1.2529056252905626, "grad_norm": 285.273193359375, "learning_rate": 6.468420943331243e-07, "loss": 13.8906, "step": 18865 }, { "epoch": 1.2529720395829183, "grad_norm": 289.49395751953125, "learning_rate": 6.467414746661102e-07, "loss": 24.6875, "step": 18866 }, { "epoch": 1.253038453875274, "grad_norm": 293.79217529296875, "learning_rate": 6.466408590853867e-07, "loss": 16.4766, "step": 18867 }, { "epoch": 1.2531048681676298, "grad_norm": 313.5965881347656, "learning_rate": 6.465402475921182e-07, "loss": 19.8438, "step": 18868 }, { "epoch": 1.2531712824599854, "grad_norm": 218.4955596923828, "learning_rate": 6.464396401874684e-07, "loss": 20.0312, "step": 18869 }, { "epoch": 1.253237696752341, "grad_norm": 197.70620727539062, "learning_rate": 6.46339036872601e-07, "loss": 13.0781, "step": 18870 }, { "epoch": 1.2533041110446967, "grad_norm": 266.6448059082031, "learning_rate": 6.462384376486792e-07, "loss": 16.3438, "step": 18871 }, { "epoch": 1.2533705253370524, "grad_norm": 430.48492431640625, "learning_rate": 6.461378425168677e-07, "loss": 17.125, "step": 18872 }, { "epoch": 1.2534369396294083, "grad_norm": 183.54208374023438, "learning_rate": 6.460372514783293e-07, "loss": 19.5, "step": 18873 }, { "epoch": 1.253503353921764, "grad_norm": 172.35740661621094, "learning_rate": 6.459366645342282e-07, "loss": 22.0938, "step": 18874 }, { "epoch": 1.2535697682141196, "grad_norm": 140.77413940429688, "learning_rate": 6.45836081685727e-07, "loss": 12.3281, "step": 18875 }, { "epoch": 1.2536361825064755, "grad_norm": 98.16136169433594, "learning_rate": 6.457355029339904e-07, "loss": 10.4062, "step": 18876 }, { "epoch": 1.2537025967988311, "grad_norm": 165.02284240722656, "learning_rate": 6.456349282801809e-07, "loss": 11.1094, "step": 18877 }, { "epoch": 1.2537690110911868, "grad_norm": 290.4444885253906, "learning_rate": 6.455343577254623e-07, "loss": 17.5, "step": 18878 }, { "epoch": 1.2538354253835426, "grad_norm": 182.70094299316406, "learning_rate": 6.454337912709978e-07, "loss": 16.6562, "step": 18879 }, { "epoch": 1.2539018396758983, "grad_norm": 222.892578125, "learning_rate": 6.453332289179507e-07, "loss": 16.3125, "step": 18880 }, { "epoch": 1.253968253968254, "grad_norm": 334.3403625488281, "learning_rate": 6.452326706674844e-07, "loss": 19.0312, "step": 18881 }, { "epoch": 1.2540346682606096, "grad_norm": 114.57257843017578, "learning_rate": 6.451321165207621e-07, "loss": 13.9609, "step": 18882 }, { "epoch": 1.2541010825529653, "grad_norm": 1506.1617431640625, "learning_rate": 6.450315664789467e-07, "loss": 13.7578, "step": 18883 }, { "epoch": 1.2541674968453211, "grad_norm": 153.88890075683594, "learning_rate": 6.449310205432017e-07, "loss": 20.6562, "step": 18884 }, { "epoch": 1.2542339111376768, "grad_norm": 197.57211303710938, "learning_rate": 6.448304787146893e-07, "loss": 14.1094, "step": 18885 }, { "epoch": 1.2543003254300324, "grad_norm": 251.8444061279297, "learning_rate": 6.447299409945737e-07, "loss": 17.7656, "step": 18886 }, { "epoch": 1.2543667397223883, "grad_norm": 612.1528930664062, "learning_rate": 6.446294073840171e-07, "loss": 17.9219, "step": 18887 }, { "epoch": 1.254433154014744, "grad_norm": 184.1601104736328, "learning_rate": 6.445288778841826e-07, "loss": 15.0469, "step": 18888 }, { "epoch": 1.2544995683070996, "grad_norm": 337.45562744140625, "learning_rate": 6.444283524962328e-07, "loss": 19.875, "step": 18889 }, { "epoch": 1.2545659825994555, "grad_norm": 175.70437622070312, "learning_rate": 6.443278312213312e-07, "loss": 12.4375, "step": 18890 }, { "epoch": 1.2546323968918112, "grad_norm": 540.2696533203125, "learning_rate": 6.4422731406064e-07, "loss": 32.6094, "step": 18891 }, { "epoch": 1.2546988111841668, "grad_norm": 345.0538024902344, "learning_rate": 6.441268010153222e-07, "loss": 21.4531, "step": 18892 }, { "epoch": 1.2547652254765225, "grad_norm": 410.2553405761719, "learning_rate": 6.4402629208654e-07, "loss": 19.2969, "step": 18893 }, { "epoch": 1.2548316397688781, "grad_norm": 259.5600280761719, "learning_rate": 6.439257872754564e-07, "loss": 16.5312, "step": 18894 }, { "epoch": 1.254898054061234, "grad_norm": 174.93502807617188, "learning_rate": 6.438252865832344e-07, "loss": 16.1406, "step": 18895 }, { "epoch": 1.2549644683535897, "grad_norm": 99.7132797241211, "learning_rate": 6.437247900110357e-07, "loss": 10.2734, "step": 18896 }, { "epoch": 1.2550308826459453, "grad_norm": 240.56130981445312, "learning_rate": 6.436242975600235e-07, "loss": 20.7188, "step": 18897 }, { "epoch": 1.2550972969383012, "grad_norm": 442.78387451171875, "learning_rate": 6.435238092313598e-07, "loss": 28.7812, "step": 18898 }, { "epoch": 1.2551637112306568, "grad_norm": 374.3314514160156, "learning_rate": 6.434233250262071e-07, "loss": 19.3125, "step": 18899 }, { "epoch": 1.2552301255230125, "grad_norm": 144.22335815429688, "learning_rate": 6.433228449457275e-07, "loss": 13.4375, "step": 18900 }, { "epoch": 1.2552965398153684, "grad_norm": 167.32666015625, "learning_rate": 6.432223689910841e-07, "loss": 19.0, "step": 18901 }, { "epoch": 1.255362954107724, "grad_norm": 153.4652557373047, "learning_rate": 6.431218971634382e-07, "loss": 12.4219, "step": 18902 }, { "epoch": 1.2554293684000797, "grad_norm": 242.18312072753906, "learning_rate": 6.430214294639528e-07, "loss": 11.8828, "step": 18903 }, { "epoch": 1.2554957826924353, "grad_norm": 121.38558197021484, "learning_rate": 6.429209658937893e-07, "loss": 14.0938, "step": 18904 }, { "epoch": 1.255562196984791, "grad_norm": 228.60060119628906, "learning_rate": 6.428205064541104e-07, "loss": 19.3594, "step": 18905 }, { "epoch": 1.2556286112771469, "grad_norm": 563.9012451171875, "learning_rate": 6.427200511460775e-07, "loss": 15.4375, "step": 18906 }, { "epoch": 1.2556950255695025, "grad_norm": 291.9184265136719, "learning_rate": 6.426195999708535e-07, "loss": 18.8438, "step": 18907 }, { "epoch": 1.2557614398618582, "grad_norm": 386.59881591796875, "learning_rate": 6.425191529295997e-07, "loss": 21.9375, "step": 18908 }, { "epoch": 1.255827854154214, "grad_norm": 225.6519775390625, "learning_rate": 6.424187100234784e-07, "loss": 20.25, "step": 18909 }, { "epoch": 1.2558942684465697, "grad_norm": 769.546630859375, "learning_rate": 6.423182712536508e-07, "loss": 24.6953, "step": 18910 }, { "epoch": 1.2559606827389254, "grad_norm": 135.22601318359375, "learning_rate": 6.422178366212796e-07, "loss": 12.1562, "step": 18911 }, { "epoch": 1.2560270970312812, "grad_norm": 796.8382568359375, "learning_rate": 6.421174061275261e-07, "loss": 18.5625, "step": 18912 }, { "epoch": 1.256093511323637, "grad_norm": 272.3433837890625, "learning_rate": 6.42016979773552e-07, "loss": 17.7969, "step": 18913 }, { "epoch": 1.2561599256159925, "grad_norm": 137.1748046875, "learning_rate": 6.419165575605188e-07, "loss": 12.5156, "step": 18914 }, { "epoch": 1.2562263399083482, "grad_norm": 136.83148193359375, "learning_rate": 6.418161394895889e-07, "loss": 14.4531, "step": 18915 }, { "epoch": 1.2562927542007039, "grad_norm": 385.31787109375, "learning_rate": 6.41715725561923e-07, "loss": 18.2812, "step": 18916 }, { "epoch": 1.2563591684930597, "grad_norm": 254.78628540039062, "learning_rate": 6.416153157786832e-07, "loss": 18.6562, "step": 18917 }, { "epoch": 1.2564255827854154, "grad_norm": 309.2674560546875, "learning_rate": 6.415149101410305e-07, "loss": 22.7812, "step": 18918 }, { "epoch": 1.256491997077771, "grad_norm": 393.8423156738281, "learning_rate": 6.414145086501268e-07, "loss": 17.125, "step": 18919 }, { "epoch": 1.256558411370127, "grad_norm": 243.62672424316406, "learning_rate": 6.413141113071332e-07, "loss": 19.4688, "step": 18920 }, { "epoch": 1.2566248256624826, "grad_norm": 386.1367492675781, "learning_rate": 6.412137181132113e-07, "loss": 21.1562, "step": 18921 }, { "epoch": 1.2566912399548382, "grad_norm": 236.27146911621094, "learning_rate": 6.411133290695219e-07, "loss": 13.2188, "step": 18922 }, { "epoch": 1.256757654247194, "grad_norm": 254.75881958007812, "learning_rate": 6.410129441772272e-07, "loss": 15.5938, "step": 18923 }, { "epoch": 1.2568240685395498, "grad_norm": 402.1883544921875, "learning_rate": 6.40912563437487e-07, "loss": 13.5547, "step": 18924 }, { "epoch": 1.2568904828319054, "grad_norm": 1421.6104736328125, "learning_rate": 6.408121868514637e-07, "loss": 14.125, "step": 18925 }, { "epoch": 1.256956897124261, "grad_norm": 306.79620361328125, "learning_rate": 6.407118144203176e-07, "loss": 13.9531, "step": 18926 }, { "epoch": 1.2570233114166167, "grad_norm": 302.2480163574219, "learning_rate": 6.406114461452103e-07, "loss": 22.4531, "step": 18927 }, { "epoch": 1.2570897257089726, "grad_norm": 217.47462463378906, "learning_rate": 6.405110820273023e-07, "loss": 20.4375, "step": 18928 }, { "epoch": 1.2571561400013282, "grad_norm": 248.52072143554688, "learning_rate": 6.40410722067755e-07, "loss": 15.1406, "step": 18929 }, { "epoch": 1.257222554293684, "grad_norm": 309.20245361328125, "learning_rate": 6.403103662677291e-07, "loss": 15.8125, "step": 18930 }, { "epoch": 1.2572889685860398, "grad_norm": 200.88682556152344, "learning_rate": 6.402100146283851e-07, "loss": 12.0938, "step": 18931 }, { "epoch": 1.2573553828783954, "grad_norm": 212.20034790039062, "learning_rate": 6.401096671508848e-07, "loss": 15.9219, "step": 18932 }, { "epoch": 1.257421797170751, "grad_norm": 90.3613052368164, "learning_rate": 6.400093238363879e-07, "loss": 10.2188, "step": 18933 }, { "epoch": 1.257488211463107, "grad_norm": 274.9687194824219, "learning_rate": 6.399089846860559e-07, "loss": 18.5, "step": 18934 }, { "epoch": 1.2575546257554626, "grad_norm": 149.4337615966797, "learning_rate": 6.398086497010487e-07, "loss": 18.6094, "step": 18935 }, { "epoch": 1.2576210400478183, "grad_norm": 142.8098602294922, "learning_rate": 6.397083188825277e-07, "loss": 18.1562, "step": 18936 }, { "epoch": 1.257687454340174, "grad_norm": 135.44943237304688, "learning_rate": 6.396079922316531e-07, "loss": 17.1562, "step": 18937 }, { "epoch": 1.2577538686325298, "grad_norm": 151.3843231201172, "learning_rate": 6.395076697495854e-07, "loss": 15.5156, "step": 18938 }, { "epoch": 1.2578202829248855, "grad_norm": 180.14706420898438, "learning_rate": 6.394073514374848e-07, "loss": 15.0625, "step": 18939 }, { "epoch": 1.2578866972172411, "grad_norm": 198.314208984375, "learning_rate": 6.393070372965123e-07, "loss": 24.0, "step": 18940 }, { "epoch": 1.2579531115095968, "grad_norm": 206.1505889892578, "learning_rate": 6.392067273278281e-07, "loss": 19.5469, "step": 18941 }, { "epoch": 1.2580195258019526, "grad_norm": 214.98548889160156, "learning_rate": 6.391064215325925e-07, "loss": 18.25, "step": 18942 }, { "epoch": 1.2580859400943083, "grad_norm": 248.20419311523438, "learning_rate": 6.390061199119652e-07, "loss": 17.0781, "step": 18943 }, { "epoch": 1.258152354386664, "grad_norm": 263.7267761230469, "learning_rate": 6.389058224671075e-07, "loss": 15.5781, "step": 18944 }, { "epoch": 1.2582187686790198, "grad_norm": 158.4006805419922, "learning_rate": 6.388055291991786e-07, "loss": 12.3047, "step": 18945 }, { "epoch": 1.2582851829713755, "grad_norm": 183.50230407714844, "learning_rate": 6.387052401093393e-07, "loss": 16.7344, "step": 18946 }, { "epoch": 1.2583515972637311, "grad_norm": 155.1532440185547, "learning_rate": 6.386049551987495e-07, "loss": 15.4688, "step": 18947 }, { "epoch": 1.2584180115560868, "grad_norm": 219.83364868164062, "learning_rate": 6.385046744685691e-07, "loss": 17.375, "step": 18948 }, { "epoch": 1.2584844258484427, "grad_norm": 185.94808959960938, "learning_rate": 6.38404397919958e-07, "loss": 12.5625, "step": 18949 }, { "epoch": 1.2585508401407983, "grad_norm": 155.2758026123047, "learning_rate": 6.383041255540767e-07, "loss": 13.1562, "step": 18950 }, { "epoch": 1.258617254433154, "grad_norm": 1293.79541015625, "learning_rate": 6.382038573720844e-07, "loss": 17.4062, "step": 18951 }, { "epoch": 1.2586836687255096, "grad_norm": 108.59557342529297, "learning_rate": 6.381035933751416e-07, "loss": 17.3594, "step": 18952 }, { "epoch": 1.2587500830178655, "grad_norm": 270.560791015625, "learning_rate": 6.38003333564407e-07, "loss": 15.8438, "step": 18953 }, { "epoch": 1.2588164973102212, "grad_norm": 288.0169372558594, "learning_rate": 6.37903077941042e-07, "loss": 14.2344, "step": 18954 }, { "epoch": 1.2588829116025768, "grad_norm": 157.3824462890625, "learning_rate": 6.378028265062047e-07, "loss": 16.9688, "step": 18955 }, { "epoch": 1.2589493258949327, "grad_norm": 246.74815368652344, "learning_rate": 6.37702579261056e-07, "loss": 16.0469, "step": 18956 }, { "epoch": 1.2590157401872883, "grad_norm": 332.591064453125, "learning_rate": 6.376023362067544e-07, "loss": 16.2031, "step": 18957 }, { "epoch": 1.259082154479644, "grad_norm": 122.59634399414062, "learning_rate": 6.375020973444605e-07, "loss": 14.2344, "step": 18958 }, { "epoch": 1.2591485687719997, "grad_norm": 324.7799987792969, "learning_rate": 6.37401862675333e-07, "loss": 17.6406, "step": 18959 }, { "epoch": 1.2592149830643555, "grad_norm": 156.3879852294922, "learning_rate": 6.37301632200532e-07, "loss": 15.0625, "step": 18960 }, { "epoch": 1.2592813973567112, "grad_norm": 119.0185546875, "learning_rate": 6.372014059212161e-07, "loss": 15.4688, "step": 18961 }, { "epoch": 1.2593478116490668, "grad_norm": 742.763427734375, "learning_rate": 6.371011838385454e-07, "loss": 15.3125, "step": 18962 }, { "epoch": 1.2594142259414225, "grad_norm": 204.06375122070312, "learning_rate": 6.37000965953679e-07, "loss": 13.6562, "step": 18963 }, { "epoch": 1.2594806402337784, "grad_norm": 237.8753662109375, "learning_rate": 6.369007522677761e-07, "loss": 18.7656, "step": 18964 }, { "epoch": 1.259547054526134, "grad_norm": 163.2668914794922, "learning_rate": 6.368005427819956e-07, "loss": 16.9141, "step": 18965 }, { "epoch": 1.2596134688184897, "grad_norm": 636.5507202148438, "learning_rate": 6.367003374974976e-07, "loss": 16.9844, "step": 18966 }, { "epoch": 1.2596798831108456, "grad_norm": 283.63836669921875, "learning_rate": 6.366001364154401e-07, "loss": 25.0781, "step": 18967 }, { "epoch": 1.2597462974032012, "grad_norm": 315.4850158691406, "learning_rate": 6.364999395369826e-07, "loss": 13.7344, "step": 18968 }, { "epoch": 1.2598127116955569, "grad_norm": 279.7513122558594, "learning_rate": 6.363997468632846e-07, "loss": 18.7812, "step": 18969 }, { "epoch": 1.2598791259879125, "grad_norm": 375.4681396484375, "learning_rate": 6.362995583955042e-07, "loss": 22.8906, "step": 18970 }, { "epoch": 1.2599455402802684, "grad_norm": 173.76779174804688, "learning_rate": 6.361993741348013e-07, "loss": 16.3438, "step": 18971 }, { "epoch": 1.260011954572624, "grad_norm": 166.32496643066406, "learning_rate": 6.360991940823339e-07, "loss": 14.9219, "step": 18972 }, { "epoch": 1.2600783688649797, "grad_norm": 239.095458984375, "learning_rate": 6.359990182392616e-07, "loss": 21.125, "step": 18973 }, { "epoch": 1.2601447831573354, "grad_norm": 181.26351928710938, "learning_rate": 6.358988466067421e-07, "loss": 20.75, "step": 18974 }, { "epoch": 1.2602111974496912, "grad_norm": 252.56146240234375, "learning_rate": 6.357986791859354e-07, "loss": 14.5156, "step": 18975 }, { "epoch": 1.2602776117420469, "grad_norm": 380.8543395996094, "learning_rate": 6.356985159779993e-07, "loss": 20.125, "step": 18976 }, { "epoch": 1.2603440260344025, "grad_norm": 611.7814331054688, "learning_rate": 6.355983569840928e-07, "loss": 17.7344, "step": 18977 }, { "epoch": 1.2604104403267584, "grad_norm": 716.378173828125, "learning_rate": 6.354982022053741e-07, "loss": 24.1094, "step": 18978 }, { "epoch": 1.260476854619114, "grad_norm": 161.95635986328125, "learning_rate": 6.353980516430024e-07, "loss": 12.7188, "step": 18979 }, { "epoch": 1.2605432689114697, "grad_norm": 759.0195922851562, "learning_rate": 6.352979052981356e-07, "loss": 13.6094, "step": 18980 }, { "epoch": 1.2606096832038254, "grad_norm": 206.7086639404297, "learning_rate": 6.351977631719326e-07, "loss": 21.9062, "step": 18981 }, { "epoch": 1.2606760974961813, "grad_norm": 108.58806610107422, "learning_rate": 6.350976252655509e-07, "loss": 17.3438, "step": 18982 }, { "epoch": 1.260742511788537, "grad_norm": 109.46709442138672, "learning_rate": 6.349974915801499e-07, "loss": 14.0312, "step": 18983 }, { "epoch": 1.2608089260808926, "grad_norm": 102.52484130859375, "learning_rate": 6.348973621168874e-07, "loss": 12.2812, "step": 18984 }, { "epoch": 1.2608753403732482, "grad_norm": 290.165771484375, "learning_rate": 6.347972368769217e-07, "loss": 15.5781, "step": 18985 }, { "epoch": 1.260941754665604, "grad_norm": 126.33294677734375, "learning_rate": 6.346971158614108e-07, "loss": 10.8594, "step": 18986 }, { "epoch": 1.2610081689579598, "grad_norm": 251.77459716796875, "learning_rate": 6.345969990715135e-07, "loss": 15.3125, "step": 18987 }, { "epoch": 1.2610745832503154, "grad_norm": 373.88250732421875, "learning_rate": 6.344968865083868e-07, "loss": 14.8594, "step": 18988 }, { "epoch": 1.2611409975426713, "grad_norm": 185.9934844970703, "learning_rate": 6.343967781731898e-07, "loss": 12.7969, "step": 18989 }, { "epoch": 1.261207411835027, "grad_norm": 242.905029296875, "learning_rate": 6.342966740670797e-07, "loss": 23.1562, "step": 18990 }, { "epoch": 1.2612738261273826, "grad_norm": 91.95494842529297, "learning_rate": 6.341965741912152e-07, "loss": 11.9375, "step": 18991 }, { "epoch": 1.2613402404197382, "grad_norm": 164.55502319335938, "learning_rate": 6.340964785467533e-07, "loss": 23.875, "step": 18992 }, { "epoch": 1.2614066547120941, "grad_norm": 178.2082977294922, "learning_rate": 6.339963871348528e-07, "loss": 19.3438, "step": 18993 }, { "epoch": 1.2614730690044498, "grad_norm": 157.32359313964844, "learning_rate": 6.338962999566708e-07, "loss": 13.2969, "step": 18994 }, { "epoch": 1.2615394832968054, "grad_norm": 210.39759826660156, "learning_rate": 6.337962170133655e-07, "loss": 15.2969, "step": 18995 }, { "epoch": 1.261605897589161, "grad_norm": 265.3944396972656, "learning_rate": 6.336961383060939e-07, "loss": 18.7969, "step": 18996 }, { "epoch": 1.261672311881517, "grad_norm": 106.41735076904297, "learning_rate": 6.335960638360147e-07, "loss": 15.7969, "step": 18997 }, { "epoch": 1.2617387261738726, "grad_norm": 188.04843139648438, "learning_rate": 6.334959936042848e-07, "loss": 17.2031, "step": 18998 }, { "epoch": 1.2618051404662283, "grad_norm": 167.060791015625, "learning_rate": 6.33395927612062e-07, "loss": 13.3906, "step": 18999 }, { "epoch": 1.2618715547585841, "grad_norm": 137.8876953125, "learning_rate": 6.332958658605032e-07, "loss": 14.7656, "step": 19000 }, { "epoch": 1.2619379690509398, "grad_norm": 197.23782348632812, "learning_rate": 6.331958083507669e-07, "loss": 17.9219, "step": 19001 }, { "epoch": 1.2620043833432955, "grad_norm": 141.23367309570312, "learning_rate": 6.330957550840097e-07, "loss": 16.7188, "step": 19002 }, { "epoch": 1.262070797635651, "grad_norm": 99.96627807617188, "learning_rate": 6.329957060613896e-07, "loss": 14.1562, "step": 19003 }, { "epoch": 1.262137211928007, "grad_norm": 217.45242309570312, "learning_rate": 6.32895661284063e-07, "loss": 18.0312, "step": 19004 }, { "epoch": 1.2622036262203626, "grad_norm": 331.1907958984375, "learning_rate": 6.327956207531879e-07, "loss": 19.5625, "step": 19005 }, { "epoch": 1.2622700405127183, "grad_norm": 281.75970458984375, "learning_rate": 6.326955844699216e-07, "loss": 30.2109, "step": 19006 }, { "epoch": 1.262336454805074, "grad_norm": 144.51161193847656, "learning_rate": 6.325955524354205e-07, "loss": 13.9531, "step": 19007 }, { "epoch": 1.2624028690974298, "grad_norm": 140.295654296875, "learning_rate": 6.32495524650843e-07, "loss": 13.5625, "step": 19008 }, { "epoch": 1.2624692833897855, "grad_norm": 169.36563110351562, "learning_rate": 6.323955011173445e-07, "loss": 12.4141, "step": 19009 }, { "epoch": 1.2625356976821411, "grad_norm": 184.64952087402344, "learning_rate": 6.322954818360834e-07, "loss": 16.3438, "step": 19010 }, { "epoch": 1.262602111974497, "grad_norm": 129.81478881835938, "learning_rate": 6.321954668082159e-07, "loss": 12.5781, "step": 19011 }, { "epoch": 1.2626685262668527, "grad_norm": 110.04569244384766, "learning_rate": 6.320954560348993e-07, "loss": 16.2969, "step": 19012 }, { "epoch": 1.2627349405592083, "grad_norm": 1323.859619140625, "learning_rate": 6.3199544951729e-07, "loss": 18.0938, "step": 19013 }, { "epoch": 1.2628013548515642, "grad_norm": 471.9472961425781, "learning_rate": 6.318954472565455e-07, "loss": 17.1094, "step": 19014 }, { "epoch": 1.2628677691439198, "grad_norm": 232.65867614746094, "learning_rate": 6.31795449253822e-07, "loss": 17.5625, "step": 19015 }, { "epoch": 1.2629341834362755, "grad_norm": 120.71823120117188, "learning_rate": 6.316954555102767e-07, "loss": 15.7969, "step": 19016 }, { "epoch": 1.2630005977286312, "grad_norm": 323.1208801269531, "learning_rate": 6.315954660270654e-07, "loss": 12.75, "step": 19017 }, { "epoch": 1.2630670120209868, "grad_norm": 342.40277099609375, "learning_rate": 6.314954808053458e-07, "loss": 17.3203, "step": 19018 }, { "epoch": 1.2631334263133427, "grad_norm": 250.70567321777344, "learning_rate": 6.31395499846274e-07, "loss": 15.875, "step": 19019 }, { "epoch": 1.2631998406056983, "grad_norm": 318.6698303222656, "learning_rate": 6.312955231510064e-07, "loss": 20.1562, "step": 19020 }, { "epoch": 1.263266254898054, "grad_norm": 122.22766876220703, "learning_rate": 6.311955507206994e-07, "loss": 14.75, "step": 19021 }, { "epoch": 1.2633326691904099, "grad_norm": 238.6510772705078, "learning_rate": 6.310955825565099e-07, "loss": 18.9844, "step": 19022 }, { "epoch": 1.2633990834827655, "grad_norm": 231.9083251953125, "learning_rate": 6.309956186595938e-07, "loss": 19.2031, "step": 19023 }, { "epoch": 1.2634654977751212, "grad_norm": 363.76287841796875, "learning_rate": 6.308956590311079e-07, "loss": 17.4219, "step": 19024 }, { "epoch": 1.263531912067477, "grad_norm": 570.5718383789062, "learning_rate": 6.307957036722076e-07, "loss": 22.2812, "step": 19025 }, { "epoch": 1.2635983263598327, "grad_norm": 239.2099609375, "learning_rate": 6.306957525840502e-07, "loss": 26.0312, "step": 19026 }, { "epoch": 1.2636647406521884, "grad_norm": 173.13958740234375, "learning_rate": 6.305958057677912e-07, "loss": 21.2344, "step": 19027 }, { "epoch": 1.263731154944544, "grad_norm": 337.6837463378906, "learning_rate": 6.30495863224587e-07, "loss": 22.3281, "step": 19028 }, { "epoch": 1.2637975692368997, "grad_norm": 182.4094696044922, "learning_rate": 6.303959249555932e-07, "loss": 19.125, "step": 19029 }, { "epoch": 1.2638639835292556, "grad_norm": 335.1646423339844, "learning_rate": 6.302959909619669e-07, "loss": 16.8125, "step": 19030 }, { "epoch": 1.2639303978216112, "grad_norm": 155.75393676757812, "learning_rate": 6.30196061244863e-07, "loss": 13.9375, "step": 19031 }, { "epoch": 1.2639968121139669, "grad_norm": 199.5777587890625, "learning_rate": 6.300961358054379e-07, "loss": 17.8125, "step": 19032 }, { "epoch": 1.2640632264063227, "grad_norm": 677.537353515625, "learning_rate": 6.299962146448472e-07, "loss": 17.3906, "step": 19033 }, { "epoch": 1.2641296406986784, "grad_norm": 131.03379821777344, "learning_rate": 6.298962977642472e-07, "loss": 13.4531, "step": 19034 }, { "epoch": 1.264196054991034, "grad_norm": 142.6115264892578, "learning_rate": 6.297963851647931e-07, "loss": 12.8906, "step": 19035 }, { "epoch": 1.26426246928339, "grad_norm": 119.50983428955078, "learning_rate": 6.296964768476413e-07, "loss": 13.4062, "step": 19036 }, { "epoch": 1.2643288835757456, "grad_norm": 134.37161254882812, "learning_rate": 6.295965728139469e-07, "loss": 15.2188, "step": 19037 }, { "epoch": 1.2643952978681012, "grad_norm": 218.88304138183594, "learning_rate": 6.294966730648661e-07, "loss": 17.1406, "step": 19038 }, { "epoch": 1.2644617121604569, "grad_norm": 210.82420349121094, "learning_rate": 6.293967776015535e-07, "loss": 17.8281, "step": 19039 }, { "epoch": 1.2645281264528125, "grad_norm": 147.08055114746094, "learning_rate": 6.292968864251657e-07, "loss": 14.7656, "step": 19040 }, { "epoch": 1.2645945407451684, "grad_norm": 255.20114135742188, "learning_rate": 6.291969995368578e-07, "loss": 15.1875, "step": 19041 }, { "epoch": 1.264660955037524, "grad_norm": 232.51934814453125, "learning_rate": 6.290971169377848e-07, "loss": 16.4375, "step": 19042 }, { "epoch": 1.2647273693298797, "grad_norm": 346.6658935546875, "learning_rate": 6.289972386291029e-07, "loss": 13.2188, "step": 19043 }, { "epoch": 1.2647937836222356, "grad_norm": 225.1104278564453, "learning_rate": 6.288973646119669e-07, "loss": 14.7031, "step": 19044 }, { "epoch": 1.2648601979145913, "grad_norm": 132.4659423828125, "learning_rate": 6.287974948875323e-07, "loss": 12.6875, "step": 19045 }, { "epoch": 1.264926612206947, "grad_norm": 105.14098358154297, "learning_rate": 6.286976294569538e-07, "loss": 16.625, "step": 19046 }, { "epoch": 1.2649930264993028, "grad_norm": 152.7415008544922, "learning_rate": 6.285977683213874e-07, "loss": 14.3594, "step": 19047 }, { "epoch": 1.2650594407916584, "grad_norm": 458.8930358886719, "learning_rate": 6.284979114819877e-07, "loss": 17.2344, "step": 19048 }, { "epoch": 1.265125855084014, "grad_norm": 169.97328186035156, "learning_rate": 6.283980589399103e-07, "loss": 20.8281, "step": 19049 }, { "epoch": 1.2651922693763697, "grad_norm": 136.4248809814453, "learning_rate": 6.282982106963093e-07, "loss": 14.9531, "step": 19050 }, { "epoch": 1.2652586836687254, "grad_norm": 554.1668701171875, "learning_rate": 6.28198366752341e-07, "loss": 15.2422, "step": 19051 }, { "epoch": 1.2653250979610813, "grad_norm": 154.46246337890625, "learning_rate": 6.280985271091587e-07, "loss": 18.1094, "step": 19052 }, { "epoch": 1.265391512253437, "grad_norm": 132.3347930908203, "learning_rate": 6.27998691767919e-07, "loss": 16.3906, "step": 19053 }, { "epoch": 1.2654579265457926, "grad_norm": 544.3385009765625, "learning_rate": 6.278988607297755e-07, "loss": 23.7188, "step": 19054 }, { "epoch": 1.2655243408381485, "grad_norm": 896.4783325195312, "learning_rate": 6.277990339958837e-07, "loss": 16.8594, "step": 19055 }, { "epoch": 1.2655907551305041, "grad_norm": 146.4831085205078, "learning_rate": 6.276992115673973e-07, "loss": 15.9062, "step": 19056 }, { "epoch": 1.2656571694228598, "grad_norm": 138.11065673828125, "learning_rate": 6.275993934454726e-07, "loss": 10.8828, "step": 19057 }, { "epoch": 1.2657235837152156, "grad_norm": 115.99187469482422, "learning_rate": 6.27499579631263e-07, "loss": 13.6094, "step": 19058 }, { "epoch": 1.2657899980075713, "grad_norm": 212.53515625, "learning_rate": 6.273997701259236e-07, "loss": 18.7656, "step": 19059 }, { "epoch": 1.265856412299927, "grad_norm": 159.24554443359375, "learning_rate": 6.272999649306085e-07, "loss": 16.0312, "step": 19060 }, { "epoch": 1.2659228265922826, "grad_norm": 647.6237182617188, "learning_rate": 6.272001640464729e-07, "loss": 14.8438, "step": 19061 }, { "epoch": 1.2659892408846383, "grad_norm": 209.73008728027344, "learning_rate": 6.271003674746705e-07, "loss": 17.9688, "step": 19062 }, { "epoch": 1.2660556551769941, "grad_norm": 226.6430206298828, "learning_rate": 6.270005752163563e-07, "loss": 19.6406, "step": 19063 }, { "epoch": 1.2661220694693498, "grad_norm": 106.24896240234375, "learning_rate": 6.26900787272684e-07, "loss": 12.5, "step": 19064 }, { "epoch": 1.2661884837617055, "grad_norm": 272.2342529296875, "learning_rate": 6.268010036448087e-07, "loss": 18.1719, "step": 19065 }, { "epoch": 1.2662548980540613, "grad_norm": 164.6473388671875, "learning_rate": 6.267012243338838e-07, "loss": 16.8594, "step": 19066 }, { "epoch": 1.266321312346417, "grad_norm": 251.87559509277344, "learning_rate": 6.266014493410643e-07, "loss": 16.5156, "step": 19067 }, { "epoch": 1.2663877266387726, "grad_norm": 263.6546630859375, "learning_rate": 6.265016786675035e-07, "loss": 18.6562, "step": 19068 }, { "epoch": 1.2664541409311285, "grad_norm": 244.72840881347656, "learning_rate": 6.264019123143563e-07, "loss": 13.9688, "step": 19069 }, { "epoch": 1.2665205552234842, "grad_norm": 91.27214813232422, "learning_rate": 6.26302150282776e-07, "loss": 16.5312, "step": 19070 }, { "epoch": 1.2665869695158398, "grad_norm": 201.7585906982422, "learning_rate": 6.262023925739173e-07, "loss": 14.1875, "step": 19071 }, { "epoch": 1.2666533838081955, "grad_norm": 482.957275390625, "learning_rate": 6.261026391889334e-07, "loss": 24.5156, "step": 19072 }, { "epoch": 1.2667197981005511, "grad_norm": 267.2728576660156, "learning_rate": 6.260028901289789e-07, "loss": 16.6719, "step": 19073 }, { "epoch": 1.266786212392907, "grad_norm": 141.4888916015625, "learning_rate": 6.259031453952068e-07, "loss": 14.4844, "step": 19074 }, { "epoch": 1.2668526266852627, "grad_norm": 245.865234375, "learning_rate": 6.258034049887719e-07, "loss": 16.9531, "step": 19075 }, { "epoch": 1.2669190409776183, "grad_norm": 145.61126708984375, "learning_rate": 6.257036689108273e-07, "loss": 14.4688, "step": 19076 }, { "epoch": 1.2669854552699742, "grad_norm": 119.7602767944336, "learning_rate": 6.256039371625269e-07, "loss": 14.1094, "step": 19077 }, { "epoch": 1.2670518695623298, "grad_norm": 207.00189208984375, "learning_rate": 6.255042097450238e-07, "loss": 15.5156, "step": 19078 }, { "epoch": 1.2671182838546855, "grad_norm": 222.56361389160156, "learning_rate": 6.254044866594725e-07, "loss": 16.6875, "step": 19079 }, { "epoch": 1.2671846981470414, "grad_norm": 249.54315185546875, "learning_rate": 6.253047679070262e-07, "loss": 15.25, "step": 19080 }, { "epoch": 1.267251112439397, "grad_norm": 306.4901123046875, "learning_rate": 6.252050534888378e-07, "loss": 14.0, "step": 19081 }, { "epoch": 1.2673175267317527, "grad_norm": 182.6408233642578, "learning_rate": 6.251053434060616e-07, "loss": 14.8906, "step": 19082 }, { "epoch": 1.2673839410241083, "grad_norm": 326.96142578125, "learning_rate": 6.250056376598505e-07, "loss": 19.9766, "step": 19083 }, { "epoch": 1.267450355316464, "grad_norm": 211.3432159423828, "learning_rate": 6.249059362513582e-07, "loss": 18.0625, "step": 19084 }, { "epoch": 1.2675167696088199, "grad_norm": 373.31683349609375, "learning_rate": 6.248062391817372e-07, "loss": 12.0312, "step": 19085 }, { "epoch": 1.2675831839011755, "grad_norm": 134.44110107421875, "learning_rate": 6.247065464521417e-07, "loss": 15.9375, "step": 19086 }, { "epoch": 1.2676495981935312, "grad_norm": 269.93017578125, "learning_rate": 6.246068580637242e-07, "loss": 15.8906, "step": 19087 }, { "epoch": 1.267716012485887, "grad_norm": 253.045166015625, "learning_rate": 6.245071740176384e-07, "loss": 17.9375, "step": 19088 }, { "epoch": 1.2677824267782427, "grad_norm": 155.9531707763672, "learning_rate": 6.244074943150368e-07, "loss": 12.7031, "step": 19089 }, { "epoch": 1.2678488410705984, "grad_norm": 103.66617584228516, "learning_rate": 6.24307818957073e-07, "loss": 14.5469, "step": 19090 }, { "epoch": 1.2679152553629542, "grad_norm": 481.3315734863281, "learning_rate": 6.242081479448996e-07, "loss": 16.9297, "step": 19091 }, { "epoch": 1.26798166965531, "grad_norm": 181.00657653808594, "learning_rate": 6.241084812796698e-07, "loss": 16.375, "step": 19092 }, { "epoch": 1.2680480839476655, "grad_norm": 316.0494079589844, "learning_rate": 6.240088189625361e-07, "loss": 23.9375, "step": 19093 }, { "epoch": 1.2681144982400212, "grad_norm": 136.04632568359375, "learning_rate": 6.239091609946518e-07, "loss": 17.875, "step": 19094 }, { "epoch": 1.2681809125323769, "grad_norm": 127.51958465576172, "learning_rate": 6.23809507377169e-07, "loss": 15.0625, "step": 19095 }, { "epoch": 1.2682473268247327, "grad_norm": 245.722900390625, "learning_rate": 6.237098581112413e-07, "loss": 16.0781, "step": 19096 }, { "epoch": 1.2683137411170884, "grad_norm": 374.5370178222656, "learning_rate": 6.236102131980209e-07, "loss": 15.4688, "step": 19097 }, { "epoch": 1.268380155409444, "grad_norm": 189.74063110351562, "learning_rate": 6.235105726386606e-07, "loss": 18.5781, "step": 19098 }, { "epoch": 1.2684465697018, "grad_norm": 174.478515625, "learning_rate": 6.234109364343125e-07, "loss": 19.75, "step": 19099 }, { "epoch": 1.2685129839941556, "grad_norm": 194.9842071533203, "learning_rate": 6.2331130458613e-07, "loss": 20.125, "step": 19100 }, { "epoch": 1.2685793982865112, "grad_norm": 380.48089599609375, "learning_rate": 6.232116770952649e-07, "loss": 16.0312, "step": 19101 }, { "epoch": 1.268645812578867, "grad_norm": 131.87815856933594, "learning_rate": 6.2311205396287e-07, "loss": 14.3125, "step": 19102 }, { "epoch": 1.2687122268712228, "grad_norm": 135.82888793945312, "learning_rate": 6.230124351900972e-07, "loss": 16.8906, "step": 19103 }, { "epoch": 1.2687786411635784, "grad_norm": 413.4534912109375, "learning_rate": 6.229128207780996e-07, "loss": 17.0, "step": 19104 }, { "epoch": 1.268845055455934, "grad_norm": 124.49569702148438, "learning_rate": 6.228132107280287e-07, "loss": 13.2969, "step": 19105 }, { "epoch": 1.2689114697482897, "grad_norm": 224.00433349609375, "learning_rate": 6.227136050410375e-07, "loss": 14.0, "step": 19106 }, { "epoch": 1.2689778840406456, "grad_norm": 251.63299560546875, "learning_rate": 6.226140037182772e-07, "loss": 16.3281, "step": 19107 }, { "epoch": 1.2690442983330013, "grad_norm": 167.33897399902344, "learning_rate": 6.22514406760901e-07, "loss": 13.1406, "step": 19108 }, { "epoch": 1.269110712625357, "grad_norm": 175.7450408935547, "learning_rate": 6.2241481417006e-07, "loss": 14.4922, "step": 19109 }, { "epoch": 1.2691771269177128, "grad_norm": 154.41993713378906, "learning_rate": 6.223152259469072e-07, "loss": 14.9062, "step": 19110 }, { "epoch": 1.2692435412100684, "grad_norm": 249.42950439453125, "learning_rate": 6.222156420925934e-07, "loss": 17.5469, "step": 19111 }, { "epoch": 1.269309955502424, "grad_norm": 200.9296112060547, "learning_rate": 6.221160626082722e-07, "loss": 19.6875, "step": 19112 }, { "epoch": 1.26937636979478, "grad_norm": 148.79270935058594, "learning_rate": 6.220164874950936e-07, "loss": 14.5469, "step": 19113 }, { "epoch": 1.2694427840871356, "grad_norm": 229.6375732421875, "learning_rate": 6.219169167542107e-07, "loss": 15.8906, "step": 19114 }, { "epoch": 1.2695091983794913, "grad_norm": 190.00027465820312, "learning_rate": 6.218173503867749e-07, "loss": 12.8594, "step": 19115 }, { "epoch": 1.269575612671847, "grad_norm": 281.09979248046875, "learning_rate": 6.217177883939375e-07, "loss": 20.6719, "step": 19116 }, { "epoch": 1.2696420269642026, "grad_norm": 240.61817932128906, "learning_rate": 6.216182307768511e-07, "loss": 15.9219, "step": 19117 }, { "epoch": 1.2697084412565585, "grad_norm": 244.21766662597656, "learning_rate": 6.215186775366666e-07, "loss": 14.6094, "step": 19118 }, { "epoch": 1.2697748555489141, "grad_norm": 297.5785827636719, "learning_rate": 6.214191286745361e-07, "loss": 26.7656, "step": 19119 }, { "epoch": 1.2698412698412698, "grad_norm": 160.0109405517578, "learning_rate": 6.213195841916104e-07, "loss": 16.3594, "step": 19120 }, { "epoch": 1.2699076841336256, "grad_norm": 221.32264709472656, "learning_rate": 6.212200440890416e-07, "loss": 16.0156, "step": 19121 }, { "epoch": 1.2699740984259813, "grad_norm": 385.4599914550781, "learning_rate": 6.211205083679809e-07, "loss": 24.4375, "step": 19122 }, { "epoch": 1.270040512718337, "grad_norm": 241.1743927001953, "learning_rate": 6.210209770295798e-07, "loss": 19.8281, "step": 19123 }, { "epoch": 1.2701069270106928, "grad_norm": 421.367919921875, "learning_rate": 6.20921450074989e-07, "loss": 20.0156, "step": 19124 }, { "epoch": 1.2701733413030485, "grad_norm": 118.6113052368164, "learning_rate": 6.208219275053608e-07, "loss": 14.0, "step": 19125 }, { "epoch": 1.2702397555954041, "grad_norm": 260.30462646484375, "learning_rate": 6.207224093218458e-07, "loss": 22.7656, "step": 19126 }, { "epoch": 1.2703061698877598, "grad_norm": 154.69192504882812, "learning_rate": 6.206228955255952e-07, "loss": 15.4531, "step": 19127 }, { "epoch": 1.2703725841801154, "grad_norm": 474.62469482421875, "learning_rate": 6.2052338611776e-07, "loss": 17.9844, "step": 19128 }, { "epoch": 1.2704389984724713, "grad_norm": 191.14732360839844, "learning_rate": 6.204238810994917e-07, "loss": 14.0625, "step": 19129 }, { "epoch": 1.270505412764827, "grad_norm": 443.1842956542969, "learning_rate": 6.203243804719412e-07, "loss": 18.0625, "step": 19130 }, { "epoch": 1.2705718270571826, "grad_norm": 211.48997497558594, "learning_rate": 6.202248842362592e-07, "loss": 16.6562, "step": 19131 }, { "epoch": 1.2706382413495385, "grad_norm": 165.0428466796875, "learning_rate": 6.201253923935964e-07, "loss": 20.1094, "step": 19132 }, { "epoch": 1.2707046556418942, "grad_norm": 130.02369689941406, "learning_rate": 6.200259049451045e-07, "loss": 10.8438, "step": 19133 }, { "epoch": 1.2707710699342498, "grad_norm": 186.2410888671875, "learning_rate": 6.199264218919335e-07, "loss": 15.6406, "step": 19134 }, { "epoch": 1.2708374842266057, "grad_norm": 276.1860656738281, "learning_rate": 6.198269432352348e-07, "loss": 21.5938, "step": 19135 }, { "epoch": 1.2709038985189614, "grad_norm": 344.1921081542969, "learning_rate": 6.197274689761584e-07, "loss": 16.5938, "step": 19136 }, { "epoch": 1.270970312811317, "grad_norm": 195.9279327392578, "learning_rate": 6.19627999115856e-07, "loss": 14.1719, "step": 19137 }, { "epoch": 1.2710367271036727, "grad_norm": 253.5518798828125, "learning_rate": 6.195285336554768e-07, "loss": 17.2969, "step": 19138 }, { "epoch": 1.2711031413960283, "grad_norm": 136.3930206298828, "learning_rate": 6.194290725961727e-07, "loss": 15.0, "step": 19139 }, { "epoch": 1.2711695556883842, "grad_norm": 171.61851501464844, "learning_rate": 6.193296159390933e-07, "loss": 14.9531, "step": 19140 }, { "epoch": 1.2712359699807398, "grad_norm": 213.11549377441406, "learning_rate": 6.192301636853896e-07, "loss": 17.0469, "step": 19141 }, { "epoch": 1.2713023842730955, "grad_norm": 127.78414154052734, "learning_rate": 6.191307158362114e-07, "loss": 17.625, "step": 19142 }, { "epoch": 1.2713687985654514, "grad_norm": 131.4542236328125, "learning_rate": 6.190312723927101e-07, "loss": 17.0156, "step": 19143 }, { "epoch": 1.271435212857807, "grad_norm": 226.20111083984375, "learning_rate": 6.189318333560349e-07, "loss": 13.5938, "step": 19144 }, { "epoch": 1.2715016271501627, "grad_norm": 374.2724609375, "learning_rate": 6.188323987273367e-07, "loss": 13.1875, "step": 19145 }, { "epoch": 1.2715680414425186, "grad_norm": 119.2178955078125, "learning_rate": 6.187329685077654e-07, "loss": 14.8281, "step": 19146 }, { "epoch": 1.2716344557348742, "grad_norm": 148.2435760498047, "learning_rate": 6.186335426984713e-07, "loss": 16.6406, "step": 19147 }, { "epoch": 1.2717008700272299, "grad_norm": 299.7029724121094, "learning_rate": 6.185341213006046e-07, "loss": 13.8594, "step": 19148 }, { "epoch": 1.2717672843195855, "grad_norm": 296.5845947265625, "learning_rate": 6.184347043153152e-07, "loss": 14.1562, "step": 19149 }, { "epoch": 1.2718336986119412, "grad_norm": 483.2769470214844, "learning_rate": 6.183352917437527e-07, "loss": 22.2031, "step": 19150 }, { "epoch": 1.271900112904297, "grad_norm": 300.9551086425781, "learning_rate": 6.182358835870681e-07, "loss": 16.6094, "step": 19151 }, { "epoch": 1.2719665271966527, "grad_norm": 237.8192596435547, "learning_rate": 6.181364798464103e-07, "loss": 16.5, "step": 19152 }, { "epoch": 1.2720329414890084, "grad_norm": 169.37847900390625, "learning_rate": 6.180370805229292e-07, "loss": 18.9062, "step": 19153 }, { "epoch": 1.2720993557813642, "grad_norm": 194.25086975097656, "learning_rate": 6.179376856177755e-07, "loss": 14.9688, "step": 19154 }, { "epoch": 1.27216577007372, "grad_norm": 283.3495788574219, "learning_rate": 6.178382951320981e-07, "loss": 13.3438, "step": 19155 }, { "epoch": 1.2722321843660755, "grad_norm": 286.7958068847656, "learning_rate": 6.17738909067047e-07, "loss": 16.25, "step": 19156 }, { "epoch": 1.2722985986584314, "grad_norm": 108.76702117919922, "learning_rate": 6.176395274237718e-07, "loss": 13.1797, "step": 19157 }, { "epoch": 1.272365012950787, "grad_norm": 188.05892944335938, "learning_rate": 6.175401502034221e-07, "loss": 15.0781, "step": 19158 }, { "epoch": 1.2724314272431427, "grad_norm": 236.94613647460938, "learning_rate": 6.17440777407147e-07, "loss": 17.3906, "step": 19159 }, { "epoch": 1.2724978415354984, "grad_norm": 129.8702392578125, "learning_rate": 6.173414090360969e-07, "loss": 14.625, "step": 19160 }, { "epoch": 1.272564255827854, "grad_norm": 324.3006591796875, "learning_rate": 6.172420450914204e-07, "loss": 16.5, "step": 19161 }, { "epoch": 1.27263067012021, "grad_norm": 241.16148376464844, "learning_rate": 6.171426855742674e-07, "loss": 14.0156, "step": 19162 }, { "epoch": 1.2726970844125656, "grad_norm": 234.30496215820312, "learning_rate": 6.170433304857867e-07, "loss": 19.0469, "step": 19163 }, { "epoch": 1.2727634987049212, "grad_norm": 185.70040893554688, "learning_rate": 6.169439798271285e-07, "loss": 15.5625, "step": 19164 }, { "epoch": 1.272829912997277, "grad_norm": 305.5137634277344, "learning_rate": 6.168446335994411e-07, "loss": 16.1719, "step": 19165 }, { "epoch": 1.2728963272896328, "grad_norm": 97.12531280517578, "learning_rate": 6.167452918038742e-07, "loss": 13.3125, "step": 19166 }, { "epoch": 1.2729627415819884, "grad_norm": 225.09120178222656, "learning_rate": 6.166459544415763e-07, "loss": 17.5, "step": 19167 }, { "epoch": 1.2730291558743443, "grad_norm": 116.11309051513672, "learning_rate": 6.165466215136975e-07, "loss": 13.1875, "step": 19168 }, { "epoch": 1.2730955701667, "grad_norm": 102.14618682861328, "learning_rate": 6.16447293021386e-07, "loss": 14.8281, "step": 19169 }, { "epoch": 1.2731619844590556, "grad_norm": 221.14166259765625, "learning_rate": 6.163479689657914e-07, "loss": 14.0938, "step": 19170 }, { "epoch": 1.2732283987514112, "grad_norm": 90.00193786621094, "learning_rate": 6.162486493480616e-07, "loss": 11.0625, "step": 19171 }, { "epoch": 1.273294813043767, "grad_norm": 159.22579956054688, "learning_rate": 6.161493341693467e-07, "loss": 13.625, "step": 19172 }, { "epoch": 1.2733612273361228, "grad_norm": 268.6111755371094, "learning_rate": 6.160500234307948e-07, "loss": 17.875, "step": 19173 }, { "epoch": 1.2734276416284784, "grad_norm": 140.721435546875, "learning_rate": 6.159507171335549e-07, "loss": 14.5156, "step": 19174 }, { "epoch": 1.273494055920834, "grad_norm": 201.58128356933594, "learning_rate": 6.158514152787754e-07, "loss": 17.0781, "step": 19175 }, { "epoch": 1.27356047021319, "grad_norm": 222.6965789794922, "learning_rate": 6.157521178676057e-07, "loss": 17.2656, "step": 19176 }, { "epoch": 1.2736268845055456, "grad_norm": 226.23243713378906, "learning_rate": 6.156528249011935e-07, "loss": 15.2031, "step": 19177 }, { "epoch": 1.2736932987979013, "grad_norm": 282.3669128417969, "learning_rate": 6.155535363806878e-07, "loss": 13.4062, "step": 19178 }, { "epoch": 1.2737597130902572, "grad_norm": 1033.4681396484375, "learning_rate": 6.154542523072372e-07, "loss": 19.4375, "step": 19179 }, { "epoch": 1.2738261273826128, "grad_norm": 1280.390625, "learning_rate": 6.153549726819904e-07, "loss": 12.3438, "step": 19180 }, { "epoch": 1.2738925416749685, "grad_norm": 290.99713134765625, "learning_rate": 6.152556975060947e-07, "loss": 13.7031, "step": 19181 }, { "epoch": 1.2739589559673241, "grad_norm": 196.72328186035156, "learning_rate": 6.151564267806999e-07, "loss": 13.8594, "step": 19182 }, { "epoch": 1.2740253702596798, "grad_norm": 487.57110595703125, "learning_rate": 6.150571605069534e-07, "loss": 24.4375, "step": 19183 }, { "epoch": 1.2740917845520356, "grad_norm": 263.4096984863281, "learning_rate": 6.14957898686004e-07, "loss": 19.7031, "step": 19184 }, { "epoch": 1.2741581988443913, "grad_norm": 534.512939453125, "learning_rate": 6.14858641318999e-07, "loss": 22.5625, "step": 19185 }, { "epoch": 1.274224613136747, "grad_norm": 232.7212677001953, "learning_rate": 6.147593884070876e-07, "loss": 18.4688, "step": 19186 }, { "epoch": 1.2742910274291028, "grad_norm": 271.6322937011719, "learning_rate": 6.146601399514173e-07, "loss": 16.6875, "step": 19187 }, { "epoch": 1.2743574417214585, "grad_norm": 208.85292053222656, "learning_rate": 6.145608959531361e-07, "loss": 20.6094, "step": 19188 }, { "epoch": 1.2744238560138141, "grad_norm": 398.9486999511719, "learning_rate": 6.144616564133927e-07, "loss": 11.75, "step": 19189 }, { "epoch": 1.27449027030617, "grad_norm": 347.4316711425781, "learning_rate": 6.143624213333342e-07, "loss": 19.7109, "step": 19190 }, { "epoch": 1.2745566845985257, "grad_norm": 148.6455841064453, "learning_rate": 6.142631907141089e-07, "loss": 14.375, "step": 19191 }, { "epoch": 1.2746230988908813, "grad_norm": 360.9189453125, "learning_rate": 6.141639645568645e-07, "loss": 13.4062, "step": 19192 }, { "epoch": 1.274689513183237, "grad_norm": 211.84959411621094, "learning_rate": 6.14064742862749e-07, "loss": 17.5625, "step": 19193 }, { "epoch": 1.2747559274755926, "grad_norm": 146.88645935058594, "learning_rate": 6.1396552563291e-07, "loss": 16.5938, "step": 19194 }, { "epoch": 1.2748223417679485, "grad_norm": 269.31365966796875, "learning_rate": 6.138663128684953e-07, "loss": 11.9844, "step": 19195 }, { "epoch": 1.2748887560603042, "grad_norm": 213.45655822753906, "learning_rate": 6.137671045706521e-07, "loss": 12.5234, "step": 19196 }, { "epoch": 1.2749551703526598, "grad_norm": 238.39797973632812, "learning_rate": 6.13667900740529e-07, "loss": 16.3906, "step": 19197 }, { "epoch": 1.2750215846450157, "grad_norm": 291.9666442871094, "learning_rate": 6.13568701379272e-07, "loss": 19.2344, "step": 19198 }, { "epoch": 1.2750879989373713, "grad_norm": 147.9047393798828, "learning_rate": 6.1346950648803e-07, "loss": 15.5312, "step": 19199 }, { "epoch": 1.275154413229727, "grad_norm": 207.37368774414062, "learning_rate": 6.133703160679496e-07, "loss": 18.8594, "step": 19200 }, { "epoch": 1.2752208275220829, "grad_norm": 256.65240478515625, "learning_rate": 6.132711301201787e-07, "loss": 17.2188, "step": 19201 }, { "epoch": 1.2752872418144385, "grad_norm": 217.55360412597656, "learning_rate": 6.13171948645864e-07, "loss": 18.875, "step": 19202 }, { "epoch": 1.2753536561067942, "grad_norm": 255.97560119628906, "learning_rate": 6.130727716461535e-07, "loss": 16.375, "step": 19203 }, { "epoch": 1.2754200703991498, "grad_norm": 169.79612731933594, "learning_rate": 6.129735991221939e-07, "loss": 15.7188, "step": 19204 }, { "epoch": 1.2754864846915055, "grad_norm": 293.9027404785156, "learning_rate": 6.128744310751326e-07, "loss": 20.6875, "step": 19205 }, { "epoch": 1.2755528989838614, "grad_norm": 198.7723388671875, "learning_rate": 6.127752675061163e-07, "loss": 14.0, "step": 19206 }, { "epoch": 1.275619313276217, "grad_norm": 102.46037292480469, "learning_rate": 6.126761084162928e-07, "loss": 9.2969, "step": 19207 }, { "epoch": 1.2756857275685727, "grad_norm": 103.09076690673828, "learning_rate": 6.125769538068086e-07, "loss": 21.0156, "step": 19208 }, { "epoch": 1.2757521418609286, "grad_norm": 437.3043212890625, "learning_rate": 6.124778036788109e-07, "loss": 13.0547, "step": 19209 }, { "epoch": 1.2758185561532842, "grad_norm": 376.7609558105469, "learning_rate": 6.123786580334462e-07, "loss": 22.0781, "step": 19210 }, { "epoch": 1.2758849704456399, "grad_norm": 165.0345001220703, "learning_rate": 6.122795168718622e-07, "loss": 15.8594, "step": 19211 }, { "epoch": 1.2759513847379957, "grad_norm": 188.59661865234375, "learning_rate": 6.121803801952048e-07, "loss": 14.0156, "step": 19212 }, { "epoch": 1.2760177990303514, "grad_norm": 198.56942749023438, "learning_rate": 6.120812480046214e-07, "loss": 15.5, "step": 19213 }, { "epoch": 1.276084213322707, "grad_norm": 870.4342651367188, "learning_rate": 6.11982120301258e-07, "loss": 22.7344, "step": 19214 }, { "epoch": 1.2761506276150627, "grad_norm": 273.8988342285156, "learning_rate": 6.11882997086262e-07, "loss": 18.4062, "step": 19215 }, { "epoch": 1.2762170419074184, "grad_norm": 207.29876708984375, "learning_rate": 6.117838783607796e-07, "loss": 14.8438, "step": 19216 }, { "epoch": 1.2762834561997742, "grad_norm": 182.63250732421875, "learning_rate": 6.116847641259577e-07, "loss": 14.9375, "step": 19217 }, { "epoch": 1.27634987049213, "grad_norm": 161.7208251953125, "learning_rate": 6.115856543829421e-07, "loss": 14.7812, "step": 19218 }, { "epoch": 1.2764162847844855, "grad_norm": 195.75897216796875, "learning_rate": 6.114865491328802e-07, "loss": 14.2656, "step": 19219 }, { "epoch": 1.2764826990768414, "grad_norm": 164.58909606933594, "learning_rate": 6.113874483769173e-07, "loss": 14.2656, "step": 19220 }, { "epoch": 1.276549113369197, "grad_norm": 330.1724548339844, "learning_rate": 6.112883521162007e-07, "loss": 18.7812, "step": 19221 }, { "epoch": 1.2766155276615527, "grad_norm": 174.6719970703125, "learning_rate": 6.11189260351876e-07, "loss": 14.2031, "step": 19222 }, { "epoch": 1.2766819419539086, "grad_norm": 319.7240905761719, "learning_rate": 6.110901730850902e-07, "loss": 19.2656, "step": 19223 }, { "epoch": 1.2767483562462643, "grad_norm": 471.70855712890625, "learning_rate": 6.109910903169883e-07, "loss": 23.1719, "step": 19224 }, { "epoch": 1.27681477053862, "grad_norm": 234.53721618652344, "learning_rate": 6.108920120487175e-07, "loss": 20.7188, "step": 19225 }, { "epoch": 1.2768811848309756, "grad_norm": 233.8823699951172, "learning_rate": 6.107929382814239e-07, "loss": 14.6406, "step": 19226 }, { "epoch": 1.2769475991233312, "grad_norm": 389.9299621582031, "learning_rate": 6.106938690162525e-07, "loss": 16.0703, "step": 19227 }, { "epoch": 1.277014013415687, "grad_norm": 117.32806396484375, "learning_rate": 6.105948042543505e-07, "loss": 16.2812, "step": 19228 }, { "epoch": 1.2770804277080428, "grad_norm": 98.77955627441406, "learning_rate": 6.10495743996863e-07, "loss": 13.5312, "step": 19229 }, { "epoch": 1.2771468420003984, "grad_norm": 431.29150390625, "learning_rate": 6.103966882449363e-07, "loss": 17.2031, "step": 19230 }, { "epoch": 1.2772132562927543, "grad_norm": 155.1864776611328, "learning_rate": 6.102976369997157e-07, "loss": 15.7969, "step": 19231 }, { "epoch": 1.27727967058511, "grad_norm": 245.3343963623047, "learning_rate": 6.101985902623477e-07, "loss": 15.6719, "step": 19232 }, { "epoch": 1.2773460848774656, "grad_norm": 178.7011260986328, "learning_rate": 6.100995480339775e-07, "loss": 15.9062, "step": 19233 }, { "epoch": 1.2774124991698215, "grad_norm": 291.2196350097656, "learning_rate": 6.10000510315751e-07, "loss": 26.3906, "step": 19234 }, { "epoch": 1.2774789134621771, "grad_norm": 131.67449951171875, "learning_rate": 6.099014771088134e-07, "loss": 11.7656, "step": 19235 }, { "epoch": 1.2775453277545328, "grad_norm": 208.83180236816406, "learning_rate": 6.09802448414311e-07, "loss": 16.6875, "step": 19236 }, { "epoch": 1.2776117420468884, "grad_norm": 177.08090209960938, "learning_rate": 6.097034242333885e-07, "loss": 10.9297, "step": 19237 }, { "epoch": 1.277678156339244, "grad_norm": 323.2721252441406, "learning_rate": 6.096044045671919e-07, "loss": 26.1562, "step": 19238 }, { "epoch": 1.2777445706316, "grad_norm": 327.1858825683594, "learning_rate": 6.095053894168662e-07, "loss": 16.8125, "step": 19239 }, { "epoch": 1.2778109849239556, "grad_norm": 268.4122009277344, "learning_rate": 6.094063787835576e-07, "loss": 16.0156, "step": 19240 }, { "epoch": 1.2778773992163113, "grad_norm": 141.35606384277344, "learning_rate": 6.093073726684101e-07, "loss": 14.1094, "step": 19241 }, { "epoch": 1.2779438135086671, "grad_norm": 282.5798645019531, "learning_rate": 6.092083710725702e-07, "loss": 14.4062, "step": 19242 }, { "epoch": 1.2780102278010228, "grad_norm": 150.7658233642578, "learning_rate": 6.09109373997182e-07, "loss": 13.4844, "step": 19243 }, { "epoch": 1.2780766420933785, "grad_norm": 136.7694854736328, "learning_rate": 6.090103814433916e-07, "loss": 11.0938, "step": 19244 }, { "epoch": 1.2781430563857343, "grad_norm": 147.34629821777344, "learning_rate": 6.089113934123431e-07, "loss": 11.5938, "step": 19245 }, { "epoch": 1.27820947067809, "grad_norm": 740.2432861328125, "learning_rate": 6.088124099051827e-07, "loss": 23.0781, "step": 19246 }, { "epoch": 1.2782758849704456, "grad_norm": 219.1007080078125, "learning_rate": 6.087134309230544e-07, "loss": 14.8906, "step": 19247 }, { "epoch": 1.2783422992628013, "grad_norm": 322.3953552246094, "learning_rate": 6.086144564671037e-07, "loss": 15.6875, "step": 19248 }, { "epoch": 1.278408713555157, "grad_norm": 334.27764892578125, "learning_rate": 6.08515486538475e-07, "loss": 23.4141, "step": 19249 }, { "epoch": 1.2784751278475128, "grad_norm": 207.29501342773438, "learning_rate": 6.084165211383137e-07, "loss": 15.5469, "step": 19250 }, { "epoch": 1.2785415421398685, "grad_norm": 189.402099609375, "learning_rate": 6.083175602677642e-07, "loss": 14.3594, "step": 19251 }, { "epoch": 1.2786079564322241, "grad_norm": 467.9245300292969, "learning_rate": 6.082186039279713e-07, "loss": 14.2031, "step": 19252 }, { "epoch": 1.27867437072458, "grad_norm": 125.24900817871094, "learning_rate": 6.081196521200794e-07, "loss": 16.0781, "step": 19253 }, { "epoch": 1.2787407850169357, "grad_norm": 249.06935119628906, "learning_rate": 6.080207048452338e-07, "loss": 16.1875, "step": 19254 }, { "epoch": 1.2788071993092913, "grad_norm": 245.1111297607422, "learning_rate": 6.079217621045785e-07, "loss": 17.2656, "step": 19255 }, { "epoch": 1.2788736136016472, "grad_norm": 198.7763671875, "learning_rate": 6.078228238992582e-07, "loss": 16.5625, "step": 19256 }, { "epoch": 1.2789400278940029, "grad_norm": 148.3837127685547, "learning_rate": 6.077238902304172e-07, "loss": 15.3281, "step": 19257 }, { "epoch": 1.2790064421863585, "grad_norm": 163.5549774169922, "learning_rate": 6.076249610992002e-07, "loss": 18.1719, "step": 19258 }, { "epoch": 1.2790728564787142, "grad_norm": 224.93692016601562, "learning_rate": 6.075260365067513e-07, "loss": 18.0938, "step": 19259 }, { "epoch": 1.2791392707710698, "grad_norm": 121.91334533691406, "learning_rate": 6.074271164542151e-07, "loss": 15.3438, "step": 19260 }, { "epoch": 1.2792056850634257, "grad_norm": 186.31861877441406, "learning_rate": 6.073282009427354e-07, "loss": 12.6875, "step": 19261 }, { "epoch": 1.2792720993557813, "grad_norm": 188.29014587402344, "learning_rate": 6.072292899734565e-07, "loss": 14.0312, "step": 19262 }, { "epoch": 1.279338513648137, "grad_norm": 196.9825439453125, "learning_rate": 6.07130383547523e-07, "loss": 17.9062, "step": 19263 }, { "epoch": 1.2794049279404929, "grad_norm": 321.1466979980469, "learning_rate": 6.070314816660787e-07, "loss": 15.5625, "step": 19264 }, { "epoch": 1.2794713422328485, "grad_norm": 164.36343383789062, "learning_rate": 6.069325843302675e-07, "loss": 12.5938, "step": 19265 }, { "epoch": 1.2795377565252042, "grad_norm": 250.0704803466797, "learning_rate": 6.068336915412333e-07, "loss": 11.9766, "step": 19266 }, { "epoch": 1.27960417081756, "grad_norm": 152.98974609375, "learning_rate": 6.067348033001206e-07, "loss": 14.4062, "step": 19267 }, { "epoch": 1.2796705851099157, "grad_norm": 199.5304412841797, "learning_rate": 6.066359196080728e-07, "loss": 15.7656, "step": 19268 }, { "epoch": 1.2797369994022714, "grad_norm": 146.60240173339844, "learning_rate": 6.06537040466234e-07, "loss": 14.7344, "step": 19269 }, { "epoch": 1.279803413694627, "grad_norm": 733.1123046875, "learning_rate": 6.064381658757473e-07, "loss": 23.4141, "step": 19270 }, { "epoch": 1.2798698279869827, "grad_norm": 232.55079650878906, "learning_rate": 6.063392958377576e-07, "loss": 18.875, "step": 19271 }, { "epoch": 1.2799362422793386, "grad_norm": 519.66943359375, "learning_rate": 6.062404303534076e-07, "loss": 30.9219, "step": 19272 }, { "epoch": 1.2800026565716942, "grad_norm": 550.7177124023438, "learning_rate": 6.061415694238414e-07, "loss": 21.2812, "step": 19273 }, { "epoch": 1.2800690708640499, "grad_norm": 144.27474975585938, "learning_rate": 6.060427130502022e-07, "loss": 14.0469, "step": 19274 }, { "epoch": 1.2801354851564057, "grad_norm": 320.5425109863281, "learning_rate": 6.059438612336339e-07, "loss": 14.8906, "step": 19275 }, { "epoch": 1.2802018994487614, "grad_norm": 242.31842041015625, "learning_rate": 6.058450139752798e-07, "loss": 15.4531, "step": 19276 }, { "epoch": 1.280268313741117, "grad_norm": 277.22052001953125, "learning_rate": 6.057461712762833e-07, "loss": 16.25, "step": 19277 }, { "epoch": 1.280334728033473, "grad_norm": 186.7403564453125, "learning_rate": 6.056473331377874e-07, "loss": 15.3125, "step": 19278 }, { "epoch": 1.2804011423258286, "grad_norm": 118.86952209472656, "learning_rate": 6.055484995609362e-07, "loss": 11.4531, "step": 19279 }, { "epoch": 1.2804675566181842, "grad_norm": 263.7388610839844, "learning_rate": 6.054496705468722e-07, "loss": 17.8594, "step": 19280 }, { "epoch": 1.2805339709105399, "grad_norm": 183.74913024902344, "learning_rate": 6.053508460967393e-07, "loss": 18.4219, "step": 19281 }, { "epoch": 1.2806003852028955, "grad_norm": 151.75558471679688, "learning_rate": 6.052520262116797e-07, "loss": 15.1328, "step": 19282 }, { "epoch": 1.2806667994952514, "grad_norm": 126.49723815917969, "learning_rate": 6.051532108928377e-07, "loss": 14.875, "step": 19283 }, { "epoch": 1.280733213787607, "grad_norm": 267.1083679199219, "learning_rate": 6.05054400141355e-07, "loss": 16.5, "step": 19284 }, { "epoch": 1.2807996280799627, "grad_norm": 121.21573638916016, "learning_rate": 6.049555939583757e-07, "loss": 14.0781, "step": 19285 }, { "epoch": 1.2808660423723186, "grad_norm": 257.146484375, "learning_rate": 6.04856792345042e-07, "loss": 20.0781, "step": 19286 }, { "epoch": 1.2809324566646743, "grad_norm": 99.86869049072266, "learning_rate": 6.047579953024975e-07, "loss": 16.625, "step": 19287 }, { "epoch": 1.28099887095703, "grad_norm": 336.48974609375, "learning_rate": 6.04659202831884e-07, "loss": 27.3125, "step": 19288 }, { "epoch": 1.2810652852493858, "grad_norm": 187.35260009765625, "learning_rate": 6.045604149343454e-07, "loss": 21.1562, "step": 19289 }, { "epoch": 1.2811316995417414, "grad_norm": 111.03961944580078, "learning_rate": 6.044616316110236e-07, "loss": 12.6719, "step": 19290 }, { "epoch": 1.281198113834097, "grad_norm": 231.54010009765625, "learning_rate": 6.043628528630618e-07, "loss": 17.2656, "step": 19291 }, { "epoch": 1.2812645281264528, "grad_norm": 136.90524291992188, "learning_rate": 6.04264078691602e-07, "loss": 13.0938, "step": 19292 }, { "epoch": 1.2813309424188084, "grad_norm": 260.4066467285156, "learning_rate": 6.041653090977876e-07, "loss": 16.6719, "step": 19293 }, { "epoch": 1.2813973567111643, "grad_norm": 123.65510559082031, "learning_rate": 6.040665440827604e-07, "loss": 14.2422, "step": 19294 }, { "epoch": 1.28146377100352, "grad_norm": 205.59837341308594, "learning_rate": 6.039677836476633e-07, "loss": 14.9688, "step": 19295 }, { "epoch": 1.2815301852958756, "grad_norm": 224.90151977539062, "learning_rate": 6.03869027793638e-07, "loss": 14.7422, "step": 19296 }, { "epoch": 1.2815965995882315, "grad_norm": 264.15985107421875, "learning_rate": 6.03770276521828e-07, "loss": 14.5, "step": 19297 }, { "epoch": 1.2816630138805871, "grad_norm": 314.27764892578125, "learning_rate": 6.036715298333745e-07, "loss": 20.6094, "step": 19298 }, { "epoch": 1.2817294281729428, "grad_norm": 1092.7750244140625, "learning_rate": 6.035727877294202e-07, "loss": 14.1406, "step": 19299 }, { "epoch": 1.2817958424652987, "grad_norm": 353.9375305175781, "learning_rate": 6.034740502111076e-07, "loss": 17.75, "step": 19300 }, { "epoch": 1.2818622567576543, "grad_norm": 169.4161376953125, "learning_rate": 6.033753172795783e-07, "loss": 13.7031, "step": 19301 }, { "epoch": 1.28192867105001, "grad_norm": 501.3122863769531, "learning_rate": 6.03276588935975e-07, "loss": 24.5859, "step": 19302 }, { "epoch": 1.2819950853423656, "grad_norm": 281.0584716796875, "learning_rate": 6.031778651814389e-07, "loss": 14.6875, "step": 19303 }, { "epoch": 1.2820614996347213, "grad_norm": 147.17855834960938, "learning_rate": 6.03079146017113e-07, "loss": 15.9688, "step": 19304 }, { "epoch": 1.2821279139270771, "grad_norm": 342.68084716796875, "learning_rate": 6.029804314441381e-07, "loss": 16.2344, "step": 19305 }, { "epoch": 1.2821943282194328, "grad_norm": 208.2516632080078, "learning_rate": 6.028817214636569e-07, "loss": 13.2031, "step": 19306 }, { "epoch": 1.2822607425117885, "grad_norm": 337.6141357421875, "learning_rate": 6.027830160768108e-07, "loss": 14.1562, "step": 19307 }, { "epoch": 1.2823271568041443, "grad_norm": 2037.3333740234375, "learning_rate": 6.02684315284742e-07, "loss": 19.0156, "step": 19308 }, { "epoch": 1.2823935710965, "grad_norm": 221.64390563964844, "learning_rate": 6.025856190885915e-07, "loss": 12.8906, "step": 19309 }, { "epoch": 1.2824599853888556, "grad_norm": 223.71641540527344, "learning_rate": 6.024869274895018e-07, "loss": 18.625, "step": 19310 }, { "epoch": 1.2825263996812115, "grad_norm": 191.155517578125, "learning_rate": 6.023882404886138e-07, "loss": 21.9688, "step": 19311 }, { "epoch": 1.2825928139735672, "grad_norm": 187.38442993164062, "learning_rate": 6.022895580870698e-07, "loss": 17.625, "step": 19312 }, { "epoch": 1.2826592282659228, "grad_norm": 428.77667236328125, "learning_rate": 6.021908802860103e-07, "loss": 16.9375, "step": 19313 }, { "epoch": 1.2827256425582785, "grad_norm": 177.83087158203125, "learning_rate": 6.020922070865777e-07, "loss": 15.9219, "step": 19314 }, { "epoch": 1.2827920568506341, "grad_norm": 209.4674835205078, "learning_rate": 6.019935384899128e-07, "loss": 14.0, "step": 19315 }, { "epoch": 1.28285847114299, "grad_norm": 200.97389221191406, "learning_rate": 6.018948744971574e-07, "loss": 15.9062, "step": 19316 }, { "epoch": 1.2829248854353457, "grad_norm": 233.9110870361328, "learning_rate": 6.01796215109452e-07, "loss": 17.2344, "step": 19317 }, { "epoch": 1.2829912997277013, "grad_norm": 206.6496124267578, "learning_rate": 6.016975603279389e-07, "loss": 17.4844, "step": 19318 }, { "epoch": 1.2830577140200572, "grad_norm": 225.61302185058594, "learning_rate": 6.015989101537586e-07, "loss": 15.0312, "step": 19319 }, { "epoch": 1.2831241283124128, "grad_norm": 206.20101928710938, "learning_rate": 6.015002645880525e-07, "loss": 17.5625, "step": 19320 }, { "epoch": 1.2831905426047685, "grad_norm": 121.43537902832031, "learning_rate": 6.01401623631961e-07, "loss": 13.1562, "step": 19321 }, { "epoch": 1.2832569568971244, "grad_norm": 718.6021118164062, "learning_rate": 6.013029872866262e-07, "loss": 19.4844, "step": 19322 }, { "epoch": 1.28332337118948, "grad_norm": 204.86000061035156, "learning_rate": 6.012043555531885e-07, "loss": 17.2188, "step": 19323 }, { "epoch": 1.2833897854818357, "grad_norm": 127.95205688476562, "learning_rate": 6.011057284327888e-07, "loss": 14.8281, "step": 19324 }, { "epoch": 1.2834561997741913, "grad_norm": 198.3691864013672, "learning_rate": 6.01007105926568e-07, "loss": 14.1562, "step": 19325 }, { "epoch": 1.283522614066547, "grad_norm": 248.5806427001953, "learning_rate": 6.00908488035667e-07, "loss": 19.0781, "step": 19326 }, { "epoch": 1.2835890283589029, "grad_norm": 322.63348388671875, "learning_rate": 6.008098747612261e-07, "loss": 19.5469, "step": 19327 }, { "epoch": 1.2836554426512585, "grad_norm": 92.72569274902344, "learning_rate": 6.00711266104387e-07, "loss": 12.8594, "step": 19328 }, { "epoch": 1.2837218569436142, "grad_norm": 203.77647399902344, "learning_rate": 6.006126620662892e-07, "loss": 15.7656, "step": 19329 }, { "epoch": 1.28378827123597, "grad_norm": 172.6302947998047, "learning_rate": 6.005140626480742e-07, "loss": 14.8906, "step": 19330 }, { "epoch": 1.2838546855283257, "grad_norm": 234.275146484375, "learning_rate": 6.004154678508818e-07, "loss": 17.4219, "step": 19331 }, { "epoch": 1.2839210998206814, "grad_norm": 199.3033905029297, "learning_rate": 6.003168776758531e-07, "loss": 14.6875, "step": 19332 }, { "epoch": 1.2839875141130372, "grad_norm": 109.40625762939453, "learning_rate": 6.002182921241282e-07, "loss": 13.7812, "step": 19333 }, { "epoch": 1.284053928405393, "grad_norm": 226.2710418701172, "learning_rate": 6.001197111968477e-07, "loss": 16.7812, "step": 19334 }, { "epoch": 1.2841203426977486, "grad_norm": 210.48915100097656, "learning_rate": 6.000211348951515e-07, "loss": 20.9531, "step": 19335 }, { "epoch": 1.2841867569901042, "grad_norm": 986.572509765625, "learning_rate": 5.999225632201804e-07, "loss": 15.7656, "step": 19336 }, { "epoch": 1.2842531712824599, "grad_norm": 125.41258239746094, "learning_rate": 5.998239961730744e-07, "loss": 13.9219, "step": 19337 }, { "epoch": 1.2843195855748157, "grad_norm": 134.42657470703125, "learning_rate": 5.997254337549733e-07, "loss": 14.875, "step": 19338 }, { "epoch": 1.2843859998671714, "grad_norm": 120.80072784423828, "learning_rate": 5.996268759670182e-07, "loss": 12.7812, "step": 19339 }, { "epoch": 1.284452414159527, "grad_norm": 105.98934173583984, "learning_rate": 5.99528322810348e-07, "loss": 10.6094, "step": 19340 }, { "epoch": 1.284518828451883, "grad_norm": 287.93609619140625, "learning_rate": 5.994297742861036e-07, "loss": 18.2969, "step": 19341 }, { "epoch": 1.2845852427442386, "grad_norm": 212.14979553222656, "learning_rate": 5.993312303954243e-07, "loss": 16.1562, "step": 19342 }, { "epoch": 1.2846516570365942, "grad_norm": 256.20770263671875, "learning_rate": 5.992326911394505e-07, "loss": 13.5156, "step": 19343 }, { "epoch": 1.28471807132895, "grad_norm": 146.68724060058594, "learning_rate": 5.991341565193218e-07, "loss": 12.0938, "step": 19344 }, { "epoch": 1.2847844856213058, "grad_norm": 291.27392578125, "learning_rate": 5.990356265361782e-07, "loss": 14.7969, "step": 19345 }, { "epoch": 1.2848508999136614, "grad_norm": 230.67135620117188, "learning_rate": 5.989371011911589e-07, "loss": 14.9531, "step": 19346 }, { "epoch": 1.284917314206017, "grad_norm": 636.2528076171875, "learning_rate": 5.988385804854043e-07, "loss": 17.3125, "step": 19347 }, { "epoch": 1.2849837284983727, "grad_norm": 162.4738006591797, "learning_rate": 5.987400644200532e-07, "loss": 13.0938, "step": 19348 }, { "epoch": 1.2850501427907286, "grad_norm": 752.1383666992188, "learning_rate": 5.986415529962461e-07, "loss": 17.2969, "step": 19349 }, { "epoch": 1.2851165570830843, "grad_norm": 176.4115753173828, "learning_rate": 5.985430462151219e-07, "loss": 18.3594, "step": 19350 }, { "epoch": 1.28518297137544, "grad_norm": 174.44668579101562, "learning_rate": 5.984445440778203e-07, "loss": 16.7812, "step": 19351 }, { "epoch": 1.2852493856677958, "grad_norm": 277.71728515625, "learning_rate": 5.983460465854804e-07, "loss": 18.3438, "step": 19352 }, { "epoch": 1.2853157999601514, "grad_norm": 212.44520568847656, "learning_rate": 5.982475537392421e-07, "loss": 17.7812, "step": 19353 }, { "epoch": 1.285382214252507, "grad_norm": 175.60350036621094, "learning_rate": 5.981490655402444e-07, "loss": 12.9688, "step": 19354 }, { "epoch": 1.285448628544863, "grad_norm": 523.1680908203125, "learning_rate": 5.980505819896267e-07, "loss": 16.7812, "step": 19355 }, { "epoch": 1.2855150428372186, "grad_norm": 331.03387451171875, "learning_rate": 5.979521030885277e-07, "loss": 11.8438, "step": 19356 }, { "epoch": 1.2855814571295743, "grad_norm": 223.87835693359375, "learning_rate": 5.978536288380874e-07, "loss": 11.8125, "step": 19357 }, { "epoch": 1.28564787142193, "grad_norm": 216.3285369873047, "learning_rate": 5.977551592394443e-07, "loss": 19.6094, "step": 19358 }, { "epoch": 1.2857142857142856, "grad_norm": 153.63156127929688, "learning_rate": 5.976566942937375e-07, "loss": 20.6719, "step": 19359 }, { "epoch": 1.2857807000066415, "grad_norm": 233.0293426513672, "learning_rate": 5.97558234002106e-07, "loss": 20.2344, "step": 19360 }, { "epoch": 1.2858471142989971, "grad_norm": 492.0638122558594, "learning_rate": 5.97459778365689e-07, "loss": 18.1875, "step": 19361 }, { "epoch": 1.2859135285913528, "grad_norm": 236.67340087890625, "learning_rate": 5.973613273856249e-07, "loss": 14.3047, "step": 19362 }, { "epoch": 1.2859799428837086, "grad_norm": 320.10565185546875, "learning_rate": 5.97262881063053e-07, "loss": 17.75, "step": 19363 }, { "epoch": 1.2860463571760643, "grad_norm": 241.49212646484375, "learning_rate": 5.971644393991116e-07, "loss": 17.0156, "step": 19364 }, { "epoch": 1.28611277146842, "grad_norm": 223.44557189941406, "learning_rate": 5.970660023949401e-07, "loss": 18.8594, "step": 19365 }, { "epoch": 1.2861791857607758, "grad_norm": 92.93214416503906, "learning_rate": 5.969675700516762e-07, "loss": 12.3906, "step": 19366 }, { "epoch": 1.2862456000531315, "grad_norm": 173.97918701171875, "learning_rate": 5.968691423704595e-07, "loss": 12.5625, "step": 19367 }, { "epoch": 1.2863120143454871, "grad_norm": 197.3688507080078, "learning_rate": 5.967707193524279e-07, "loss": 12.7344, "step": 19368 }, { "epoch": 1.2863784286378428, "grad_norm": 157.0872802734375, "learning_rate": 5.966723009987203e-07, "loss": 15.5469, "step": 19369 }, { "epoch": 1.2864448429301985, "grad_norm": 256.20904541015625, "learning_rate": 5.965738873104744e-07, "loss": 13.8984, "step": 19370 }, { "epoch": 1.2865112572225543, "grad_norm": 150.42596435546875, "learning_rate": 5.964754782888297e-07, "loss": 13.7188, "step": 19371 }, { "epoch": 1.28657767151491, "grad_norm": 206.37657165527344, "learning_rate": 5.963770739349239e-07, "loss": 18.0391, "step": 19372 }, { "epoch": 1.2866440858072656, "grad_norm": 315.3779602050781, "learning_rate": 5.962786742498949e-07, "loss": 23.6406, "step": 19373 }, { "epoch": 1.2867105000996215, "grad_norm": 147.02700805664062, "learning_rate": 5.96180279234882e-07, "loss": 17.2188, "step": 19374 }, { "epoch": 1.2867769143919772, "grad_norm": 250.40737915039062, "learning_rate": 5.960818888910224e-07, "loss": 17.1406, "step": 19375 }, { "epoch": 1.2868433286843328, "grad_norm": 267.1330871582031, "learning_rate": 5.959835032194549e-07, "loss": 14.5156, "step": 19376 }, { "epoch": 1.2869097429766887, "grad_norm": 159.2133331298828, "learning_rate": 5.958851222213168e-07, "loss": 14.8906, "step": 19377 }, { "epoch": 1.2869761572690444, "grad_norm": 681.0742797851562, "learning_rate": 5.957867458977471e-07, "loss": 17.6484, "step": 19378 }, { "epoch": 1.2870425715614, "grad_norm": 160.84286499023438, "learning_rate": 5.95688374249883e-07, "loss": 15.25, "step": 19379 }, { "epoch": 1.2871089858537557, "grad_norm": 477.6565856933594, "learning_rate": 5.955900072788629e-07, "loss": 13.0625, "step": 19380 }, { "epoch": 1.2871754001461113, "grad_norm": 100.12385559082031, "learning_rate": 5.954916449858239e-07, "loss": 12.3125, "step": 19381 }, { "epoch": 1.2872418144384672, "grad_norm": 215.99227905273438, "learning_rate": 5.953932873719047e-07, "loss": 17.3906, "step": 19382 }, { "epoch": 1.2873082287308228, "grad_norm": 206.31727600097656, "learning_rate": 5.952949344382424e-07, "loss": 14.75, "step": 19383 }, { "epoch": 1.2873746430231785, "grad_norm": 395.1739807128906, "learning_rate": 5.951965861859752e-07, "loss": 14.9219, "step": 19384 }, { "epoch": 1.2874410573155344, "grad_norm": 288.1817932128906, "learning_rate": 5.950982426162401e-07, "loss": 19.1562, "step": 19385 }, { "epoch": 1.28750747160789, "grad_norm": 201.06105041503906, "learning_rate": 5.949999037301754e-07, "loss": 16.6875, "step": 19386 }, { "epoch": 1.2875738859002457, "grad_norm": 280.9143371582031, "learning_rate": 5.949015695289182e-07, "loss": 19.7656, "step": 19387 }, { "epoch": 1.2876403001926016, "grad_norm": 202.3145751953125, "learning_rate": 5.94803240013606e-07, "loss": 17.8438, "step": 19388 }, { "epoch": 1.2877067144849572, "grad_norm": 133.66722106933594, "learning_rate": 5.947049151853763e-07, "loss": 13.9531, "step": 19389 }, { "epoch": 1.2877731287773129, "grad_norm": 117.63961791992188, "learning_rate": 5.946065950453664e-07, "loss": 19.9062, "step": 19390 }, { "epoch": 1.2878395430696685, "grad_norm": 174.51416015625, "learning_rate": 5.945082795947134e-07, "loss": 16.1562, "step": 19391 }, { "epoch": 1.2879059573620242, "grad_norm": 219.82716369628906, "learning_rate": 5.944099688345552e-07, "loss": 23.1875, "step": 19392 }, { "epoch": 1.28797237165438, "grad_norm": 217.14096069335938, "learning_rate": 5.943116627660285e-07, "loss": 18.2031, "step": 19393 }, { "epoch": 1.2880387859467357, "grad_norm": 346.5890808105469, "learning_rate": 5.942133613902707e-07, "loss": 17.875, "step": 19394 }, { "epoch": 1.2881052002390914, "grad_norm": 279.8817138671875, "learning_rate": 5.941150647084182e-07, "loss": 19.4531, "step": 19395 }, { "epoch": 1.2881716145314472, "grad_norm": 490.99755859375, "learning_rate": 5.94016772721609e-07, "loss": 12.3438, "step": 19396 }, { "epoch": 1.288238028823803, "grad_norm": 178.91656494140625, "learning_rate": 5.939184854309795e-07, "loss": 15.1719, "step": 19397 }, { "epoch": 1.2883044431161585, "grad_norm": 151.4564208984375, "learning_rate": 5.938202028376671e-07, "loss": 16.3438, "step": 19398 }, { "epoch": 1.2883708574085144, "grad_norm": 136.39991760253906, "learning_rate": 5.937219249428081e-07, "loss": 13.1094, "step": 19399 }, { "epoch": 1.28843727170087, "grad_norm": 575.3967895507812, "learning_rate": 5.936236517475397e-07, "loss": 20.7812, "step": 19400 }, { "epoch": 1.2885036859932257, "grad_norm": 248.9634246826172, "learning_rate": 5.935253832529986e-07, "loss": 18.4375, "step": 19401 }, { "epoch": 1.2885701002855814, "grad_norm": 186.6954345703125, "learning_rate": 5.934271194603216e-07, "loss": 17.9688, "step": 19402 }, { "epoch": 1.288636514577937, "grad_norm": 233.74119567871094, "learning_rate": 5.933288603706447e-07, "loss": 12.5781, "step": 19403 }, { "epoch": 1.288702928870293, "grad_norm": 203.28875732421875, "learning_rate": 5.932306059851055e-07, "loss": 12.3438, "step": 19404 }, { "epoch": 1.2887693431626486, "grad_norm": 122.88681030273438, "learning_rate": 5.9313235630484e-07, "loss": 13.5469, "step": 19405 }, { "epoch": 1.2888357574550042, "grad_norm": 177.51397705078125, "learning_rate": 5.93034111330985e-07, "loss": 13.5938, "step": 19406 }, { "epoch": 1.28890217174736, "grad_norm": 255.05979919433594, "learning_rate": 5.929358710646761e-07, "loss": 15.125, "step": 19407 }, { "epoch": 1.2889685860397158, "grad_norm": 880.55908203125, "learning_rate": 5.928376355070512e-07, "loss": 13.9375, "step": 19408 }, { "epoch": 1.2890350003320714, "grad_norm": 93.79191589355469, "learning_rate": 5.92739404659245e-07, "loss": 15.0625, "step": 19409 }, { "epoch": 1.2891014146244273, "grad_norm": 241.3519744873047, "learning_rate": 5.926411785223948e-07, "loss": 21.625, "step": 19410 }, { "epoch": 1.289167828916783, "grad_norm": 151.87086486816406, "learning_rate": 5.925429570976366e-07, "loss": 10.7656, "step": 19411 }, { "epoch": 1.2892342432091386, "grad_norm": 294.93914794921875, "learning_rate": 5.924447403861062e-07, "loss": 16.1562, "step": 19412 }, { "epoch": 1.2893006575014943, "grad_norm": 149.6160125732422, "learning_rate": 5.923465283889406e-07, "loss": 15.2969, "step": 19413 }, { "epoch": 1.28936707179385, "grad_norm": 596.5942993164062, "learning_rate": 5.922483211072749e-07, "loss": 20.1641, "step": 19414 }, { "epoch": 1.2894334860862058, "grad_norm": 220.00692749023438, "learning_rate": 5.921501185422457e-07, "loss": 16.625, "step": 19415 }, { "epoch": 1.2894999003785614, "grad_norm": 120.44850158691406, "learning_rate": 5.920519206949884e-07, "loss": 12.5391, "step": 19416 }, { "epoch": 1.289566314670917, "grad_norm": 267.6041259765625, "learning_rate": 5.919537275666397e-07, "loss": 18.0938, "step": 19417 }, { "epoch": 1.289632728963273, "grad_norm": 185.19619750976562, "learning_rate": 5.918555391583347e-07, "loss": 20.0781, "step": 19418 }, { "epoch": 1.2896991432556286, "grad_norm": 213.2294464111328, "learning_rate": 5.917573554712096e-07, "loss": 16.7188, "step": 19419 }, { "epoch": 1.2897655575479843, "grad_norm": 159.74526977539062, "learning_rate": 5.916591765063998e-07, "loss": 19.3594, "step": 19420 }, { "epoch": 1.2898319718403402, "grad_norm": 463.4560241699219, "learning_rate": 5.915610022650414e-07, "loss": 19.5312, "step": 19421 }, { "epoch": 1.2898983861326958, "grad_norm": 155.46218872070312, "learning_rate": 5.914628327482698e-07, "loss": 13.1094, "step": 19422 }, { "epoch": 1.2899648004250515, "grad_norm": 272.7480773925781, "learning_rate": 5.913646679572206e-07, "loss": 11.8594, "step": 19423 }, { "epoch": 1.2900312147174071, "grad_norm": 126.48619842529297, "learning_rate": 5.91266507893029e-07, "loss": 14.6562, "step": 19424 }, { "epoch": 1.2900976290097628, "grad_norm": 96.9917984008789, "learning_rate": 5.911683525568312e-07, "loss": 13.1875, "step": 19425 }, { "epoch": 1.2901640433021186, "grad_norm": 485.83599853515625, "learning_rate": 5.910702019497618e-07, "loss": 21.375, "step": 19426 }, { "epoch": 1.2902304575944743, "grad_norm": 209.54910278320312, "learning_rate": 5.909720560729567e-07, "loss": 26.1562, "step": 19427 }, { "epoch": 1.29029687188683, "grad_norm": 186.53843688964844, "learning_rate": 5.908739149275508e-07, "loss": 21.3438, "step": 19428 }, { "epoch": 1.2903632861791858, "grad_norm": 203.20860290527344, "learning_rate": 5.9077577851468e-07, "loss": 12.8281, "step": 19429 }, { "epoch": 1.2904297004715415, "grad_norm": 124.77392578125, "learning_rate": 5.906776468354784e-07, "loss": 13.7344, "step": 19430 }, { "epoch": 1.2904961147638971, "grad_norm": 195.97935485839844, "learning_rate": 5.905795198910821e-07, "loss": 16.1562, "step": 19431 }, { "epoch": 1.290562529056253, "grad_norm": 266.40618896484375, "learning_rate": 5.904813976826257e-07, "loss": 14.8906, "step": 19432 }, { "epoch": 1.2906289433486087, "grad_norm": 1007.6708374023438, "learning_rate": 5.903832802112446e-07, "loss": 20.5312, "step": 19433 }, { "epoch": 1.2906953576409643, "grad_norm": 234.58116149902344, "learning_rate": 5.902851674780731e-07, "loss": 21.7344, "step": 19434 }, { "epoch": 1.29076177193332, "grad_norm": 257.6639404296875, "learning_rate": 5.901870594842469e-07, "loss": 16.5156, "step": 19435 }, { "epoch": 1.2908281862256756, "grad_norm": 207.1629638671875, "learning_rate": 5.900889562309002e-07, "loss": 15.8906, "step": 19436 }, { "epoch": 1.2908946005180315, "grad_norm": 192.11972045898438, "learning_rate": 5.899908577191684e-07, "loss": 23.5, "step": 19437 }, { "epoch": 1.2909610148103872, "grad_norm": 338.2573547363281, "learning_rate": 5.898927639501855e-07, "loss": 12.0078, "step": 19438 }, { "epoch": 1.2910274291027428, "grad_norm": 227.40310668945312, "learning_rate": 5.89794674925087e-07, "loss": 16.7188, "step": 19439 }, { "epoch": 1.2910938433950987, "grad_norm": 125.21073913574219, "learning_rate": 5.89696590645007e-07, "loss": 17.0625, "step": 19440 }, { "epoch": 1.2911602576874543, "grad_norm": 732.6084594726562, "learning_rate": 5.895985111110804e-07, "loss": 30.5469, "step": 19441 }, { "epoch": 1.29122667197981, "grad_norm": 326.3605651855469, "learning_rate": 5.895004363244413e-07, "loss": 19.0312, "step": 19442 }, { "epoch": 1.2912930862721659, "grad_norm": 232.44091796875, "learning_rate": 5.894023662862246e-07, "loss": 17.5, "step": 19443 }, { "epoch": 1.2913595005645215, "grad_norm": 314.0865478515625, "learning_rate": 5.893043009975645e-07, "loss": 22.8438, "step": 19444 }, { "epoch": 1.2914259148568772, "grad_norm": 157.22337341308594, "learning_rate": 5.892062404595956e-07, "loss": 13.5156, "step": 19445 }, { "epoch": 1.2914923291492328, "grad_norm": 228.08538818359375, "learning_rate": 5.891081846734518e-07, "loss": 15.7344, "step": 19446 }, { "epoch": 1.2915587434415885, "grad_norm": 189.97706604003906, "learning_rate": 5.890101336402677e-07, "loss": 15.6875, "step": 19447 }, { "epoch": 1.2916251577339444, "grad_norm": 106.145263671875, "learning_rate": 5.889120873611775e-07, "loss": 14.0938, "step": 19448 }, { "epoch": 1.2916915720263, "grad_norm": 194.81454467773438, "learning_rate": 5.888140458373148e-07, "loss": 19.1406, "step": 19449 }, { "epoch": 1.2917579863186557, "grad_norm": 155.02655029296875, "learning_rate": 5.887160090698149e-07, "loss": 15.0938, "step": 19450 }, { "epoch": 1.2918244006110116, "grad_norm": 309.816162109375, "learning_rate": 5.886179770598103e-07, "loss": 14.9453, "step": 19451 }, { "epoch": 1.2918908149033672, "grad_norm": 257.343994140625, "learning_rate": 5.885199498084361e-07, "loss": 19.1875, "step": 19452 }, { "epoch": 1.2919572291957229, "grad_norm": 146.0023193359375, "learning_rate": 5.884219273168258e-07, "loss": 17.4531, "step": 19453 }, { "epoch": 1.2920236434880787, "grad_norm": 166.8192901611328, "learning_rate": 5.883239095861133e-07, "loss": 15.7344, "step": 19454 }, { "epoch": 1.2920900577804344, "grad_norm": 225.43592834472656, "learning_rate": 5.882258966174321e-07, "loss": 19.2656, "step": 19455 }, { "epoch": 1.29215647207279, "grad_norm": 261.01751708984375, "learning_rate": 5.881278884119168e-07, "loss": 12.9297, "step": 19456 }, { "epoch": 1.2922228863651457, "grad_norm": 1213.270263671875, "learning_rate": 5.880298849707003e-07, "loss": 16.7344, "step": 19457 }, { "epoch": 1.2922893006575014, "grad_norm": 195.85562133789062, "learning_rate": 5.879318862949167e-07, "loss": 23.1875, "step": 19458 }, { "epoch": 1.2923557149498572, "grad_norm": 194.46067810058594, "learning_rate": 5.878338923856991e-07, "loss": 18.0781, "step": 19459 }, { "epoch": 1.292422129242213, "grad_norm": 102.7956314086914, "learning_rate": 5.877359032441817e-07, "loss": 13.2344, "step": 19460 }, { "epoch": 1.2924885435345685, "grad_norm": 363.8287353515625, "learning_rate": 5.876379188714975e-07, "loss": 21.7969, "step": 19461 }, { "epoch": 1.2925549578269244, "grad_norm": 237.402099609375, "learning_rate": 5.875399392687803e-07, "loss": 18.4062, "step": 19462 }, { "epoch": 1.29262137211928, "grad_norm": 404.64849853515625, "learning_rate": 5.874419644371629e-07, "loss": 15.3281, "step": 19463 }, { "epoch": 1.2926877864116357, "grad_norm": 187.03970336914062, "learning_rate": 5.873439943777791e-07, "loss": 22.7812, "step": 19464 }, { "epoch": 1.2927542007039916, "grad_norm": 232.7110595703125, "learning_rate": 5.87246029091762e-07, "loss": 18.5938, "step": 19465 }, { "epoch": 1.2928206149963473, "grad_norm": 564.4719848632812, "learning_rate": 5.87148068580245e-07, "loss": 15.4219, "step": 19466 }, { "epoch": 1.292887029288703, "grad_norm": 284.6599426269531, "learning_rate": 5.870501128443608e-07, "loss": 16.375, "step": 19467 }, { "epoch": 1.2929534435810586, "grad_norm": 189.12307739257812, "learning_rate": 5.869521618852431e-07, "loss": 17.0625, "step": 19468 }, { "epoch": 1.2930198578734142, "grad_norm": 285.73736572265625, "learning_rate": 5.868542157040243e-07, "loss": 20.1562, "step": 19469 }, { "epoch": 1.29308627216577, "grad_norm": 172.3994140625, "learning_rate": 5.867562743018379e-07, "loss": 15.7031, "step": 19470 }, { "epoch": 1.2931526864581258, "grad_norm": 370.4728698730469, "learning_rate": 5.866583376798165e-07, "loss": 20.4375, "step": 19471 }, { "epoch": 1.2932191007504814, "grad_norm": 272.2843322753906, "learning_rate": 5.865604058390934e-07, "loss": 17.1562, "step": 19472 }, { "epoch": 1.2932855150428373, "grad_norm": 306.6036682128906, "learning_rate": 5.864624787808006e-07, "loss": 13.5781, "step": 19473 }, { "epoch": 1.293351929335193, "grad_norm": 179.16477966308594, "learning_rate": 5.863645565060719e-07, "loss": 17.3281, "step": 19474 }, { "epoch": 1.2934183436275486, "grad_norm": 170.28567504882812, "learning_rate": 5.862666390160391e-07, "loss": 23.5312, "step": 19475 }, { "epoch": 1.2934847579199045, "grad_norm": 110.77606964111328, "learning_rate": 5.861687263118357e-07, "loss": 14.5156, "step": 19476 }, { "epoch": 1.2935511722122601, "grad_norm": 377.9222717285156, "learning_rate": 5.860708183945933e-07, "loss": 27.7188, "step": 19477 }, { "epoch": 1.2936175865046158, "grad_norm": 127.1004409790039, "learning_rate": 5.859729152654454e-07, "loss": 12.875, "step": 19478 }, { "epoch": 1.2936840007969714, "grad_norm": 114.35003662109375, "learning_rate": 5.858750169255238e-07, "loss": 15.1875, "step": 19479 }, { "epoch": 1.293750415089327, "grad_norm": 109.1383285522461, "learning_rate": 5.857771233759615e-07, "loss": 14.2812, "step": 19480 }, { "epoch": 1.293816829381683, "grad_norm": 739.3878173828125, "learning_rate": 5.856792346178901e-07, "loss": 17.7969, "step": 19481 }, { "epoch": 1.2938832436740386, "grad_norm": 218.9315643310547, "learning_rate": 5.85581350652443e-07, "loss": 16.4219, "step": 19482 }, { "epoch": 1.2939496579663943, "grad_norm": 218.465087890625, "learning_rate": 5.854834714807515e-07, "loss": 16.9688, "step": 19483 }, { "epoch": 1.2940160722587501, "grad_norm": 309.2102966308594, "learning_rate": 5.85385597103948e-07, "loss": 16.7188, "step": 19484 }, { "epoch": 1.2940824865511058, "grad_norm": 387.3876953125, "learning_rate": 5.852877275231651e-07, "loss": 17.25, "step": 19485 }, { "epoch": 1.2941489008434615, "grad_norm": 486.5370178222656, "learning_rate": 5.851898627395346e-07, "loss": 22.25, "step": 19486 }, { "epoch": 1.2942153151358173, "grad_norm": 183.20492553710938, "learning_rate": 5.850920027541888e-07, "loss": 16.3438, "step": 19487 }, { "epoch": 1.294281729428173, "grad_norm": 176.9840545654297, "learning_rate": 5.84994147568259e-07, "loss": 15.1875, "step": 19488 }, { "epoch": 1.2943481437205286, "grad_norm": 248.37281799316406, "learning_rate": 5.84896297182878e-07, "loss": 19.875, "step": 19489 }, { "epoch": 1.2944145580128843, "grad_norm": 166.521728515625, "learning_rate": 5.847984515991771e-07, "loss": 12.3047, "step": 19490 }, { "epoch": 1.29448097230524, "grad_norm": 291.4526062011719, "learning_rate": 5.847006108182884e-07, "loss": 14.4219, "step": 19491 }, { "epoch": 1.2945473865975958, "grad_norm": 138.80062866210938, "learning_rate": 5.846027748413433e-07, "loss": 17.2188, "step": 19492 }, { "epoch": 1.2946138008899515, "grad_norm": 509.7909240722656, "learning_rate": 5.845049436694743e-07, "loss": 19.625, "step": 19493 }, { "epoch": 1.2946802151823071, "grad_norm": 193.0540771484375, "learning_rate": 5.844071173038118e-07, "loss": 13.2812, "step": 19494 }, { "epoch": 1.294746629474663, "grad_norm": 296.0894470214844, "learning_rate": 5.843092957454887e-07, "loss": 18.2031, "step": 19495 }, { "epoch": 1.2948130437670187, "grad_norm": 169.49378967285156, "learning_rate": 5.842114789956359e-07, "loss": 17.1406, "step": 19496 }, { "epoch": 1.2948794580593743, "grad_norm": 358.3481750488281, "learning_rate": 5.841136670553848e-07, "loss": 20.75, "step": 19497 }, { "epoch": 1.2949458723517302, "grad_norm": 187.50784301757812, "learning_rate": 5.840158599258668e-07, "loss": 17.5312, "step": 19498 }, { "epoch": 1.2950122866440859, "grad_norm": 146.86434936523438, "learning_rate": 5.839180576082138e-07, "loss": 17.1719, "step": 19499 }, { "epoch": 1.2950787009364415, "grad_norm": 138.3113250732422, "learning_rate": 5.838202601035565e-07, "loss": 16.7969, "step": 19500 }, { "epoch": 1.2951451152287972, "grad_norm": 634.7222290039062, "learning_rate": 5.837224674130268e-07, "loss": 21.5312, "step": 19501 }, { "epoch": 1.2952115295211528, "grad_norm": 215.21429443359375, "learning_rate": 5.836246795377553e-07, "loss": 15.7656, "step": 19502 }, { "epoch": 1.2952779438135087, "grad_norm": 334.82110595703125, "learning_rate": 5.835268964788734e-07, "loss": 18.9062, "step": 19503 }, { "epoch": 1.2953443581058643, "grad_norm": 116.04360961914062, "learning_rate": 5.834291182375121e-07, "loss": 13.6719, "step": 19504 }, { "epoch": 1.29541077239822, "grad_norm": 168.01976013183594, "learning_rate": 5.833313448148028e-07, "loss": 12.4688, "step": 19505 }, { "epoch": 1.2954771866905759, "grad_norm": 246.19276428222656, "learning_rate": 5.832335762118761e-07, "loss": 15.4688, "step": 19506 }, { "epoch": 1.2955436009829315, "grad_norm": 144.2580108642578, "learning_rate": 5.831358124298636e-07, "loss": 21.8594, "step": 19507 }, { "epoch": 1.2956100152752872, "grad_norm": 198.68264770507812, "learning_rate": 5.830380534698946e-07, "loss": 16.125, "step": 19508 }, { "epoch": 1.295676429567643, "grad_norm": 154.01873779296875, "learning_rate": 5.829402993331019e-07, "loss": 15.9375, "step": 19509 }, { "epoch": 1.2957428438599987, "grad_norm": 203.09449768066406, "learning_rate": 5.828425500206149e-07, "loss": 22.8594, "step": 19510 }, { "epoch": 1.2958092581523544, "grad_norm": 233.84938049316406, "learning_rate": 5.827448055335647e-07, "loss": 18.5312, "step": 19511 }, { "epoch": 1.29587567244471, "grad_norm": 172.6304931640625, "learning_rate": 5.826470658730821e-07, "loss": 19.4219, "step": 19512 }, { "epoch": 1.2959420867370657, "grad_norm": 153.02667236328125, "learning_rate": 5.82549331040298e-07, "loss": 14.625, "step": 19513 }, { "epoch": 1.2960085010294216, "grad_norm": 171.60057067871094, "learning_rate": 5.824516010363416e-07, "loss": 15.2188, "step": 19514 }, { "epoch": 1.2960749153217772, "grad_norm": 191.530029296875, "learning_rate": 5.823538758623451e-07, "loss": 14.2578, "step": 19515 }, { "epoch": 1.2961413296141329, "grad_norm": 319.1054382324219, "learning_rate": 5.822561555194379e-07, "loss": 12.7734, "step": 19516 }, { "epoch": 1.2962077439064887, "grad_norm": 91.92549133300781, "learning_rate": 5.821584400087504e-07, "loss": 15.3125, "step": 19517 }, { "epoch": 1.2962741581988444, "grad_norm": 250.53457641601562, "learning_rate": 5.820607293314132e-07, "loss": 16.3906, "step": 19518 }, { "epoch": 1.2963405724912, "grad_norm": 205.1513671875, "learning_rate": 5.819630234885564e-07, "loss": 17.125, "step": 19519 }, { "epoch": 1.296406986783556, "grad_norm": 171.0116729736328, "learning_rate": 5.818653224813105e-07, "loss": 13.9375, "step": 19520 }, { "epoch": 1.2964734010759116, "grad_norm": 137.0102996826172, "learning_rate": 5.817676263108054e-07, "loss": 18.5938, "step": 19521 }, { "epoch": 1.2965398153682672, "grad_norm": 318.1681213378906, "learning_rate": 5.816699349781711e-07, "loss": 17.75, "step": 19522 }, { "epoch": 1.2966062296606229, "grad_norm": 347.8189392089844, "learning_rate": 5.815722484845378e-07, "loss": 17.5469, "step": 19523 }, { "epoch": 1.2966726439529785, "grad_norm": 270.2369384765625, "learning_rate": 5.814745668310359e-07, "loss": 20.7969, "step": 19524 }, { "epoch": 1.2967390582453344, "grad_norm": 739.8585815429688, "learning_rate": 5.81376890018794e-07, "loss": 20.1719, "step": 19525 }, { "epoch": 1.29680547253769, "grad_norm": 168.60182189941406, "learning_rate": 5.812792180489436e-07, "loss": 14.8125, "step": 19526 }, { "epoch": 1.2968718868300457, "grad_norm": 249.2595977783203, "learning_rate": 5.811815509226135e-07, "loss": 12.6719, "step": 19527 }, { "epoch": 1.2969383011224016, "grad_norm": 166.1477508544922, "learning_rate": 5.810838886409337e-07, "loss": 12.875, "step": 19528 }, { "epoch": 1.2970047154147573, "grad_norm": 226.42596435546875, "learning_rate": 5.80986231205034e-07, "loss": 20.2812, "step": 19529 }, { "epoch": 1.297071129707113, "grad_norm": 450.52960205078125, "learning_rate": 5.808885786160438e-07, "loss": 20.5156, "step": 19530 }, { "epoch": 1.2971375439994688, "grad_norm": 219.923583984375, "learning_rate": 5.807909308750929e-07, "loss": 22.5312, "step": 19531 }, { "epoch": 1.2972039582918244, "grad_norm": 181.2451934814453, "learning_rate": 5.80693287983311e-07, "loss": 17.2188, "step": 19532 }, { "epoch": 1.29727037258418, "grad_norm": 347.2974548339844, "learning_rate": 5.805956499418269e-07, "loss": 17.9297, "step": 19533 }, { "epoch": 1.2973367868765358, "grad_norm": 144.8404083251953, "learning_rate": 5.804980167517712e-07, "loss": 15.3438, "step": 19534 }, { "epoch": 1.2974032011688914, "grad_norm": 494.3333435058594, "learning_rate": 5.80400388414272e-07, "loss": 20.5156, "step": 19535 }, { "epoch": 1.2974696154612473, "grad_norm": 179.88796997070312, "learning_rate": 5.803027649304592e-07, "loss": 17.25, "step": 19536 }, { "epoch": 1.297536029753603, "grad_norm": 322.39459228515625, "learning_rate": 5.80205146301462e-07, "loss": 16.5625, "step": 19537 }, { "epoch": 1.2976024440459586, "grad_norm": 268.61810302734375, "learning_rate": 5.801075325284099e-07, "loss": 16.8438, "step": 19538 }, { "epoch": 1.2976688583383145, "grad_norm": 132.1903533935547, "learning_rate": 5.800099236124311e-07, "loss": 15.3906, "step": 19539 }, { "epoch": 1.2977352726306701, "grad_norm": 235.143310546875, "learning_rate": 5.799123195546562e-07, "loss": 14.7656, "step": 19540 }, { "epoch": 1.2978016869230258, "grad_norm": 805.7322998046875, "learning_rate": 5.798147203562127e-07, "loss": 22.2969, "step": 19541 }, { "epoch": 1.2978681012153817, "grad_norm": 125.36444854736328, "learning_rate": 5.797171260182304e-07, "loss": 14.8438, "step": 19542 }, { "epoch": 1.2979345155077373, "grad_norm": 142.76425170898438, "learning_rate": 5.796195365418379e-07, "loss": 17.2109, "step": 19543 }, { "epoch": 1.298000929800093, "grad_norm": 116.88282012939453, "learning_rate": 5.795219519281644e-07, "loss": 14.7812, "step": 19544 }, { "epoch": 1.2980673440924486, "grad_norm": 192.15309143066406, "learning_rate": 5.794243721783384e-07, "loss": 16.3906, "step": 19545 }, { "epoch": 1.2981337583848045, "grad_norm": 179.85671997070312, "learning_rate": 5.793267972934891e-07, "loss": 16.8906, "step": 19546 }, { "epoch": 1.2982001726771601, "grad_norm": 163.0644073486328, "learning_rate": 5.792292272747441e-07, "loss": 15.3594, "step": 19547 }, { "epoch": 1.2982665869695158, "grad_norm": 326.9697265625, "learning_rate": 5.791316621232335e-07, "loss": 22.75, "step": 19548 }, { "epoch": 1.2983330012618715, "grad_norm": 107.6484146118164, "learning_rate": 5.790341018400849e-07, "loss": 11.1875, "step": 19549 }, { "epoch": 1.2983994155542273, "grad_norm": 371.9698486328125, "learning_rate": 5.789365464264269e-07, "loss": 13.9375, "step": 19550 }, { "epoch": 1.298465829846583, "grad_norm": 447.8562927246094, "learning_rate": 5.788389958833881e-07, "loss": 24.1562, "step": 19551 }, { "epoch": 1.2985322441389386, "grad_norm": 691.9668579101562, "learning_rate": 5.787414502120971e-07, "loss": 20.1094, "step": 19552 }, { "epoch": 1.2985986584312945, "grad_norm": 115.93810272216797, "learning_rate": 5.786439094136819e-07, "loss": 12.3125, "step": 19553 }, { "epoch": 1.2986650727236502, "grad_norm": 205.65283203125, "learning_rate": 5.785463734892715e-07, "loss": 18.0781, "step": 19554 }, { "epoch": 1.2987314870160058, "grad_norm": 95.1020736694336, "learning_rate": 5.784488424399928e-07, "loss": 10.6406, "step": 19555 }, { "epoch": 1.2987979013083615, "grad_norm": 201.7133331298828, "learning_rate": 5.783513162669753e-07, "loss": 19.0781, "step": 19556 }, { "epoch": 1.2988643156007174, "grad_norm": 289.653076171875, "learning_rate": 5.782537949713469e-07, "loss": 18.1406, "step": 19557 }, { "epoch": 1.298930729893073, "grad_norm": 168.9608917236328, "learning_rate": 5.781562785542351e-07, "loss": 14.7031, "step": 19558 }, { "epoch": 1.2989971441854287, "grad_norm": 1017.4030151367188, "learning_rate": 5.780587670167682e-07, "loss": 13.3906, "step": 19559 }, { "epoch": 1.2990635584777843, "grad_norm": 330.2094421386719, "learning_rate": 5.779612603600742e-07, "loss": 21.2344, "step": 19560 }, { "epoch": 1.2991299727701402, "grad_norm": 209.70130920410156, "learning_rate": 5.778637585852811e-07, "loss": 17.4219, "step": 19561 }, { "epoch": 1.2991963870624958, "grad_norm": 397.92449951171875, "learning_rate": 5.777662616935164e-07, "loss": 16.0312, "step": 19562 }, { "epoch": 1.2992628013548515, "grad_norm": 392.91302490234375, "learning_rate": 5.776687696859085e-07, "loss": 23.4062, "step": 19563 }, { "epoch": 1.2993292156472074, "grad_norm": 229.9862518310547, "learning_rate": 5.775712825635842e-07, "loss": 18.25, "step": 19564 }, { "epoch": 1.299395629939563, "grad_norm": 240.7254638671875, "learning_rate": 5.774738003276723e-07, "loss": 21.6094, "step": 19565 }, { "epoch": 1.2994620442319187, "grad_norm": 384.88922119140625, "learning_rate": 5.773763229792994e-07, "loss": 19.7031, "step": 19566 }, { "epoch": 1.2995284585242743, "grad_norm": 191.33938598632812, "learning_rate": 5.772788505195936e-07, "loss": 14.9062, "step": 19567 }, { "epoch": 1.2995948728166302, "grad_norm": 229.99073791503906, "learning_rate": 5.771813829496822e-07, "loss": 14.6562, "step": 19568 }, { "epoch": 1.2996612871089859, "grad_norm": 222.0997772216797, "learning_rate": 5.770839202706928e-07, "loss": 20.25, "step": 19569 }, { "epoch": 1.2997277014013415, "grad_norm": 477.0062561035156, "learning_rate": 5.769864624837529e-07, "loss": 22.8125, "step": 19570 }, { "epoch": 1.2997941156936972, "grad_norm": 360.1139221191406, "learning_rate": 5.768890095899899e-07, "loss": 11.625, "step": 19571 }, { "epoch": 1.299860529986053, "grad_norm": 238.85643005371094, "learning_rate": 5.767915615905301e-07, "loss": 17.125, "step": 19572 }, { "epoch": 1.2999269442784087, "grad_norm": 1148.2218017578125, "learning_rate": 5.766941184865023e-07, "loss": 23.875, "step": 19573 }, { "epoch": 1.2999933585707644, "grad_norm": 312.3917541503906, "learning_rate": 5.765966802790325e-07, "loss": 25.0625, "step": 19574 }, { "epoch": 1.3000597728631202, "grad_norm": 219.51251220703125, "learning_rate": 5.76499246969248e-07, "loss": 17.1719, "step": 19575 }, { "epoch": 1.300126187155476, "grad_norm": 189.2578125, "learning_rate": 5.764018185582762e-07, "loss": 11.8906, "step": 19576 }, { "epoch": 1.3001926014478316, "grad_norm": 255.04066467285156, "learning_rate": 5.763043950472442e-07, "loss": 17.4062, "step": 19577 }, { "epoch": 1.3002590157401872, "grad_norm": 169.03919982910156, "learning_rate": 5.762069764372778e-07, "loss": 12.4062, "step": 19578 }, { "epoch": 1.300325430032543, "grad_norm": 121.87358856201172, "learning_rate": 5.761095627295056e-07, "loss": 15.3594, "step": 19579 }, { "epoch": 1.3003918443248987, "grad_norm": 166.81712341308594, "learning_rate": 5.760121539250531e-07, "loss": 14.4062, "step": 19580 }, { "epoch": 1.3004582586172544, "grad_norm": 265.40625, "learning_rate": 5.759147500250474e-07, "loss": 12.5781, "step": 19581 }, { "epoch": 1.30052467290961, "grad_norm": 225.36810302734375, "learning_rate": 5.758173510306155e-07, "loss": 16.8906, "step": 19582 }, { "epoch": 1.300591087201966, "grad_norm": 154.2041778564453, "learning_rate": 5.757199569428839e-07, "loss": 17.6562, "step": 19583 }, { "epoch": 1.3006575014943216, "grad_norm": 372.28643798828125, "learning_rate": 5.756225677629792e-07, "loss": 20.1875, "step": 19584 }, { "epoch": 1.3007239157866772, "grad_norm": 136.77618408203125, "learning_rate": 5.755251834920281e-07, "loss": 15.0156, "step": 19585 }, { "epoch": 1.300790330079033, "grad_norm": 141.93780517578125, "learning_rate": 5.754278041311564e-07, "loss": 19.4375, "step": 19586 }, { "epoch": 1.3008567443713888, "grad_norm": 107.714111328125, "learning_rate": 5.753304296814916e-07, "loss": 14.2188, "step": 19587 }, { "epoch": 1.3009231586637444, "grad_norm": 122.11651611328125, "learning_rate": 5.752330601441593e-07, "loss": 26.2188, "step": 19588 }, { "epoch": 1.3009895729561, "grad_norm": 158.19593811035156, "learning_rate": 5.75135695520286e-07, "loss": 17.4844, "step": 19589 }, { "epoch": 1.301055987248456, "grad_norm": 200.91139221191406, "learning_rate": 5.750383358109978e-07, "loss": 11.3281, "step": 19590 }, { "epoch": 1.3011224015408116, "grad_norm": 311.75323486328125, "learning_rate": 5.749409810174212e-07, "loss": 15.7031, "step": 19591 }, { "epoch": 1.3011888158331673, "grad_norm": 112.1557388305664, "learning_rate": 5.748436311406824e-07, "loss": 15.4844, "step": 19592 }, { "epoch": 1.301255230125523, "grad_norm": 266.4021301269531, "learning_rate": 5.747462861819071e-07, "loss": 16.5156, "step": 19593 }, { "epoch": 1.3013216444178788, "grad_norm": 186.21090698242188, "learning_rate": 5.746489461422215e-07, "loss": 19.3438, "step": 19594 }, { "epoch": 1.3013880587102344, "grad_norm": 375.4872741699219, "learning_rate": 5.745516110227517e-07, "loss": 18.6094, "step": 19595 }, { "epoch": 1.30145447300259, "grad_norm": 171.65196228027344, "learning_rate": 5.74454280824624e-07, "loss": 19.9688, "step": 19596 }, { "epoch": 1.301520887294946, "grad_norm": 136.1787567138672, "learning_rate": 5.743569555489629e-07, "loss": 18.5625, "step": 19597 }, { "epoch": 1.3015873015873016, "grad_norm": 113.03465270996094, "learning_rate": 5.74259635196896e-07, "loss": 13.6875, "step": 19598 }, { "epoch": 1.3016537158796573, "grad_norm": 146.64158630371094, "learning_rate": 5.741623197695478e-07, "loss": 19.2188, "step": 19599 }, { "epoch": 1.301720130172013, "grad_norm": 106.99208068847656, "learning_rate": 5.740650092680442e-07, "loss": 13.7031, "step": 19600 }, { "epoch": 1.3017865444643688, "grad_norm": 180.36129760742188, "learning_rate": 5.739677036935108e-07, "loss": 13.4062, "step": 19601 }, { "epoch": 1.3018529587567245, "grad_norm": 255.46766662597656, "learning_rate": 5.73870403047074e-07, "loss": 14.9688, "step": 19602 }, { "epoch": 1.3019193730490801, "grad_norm": 93.67884826660156, "learning_rate": 5.73773107329858e-07, "loss": 14.8125, "step": 19603 }, { "epoch": 1.3019857873414358, "grad_norm": 219.42544555664062, "learning_rate": 5.736758165429894e-07, "loss": 14.75, "step": 19604 }, { "epoch": 1.3020522016337917, "grad_norm": 188.0276336669922, "learning_rate": 5.735785306875929e-07, "loss": 18.2969, "step": 19605 }, { "epoch": 1.3021186159261473, "grad_norm": 344.68731689453125, "learning_rate": 5.734812497647938e-07, "loss": 18.0312, "step": 19606 }, { "epoch": 1.302185030218503, "grad_norm": 506.66845703125, "learning_rate": 5.73383973775718e-07, "loss": 14.1641, "step": 19607 }, { "epoch": 1.3022514445108588, "grad_norm": 222.58079528808594, "learning_rate": 5.732867027214903e-07, "loss": 16.7891, "step": 19608 }, { "epoch": 1.3023178588032145, "grad_norm": 354.8226013183594, "learning_rate": 5.731894366032358e-07, "loss": 19.5625, "step": 19609 }, { "epoch": 1.3023842730955701, "grad_norm": 151.38916015625, "learning_rate": 5.730921754220803e-07, "loss": 13.1094, "step": 19610 }, { "epoch": 1.3024506873879258, "grad_norm": 322.0527648925781, "learning_rate": 5.729949191791475e-07, "loss": 15.4844, "step": 19611 }, { "epoch": 1.3025171016802817, "grad_norm": 1192.0787353515625, "learning_rate": 5.728976678755641e-07, "loss": 13.6328, "step": 19612 }, { "epoch": 1.3025835159726373, "grad_norm": 276.8609924316406, "learning_rate": 5.728004215124538e-07, "loss": 17.0938, "step": 19613 }, { "epoch": 1.302649930264993, "grad_norm": 739.6213989257812, "learning_rate": 5.727031800909418e-07, "loss": 15.5781, "step": 19614 }, { "epoch": 1.3027163445573486, "grad_norm": 173.63717651367188, "learning_rate": 5.726059436121532e-07, "loss": 13.2812, "step": 19615 }, { "epoch": 1.3027827588497045, "grad_norm": 240.3579559326172, "learning_rate": 5.725087120772125e-07, "loss": 17.0469, "step": 19616 }, { "epoch": 1.3028491731420602, "grad_norm": 116.1099853515625, "learning_rate": 5.724114854872444e-07, "loss": 15.25, "step": 19617 }, { "epoch": 1.3029155874344158, "grad_norm": 642.426513671875, "learning_rate": 5.723142638433743e-07, "loss": 23.5156, "step": 19618 }, { "epoch": 1.3029820017267717, "grad_norm": 173.7180633544922, "learning_rate": 5.722170471467253e-07, "loss": 15.2188, "step": 19619 }, { "epoch": 1.3030484160191274, "grad_norm": 74.1297607421875, "learning_rate": 5.721198353984236e-07, "loss": 11.75, "step": 19620 }, { "epoch": 1.303114830311483, "grad_norm": 170.5751190185547, "learning_rate": 5.720226285995924e-07, "loss": 19.0469, "step": 19621 }, { "epoch": 1.3031812446038389, "grad_norm": 170.4786376953125, "learning_rate": 5.719254267513566e-07, "loss": 15.75, "step": 19622 }, { "epoch": 1.3032476588961945, "grad_norm": 161.26182556152344, "learning_rate": 5.718282298548409e-07, "loss": 13.9141, "step": 19623 }, { "epoch": 1.3033140731885502, "grad_norm": 217.29367065429688, "learning_rate": 5.717310379111696e-07, "loss": 16.8047, "step": 19624 }, { "epoch": 1.3033804874809058, "grad_norm": 114.3887710571289, "learning_rate": 5.71633850921466e-07, "loss": 15.5156, "step": 19625 }, { "epoch": 1.3034469017732615, "grad_norm": 317.9263000488281, "learning_rate": 5.715366688868556e-07, "loss": 14.6094, "step": 19626 }, { "epoch": 1.3035133160656174, "grad_norm": 162.66311645507812, "learning_rate": 5.714394918084618e-07, "loss": 19.875, "step": 19627 }, { "epoch": 1.303579730357973, "grad_norm": 158.6868896484375, "learning_rate": 5.713423196874086e-07, "loss": 18.125, "step": 19628 }, { "epoch": 1.3036461446503287, "grad_norm": 339.193603515625, "learning_rate": 5.712451525248203e-07, "loss": 14.1406, "step": 19629 }, { "epoch": 1.3037125589426846, "grad_norm": 220.9220428466797, "learning_rate": 5.711479903218209e-07, "loss": 16.5312, "step": 19630 }, { "epoch": 1.3037789732350402, "grad_norm": 309.5283203125, "learning_rate": 5.710508330795342e-07, "loss": 13.6562, "step": 19631 }, { "epoch": 1.3038453875273959, "grad_norm": 237.31178283691406, "learning_rate": 5.709536807990842e-07, "loss": 19.0781, "step": 19632 }, { "epoch": 1.3039118018197517, "grad_norm": 266.01031494140625, "learning_rate": 5.708565334815947e-07, "loss": 20.0, "step": 19633 }, { "epoch": 1.3039782161121074, "grad_norm": 159.30775451660156, "learning_rate": 5.707593911281892e-07, "loss": 17.0156, "step": 19634 }, { "epoch": 1.304044630404463, "grad_norm": 105.58252716064453, "learning_rate": 5.70662253739992e-07, "loss": 17.8594, "step": 19635 }, { "epoch": 1.3041110446968187, "grad_norm": 134.55462646484375, "learning_rate": 5.705651213181256e-07, "loss": 14.4062, "step": 19636 }, { "epoch": 1.3041774589891744, "grad_norm": 330.464599609375, "learning_rate": 5.704679938637148e-07, "loss": 25.2969, "step": 19637 }, { "epoch": 1.3042438732815302, "grad_norm": 175.63917541503906, "learning_rate": 5.703708713778823e-07, "loss": 14.4531, "step": 19638 }, { "epoch": 1.304310287573886, "grad_norm": 212.2919158935547, "learning_rate": 5.702737538617518e-07, "loss": 14.4062, "step": 19639 }, { "epoch": 1.3043767018662416, "grad_norm": 225.488525390625, "learning_rate": 5.701766413164467e-07, "loss": 20.1094, "step": 19640 }, { "epoch": 1.3044431161585974, "grad_norm": 289.0960998535156, "learning_rate": 5.700795337430908e-07, "loss": 23.0469, "step": 19641 }, { "epoch": 1.304509530450953, "grad_norm": 364.48956298828125, "learning_rate": 5.699824311428062e-07, "loss": 20.3594, "step": 19642 }, { "epoch": 1.3045759447433087, "grad_norm": 412.3988952636719, "learning_rate": 5.698853335167175e-07, "loss": 16.9062, "step": 19643 }, { "epoch": 1.3046423590356646, "grad_norm": 265.0875244140625, "learning_rate": 5.697882408659471e-07, "loss": 10.8906, "step": 19644 }, { "epoch": 1.3047087733280203, "grad_norm": 295.81976318359375, "learning_rate": 5.696911531916181e-07, "loss": 17.3125, "step": 19645 }, { "epoch": 1.304775187620376, "grad_norm": 125.78303527832031, "learning_rate": 5.695940704948536e-07, "loss": 17.5938, "step": 19646 }, { "epoch": 1.3048416019127316, "grad_norm": 316.9349670410156, "learning_rate": 5.69496992776777e-07, "loss": 17.9375, "step": 19647 }, { "epoch": 1.3049080162050872, "grad_norm": 192.10923767089844, "learning_rate": 5.693999200385107e-07, "loss": 17.0781, "step": 19648 }, { "epoch": 1.304974430497443, "grad_norm": 283.9212341308594, "learning_rate": 5.693028522811783e-07, "loss": 15.1562, "step": 19649 }, { "epoch": 1.3050408447897988, "grad_norm": 214.9370880126953, "learning_rate": 5.692057895059014e-07, "loss": 30.875, "step": 19650 }, { "epoch": 1.3051072590821544, "grad_norm": 276.7818298339844, "learning_rate": 5.691087317138043e-07, "loss": 31.0938, "step": 19651 }, { "epoch": 1.3051736733745103, "grad_norm": 388.76214599609375, "learning_rate": 5.690116789060084e-07, "loss": 25.5156, "step": 19652 }, { "epoch": 1.305240087666866, "grad_norm": 390.85699462890625, "learning_rate": 5.68914631083637e-07, "loss": 34.0781, "step": 19653 }, { "epoch": 1.3053065019592216, "grad_norm": 116.01189422607422, "learning_rate": 5.688175882478125e-07, "loss": 13.9844, "step": 19654 }, { "epoch": 1.3053729162515775, "grad_norm": 105.58573913574219, "learning_rate": 5.687205503996576e-07, "loss": 15.1719, "step": 19655 }, { "epoch": 1.3054393305439331, "grad_norm": 139.8979034423828, "learning_rate": 5.686235175402947e-07, "loss": 20.6719, "step": 19656 }, { "epoch": 1.3055057448362888, "grad_norm": 151.60873413085938, "learning_rate": 5.685264896708465e-07, "loss": 16.125, "step": 19657 }, { "epoch": 1.3055721591286444, "grad_norm": 117.97527313232422, "learning_rate": 5.684294667924344e-07, "loss": 19.9375, "step": 19658 }, { "epoch": 1.305638573421, "grad_norm": 104.90489196777344, "learning_rate": 5.68332448906182e-07, "loss": 13.0859, "step": 19659 }, { "epoch": 1.305704987713356, "grad_norm": 184.10679626464844, "learning_rate": 5.682354360132106e-07, "loss": 14.9219, "step": 19660 }, { "epoch": 1.3057714020057116, "grad_norm": 287.5103454589844, "learning_rate": 5.681384281146426e-07, "loss": 18.5312, "step": 19661 }, { "epoch": 1.3058378162980673, "grad_norm": 146.8104248046875, "learning_rate": 5.680414252116004e-07, "loss": 17.7188, "step": 19662 }, { "epoch": 1.3059042305904232, "grad_norm": 252.31788635253906, "learning_rate": 5.679444273052063e-07, "loss": 16.0938, "step": 19663 }, { "epoch": 1.3059706448827788, "grad_norm": 498.38250732421875, "learning_rate": 5.67847434396581e-07, "loss": 18.6562, "step": 19664 }, { "epoch": 1.3060370591751345, "grad_norm": 316.7770080566406, "learning_rate": 5.677504464868483e-07, "loss": 15.0469, "step": 19665 }, { "epoch": 1.3061034734674903, "grad_norm": 105.23094177246094, "learning_rate": 5.676534635771287e-07, "loss": 12.3594, "step": 19666 }, { "epoch": 1.306169887759846, "grad_norm": 171.9326934814453, "learning_rate": 5.675564856685441e-07, "loss": 17.5156, "step": 19667 }, { "epoch": 1.3062363020522016, "grad_norm": 1138.7657470703125, "learning_rate": 5.674595127622177e-07, "loss": 18.4375, "step": 19668 }, { "epoch": 1.3063027163445573, "grad_norm": 213.281005859375, "learning_rate": 5.673625448592698e-07, "loss": 11.0781, "step": 19669 }, { "epoch": 1.306369130636913, "grad_norm": 160.9496307373047, "learning_rate": 5.672655819608223e-07, "loss": 18.7188, "step": 19670 }, { "epoch": 1.3064355449292688, "grad_norm": 257.9664001464844, "learning_rate": 5.671686240679971e-07, "loss": 16.4375, "step": 19671 }, { "epoch": 1.3065019592216245, "grad_norm": 239.98951721191406, "learning_rate": 5.670716711819158e-07, "loss": 17.0938, "step": 19672 }, { "epoch": 1.3065683735139801, "grad_norm": 399.99102783203125, "learning_rate": 5.669747233036997e-07, "loss": 21.0625, "step": 19673 }, { "epoch": 1.306634787806336, "grad_norm": 268.9463806152344, "learning_rate": 5.668777804344706e-07, "loss": 19.5469, "step": 19674 }, { "epoch": 1.3067012020986917, "grad_norm": 250.02049255371094, "learning_rate": 5.66780842575349e-07, "loss": 20.0469, "step": 19675 }, { "epoch": 1.3067676163910473, "grad_norm": 276.546630859375, "learning_rate": 5.666839097274574e-07, "loss": 17.9688, "step": 19676 }, { "epoch": 1.3068340306834032, "grad_norm": 188.34457397460938, "learning_rate": 5.665869818919163e-07, "loss": 17.4219, "step": 19677 }, { "epoch": 1.3069004449757589, "grad_norm": 243.6557159423828, "learning_rate": 5.664900590698468e-07, "loss": 18.0781, "step": 19678 }, { "epoch": 1.3069668592681145, "grad_norm": 187.39198303222656, "learning_rate": 5.663931412623705e-07, "loss": 21.0938, "step": 19679 }, { "epoch": 1.3070332735604702, "grad_norm": 261.1461486816406, "learning_rate": 5.662962284706082e-07, "loss": 15.0625, "step": 19680 }, { "epoch": 1.3070996878528258, "grad_norm": 184.61183166503906, "learning_rate": 5.661993206956811e-07, "loss": 14.4062, "step": 19681 }, { "epoch": 1.3071661021451817, "grad_norm": 921.7304077148438, "learning_rate": 5.661024179387107e-07, "loss": 15.7656, "step": 19682 }, { "epoch": 1.3072325164375374, "grad_norm": 180.66329956054688, "learning_rate": 5.660055202008164e-07, "loss": 14.6875, "step": 19683 }, { "epoch": 1.307298930729893, "grad_norm": 133.48004150390625, "learning_rate": 5.659086274831205e-07, "loss": 15.1094, "step": 19684 }, { "epoch": 1.3073653450222489, "grad_norm": 154.89617919921875, "learning_rate": 5.658117397867431e-07, "loss": 13.0312, "step": 19685 }, { "epoch": 1.3074317593146045, "grad_norm": 256.7717590332031, "learning_rate": 5.657148571128052e-07, "loss": 16.0781, "step": 19686 }, { "epoch": 1.3074981736069602, "grad_norm": 235.68540954589844, "learning_rate": 5.656179794624273e-07, "loss": 17.7188, "step": 19687 }, { "epoch": 1.307564587899316, "grad_norm": 321.07080078125, "learning_rate": 5.655211068367306e-07, "loss": 17.5938, "step": 19688 }, { "epoch": 1.3076310021916717, "grad_norm": 199.23741149902344, "learning_rate": 5.654242392368344e-07, "loss": 16.7656, "step": 19689 }, { "epoch": 1.3076974164840274, "grad_norm": 97.6949234008789, "learning_rate": 5.653273766638608e-07, "loss": 13.6797, "step": 19690 }, { "epoch": 1.307763830776383, "grad_norm": 562.1288452148438, "learning_rate": 5.65230519118929e-07, "loss": 21.0312, "step": 19691 }, { "epoch": 1.3078302450687387, "grad_norm": 283.3261413574219, "learning_rate": 5.651336666031598e-07, "loss": 19.1094, "step": 19692 }, { "epoch": 1.3078966593610946, "grad_norm": 439.5137023925781, "learning_rate": 5.650368191176738e-07, "loss": 18.7188, "step": 19693 }, { "epoch": 1.3079630736534502, "grad_norm": 274.5522766113281, "learning_rate": 5.64939976663591e-07, "loss": 17.4531, "step": 19694 }, { "epoch": 1.3080294879458059, "grad_norm": 108.98391723632812, "learning_rate": 5.648431392420316e-07, "loss": 11.1797, "step": 19695 }, { "epoch": 1.3080959022381617, "grad_norm": 788.4706420898438, "learning_rate": 5.647463068541163e-07, "loss": 20.8594, "step": 19696 }, { "epoch": 1.3081623165305174, "grad_norm": 163.27392578125, "learning_rate": 5.646494795009639e-07, "loss": 19.9531, "step": 19697 }, { "epoch": 1.308228730822873, "grad_norm": 95.67998504638672, "learning_rate": 5.64552657183696e-07, "loss": 14.2266, "step": 19698 }, { "epoch": 1.308295145115229, "grad_norm": 129.81640625, "learning_rate": 5.644558399034316e-07, "loss": 16.9531, "step": 19699 }, { "epoch": 1.3083615594075846, "grad_norm": 181.15711975097656, "learning_rate": 5.643590276612909e-07, "loss": 14.5, "step": 19700 }, { "epoch": 1.3084279736999402, "grad_norm": 935.9544067382812, "learning_rate": 5.642622204583938e-07, "loss": 15.2031, "step": 19701 }, { "epoch": 1.308494387992296, "grad_norm": 241.06997680664062, "learning_rate": 5.641654182958604e-07, "loss": 16.0, "step": 19702 }, { "epoch": 1.3085608022846515, "grad_norm": 288.5451965332031, "learning_rate": 5.640686211748092e-07, "loss": 15.7188, "step": 19703 }, { "epoch": 1.3086272165770074, "grad_norm": 78.92263793945312, "learning_rate": 5.639718290963613e-07, "loss": 18.7656, "step": 19704 }, { "epoch": 1.308693630869363, "grad_norm": 117.60413360595703, "learning_rate": 5.638750420616364e-07, "loss": 9.0781, "step": 19705 }, { "epoch": 1.3087600451617187, "grad_norm": 111.86759948730469, "learning_rate": 5.637782600717527e-07, "loss": 14.4062, "step": 19706 }, { "epoch": 1.3088264594540746, "grad_norm": 1303.0439453125, "learning_rate": 5.636814831278311e-07, "loss": 19.3594, "step": 19707 }, { "epoch": 1.3088928737464303, "grad_norm": 150.587890625, "learning_rate": 5.635847112309903e-07, "loss": 12.2969, "step": 19708 }, { "epoch": 1.308959288038786, "grad_norm": 144.40403747558594, "learning_rate": 5.634879443823499e-07, "loss": 16.0938, "step": 19709 }, { "epoch": 1.3090257023311418, "grad_norm": 377.6524658203125, "learning_rate": 5.633911825830293e-07, "loss": 17.8594, "step": 19710 }, { "epoch": 1.3090921166234974, "grad_norm": 225.174560546875, "learning_rate": 5.632944258341476e-07, "loss": 18.1406, "step": 19711 }, { "epoch": 1.309158530915853, "grad_norm": 277.4299621582031, "learning_rate": 5.631976741368243e-07, "loss": 18.4688, "step": 19712 }, { "epoch": 1.3092249452082088, "grad_norm": 224.432861328125, "learning_rate": 5.631009274921788e-07, "loss": 19.7812, "step": 19713 }, { "epoch": 1.3092913595005644, "grad_norm": 135.628173828125, "learning_rate": 5.63004185901329e-07, "loss": 18.4531, "step": 19714 }, { "epoch": 1.3093577737929203, "grad_norm": 117.36322784423828, "learning_rate": 5.629074493653956e-07, "loss": 15.0938, "step": 19715 }, { "epoch": 1.309424188085276, "grad_norm": 277.94287109375, "learning_rate": 5.628107178854963e-07, "loss": 17.7969, "step": 19716 }, { "epoch": 1.3094906023776316, "grad_norm": 199.30690002441406, "learning_rate": 5.627139914627507e-07, "loss": 17.9219, "step": 19717 }, { "epoch": 1.3095570166699875, "grad_norm": 208.1028289794922, "learning_rate": 5.626172700982774e-07, "loss": 21.5625, "step": 19718 }, { "epoch": 1.3096234309623431, "grad_norm": 319.05950927734375, "learning_rate": 5.625205537931951e-07, "loss": 16.8906, "step": 19719 }, { "epoch": 1.3096898452546988, "grad_norm": 174.32015991210938, "learning_rate": 5.624238425486232e-07, "loss": 14.2188, "step": 19720 }, { "epoch": 1.3097562595470547, "grad_norm": 198.36962890625, "learning_rate": 5.623271363656801e-07, "loss": 14.75, "step": 19721 }, { "epoch": 1.3098226738394103, "grad_norm": 133.99900817871094, "learning_rate": 5.622304352454836e-07, "loss": 14.0156, "step": 19722 }, { "epoch": 1.309889088131766, "grad_norm": 392.0896301269531, "learning_rate": 5.621337391891537e-07, "loss": 28.5938, "step": 19723 }, { "epoch": 1.3099555024241216, "grad_norm": 901.6165771484375, "learning_rate": 5.620370481978078e-07, "loss": 20.3281, "step": 19724 }, { "epoch": 1.3100219167164773, "grad_norm": 1031.065185546875, "learning_rate": 5.619403622725649e-07, "loss": 20.7812, "step": 19725 }, { "epoch": 1.3100883310088332, "grad_norm": 125.9866714477539, "learning_rate": 5.618436814145432e-07, "loss": 14.6875, "step": 19726 }, { "epoch": 1.3101547453011888, "grad_norm": 105.57986450195312, "learning_rate": 5.617470056248617e-07, "loss": 12.1875, "step": 19727 }, { "epoch": 1.3102211595935445, "grad_norm": 163.8927001953125, "learning_rate": 5.616503349046371e-07, "loss": 20.9219, "step": 19728 }, { "epoch": 1.3102875738859003, "grad_norm": 171.18052673339844, "learning_rate": 5.615536692549895e-07, "loss": 16.3906, "step": 19729 }, { "epoch": 1.310353988178256, "grad_norm": 241.1033172607422, "learning_rate": 5.614570086770359e-07, "loss": 15.2656, "step": 19730 }, { "epoch": 1.3104204024706116, "grad_norm": 193.2146759033203, "learning_rate": 5.613603531718948e-07, "loss": 16.3281, "step": 19731 }, { "epoch": 1.3104868167629675, "grad_norm": 413.8324890136719, "learning_rate": 5.612637027406841e-07, "loss": 21.8594, "step": 19732 }, { "epoch": 1.3105532310553232, "grad_norm": 292.1805114746094, "learning_rate": 5.61167057384522e-07, "loss": 16.2656, "step": 19733 }, { "epoch": 1.3106196453476788, "grad_norm": 257.7444763183594, "learning_rate": 5.610704171045262e-07, "loss": 19.25, "step": 19734 }, { "epoch": 1.3106860596400345, "grad_norm": 126.1893310546875, "learning_rate": 5.609737819018152e-07, "loss": 15.2188, "step": 19735 }, { "epoch": 1.3107524739323901, "grad_norm": 130.71792602539062, "learning_rate": 5.608771517775055e-07, "loss": 15.9531, "step": 19736 }, { "epoch": 1.310818888224746, "grad_norm": 284.66754150390625, "learning_rate": 5.607805267327166e-07, "loss": 25.5312, "step": 19737 }, { "epoch": 1.3108853025171017, "grad_norm": 255.33880615234375, "learning_rate": 5.606839067685647e-07, "loss": 13.0625, "step": 19738 }, { "epoch": 1.3109517168094573, "grad_norm": 314.4898376464844, "learning_rate": 5.605872918861681e-07, "loss": 13.3438, "step": 19739 }, { "epoch": 1.3110181311018132, "grad_norm": 267.2845153808594, "learning_rate": 5.604906820866444e-07, "loss": 18.7344, "step": 19740 }, { "epoch": 1.3110845453941689, "grad_norm": 282.29803466796875, "learning_rate": 5.603940773711112e-07, "loss": 19.3125, "step": 19741 }, { "epoch": 1.3111509596865245, "grad_norm": 282.9151306152344, "learning_rate": 5.602974777406854e-07, "loss": 18.9688, "step": 19742 }, { "epoch": 1.3112173739788804, "grad_norm": 394.7209167480469, "learning_rate": 5.602008831964853e-07, "loss": 16.0938, "step": 19743 }, { "epoch": 1.311283788271236, "grad_norm": 283.11279296875, "learning_rate": 5.601042937396274e-07, "loss": 17.3281, "step": 19744 }, { "epoch": 1.3113502025635917, "grad_norm": 200.0744171142578, "learning_rate": 5.600077093712296e-07, "loss": 20.6875, "step": 19745 }, { "epoch": 1.3114166168559473, "grad_norm": 158.62184143066406, "learning_rate": 5.599111300924093e-07, "loss": 19.3281, "step": 19746 }, { "epoch": 1.311483031148303, "grad_norm": 541.8070068359375, "learning_rate": 5.598145559042828e-07, "loss": 16.0, "step": 19747 }, { "epoch": 1.3115494454406589, "grad_norm": 150.120849609375, "learning_rate": 5.597179868079677e-07, "loss": 17.5938, "step": 19748 }, { "epoch": 1.3116158597330145, "grad_norm": 299.5580749511719, "learning_rate": 5.59621422804581e-07, "loss": 17.5156, "step": 19749 }, { "epoch": 1.3116822740253702, "grad_norm": 202.4893035888672, "learning_rate": 5.595248638952399e-07, "loss": 16.7344, "step": 19750 }, { "epoch": 1.311748688317726, "grad_norm": 267.8340148925781, "learning_rate": 5.59428310081061e-07, "loss": 12.2969, "step": 19751 }, { "epoch": 1.3118151026100817, "grad_norm": 538.2630004882812, "learning_rate": 5.593317613631619e-07, "loss": 26.0625, "step": 19752 }, { "epoch": 1.3118815169024374, "grad_norm": 264.4188232421875, "learning_rate": 5.59235217742658e-07, "loss": 15.6094, "step": 19753 }, { "epoch": 1.3119479311947932, "grad_norm": 285.22662353515625, "learning_rate": 5.591386792206678e-07, "loss": 17.7188, "step": 19754 }, { "epoch": 1.312014345487149, "grad_norm": 132.89906311035156, "learning_rate": 5.590421457983067e-07, "loss": 16.4688, "step": 19755 }, { "epoch": 1.3120807597795046, "grad_norm": 316.95037841796875, "learning_rate": 5.589456174766918e-07, "loss": 16.6094, "step": 19756 }, { "epoch": 1.3121471740718602, "grad_norm": 127.12664794921875, "learning_rate": 5.588490942569397e-07, "loss": 18.0, "step": 19757 }, { "epoch": 1.3122135883642159, "grad_norm": 112.009033203125, "learning_rate": 5.587525761401668e-07, "loss": 10.5938, "step": 19758 }, { "epoch": 1.3122800026565717, "grad_norm": 234.12179565429688, "learning_rate": 5.586560631274897e-07, "loss": 20.0938, "step": 19759 }, { "epoch": 1.3123464169489274, "grad_norm": 285.0274963378906, "learning_rate": 5.585595552200251e-07, "loss": 17.0, "step": 19760 }, { "epoch": 1.312412831241283, "grad_norm": 169.6555633544922, "learning_rate": 5.584630524188884e-07, "loss": 18.0156, "step": 19761 }, { "epoch": 1.312479245533639, "grad_norm": 387.2509765625, "learning_rate": 5.583665547251972e-07, "loss": 12.4844, "step": 19762 }, { "epoch": 1.3125456598259946, "grad_norm": 297.05633544921875, "learning_rate": 5.582700621400667e-07, "loss": 17.25, "step": 19763 }, { "epoch": 1.3126120741183502, "grad_norm": 194.90280151367188, "learning_rate": 5.581735746646134e-07, "loss": 16.375, "step": 19764 }, { "epoch": 1.3126784884107061, "grad_norm": 468.24322509765625, "learning_rate": 5.580770922999533e-07, "loss": 13.75, "step": 19765 }, { "epoch": 1.3127449027030618, "grad_norm": 155.6737060546875, "learning_rate": 5.579806150472031e-07, "loss": 12.6406, "step": 19766 }, { "epoch": 1.3128113169954174, "grad_norm": 154.82386779785156, "learning_rate": 5.578841429074773e-07, "loss": 14.0312, "step": 19767 }, { "epoch": 1.312877731287773, "grad_norm": 197.3603515625, "learning_rate": 5.577876758818938e-07, "loss": 18.6875, "step": 19768 }, { "epoch": 1.3129441455801287, "grad_norm": 95.061279296875, "learning_rate": 5.576912139715669e-07, "loss": 15.4062, "step": 19769 }, { "epoch": 1.3130105598724846, "grad_norm": 380.21026611328125, "learning_rate": 5.575947571776131e-07, "loss": 14.1562, "step": 19770 }, { "epoch": 1.3130769741648403, "grad_norm": 226.4787139892578, "learning_rate": 5.574983055011479e-07, "loss": 15.3594, "step": 19771 }, { "epoch": 1.313143388457196, "grad_norm": 185.97634887695312, "learning_rate": 5.574018589432872e-07, "loss": 17.2656, "step": 19772 }, { "epoch": 1.3132098027495518, "grad_norm": 177.39083862304688, "learning_rate": 5.573054175051467e-07, "loss": 13.6094, "step": 19773 }, { "epoch": 1.3132762170419074, "grad_norm": 249.59523010253906, "learning_rate": 5.572089811878422e-07, "loss": 16.1562, "step": 19774 }, { "epoch": 1.313342631334263, "grad_norm": 128.07284545898438, "learning_rate": 5.57112549992488e-07, "loss": 12.3125, "step": 19775 }, { "epoch": 1.313409045626619, "grad_norm": 131.92681884765625, "learning_rate": 5.570161239202014e-07, "loss": 13.1406, "step": 19776 }, { "epoch": 1.3134754599189746, "grad_norm": 154.6248016357422, "learning_rate": 5.569197029720962e-07, "loss": 16.7812, "step": 19777 }, { "epoch": 1.3135418742113303, "grad_norm": 341.7842712402344, "learning_rate": 5.568232871492882e-07, "loss": 22.1719, "step": 19778 }, { "epoch": 1.313608288503686, "grad_norm": 174.62635803222656, "learning_rate": 5.567268764528937e-07, "loss": 11.125, "step": 19779 }, { "epoch": 1.3136747027960416, "grad_norm": 216.6798858642578, "learning_rate": 5.566304708840264e-07, "loss": 15.3906, "step": 19780 }, { "epoch": 1.3137411170883975, "grad_norm": 132.604736328125, "learning_rate": 5.565340704438023e-07, "loss": 11.6094, "step": 19781 }, { "epoch": 1.3138075313807531, "grad_norm": 344.9735107421875, "learning_rate": 5.564376751333364e-07, "loss": 24.5469, "step": 19782 }, { "epoch": 1.3138739456731088, "grad_norm": 268.5818786621094, "learning_rate": 5.563412849537437e-07, "loss": 12.8359, "step": 19783 }, { "epoch": 1.3139403599654647, "grad_norm": 183.70758056640625, "learning_rate": 5.562448999061391e-07, "loss": 13.6875, "step": 19784 }, { "epoch": 1.3140067742578203, "grad_norm": 435.1949768066406, "learning_rate": 5.561485199916381e-07, "loss": 20.2812, "step": 19785 }, { "epoch": 1.314073188550176, "grad_norm": 135.49078369140625, "learning_rate": 5.560521452113542e-07, "loss": 17.1562, "step": 19786 }, { "epoch": 1.3141396028425318, "grad_norm": 180.52774047851562, "learning_rate": 5.55955775566404e-07, "loss": 14.1094, "step": 19787 }, { "epoch": 1.3142060171348875, "grad_norm": 225.04737854003906, "learning_rate": 5.558594110579009e-07, "loss": 16.2969, "step": 19788 }, { "epoch": 1.3142724314272431, "grad_norm": 232.47894287109375, "learning_rate": 5.557630516869601e-07, "loss": 21.5312, "step": 19789 }, { "epoch": 1.3143388457195988, "grad_norm": 554.558349609375, "learning_rate": 5.55666697454696e-07, "loss": 28.0938, "step": 19790 }, { "epoch": 1.3144052600119545, "grad_norm": 372.634765625, "learning_rate": 5.555703483622237e-07, "loss": 12.6094, "step": 19791 }, { "epoch": 1.3144716743043103, "grad_norm": 219.33387756347656, "learning_rate": 5.554740044106568e-07, "loss": 19.875, "step": 19792 }, { "epoch": 1.314538088596666, "grad_norm": 189.064208984375, "learning_rate": 5.553776656011108e-07, "loss": 16.1719, "step": 19793 }, { "epoch": 1.3146045028890216, "grad_norm": 140.7354736328125, "learning_rate": 5.552813319346993e-07, "loss": 16.1406, "step": 19794 }, { "epoch": 1.3146709171813775, "grad_norm": 139.149169921875, "learning_rate": 5.551850034125369e-07, "loss": 16.3125, "step": 19795 }, { "epoch": 1.3147373314737332, "grad_norm": 212.23793029785156, "learning_rate": 5.550886800357379e-07, "loss": 14.7812, "step": 19796 }, { "epoch": 1.3148037457660888, "grad_norm": 151.17874145507812, "learning_rate": 5.549923618054164e-07, "loss": 12.9688, "step": 19797 }, { "epoch": 1.3148701600584447, "grad_norm": 207.148681640625, "learning_rate": 5.548960487226869e-07, "loss": 18.0, "step": 19798 }, { "epoch": 1.3149365743508004, "grad_norm": 203.1682586669922, "learning_rate": 5.547997407886635e-07, "loss": 16.6562, "step": 19799 }, { "epoch": 1.315002988643156, "grad_norm": 164.85914611816406, "learning_rate": 5.54703438004459e-07, "loss": 13.9375, "step": 19800 }, { "epoch": 1.3150694029355117, "grad_norm": 157.56947326660156, "learning_rate": 5.546071403711893e-07, "loss": 15.2656, "step": 19801 }, { "epoch": 1.3151358172278673, "grad_norm": 134.39109802246094, "learning_rate": 5.545108478899672e-07, "loss": 10.7031, "step": 19802 }, { "epoch": 1.3152022315202232, "grad_norm": 205.29335021972656, "learning_rate": 5.544145605619064e-07, "loss": 14.0781, "step": 19803 }, { "epoch": 1.3152686458125789, "grad_norm": 487.04241943359375, "learning_rate": 5.543182783881212e-07, "loss": 21.0469, "step": 19804 }, { "epoch": 1.3153350601049345, "grad_norm": 331.3526611328125, "learning_rate": 5.542220013697252e-07, "loss": 18.375, "step": 19805 }, { "epoch": 1.3154014743972904, "grad_norm": 224.95965576171875, "learning_rate": 5.541257295078319e-07, "loss": 18.0625, "step": 19806 }, { "epoch": 1.315467888689646, "grad_norm": 247.45420837402344, "learning_rate": 5.540294628035555e-07, "loss": 13.25, "step": 19807 }, { "epoch": 1.3155343029820017, "grad_norm": 370.8572082519531, "learning_rate": 5.539332012580082e-07, "loss": 11.4375, "step": 19808 }, { "epoch": 1.3156007172743576, "grad_norm": 325.9975891113281, "learning_rate": 5.538369448723055e-07, "loss": 14.8125, "step": 19809 }, { "epoch": 1.3156671315667132, "grad_norm": 321.2232360839844, "learning_rate": 5.537406936475591e-07, "loss": 14.2344, "step": 19810 }, { "epoch": 1.3157335458590689, "grad_norm": 172.22100830078125, "learning_rate": 5.536444475848832e-07, "loss": 13.5469, "step": 19811 }, { "epoch": 1.3157999601514245, "grad_norm": 96.43368530273438, "learning_rate": 5.535482066853908e-07, "loss": 11.8359, "step": 19812 }, { "epoch": 1.3158663744437802, "grad_norm": 309.5775451660156, "learning_rate": 5.534519709501957e-07, "loss": 19.3281, "step": 19813 }, { "epoch": 1.315932788736136, "grad_norm": 133.2198944091797, "learning_rate": 5.533557403804102e-07, "loss": 14.3594, "step": 19814 }, { "epoch": 1.3159992030284917, "grad_norm": 138.62762451171875, "learning_rate": 5.53259514977148e-07, "loss": 14.3906, "step": 19815 }, { "epoch": 1.3160656173208474, "grad_norm": 216.2725830078125, "learning_rate": 5.531632947415226e-07, "loss": 18.4219, "step": 19816 }, { "epoch": 1.3161320316132032, "grad_norm": 134.73069763183594, "learning_rate": 5.530670796746457e-07, "loss": 13.5156, "step": 19817 }, { "epoch": 1.316198445905559, "grad_norm": 404.2215576171875, "learning_rate": 5.529708697776321e-07, "loss": 17.4375, "step": 19818 }, { "epoch": 1.3162648601979146, "grad_norm": 293.3344421386719, "learning_rate": 5.528746650515932e-07, "loss": 18.5, "step": 19819 }, { "epoch": 1.3163312744902704, "grad_norm": 161.83563232421875, "learning_rate": 5.527784654976423e-07, "loss": 15.3281, "step": 19820 }, { "epoch": 1.316397688782626, "grad_norm": 167.46157836914062, "learning_rate": 5.526822711168923e-07, "loss": 14.5938, "step": 19821 }, { "epoch": 1.3164641030749817, "grad_norm": 118.65617370605469, "learning_rate": 5.525860819104557e-07, "loss": 11.7422, "step": 19822 }, { "epoch": 1.3165305173673374, "grad_norm": 205.0673828125, "learning_rate": 5.524898978794456e-07, "loss": 17.5469, "step": 19823 }, { "epoch": 1.316596931659693, "grad_norm": 125.66726684570312, "learning_rate": 5.523937190249745e-07, "loss": 18.2188, "step": 19824 }, { "epoch": 1.316663345952049, "grad_norm": 96.23690795898438, "learning_rate": 5.52297545348154e-07, "loss": 13.7344, "step": 19825 }, { "epoch": 1.3167297602444046, "grad_norm": 112.30430603027344, "learning_rate": 5.522013768500981e-07, "loss": 12.7188, "step": 19826 }, { "epoch": 1.3167961745367602, "grad_norm": 178.7006378173828, "learning_rate": 5.521052135319182e-07, "loss": 21.7656, "step": 19827 }, { "epoch": 1.316862588829116, "grad_norm": 161.19741821289062, "learning_rate": 5.520090553947268e-07, "loss": 17.0, "step": 19828 }, { "epoch": 1.3169290031214718, "grad_norm": 518.5624389648438, "learning_rate": 5.519129024396363e-07, "loss": 16.125, "step": 19829 }, { "epoch": 1.3169954174138274, "grad_norm": 337.5672302246094, "learning_rate": 5.518167546677593e-07, "loss": 13.8438, "step": 19830 }, { "epoch": 1.3170618317061833, "grad_norm": 299.9923095703125, "learning_rate": 5.517206120802069e-07, "loss": 19.2969, "step": 19831 }, { "epoch": 1.317128245998539, "grad_norm": 159.87127685546875, "learning_rate": 5.51624474678093e-07, "loss": 15.3125, "step": 19832 }, { "epoch": 1.3171946602908946, "grad_norm": 189.0453338623047, "learning_rate": 5.515283424625279e-07, "loss": 15.2969, "step": 19833 }, { "epoch": 1.3172610745832503, "grad_norm": 246.6082305908203, "learning_rate": 5.514322154346245e-07, "loss": 15.7812, "step": 19834 }, { "epoch": 1.317327488875606, "grad_norm": 243.46543884277344, "learning_rate": 5.513360935954945e-07, "loss": 16.9844, "step": 19835 }, { "epoch": 1.3173939031679618, "grad_norm": 251.2429656982422, "learning_rate": 5.512399769462499e-07, "loss": 19.3125, "step": 19836 }, { "epoch": 1.3174603174603174, "grad_norm": 131.05416870117188, "learning_rate": 5.511438654880026e-07, "loss": 17.1875, "step": 19837 }, { "epoch": 1.317526731752673, "grad_norm": 713.655517578125, "learning_rate": 5.510477592218644e-07, "loss": 24.0078, "step": 19838 }, { "epoch": 1.317593146045029, "grad_norm": 180.09890747070312, "learning_rate": 5.509516581489462e-07, "loss": 18.5938, "step": 19839 }, { "epoch": 1.3176595603373846, "grad_norm": 214.47518920898438, "learning_rate": 5.50855562270361e-07, "loss": 18.9219, "step": 19840 }, { "epoch": 1.3177259746297403, "grad_norm": 113.53348541259766, "learning_rate": 5.507594715872193e-07, "loss": 14.9219, "step": 19841 }, { "epoch": 1.3177923889220962, "grad_norm": 169.18699645996094, "learning_rate": 5.50663386100633e-07, "loss": 15.4375, "step": 19842 }, { "epoch": 1.3178588032144518, "grad_norm": 243.97120666503906, "learning_rate": 5.505673058117135e-07, "loss": 17.1719, "step": 19843 }, { "epoch": 1.3179252175068075, "grad_norm": 387.46990966796875, "learning_rate": 5.504712307215723e-07, "loss": 14.2656, "step": 19844 }, { "epoch": 1.3179916317991631, "grad_norm": 396.1292724609375, "learning_rate": 5.503751608313206e-07, "loss": 20.875, "step": 19845 }, { "epoch": 1.3180580460915188, "grad_norm": 215.40281677246094, "learning_rate": 5.502790961420702e-07, "loss": 15.6094, "step": 19846 }, { "epoch": 1.3181244603838747, "grad_norm": 174.94418334960938, "learning_rate": 5.501830366549312e-07, "loss": 15.4375, "step": 19847 }, { "epoch": 1.3181908746762303, "grad_norm": 269.1221008300781, "learning_rate": 5.500869823710161e-07, "loss": 15.25, "step": 19848 }, { "epoch": 1.318257288968586, "grad_norm": 189.33363342285156, "learning_rate": 5.499909332914352e-07, "loss": 12.6797, "step": 19849 }, { "epoch": 1.3183237032609418, "grad_norm": 139.03988647460938, "learning_rate": 5.498948894172995e-07, "loss": 13.4688, "step": 19850 }, { "epoch": 1.3183901175532975, "grad_norm": 183.75604248046875, "learning_rate": 5.497988507497202e-07, "loss": 16.125, "step": 19851 }, { "epoch": 1.3184565318456531, "grad_norm": 271.6562805175781, "learning_rate": 5.497028172898081e-07, "loss": 16.3438, "step": 19852 }, { "epoch": 1.318522946138009, "grad_norm": 189.52520751953125, "learning_rate": 5.496067890386744e-07, "loss": 17.2969, "step": 19853 }, { "epoch": 1.3185893604303647, "grad_norm": 162.32579040527344, "learning_rate": 5.495107659974294e-07, "loss": 15.6719, "step": 19854 }, { "epoch": 1.3186557747227203, "grad_norm": 132.23631286621094, "learning_rate": 5.494147481671847e-07, "loss": 15.7812, "step": 19855 }, { "epoch": 1.318722189015076, "grad_norm": 67.29569244384766, "learning_rate": 5.493187355490494e-07, "loss": 13.0938, "step": 19856 }, { "epoch": 1.3187886033074316, "grad_norm": 110.49129486083984, "learning_rate": 5.492227281441359e-07, "loss": 15.8594, "step": 19857 }, { "epoch": 1.3188550175997875, "grad_norm": 557.3023071289062, "learning_rate": 5.491267259535537e-07, "loss": 25.1562, "step": 19858 }, { "epoch": 1.3189214318921432, "grad_norm": 258.1288757324219, "learning_rate": 5.490307289784134e-07, "loss": 10.0156, "step": 19859 }, { "epoch": 1.3189878461844988, "grad_norm": 156.2785186767578, "learning_rate": 5.489347372198257e-07, "loss": 13.7031, "step": 19860 }, { "epoch": 1.3190542604768547, "grad_norm": 281.91619873046875, "learning_rate": 5.488387506789007e-07, "loss": 16.0, "step": 19861 }, { "epoch": 1.3191206747692104, "grad_norm": 98.79769134521484, "learning_rate": 5.487427693567491e-07, "loss": 13.5625, "step": 19862 }, { "epoch": 1.319187089061566, "grad_norm": 250.70233154296875, "learning_rate": 5.486467932544811e-07, "loss": 13.4531, "step": 19863 }, { "epoch": 1.3192535033539219, "grad_norm": 595.0673828125, "learning_rate": 5.48550822373206e-07, "loss": 20.5, "step": 19864 }, { "epoch": 1.3193199176462775, "grad_norm": 238.2742462158203, "learning_rate": 5.484548567140356e-07, "loss": 14.375, "step": 19865 }, { "epoch": 1.3193863319386332, "grad_norm": 235.8681182861328, "learning_rate": 5.483588962780785e-07, "loss": 16.3906, "step": 19866 }, { "epoch": 1.3194527462309888, "grad_norm": 399.8672180175781, "learning_rate": 5.482629410664452e-07, "loss": 21.9844, "step": 19867 }, { "epoch": 1.3195191605233445, "grad_norm": 273.42974853515625, "learning_rate": 5.481669910802459e-07, "loss": 15.5781, "step": 19868 }, { "epoch": 1.3195855748157004, "grad_norm": 96.11540985107422, "learning_rate": 5.480710463205902e-07, "loss": 11.8672, "step": 19869 }, { "epoch": 1.319651989108056, "grad_norm": 253.3073272705078, "learning_rate": 5.479751067885881e-07, "loss": 23.4688, "step": 19870 }, { "epoch": 1.3197184034004117, "grad_norm": 136.44496154785156, "learning_rate": 5.478791724853497e-07, "loss": 13.3828, "step": 19871 }, { "epoch": 1.3197848176927676, "grad_norm": 195.5022430419922, "learning_rate": 5.477832434119834e-07, "loss": 18.1562, "step": 19872 }, { "epoch": 1.3198512319851232, "grad_norm": 317.4404602050781, "learning_rate": 5.476873195696008e-07, "loss": 15.8906, "step": 19873 }, { "epoch": 1.3199176462774789, "grad_norm": 152.2904052734375, "learning_rate": 5.475914009593099e-07, "loss": 21.7812, "step": 19874 }, { "epoch": 1.3199840605698347, "grad_norm": 175.44143676757812, "learning_rate": 5.474954875822209e-07, "loss": 13.5625, "step": 19875 }, { "epoch": 1.3200504748621904, "grad_norm": 923.7349853515625, "learning_rate": 5.47399579439443e-07, "loss": 19.7969, "step": 19876 }, { "epoch": 1.320116889154546, "grad_norm": 218.60862731933594, "learning_rate": 5.473036765320863e-07, "loss": 15.1875, "step": 19877 }, { "epoch": 1.3201833034469017, "grad_norm": 355.2365417480469, "learning_rate": 5.472077788612588e-07, "loss": 18.5938, "step": 19878 }, { "epoch": 1.3202497177392574, "grad_norm": 267.1071472167969, "learning_rate": 5.471118864280714e-07, "loss": 14.8281, "step": 19879 }, { "epoch": 1.3203161320316132, "grad_norm": 167.85995483398438, "learning_rate": 5.470159992336323e-07, "loss": 14.2031, "step": 19880 }, { "epoch": 1.320382546323969, "grad_norm": 181.24815368652344, "learning_rate": 5.469201172790507e-07, "loss": 14.25, "step": 19881 }, { "epoch": 1.3204489606163246, "grad_norm": 120.07394409179688, "learning_rate": 5.46824240565436e-07, "loss": 12.4219, "step": 19882 }, { "epoch": 1.3205153749086804, "grad_norm": 362.81793212890625, "learning_rate": 5.467283690938973e-07, "loss": 14.5781, "step": 19883 }, { "epoch": 1.320581789201036, "grad_norm": 146.18397521972656, "learning_rate": 5.466325028655433e-07, "loss": 13.6562, "step": 19884 }, { "epoch": 1.3206482034933917, "grad_norm": 298.59124755859375, "learning_rate": 5.465366418814835e-07, "loss": 10.3281, "step": 19885 }, { "epoch": 1.3207146177857476, "grad_norm": 175.47799682617188, "learning_rate": 5.464407861428256e-07, "loss": 14.0469, "step": 19886 }, { "epoch": 1.3207810320781033, "grad_norm": 127.95748138427734, "learning_rate": 5.463449356506795e-07, "loss": 14.875, "step": 19887 }, { "epoch": 1.320847446370459, "grad_norm": 402.48126220703125, "learning_rate": 5.46249090406154e-07, "loss": 16.125, "step": 19888 }, { "epoch": 1.3209138606628146, "grad_norm": 541.5050048828125, "learning_rate": 5.461532504103566e-07, "loss": 13.3281, "step": 19889 }, { "epoch": 1.3209802749551702, "grad_norm": 124.79705047607422, "learning_rate": 5.460574156643976e-07, "loss": 12.375, "step": 19890 }, { "epoch": 1.321046689247526, "grad_norm": 186.9580078125, "learning_rate": 5.459615861693841e-07, "loss": 17.5938, "step": 19891 }, { "epoch": 1.3211131035398818, "grad_norm": 211.45330810546875, "learning_rate": 5.458657619264251e-07, "loss": 15.2656, "step": 19892 }, { "epoch": 1.3211795178322374, "grad_norm": 307.8206481933594, "learning_rate": 5.457699429366293e-07, "loss": 12.2266, "step": 19893 }, { "epoch": 1.3212459321245933, "grad_norm": 256.280517578125, "learning_rate": 5.456741292011051e-07, "loss": 19.4219, "step": 19894 }, { "epoch": 1.321312346416949, "grad_norm": 178.4114227294922, "learning_rate": 5.455783207209599e-07, "loss": 20.6875, "step": 19895 }, { "epoch": 1.3213787607093046, "grad_norm": 192.30413818359375, "learning_rate": 5.454825174973034e-07, "loss": 15.4219, "step": 19896 }, { "epoch": 1.3214451750016605, "grad_norm": 528.7216796875, "learning_rate": 5.453867195312428e-07, "loss": 34.875, "step": 19897 }, { "epoch": 1.3215115892940161, "grad_norm": 309.4845275878906, "learning_rate": 5.452909268238862e-07, "loss": 19.9531, "step": 19898 }, { "epoch": 1.3215780035863718, "grad_norm": 144.53781127929688, "learning_rate": 5.451951393763421e-07, "loss": 12.4531, "step": 19899 }, { "epoch": 1.3216444178787274, "grad_norm": 246.50875854492188, "learning_rate": 5.450993571897184e-07, "loss": 16.25, "step": 19900 }, { "epoch": 1.321710832171083, "grad_norm": 101.08026123046875, "learning_rate": 5.45003580265123e-07, "loss": 13.4062, "step": 19901 }, { "epoch": 1.321777246463439, "grad_norm": 125.39637756347656, "learning_rate": 5.449078086036643e-07, "loss": 12.7031, "step": 19902 }, { "epoch": 1.3218436607557946, "grad_norm": 382.8060607910156, "learning_rate": 5.448120422064487e-07, "loss": 17.7969, "step": 19903 }, { "epoch": 1.3219100750481503, "grad_norm": 4787.93505859375, "learning_rate": 5.447162810745857e-07, "loss": 12.7031, "step": 19904 }, { "epoch": 1.3219764893405062, "grad_norm": 232.1947021484375, "learning_rate": 5.446205252091821e-07, "loss": 16.1562, "step": 19905 }, { "epoch": 1.3220429036328618, "grad_norm": 234.05870056152344, "learning_rate": 5.445247746113455e-07, "loss": 17.8125, "step": 19906 }, { "epoch": 1.3221093179252175, "grad_norm": 583.1253662109375, "learning_rate": 5.444290292821836e-07, "loss": 19.2344, "step": 19907 }, { "epoch": 1.3221757322175733, "grad_norm": 313.42724609375, "learning_rate": 5.44333289222804e-07, "loss": 21.6719, "step": 19908 }, { "epoch": 1.322242146509929, "grad_norm": 129.93026733398438, "learning_rate": 5.442375544343143e-07, "loss": 13.7344, "step": 19909 }, { "epoch": 1.3223085608022846, "grad_norm": 157.50421142578125, "learning_rate": 5.44141824917822e-07, "loss": 17.6875, "step": 19910 }, { "epoch": 1.3223749750946403, "grad_norm": 140.8392791748047, "learning_rate": 5.440461006744335e-07, "loss": 14.3906, "step": 19911 }, { "epoch": 1.322441389386996, "grad_norm": 147.0750732421875, "learning_rate": 5.439503817052575e-07, "loss": 13.8828, "step": 19912 }, { "epoch": 1.3225078036793518, "grad_norm": 206.80862426757812, "learning_rate": 5.438546680114002e-07, "loss": 15.1875, "step": 19913 }, { "epoch": 1.3225742179717075, "grad_norm": 208.0487823486328, "learning_rate": 5.437589595939691e-07, "loss": 14.9219, "step": 19914 }, { "epoch": 1.3226406322640631, "grad_norm": 184.59840393066406, "learning_rate": 5.436632564540713e-07, "loss": 15.4844, "step": 19915 }, { "epoch": 1.322707046556419, "grad_norm": 375.6140441894531, "learning_rate": 5.435675585928141e-07, "loss": 17.6562, "step": 19916 }, { "epoch": 1.3227734608487747, "grad_norm": 142.58270263671875, "learning_rate": 5.434718660113033e-07, "loss": 16.625, "step": 19917 }, { "epoch": 1.3228398751411303, "grad_norm": 950.2686157226562, "learning_rate": 5.433761787106477e-07, "loss": 8.9062, "step": 19918 }, { "epoch": 1.3229062894334862, "grad_norm": 116.75668334960938, "learning_rate": 5.432804966919528e-07, "loss": 20.0156, "step": 19919 }, { "epoch": 1.3229727037258419, "grad_norm": 350.9987487792969, "learning_rate": 5.431848199563256e-07, "loss": 16.1875, "step": 19920 }, { "epoch": 1.3230391180181975, "grad_norm": 224.79405212402344, "learning_rate": 5.43089148504873e-07, "loss": 17.2188, "step": 19921 }, { "epoch": 1.3231055323105532, "grad_norm": 225.8309783935547, "learning_rate": 5.429934823387017e-07, "loss": 19.5156, "step": 19922 }, { "epoch": 1.3231719466029088, "grad_norm": 149.80691528320312, "learning_rate": 5.428978214589183e-07, "loss": 14.4688, "step": 19923 }, { "epoch": 1.3232383608952647, "grad_norm": 306.3122253417969, "learning_rate": 5.428021658666293e-07, "loss": 19.3594, "step": 19924 }, { "epoch": 1.3233047751876204, "grad_norm": 181.18157958984375, "learning_rate": 5.427065155629411e-07, "loss": 13.3281, "step": 19925 }, { "epoch": 1.323371189479976, "grad_norm": 225.14483642578125, "learning_rate": 5.426108705489604e-07, "loss": 20.1875, "step": 19926 }, { "epoch": 1.3234376037723319, "grad_norm": 316.4626770019531, "learning_rate": 5.425152308257937e-07, "loss": 16.125, "step": 19927 }, { "epoch": 1.3235040180646875, "grad_norm": 115.330810546875, "learning_rate": 5.424195963945463e-07, "loss": 20.7812, "step": 19928 }, { "epoch": 1.3235704323570432, "grad_norm": 290.7275390625, "learning_rate": 5.423239672563258e-07, "loss": 19.2344, "step": 19929 }, { "epoch": 1.323636846649399, "grad_norm": 135.08082580566406, "learning_rate": 5.422283434122375e-07, "loss": 14.4844, "step": 19930 }, { "epoch": 1.3237032609417547, "grad_norm": 404.0526428222656, "learning_rate": 5.421327248633877e-07, "loss": 18.8125, "step": 19931 }, { "epoch": 1.3237696752341104, "grad_norm": 242.3504180908203, "learning_rate": 5.420371116108825e-07, "loss": 17.75, "step": 19932 }, { "epoch": 1.323836089526466, "grad_norm": 201.01210021972656, "learning_rate": 5.419415036558279e-07, "loss": 13.7422, "step": 19933 }, { "epoch": 1.3239025038188217, "grad_norm": 148.12591552734375, "learning_rate": 5.418459009993299e-07, "loss": 19.4219, "step": 19934 }, { "epoch": 1.3239689181111776, "grad_norm": 151.49691772460938, "learning_rate": 5.417503036424946e-07, "loss": 12.6562, "step": 19935 }, { "epoch": 1.3240353324035332, "grad_norm": 110.42251586914062, "learning_rate": 5.416547115864274e-07, "loss": 13.3281, "step": 19936 }, { "epoch": 1.3241017466958889, "grad_norm": 166.18789672851562, "learning_rate": 5.41559124832234e-07, "loss": 14.8047, "step": 19937 }, { "epoch": 1.3241681609882447, "grad_norm": 200.01358032226562, "learning_rate": 5.414635433810204e-07, "loss": 13.0469, "step": 19938 }, { "epoch": 1.3242345752806004, "grad_norm": 174.18212890625, "learning_rate": 5.41367967233892e-07, "loss": 17.8438, "step": 19939 }, { "epoch": 1.324300989572956, "grad_norm": 279.2827453613281, "learning_rate": 5.412723963919546e-07, "loss": 14.9531, "step": 19940 }, { "epoch": 1.324367403865312, "grad_norm": 126.51437377929688, "learning_rate": 5.411768308563139e-07, "loss": 19.375, "step": 19941 }, { "epoch": 1.3244338181576676, "grad_norm": 144.4769744873047, "learning_rate": 5.410812706280743e-07, "loss": 16.1562, "step": 19942 }, { "epoch": 1.3245002324500232, "grad_norm": 114.50337982177734, "learning_rate": 5.409857157083428e-07, "loss": 14.9688, "step": 19943 }, { "epoch": 1.324566646742379, "grad_norm": 250.05406188964844, "learning_rate": 5.408901660982235e-07, "loss": 16.8906, "step": 19944 }, { "epoch": 1.3246330610347345, "grad_norm": 241.82058715820312, "learning_rate": 5.407946217988219e-07, "loss": 18.0938, "step": 19945 }, { "epoch": 1.3246994753270904, "grad_norm": 267.8554992675781, "learning_rate": 5.406990828112432e-07, "loss": 14.6094, "step": 19946 }, { "epoch": 1.324765889619446, "grad_norm": 188.990966796875, "learning_rate": 5.406035491365929e-07, "loss": 24.3984, "step": 19947 }, { "epoch": 1.3248323039118017, "grad_norm": 373.1484680175781, "learning_rate": 5.405080207759758e-07, "loss": 21.3438, "step": 19948 }, { "epoch": 1.3248987182041576, "grad_norm": 163.8296661376953, "learning_rate": 5.404124977304973e-07, "loss": 17.6562, "step": 19949 }, { "epoch": 1.3249651324965133, "grad_norm": 195.62527465820312, "learning_rate": 5.403169800012611e-07, "loss": 15.0625, "step": 19950 }, { "epoch": 1.325031546788869, "grad_norm": 164.63124084472656, "learning_rate": 5.40221467589374e-07, "loss": 18.1719, "step": 19951 }, { "epoch": 1.3250979610812248, "grad_norm": 140.81105041503906, "learning_rate": 5.401259604959393e-07, "loss": 15.0312, "step": 19952 }, { "epoch": 1.3251643753735804, "grad_norm": 118.28946685791016, "learning_rate": 5.400304587220624e-07, "loss": 15.8438, "step": 19953 }, { "epoch": 1.325230789665936, "grad_norm": 395.11956787109375, "learning_rate": 5.399349622688478e-07, "loss": 18.875, "step": 19954 }, { "epoch": 1.3252972039582918, "grad_norm": 327.0892333984375, "learning_rate": 5.398394711374008e-07, "loss": 19.0312, "step": 19955 }, { "epoch": 1.3253636182506474, "grad_norm": 181.81509399414062, "learning_rate": 5.397439853288247e-07, "loss": 13.9844, "step": 19956 }, { "epoch": 1.3254300325430033, "grad_norm": 165.0601043701172, "learning_rate": 5.396485048442254e-07, "loss": 18.1875, "step": 19957 }, { "epoch": 1.325496446835359, "grad_norm": 166.36538696289062, "learning_rate": 5.395530296847065e-07, "loss": 15.125, "step": 19958 }, { "epoch": 1.3255628611277146, "grad_norm": 240.07272338867188, "learning_rate": 5.394575598513727e-07, "loss": 16.5938, "step": 19959 }, { "epoch": 1.3256292754200705, "grad_norm": 277.5164794921875, "learning_rate": 5.39362095345328e-07, "loss": 14.6719, "step": 19960 }, { "epoch": 1.3256956897124261, "grad_norm": 139.27847290039062, "learning_rate": 5.392666361676772e-07, "loss": 16.2656, "step": 19961 }, { "epoch": 1.3257621040047818, "grad_norm": 177.9167938232422, "learning_rate": 5.391711823195242e-07, "loss": 14.1406, "step": 19962 }, { "epoch": 1.3258285182971377, "grad_norm": 310.4625549316406, "learning_rate": 5.390757338019732e-07, "loss": 13.6406, "step": 19963 }, { "epoch": 1.3258949325894933, "grad_norm": 274.4516906738281, "learning_rate": 5.389802906161284e-07, "loss": 21.125, "step": 19964 }, { "epoch": 1.325961346881849, "grad_norm": 143.2996368408203, "learning_rate": 5.388848527630937e-07, "loss": 14.9375, "step": 19965 }, { "epoch": 1.3260277611742046, "grad_norm": 431.5946960449219, "learning_rate": 5.387894202439735e-07, "loss": 13.2188, "step": 19966 }, { "epoch": 1.3260941754665603, "grad_norm": 523.356201171875, "learning_rate": 5.386939930598707e-07, "loss": 27.3281, "step": 19967 }, { "epoch": 1.3261605897589162, "grad_norm": 189.58035278320312, "learning_rate": 5.385985712118905e-07, "loss": 13.9531, "step": 19968 }, { "epoch": 1.3262270040512718, "grad_norm": 231.7342529296875, "learning_rate": 5.385031547011355e-07, "loss": 23.5625, "step": 19969 }, { "epoch": 1.3262934183436275, "grad_norm": 124.72401428222656, "learning_rate": 5.3840774352871e-07, "loss": 18.6094, "step": 19970 }, { "epoch": 1.3263598326359833, "grad_norm": 176.84613037109375, "learning_rate": 5.383123376957174e-07, "loss": 12.0938, "step": 19971 }, { "epoch": 1.326426246928339, "grad_norm": 376.1812744140625, "learning_rate": 5.382169372032616e-07, "loss": 14.6719, "step": 19972 }, { "epoch": 1.3264926612206946, "grad_norm": 227.09132385253906, "learning_rate": 5.381215420524458e-07, "loss": 12.875, "step": 19973 }, { "epoch": 1.3265590755130505, "grad_norm": 175.0000762939453, "learning_rate": 5.380261522443743e-07, "loss": 12.7188, "step": 19974 }, { "epoch": 1.3266254898054062, "grad_norm": 257.3890075683594, "learning_rate": 5.37930767780149e-07, "loss": 17.2031, "step": 19975 }, { "epoch": 1.3266919040977618, "grad_norm": 277.4337158203125, "learning_rate": 5.378353886608749e-07, "loss": 15.7656, "step": 19976 }, { "epoch": 1.3267583183901175, "grad_norm": 384.548095703125, "learning_rate": 5.37740014887654e-07, "loss": 14.75, "step": 19977 }, { "epoch": 1.3268247326824731, "grad_norm": 137.2677001953125, "learning_rate": 5.376446464615903e-07, "loss": 14.2031, "step": 19978 }, { "epoch": 1.326891146974829, "grad_norm": 158.25392150878906, "learning_rate": 5.375492833837867e-07, "loss": 15.8281, "step": 19979 }, { "epoch": 1.3269575612671847, "grad_norm": 189.0699005126953, "learning_rate": 5.374539256553465e-07, "loss": 21.4375, "step": 19980 }, { "epoch": 1.3270239755595403, "grad_norm": 467.5557861328125, "learning_rate": 5.37358573277372e-07, "loss": 16.2656, "step": 19981 }, { "epoch": 1.3270903898518962, "grad_norm": 178.43023681640625, "learning_rate": 5.372632262509674e-07, "loss": 16.7031, "step": 19982 }, { "epoch": 1.3271568041442519, "grad_norm": 261.1214599609375, "learning_rate": 5.371678845772347e-07, "loss": 21.5312, "step": 19983 }, { "epoch": 1.3272232184366075, "grad_norm": 3439.030029296875, "learning_rate": 5.370725482572769e-07, "loss": 16.5156, "step": 19984 }, { "epoch": 1.3272896327289634, "grad_norm": 156.01564025878906, "learning_rate": 5.369772172921969e-07, "loss": 14.1875, "step": 19985 }, { "epoch": 1.327356047021319, "grad_norm": 294.641845703125, "learning_rate": 5.368818916830976e-07, "loss": 16.0781, "step": 19986 }, { "epoch": 1.3274224613136747, "grad_norm": 94.44239807128906, "learning_rate": 5.367865714310815e-07, "loss": 13.4375, "step": 19987 }, { "epoch": 1.3274888756060303, "grad_norm": 352.60797119140625, "learning_rate": 5.366912565372515e-07, "loss": 19.7031, "step": 19988 }, { "epoch": 1.327555289898386, "grad_norm": 249.82888793945312, "learning_rate": 5.365959470027092e-07, "loss": 17.75, "step": 19989 }, { "epoch": 1.3276217041907419, "grad_norm": 642.7070922851562, "learning_rate": 5.365006428285586e-07, "loss": 18.0781, "step": 19990 }, { "epoch": 1.3276881184830975, "grad_norm": 196.12928771972656, "learning_rate": 5.364053440159009e-07, "loss": 18.5312, "step": 19991 }, { "epoch": 1.3277545327754532, "grad_norm": 295.0911865234375, "learning_rate": 5.363100505658387e-07, "loss": 12.4219, "step": 19992 }, { "epoch": 1.327820947067809, "grad_norm": 268.3074645996094, "learning_rate": 5.362147624794746e-07, "loss": 19.4531, "step": 19993 }, { "epoch": 1.3278873613601647, "grad_norm": 129.57037353515625, "learning_rate": 5.361194797579108e-07, "loss": 15.0938, "step": 19994 }, { "epoch": 1.3279537756525204, "grad_norm": 200.925048828125, "learning_rate": 5.360242024022491e-07, "loss": 15.8281, "step": 19995 }, { "epoch": 1.3280201899448763, "grad_norm": 400.3469543457031, "learning_rate": 5.359289304135925e-07, "loss": 16.5938, "step": 19996 }, { "epoch": 1.328086604237232, "grad_norm": 324.60369873046875, "learning_rate": 5.358336637930414e-07, "loss": 10.875, "step": 19997 }, { "epoch": 1.3281530185295876, "grad_norm": 132.35202026367188, "learning_rate": 5.357384025416995e-07, "loss": 17.0, "step": 19998 }, { "epoch": 1.3282194328219432, "grad_norm": 205.08718872070312, "learning_rate": 5.356431466606683e-07, "loss": 16.25, "step": 19999 }, { "epoch": 1.3282858471142989, "grad_norm": 222.48182678222656, "learning_rate": 5.35547896151049e-07, "loss": 15.0625, "step": 20000 }, { "epoch": 1.3283522614066547, "grad_norm": 605.141357421875, "learning_rate": 5.354526510139438e-07, "loss": 15.1406, "step": 20001 }, { "epoch": 1.3284186756990104, "grad_norm": 101.74995422363281, "learning_rate": 5.353574112504545e-07, "loss": 12.7656, "step": 20002 }, { "epoch": 1.328485089991366, "grad_norm": 193.02268981933594, "learning_rate": 5.352621768616827e-07, "loss": 12.5, "step": 20003 }, { "epoch": 1.328551504283722, "grad_norm": 289.1025085449219, "learning_rate": 5.351669478487302e-07, "loss": 21.3281, "step": 20004 }, { "epoch": 1.3286179185760776, "grad_norm": 221.5342254638672, "learning_rate": 5.350717242126988e-07, "loss": 10.5156, "step": 20005 }, { "epoch": 1.3286843328684332, "grad_norm": 153.40016174316406, "learning_rate": 5.349765059546887e-07, "loss": 16.9375, "step": 20006 }, { "epoch": 1.3287507471607891, "grad_norm": 186.79786682128906, "learning_rate": 5.34881293075803e-07, "loss": 11.8203, "step": 20007 }, { "epoch": 1.3288171614531448, "grad_norm": 433.62213134765625, "learning_rate": 5.34786085577142e-07, "loss": 23.4062, "step": 20008 }, { "epoch": 1.3288835757455004, "grad_norm": 350.0341796875, "learning_rate": 5.346908834598072e-07, "loss": 13.9375, "step": 20009 }, { "epoch": 1.328949990037856, "grad_norm": 218.75527954101562, "learning_rate": 5.345956867249e-07, "loss": 14.2344, "step": 20010 }, { "epoch": 1.3290164043302117, "grad_norm": 165.35052490234375, "learning_rate": 5.345004953735214e-07, "loss": 14.8672, "step": 20011 }, { "epoch": 1.3290828186225676, "grad_norm": 427.8508605957031, "learning_rate": 5.344053094067729e-07, "loss": 16.6172, "step": 20012 }, { "epoch": 1.3291492329149233, "grad_norm": 313.9587097167969, "learning_rate": 5.343101288257556e-07, "loss": 21.8594, "step": 20013 }, { "epoch": 1.329215647207279, "grad_norm": 128.27984619140625, "learning_rate": 5.342149536315693e-07, "loss": 12.9844, "step": 20014 }, { "epoch": 1.3292820614996348, "grad_norm": 430.07110595703125, "learning_rate": 5.341197838253167e-07, "loss": 17.0938, "step": 20015 }, { "epoch": 1.3293484757919904, "grad_norm": 338.31207275390625, "learning_rate": 5.340246194080974e-07, "loss": 18.5156, "step": 20016 }, { "epoch": 1.329414890084346, "grad_norm": 295.543212890625, "learning_rate": 5.339294603810126e-07, "loss": 19.6719, "step": 20017 }, { "epoch": 1.329481304376702, "grad_norm": 192.3657989501953, "learning_rate": 5.338343067451632e-07, "loss": 13.1406, "step": 20018 }, { "epoch": 1.3295477186690576, "grad_norm": 199.6031951904297, "learning_rate": 5.337391585016499e-07, "loss": 9.6641, "step": 20019 }, { "epoch": 1.3296141329614133, "grad_norm": 166.8138885498047, "learning_rate": 5.336440156515724e-07, "loss": 17.3125, "step": 20020 }, { "epoch": 1.329680547253769, "grad_norm": 124.27967834472656, "learning_rate": 5.335488781960329e-07, "loss": 13.6406, "step": 20021 }, { "epoch": 1.3297469615461246, "grad_norm": 312.60687255859375, "learning_rate": 5.334537461361306e-07, "loss": 14.5938, "step": 20022 }, { "epoch": 1.3298133758384805, "grad_norm": 236.84556579589844, "learning_rate": 5.333586194729662e-07, "loss": 18.1562, "step": 20023 }, { "epoch": 1.3298797901308361, "grad_norm": 170.95260620117188, "learning_rate": 5.332634982076403e-07, "loss": 15.0469, "step": 20024 }, { "epoch": 1.3299462044231918, "grad_norm": 144.1439208984375, "learning_rate": 5.331683823412532e-07, "loss": 21.1875, "step": 20025 }, { "epoch": 1.3300126187155477, "grad_norm": 187.55502319335938, "learning_rate": 5.330732718749049e-07, "loss": 18.8281, "step": 20026 }, { "epoch": 1.3300790330079033, "grad_norm": 335.55413818359375, "learning_rate": 5.329781668096962e-07, "loss": 13.7266, "step": 20027 }, { "epoch": 1.330145447300259, "grad_norm": 112.07035827636719, "learning_rate": 5.328830671467261e-07, "loss": 9.4609, "step": 20028 }, { "epoch": 1.3302118615926148, "grad_norm": 448.6665954589844, "learning_rate": 5.32787972887096e-07, "loss": 21.8125, "step": 20029 }, { "epoch": 1.3302782758849705, "grad_norm": 242.1620330810547, "learning_rate": 5.326928840319049e-07, "loss": 19.9062, "step": 20030 }, { "epoch": 1.3303446901773262, "grad_norm": 226.50738525390625, "learning_rate": 5.325978005822529e-07, "loss": 18.9531, "step": 20031 }, { "epoch": 1.3304111044696818, "grad_norm": 132.07022094726562, "learning_rate": 5.325027225392402e-07, "loss": 15.1719, "step": 20032 }, { "epoch": 1.3304775187620375, "grad_norm": 261.15692138671875, "learning_rate": 5.324076499039662e-07, "loss": 20.0156, "step": 20033 }, { "epoch": 1.3305439330543933, "grad_norm": 266.51519775390625, "learning_rate": 5.323125826775313e-07, "loss": 13.4375, "step": 20034 }, { "epoch": 1.330610347346749, "grad_norm": 215.15158081054688, "learning_rate": 5.322175208610343e-07, "loss": 24.3281, "step": 20035 }, { "epoch": 1.3306767616391046, "grad_norm": 150.1745147705078, "learning_rate": 5.321224644555755e-07, "loss": 13.5781, "step": 20036 }, { "epoch": 1.3307431759314605, "grad_norm": 400.4772644042969, "learning_rate": 5.320274134622542e-07, "loss": 19.0469, "step": 20037 }, { "epoch": 1.3308095902238162, "grad_norm": 333.76556396484375, "learning_rate": 5.319323678821703e-07, "loss": 17.1094, "step": 20038 }, { "epoch": 1.3308760045161718, "grad_norm": 161.27294921875, "learning_rate": 5.318373277164221e-07, "loss": 12.625, "step": 20039 }, { "epoch": 1.3309424188085277, "grad_norm": 187.8431396484375, "learning_rate": 5.317422929661107e-07, "loss": 16.2969, "step": 20040 }, { "epoch": 1.3310088331008834, "grad_norm": 141.4316864013672, "learning_rate": 5.316472636323337e-07, "loss": 14.6406, "step": 20041 }, { "epoch": 1.331075247393239, "grad_norm": 264.1761779785156, "learning_rate": 5.315522397161913e-07, "loss": 12.3359, "step": 20042 }, { "epoch": 1.3311416616855947, "grad_norm": 219.16412353515625, "learning_rate": 5.314572212187824e-07, "loss": 21.2188, "step": 20043 }, { "epoch": 1.3312080759779503, "grad_norm": 176.08120727539062, "learning_rate": 5.313622081412067e-07, "loss": 13.2656, "step": 20044 }, { "epoch": 1.3312744902703062, "grad_norm": 171.9168243408203, "learning_rate": 5.312672004845618e-07, "loss": 15.0625, "step": 20045 }, { "epoch": 1.3313409045626619, "grad_norm": 233.9971160888672, "learning_rate": 5.311721982499487e-07, "loss": 16.3438, "step": 20046 }, { "epoch": 1.3314073188550175, "grad_norm": 166.9442901611328, "learning_rate": 5.310772014384646e-07, "loss": 11.6406, "step": 20047 }, { "epoch": 1.3314737331473734, "grad_norm": 191.3098602294922, "learning_rate": 5.30982210051209e-07, "loss": 17.3672, "step": 20048 }, { "epoch": 1.331540147439729, "grad_norm": 504.5289611816406, "learning_rate": 5.308872240892809e-07, "loss": 19.0, "step": 20049 }, { "epoch": 1.3316065617320847, "grad_norm": 233.0165252685547, "learning_rate": 5.307922435537788e-07, "loss": 15.125, "step": 20050 }, { "epoch": 1.3316729760244406, "grad_norm": 245.6305389404297, "learning_rate": 5.306972684458016e-07, "loss": 19.4688, "step": 20051 }, { "epoch": 1.3317393903167962, "grad_norm": 305.7613830566406, "learning_rate": 5.306022987664481e-07, "loss": 17.0, "step": 20052 }, { "epoch": 1.3318058046091519, "grad_norm": 265.63568115234375, "learning_rate": 5.305073345168156e-07, "loss": 15.5781, "step": 20053 }, { "epoch": 1.3318722189015075, "grad_norm": 171.82656860351562, "learning_rate": 5.304123756980045e-07, "loss": 16.2969, "step": 20054 }, { "epoch": 1.3319386331938632, "grad_norm": 142.77171325683594, "learning_rate": 5.303174223111117e-07, "loss": 13.4531, "step": 20055 }, { "epoch": 1.332005047486219, "grad_norm": 180.78033447265625, "learning_rate": 5.302224743572364e-07, "loss": 16.5625, "step": 20056 }, { "epoch": 1.3320714617785747, "grad_norm": 197.7632598876953, "learning_rate": 5.301275318374764e-07, "loss": 13.625, "step": 20057 }, { "epoch": 1.3321378760709304, "grad_norm": 246.17974853515625, "learning_rate": 5.300325947529303e-07, "loss": 22.625, "step": 20058 }, { "epoch": 1.3322042903632862, "grad_norm": 133.30886840820312, "learning_rate": 5.29937663104696e-07, "loss": 17.25, "step": 20059 }, { "epoch": 1.332270704655642, "grad_norm": 205.39797973632812, "learning_rate": 5.298427368938723e-07, "loss": 19.125, "step": 20060 }, { "epoch": 1.3323371189479976, "grad_norm": 216.32237243652344, "learning_rate": 5.297478161215558e-07, "loss": 16.2031, "step": 20061 }, { "epoch": 1.3324035332403534, "grad_norm": 200.8133087158203, "learning_rate": 5.296529007888462e-07, "loss": 17.0625, "step": 20062 }, { "epoch": 1.332469947532709, "grad_norm": 458.7415466308594, "learning_rate": 5.295579908968403e-07, "loss": 17.1875, "step": 20063 }, { "epoch": 1.3325363618250647, "grad_norm": 225.45513916015625, "learning_rate": 5.294630864466362e-07, "loss": 28.6562, "step": 20064 }, { "epoch": 1.3326027761174204, "grad_norm": 270.8951110839844, "learning_rate": 5.29368187439332e-07, "loss": 19.7188, "step": 20065 }, { "epoch": 1.332669190409776, "grad_norm": 215.5455780029297, "learning_rate": 5.292732938760254e-07, "loss": 14.8594, "step": 20066 }, { "epoch": 1.332735604702132, "grad_norm": 147.61834716796875, "learning_rate": 5.291784057578132e-07, "loss": 13.6094, "step": 20067 }, { "epoch": 1.3328020189944876, "grad_norm": 137.56170654296875, "learning_rate": 5.290835230857945e-07, "loss": 19.1562, "step": 20068 }, { "epoch": 1.3328684332868432, "grad_norm": 320.1433410644531, "learning_rate": 5.289886458610658e-07, "loss": 23.75, "step": 20069 }, { "epoch": 1.3329348475791991, "grad_norm": 451.30450439453125, "learning_rate": 5.288937740847246e-07, "loss": 22.0469, "step": 20070 }, { "epoch": 1.3330012618715548, "grad_norm": 100.31490325927734, "learning_rate": 5.287989077578686e-07, "loss": 13.5469, "step": 20071 }, { "epoch": 1.3330676761639104, "grad_norm": 232.64895629882812, "learning_rate": 5.287040468815951e-07, "loss": 16.1328, "step": 20072 }, { "epoch": 1.3331340904562663, "grad_norm": 309.3260803222656, "learning_rate": 5.286091914570017e-07, "loss": 23.7031, "step": 20073 }, { "epoch": 1.333200504748622, "grad_norm": 326.3487243652344, "learning_rate": 5.285143414851851e-07, "loss": 15.3281, "step": 20074 }, { "epoch": 1.3332669190409776, "grad_norm": 564.7393798828125, "learning_rate": 5.284194969672428e-07, "loss": 19.5469, "step": 20075 }, { "epoch": 1.3333333333333333, "grad_norm": 290.1070556640625, "learning_rate": 5.283246579042717e-07, "loss": 18.7812, "step": 20076 }, { "epoch": 1.333399747625689, "grad_norm": 250.09893798828125, "learning_rate": 5.282298242973695e-07, "loss": 17.4062, "step": 20077 }, { "epoch": 1.3334661619180448, "grad_norm": 171.35891723632812, "learning_rate": 5.281349961476318e-07, "loss": 15.1172, "step": 20078 }, { "epoch": 1.3335325762104004, "grad_norm": 132.8822021484375, "learning_rate": 5.280401734561572e-07, "loss": 15.2188, "step": 20079 }, { "epoch": 1.333598990502756, "grad_norm": 218.28585815429688, "learning_rate": 5.279453562240414e-07, "loss": 18.7812, "step": 20080 }, { "epoch": 1.333665404795112, "grad_norm": 181.74822998046875, "learning_rate": 5.278505444523816e-07, "loss": 17.2812, "step": 20081 }, { "epoch": 1.3337318190874676, "grad_norm": 483.8159484863281, "learning_rate": 5.277557381422741e-07, "loss": 14.0469, "step": 20082 }, { "epoch": 1.3337982333798233, "grad_norm": 220.06741333007812, "learning_rate": 5.276609372948166e-07, "loss": 17.7188, "step": 20083 }, { "epoch": 1.3338646476721792, "grad_norm": 242.4124755859375, "learning_rate": 5.275661419111041e-07, "loss": 15.7969, "step": 20084 }, { "epoch": 1.3339310619645348, "grad_norm": 372.8843994140625, "learning_rate": 5.274713519922349e-07, "loss": 21.5625, "step": 20085 }, { "epoch": 1.3339974762568905, "grad_norm": 208.2240447998047, "learning_rate": 5.273765675393041e-07, "loss": 18.9531, "step": 20086 }, { "epoch": 1.3340638905492461, "grad_norm": 135.76058959960938, "learning_rate": 5.272817885534088e-07, "loss": 16.8281, "step": 20087 }, { "epoch": 1.3341303048416018, "grad_norm": 329.2279968261719, "learning_rate": 5.271870150356451e-07, "loss": 15.5312, "step": 20088 }, { "epoch": 1.3341967191339577, "grad_norm": 328.2570495605469, "learning_rate": 5.270922469871094e-07, "loss": 21.5, "step": 20089 }, { "epoch": 1.3342631334263133, "grad_norm": 860.3731689453125, "learning_rate": 5.269974844088978e-07, "loss": 22.4688, "step": 20090 }, { "epoch": 1.334329547718669, "grad_norm": 352.6907653808594, "learning_rate": 5.269027273021071e-07, "loss": 12.0859, "step": 20091 }, { "epoch": 1.3343959620110248, "grad_norm": 193.0708770751953, "learning_rate": 5.26807975667832e-07, "loss": 20.875, "step": 20092 }, { "epoch": 1.3344623763033805, "grad_norm": 540.636962890625, "learning_rate": 5.267132295071701e-07, "loss": 18.1406, "step": 20093 }, { "epoch": 1.3345287905957361, "grad_norm": 172.47401428222656, "learning_rate": 5.266184888212164e-07, "loss": 20.0078, "step": 20094 }, { "epoch": 1.334595204888092, "grad_norm": 309.1150817871094, "learning_rate": 5.26523753611067e-07, "loss": 16.0469, "step": 20095 }, { "epoch": 1.3346616191804477, "grad_norm": 96.5547866821289, "learning_rate": 5.264290238778176e-07, "loss": 11.4219, "step": 20096 }, { "epoch": 1.3347280334728033, "grad_norm": 194.4946746826172, "learning_rate": 5.263342996225644e-07, "loss": 20.0312, "step": 20097 }, { "epoch": 1.334794447765159, "grad_norm": 174.6497344970703, "learning_rate": 5.262395808464029e-07, "loss": 15.6719, "step": 20098 }, { "epoch": 1.3348608620575146, "grad_norm": 562.8173828125, "learning_rate": 5.26144867550429e-07, "loss": 16.4688, "step": 20099 }, { "epoch": 1.3349272763498705, "grad_norm": 206.9641876220703, "learning_rate": 5.260501597357372e-07, "loss": 12.4062, "step": 20100 }, { "epoch": 1.3349936906422262, "grad_norm": 210.90945434570312, "learning_rate": 5.259554574034249e-07, "loss": 14.1641, "step": 20101 }, { "epoch": 1.3350601049345818, "grad_norm": 432.7358703613281, "learning_rate": 5.258607605545859e-07, "loss": 16.5703, "step": 20102 }, { "epoch": 1.3351265192269377, "grad_norm": 167.13050842285156, "learning_rate": 5.257660691903163e-07, "loss": 16.5469, "step": 20103 }, { "epoch": 1.3351929335192934, "grad_norm": 324.3852233886719, "learning_rate": 5.256713833117113e-07, "loss": 23.4688, "step": 20104 }, { "epoch": 1.335259347811649, "grad_norm": 220.46800231933594, "learning_rate": 5.255767029198666e-07, "loss": 16.3438, "step": 20105 }, { "epoch": 1.3353257621040049, "grad_norm": 318.2398681640625, "learning_rate": 5.254820280158764e-07, "loss": 16.8281, "step": 20106 }, { "epoch": 1.3353921763963605, "grad_norm": 363.6907043457031, "learning_rate": 5.253873586008371e-07, "loss": 22.7656, "step": 20107 }, { "epoch": 1.3354585906887162, "grad_norm": 88.19676971435547, "learning_rate": 5.252926946758429e-07, "loss": 13.2812, "step": 20108 }, { "epoch": 1.3355250049810719, "grad_norm": 196.0322265625, "learning_rate": 5.251980362419886e-07, "loss": 16.5469, "step": 20109 }, { "epoch": 1.3355914192734275, "grad_norm": 172.50450134277344, "learning_rate": 5.251033833003704e-07, "loss": 14.2188, "step": 20110 }, { "epoch": 1.3356578335657834, "grad_norm": 1022.3079223632812, "learning_rate": 5.250087358520821e-07, "loss": 13.5938, "step": 20111 }, { "epoch": 1.335724247858139, "grad_norm": 204.3840789794922, "learning_rate": 5.249140938982189e-07, "loss": 18.4375, "step": 20112 }, { "epoch": 1.3357906621504947, "grad_norm": 235.8435516357422, "learning_rate": 5.248194574398753e-07, "loss": 13.6719, "step": 20113 }, { "epoch": 1.3358570764428506, "grad_norm": 347.3956604003906, "learning_rate": 5.247248264781463e-07, "loss": 13.1875, "step": 20114 }, { "epoch": 1.3359234907352062, "grad_norm": 388.0047912597656, "learning_rate": 5.246302010141266e-07, "loss": 18.6094, "step": 20115 }, { "epoch": 1.3359899050275619, "grad_norm": 229.24697875976562, "learning_rate": 5.245355810489108e-07, "loss": 14.2031, "step": 20116 }, { "epoch": 1.3360563193199178, "grad_norm": 284.2703857421875, "learning_rate": 5.244409665835925e-07, "loss": 23.5312, "step": 20117 }, { "epoch": 1.3361227336122734, "grad_norm": 163.71038818359375, "learning_rate": 5.243463576192677e-07, "loss": 16.0312, "step": 20118 }, { "epoch": 1.336189147904629, "grad_norm": 235.62428283691406, "learning_rate": 5.242517541570296e-07, "loss": 15.3281, "step": 20119 }, { "epoch": 1.3362555621969847, "grad_norm": 190.98696899414062, "learning_rate": 5.241571561979729e-07, "loss": 15.9688, "step": 20120 }, { "epoch": 1.3363219764893404, "grad_norm": 447.67315673828125, "learning_rate": 5.240625637431917e-07, "loss": 14.5156, "step": 20121 }, { "epoch": 1.3363883907816962, "grad_norm": 121.07698059082031, "learning_rate": 5.239679767937804e-07, "loss": 17.9688, "step": 20122 }, { "epoch": 1.336454805074052, "grad_norm": 221.0354766845703, "learning_rate": 5.238733953508329e-07, "loss": 10.6094, "step": 20123 }, { "epoch": 1.3365212193664076, "grad_norm": 256.8255920410156, "learning_rate": 5.237788194154441e-07, "loss": 20.0625, "step": 20124 }, { "epoch": 1.3365876336587634, "grad_norm": 551.3335571289062, "learning_rate": 5.236842489887063e-07, "loss": 13.625, "step": 20125 }, { "epoch": 1.336654047951119, "grad_norm": 114.76081085205078, "learning_rate": 5.235896840717153e-07, "loss": 21.2188, "step": 20126 }, { "epoch": 1.3367204622434747, "grad_norm": 222.0771942138672, "learning_rate": 5.234951246655638e-07, "loss": 17.4062, "step": 20127 }, { "epoch": 1.3367868765358306, "grad_norm": 149.4689178466797, "learning_rate": 5.234005707713458e-07, "loss": 13.7188, "step": 20128 }, { "epoch": 1.3368532908281863, "grad_norm": 606.5719604492188, "learning_rate": 5.233060223901553e-07, "loss": 18.5156, "step": 20129 }, { "epoch": 1.336919705120542, "grad_norm": 117.21129608154297, "learning_rate": 5.232114795230862e-07, "loss": 12.5938, "step": 20130 }, { "epoch": 1.3369861194128976, "grad_norm": 295.0594177246094, "learning_rate": 5.23116942171231e-07, "loss": 13.0859, "step": 20131 }, { "epoch": 1.3370525337052532, "grad_norm": 127.0702896118164, "learning_rate": 5.230224103356848e-07, "loss": 16.0156, "step": 20132 }, { "epoch": 1.337118947997609, "grad_norm": 231.71763610839844, "learning_rate": 5.229278840175399e-07, "loss": 16.5625, "step": 20133 }, { "epoch": 1.3371853622899648, "grad_norm": 231.82794189453125, "learning_rate": 5.2283336321789e-07, "loss": 14.9688, "step": 20134 }, { "epoch": 1.3372517765823204, "grad_norm": 164.36846923828125, "learning_rate": 5.227388479378288e-07, "loss": 12.9219, "step": 20135 }, { "epoch": 1.3373181908746763, "grad_norm": 383.2608337402344, "learning_rate": 5.226443381784493e-07, "loss": 14.7656, "step": 20136 }, { "epoch": 1.337384605167032, "grad_norm": 156.1980438232422, "learning_rate": 5.225498339408449e-07, "loss": 19.8594, "step": 20137 }, { "epoch": 1.3374510194593876, "grad_norm": 469.7926330566406, "learning_rate": 5.224553352261091e-07, "loss": 15.2188, "step": 20138 }, { "epoch": 1.3375174337517435, "grad_norm": 372.978271484375, "learning_rate": 5.223608420353339e-07, "loss": 19.5938, "step": 20139 }, { "epoch": 1.3375838480440991, "grad_norm": 207.43540954589844, "learning_rate": 5.222663543696137e-07, "loss": 16.25, "step": 20140 }, { "epoch": 1.3376502623364548, "grad_norm": 166.90721130371094, "learning_rate": 5.221718722300406e-07, "loss": 12.6172, "step": 20141 }, { "epoch": 1.3377166766288104, "grad_norm": 302.5424499511719, "learning_rate": 5.220773956177076e-07, "loss": 20.2812, "step": 20142 }, { "epoch": 1.3377830909211663, "grad_norm": 283.47528076171875, "learning_rate": 5.219829245337078e-07, "loss": 16.4062, "step": 20143 }, { "epoch": 1.337849505213522, "grad_norm": 251.23049926757812, "learning_rate": 5.218884589791339e-07, "loss": 18.1875, "step": 20144 }, { "epoch": 1.3379159195058776, "grad_norm": 232.7264404296875, "learning_rate": 5.217939989550786e-07, "loss": 14.2188, "step": 20145 }, { "epoch": 1.3379823337982333, "grad_norm": 144.34573364257812, "learning_rate": 5.216995444626344e-07, "loss": 17.2812, "step": 20146 }, { "epoch": 1.3380487480905892, "grad_norm": 135.03199768066406, "learning_rate": 5.216050955028945e-07, "loss": 18.3438, "step": 20147 }, { "epoch": 1.3381151623829448, "grad_norm": 146.435791015625, "learning_rate": 5.215106520769504e-07, "loss": 13.4062, "step": 20148 }, { "epoch": 1.3381815766753005, "grad_norm": 142.0983428955078, "learning_rate": 5.214162141858958e-07, "loss": 17.1875, "step": 20149 }, { "epoch": 1.3382479909676563, "grad_norm": 999.883544921875, "learning_rate": 5.213217818308222e-07, "loss": 34.75, "step": 20150 }, { "epoch": 1.338314405260012, "grad_norm": 200.671142578125, "learning_rate": 5.212273550128221e-07, "loss": 16.6875, "step": 20151 }, { "epoch": 1.3383808195523677, "grad_norm": 138.3709259033203, "learning_rate": 5.211329337329879e-07, "loss": 16.2969, "step": 20152 }, { "epoch": 1.3384472338447233, "grad_norm": 363.83697509765625, "learning_rate": 5.210385179924118e-07, "loss": 18.0469, "step": 20153 }, { "epoch": 1.3385136481370792, "grad_norm": 186.5098419189453, "learning_rate": 5.209441077921861e-07, "loss": 13.9219, "step": 20154 }, { "epoch": 1.3385800624294348, "grad_norm": 414.7048034667969, "learning_rate": 5.208497031334029e-07, "loss": 26.375, "step": 20155 }, { "epoch": 1.3386464767217905, "grad_norm": 440.5160827636719, "learning_rate": 5.207553040171533e-07, "loss": 22.5156, "step": 20156 }, { "epoch": 1.3387128910141461, "grad_norm": 145.9416961669922, "learning_rate": 5.206609104445307e-07, "loss": 13.4062, "step": 20157 }, { "epoch": 1.338779305306502, "grad_norm": 890.2681884765625, "learning_rate": 5.20566522416626e-07, "loss": 12.4531, "step": 20158 }, { "epoch": 1.3388457195988577, "grad_norm": 294.83331298828125, "learning_rate": 5.204721399345313e-07, "loss": 16.25, "step": 20159 }, { "epoch": 1.3389121338912133, "grad_norm": 232.48956298828125, "learning_rate": 5.203777629993384e-07, "loss": 18.0625, "step": 20160 }, { "epoch": 1.3389785481835692, "grad_norm": 141.29177856445312, "learning_rate": 5.20283391612139e-07, "loss": 11.7656, "step": 20161 }, { "epoch": 1.3390449624759249, "grad_norm": 201.219482421875, "learning_rate": 5.201890257740245e-07, "loss": 18.3438, "step": 20162 }, { "epoch": 1.3391113767682805, "grad_norm": 121.89363098144531, "learning_rate": 5.200946654860872e-07, "loss": 14.6094, "step": 20163 }, { "epoch": 1.3391777910606362, "grad_norm": 163.85125732421875, "learning_rate": 5.200003107494174e-07, "loss": 18.9062, "step": 20164 }, { "epoch": 1.339244205352992, "grad_norm": 231.75665283203125, "learning_rate": 5.199059615651079e-07, "loss": 16.125, "step": 20165 }, { "epoch": 1.3393106196453477, "grad_norm": 257.5004577636719, "learning_rate": 5.198116179342491e-07, "loss": 13.4453, "step": 20166 }, { "epoch": 1.3393770339377034, "grad_norm": 109.2806396484375, "learning_rate": 5.197172798579324e-07, "loss": 12.125, "step": 20167 }, { "epoch": 1.339443448230059, "grad_norm": 209.12582397460938, "learning_rate": 5.196229473372494e-07, "loss": 16.5312, "step": 20168 }, { "epoch": 1.3395098625224149, "grad_norm": 339.9558410644531, "learning_rate": 5.195286203732916e-07, "loss": 12.4219, "step": 20169 }, { "epoch": 1.3395762768147705, "grad_norm": 137.89276123046875, "learning_rate": 5.194342989671487e-07, "loss": 15.4375, "step": 20170 }, { "epoch": 1.3396426911071262, "grad_norm": 221.8856658935547, "learning_rate": 5.193399831199136e-07, "loss": 13.2969, "step": 20171 }, { "epoch": 1.339709105399482, "grad_norm": 308.3929748535156, "learning_rate": 5.192456728326762e-07, "loss": 17.0938, "step": 20172 }, { "epoch": 1.3397755196918377, "grad_norm": 276.3221740722656, "learning_rate": 5.191513681065273e-07, "loss": 19.25, "step": 20173 }, { "epoch": 1.3398419339841934, "grad_norm": 380.0277404785156, "learning_rate": 5.190570689425584e-07, "loss": 13.0938, "step": 20174 }, { "epoch": 1.339908348276549, "grad_norm": 205.0798797607422, "learning_rate": 5.189627753418598e-07, "loss": 14.3594, "step": 20175 }, { "epoch": 1.339974762568905, "grad_norm": 771.9702758789062, "learning_rate": 5.188684873055225e-07, "loss": 20.5625, "step": 20176 }, { "epoch": 1.3400411768612606, "grad_norm": 152.99842834472656, "learning_rate": 5.187742048346375e-07, "loss": 15.3281, "step": 20177 }, { "epoch": 1.3401075911536162, "grad_norm": 200.87826538085938, "learning_rate": 5.186799279302942e-07, "loss": 17.0156, "step": 20178 }, { "epoch": 1.3401740054459719, "grad_norm": 204.45797729492188, "learning_rate": 5.185856565935848e-07, "loss": 16.5, "step": 20179 }, { "epoch": 1.3402404197383277, "grad_norm": 180.22091674804688, "learning_rate": 5.184913908255986e-07, "loss": 15.8516, "step": 20180 }, { "epoch": 1.3403068340306834, "grad_norm": 902.292236328125, "learning_rate": 5.183971306274262e-07, "loss": 19.8594, "step": 20181 }, { "epoch": 1.340373248323039, "grad_norm": 209.43226623535156, "learning_rate": 5.18302876000158e-07, "loss": 20.2344, "step": 20182 }, { "epoch": 1.340439662615395, "grad_norm": 130.5612030029297, "learning_rate": 5.182086269448844e-07, "loss": 13.6875, "step": 20183 }, { "epoch": 1.3405060769077506, "grad_norm": 173.00816345214844, "learning_rate": 5.181143834626957e-07, "loss": 14.7188, "step": 20184 }, { "epoch": 1.3405724912001062, "grad_norm": 225.89279174804688, "learning_rate": 5.18020145554682e-07, "loss": 20.375, "step": 20185 }, { "epoch": 1.340638905492462, "grad_norm": 271.5846252441406, "learning_rate": 5.179259132219331e-07, "loss": 15.3906, "step": 20186 }, { "epoch": 1.3407053197848178, "grad_norm": 144.12588500976562, "learning_rate": 5.178316864655395e-07, "loss": 15.6875, "step": 20187 }, { "epoch": 1.3407717340771734, "grad_norm": 235.53399658203125, "learning_rate": 5.177374652865912e-07, "loss": 19.3906, "step": 20188 }, { "epoch": 1.340838148369529, "grad_norm": 274.4434814453125, "learning_rate": 5.176432496861774e-07, "loss": 24.375, "step": 20189 }, { "epoch": 1.3409045626618847, "grad_norm": 159.22122192382812, "learning_rate": 5.175490396653884e-07, "loss": 15.5781, "step": 20190 }, { "epoch": 1.3409709769542406, "grad_norm": 154.86138916015625, "learning_rate": 5.174548352253138e-07, "loss": 15.0469, "step": 20191 }, { "epoch": 1.3410373912465963, "grad_norm": 133.96405029296875, "learning_rate": 5.173606363670436e-07, "loss": 12.9688, "step": 20192 }, { "epoch": 1.341103805538952, "grad_norm": 182.75885009765625, "learning_rate": 5.172664430916671e-07, "loss": 11.8203, "step": 20193 }, { "epoch": 1.3411702198313078, "grad_norm": 168.19186401367188, "learning_rate": 5.171722554002745e-07, "loss": 15.7188, "step": 20194 }, { "epoch": 1.3412366341236635, "grad_norm": 238.39535522460938, "learning_rate": 5.17078073293954e-07, "loss": 16.2656, "step": 20195 }, { "epoch": 1.341303048416019, "grad_norm": 131.81939697265625, "learning_rate": 5.169838967737966e-07, "loss": 14.5625, "step": 20196 }, { "epoch": 1.3413694627083748, "grad_norm": 177.50645446777344, "learning_rate": 5.168897258408907e-07, "loss": 12.8281, "step": 20197 }, { "epoch": 1.3414358770007306, "grad_norm": 674.8519287109375, "learning_rate": 5.167955604963258e-07, "loss": 11.8906, "step": 20198 }, { "epoch": 1.3415022912930863, "grad_norm": 151.45326232910156, "learning_rate": 5.167014007411912e-07, "loss": 15.9531, "step": 20199 }, { "epoch": 1.341568705585442, "grad_norm": 431.655029296875, "learning_rate": 5.166072465765762e-07, "loss": 19.4062, "step": 20200 }, { "epoch": 1.3416351198777976, "grad_norm": 243.06185913085938, "learning_rate": 5.165130980035696e-07, "loss": 19.6406, "step": 20201 }, { "epoch": 1.3417015341701535, "grad_norm": 446.576171875, "learning_rate": 5.164189550232614e-07, "loss": 12.1719, "step": 20202 }, { "epoch": 1.3417679484625091, "grad_norm": 80.41205596923828, "learning_rate": 5.16324817636739e-07, "loss": 10.7109, "step": 20203 }, { "epoch": 1.3418343627548648, "grad_norm": 137.10231018066406, "learning_rate": 5.162306858450929e-07, "loss": 13.3906, "step": 20204 }, { "epoch": 1.3419007770472207, "grad_norm": 227.90118408203125, "learning_rate": 5.161365596494109e-07, "loss": 19.7891, "step": 20205 }, { "epoch": 1.3419671913395763, "grad_norm": 229.2603302001953, "learning_rate": 5.160424390507823e-07, "loss": 16.75, "step": 20206 }, { "epoch": 1.342033605631932, "grad_norm": 215.46087646484375, "learning_rate": 5.159483240502955e-07, "loss": 16.8438, "step": 20207 }, { "epoch": 1.3421000199242876, "grad_norm": 180.3169403076172, "learning_rate": 5.158542146490399e-07, "loss": 13.0625, "step": 20208 }, { "epoch": 1.3421664342166435, "grad_norm": 466.0998840332031, "learning_rate": 5.157601108481027e-07, "loss": 17.2188, "step": 20209 }, { "epoch": 1.3422328485089992, "grad_norm": 111.39410400390625, "learning_rate": 5.156660126485741e-07, "loss": 14.625, "step": 20210 }, { "epoch": 1.3422992628013548, "grad_norm": 132.37799072265625, "learning_rate": 5.155719200515414e-07, "loss": 15.3594, "step": 20211 }, { "epoch": 1.3423656770937105, "grad_norm": 355.48284912109375, "learning_rate": 5.154778330580935e-07, "loss": 17.875, "step": 20212 }, { "epoch": 1.3424320913860663, "grad_norm": 119.03436279296875, "learning_rate": 5.153837516693184e-07, "loss": 15.0312, "step": 20213 }, { "epoch": 1.342498505678422, "grad_norm": 124.6065444946289, "learning_rate": 5.152896758863048e-07, "loss": 12.375, "step": 20214 }, { "epoch": 1.3425649199707776, "grad_norm": 179.73776245117188, "learning_rate": 5.151956057101408e-07, "loss": 18.8281, "step": 20215 }, { "epoch": 1.3426313342631335, "grad_norm": 186.99378967285156, "learning_rate": 5.151015411419147e-07, "loss": 15.3281, "step": 20216 }, { "epoch": 1.3426977485554892, "grad_norm": 2083.147705078125, "learning_rate": 5.150074821827138e-07, "loss": 10.2422, "step": 20217 }, { "epoch": 1.3427641628478448, "grad_norm": 202.0330810546875, "learning_rate": 5.149134288336268e-07, "loss": 20.9844, "step": 20218 }, { "epoch": 1.3428305771402007, "grad_norm": 559.391357421875, "learning_rate": 5.14819381095742e-07, "loss": 30.0156, "step": 20219 }, { "epoch": 1.3428969914325564, "grad_norm": 177.41323852539062, "learning_rate": 5.147253389701461e-07, "loss": 18.5469, "step": 20220 }, { "epoch": 1.342963405724912, "grad_norm": 191.76312255859375, "learning_rate": 5.146313024579285e-07, "loss": 19.2188, "step": 20221 }, { "epoch": 1.3430298200172677, "grad_norm": 277.254638671875, "learning_rate": 5.145372715601756e-07, "loss": 21.1875, "step": 20222 }, { "epoch": 1.3430962343096233, "grad_norm": 691.3781127929688, "learning_rate": 5.144432462779756e-07, "loss": 23.0156, "step": 20223 }, { "epoch": 1.3431626486019792, "grad_norm": 299.1681823730469, "learning_rate": 5.143492266124163e-07, "loss": 18.9062, "step": 20224 }, { "epoch": 1.3432290628943349, "grad_norm": 219.0727996826172, "learning_rate": 5.142552125645849e-07, "loss": 16.5781, "step": 20225 }, { "epoch": 1.3432954771866905, "grad_norm": 139.14120483398438, "learning_rate": 5.141612041355692e-07, "loss": 13.5156, "step": 20226 }, { "epoch": 1.3433618914790464, "grad_norm": 159.0631103515625, "learning_rate": 5.14067201326457e-07, "loss": 17.2031, "step": 20227 }, { "epoch": 1.343428305771402, "grad_norm": 166.3577117919922, "learning_rate": 5.139732041383344e-07, "loss": 15.2188, "step": 20228 }, { "epoch": 1.3434947200637577, "grad_norm": 203.24708557128906, "learning_rate": 5.138792125722901e-07, "loss": 17.6875, "step": 20229 }, { "epoch": 1.3435611343561136, "grad_norm": 180.67823791503906, "learning_rate": 5.137852266294107e-07, "loss": 17.1641, "step": 20230 }, { "epoch": 1.3436275486484692, "grad_norm": 287.17181396484375, "learning_rate": 5.136912463107832e-07, "loss": 16.0156, "step": 20231 }, { "epoch": 1.3436939629408249, "grad_norm": 553.2943115234375, "learning_rate": 5.13597271617495e-07, "loss": 21.0156, "step": 20232 }, { "epoch": 1.3437603772331805, "grad_norm": 193.50408935546875, "learning_rate": 5.135033025506334e-07, "loss": 13.3125, "step": 20233 }, { "epoch": 1.3438267915255362, "grad_norm": 352.7632751464844, "learning_rate": 5.134093391112845e-07, "loss": 26.4609, "step": 20234 }, { "epoch": 1.343893205817892, "grad_norm": 326.6852722167969, "learning_rate": 5.133153813005364e-07, "loss": 18.8281, "step": 20235 }, { "epoch": 1.3439596201102477, "grad_norm": 289.9067077636719, "learning_rate": 5.13221429119475e-07, "loss": 19.4688, "step": 20236 }, { "epoch": 1.3440260344026034, "grad_norm": 244.6708221435547, "learning_rate": 5.131274825691874e-07, "loss": 15.875, "step": 20237 }, { "epoch": 1.3440924486949593, "grad_norm": 207.41014099121094, "learning_rate": 5.130335416507603e-07, "loss": 19.4375, "step": 20238 }, { "epoch": 1.344158862987315, "grad_norm": 186.32179260253906, "learning_rate": 5.129396063652803e-07, "loss": 12.7109, "step": 20239 }, { "epoch": 1.3442252772796706, "grad_norm": 112.39946746826172, "learning_rate": 5.128456767138342e-07, "loss": 15.4531, "step": 20240 }, { "epoch": 1.3442916915720264, "grad_norm": 255.00198364257812, "learning_rate": 5.127517526975089e-07, "loss": 27.625, "step": 20241 }, { "epoch": 1.344358105864382, "grad_norm": 147.54446411132812, "learning_rate": 5.126578343173893e-07, "loss": 15.0156, "step": 20242 }, { "epoch": 1.3444245201567377, "grad_norm": 143.951416015625, "learning_rate": 5.125639215745639e-07, "loss": 12.4531, "step": 20243 }, { "epoch": 1.3444909344490934, "grad_norm": 245.2096405029297, "learning_rate": 5.124700144701174e-07, "loss": 14.0469, "step": 20244 }, { "epoch": 1.344557348741449, "grad_norm": 178.08407592773438, "learning_rate": 5.123761130051367e-07, "loss": 18.6562, "step": 20245 }, { "epoch": 1.344623763033805, "grad_norm": 114.24562072753906, "learning_rate": 5.122822171807079e-07, "loss": 13.4531, "step": 20246 }, { "epoch": 1.3446901773261606, "grad_norm": 159.3588104248047, "learning_rate": 5.121883269979174e-07, "loss": 16.3281, "step": 20247 }, { "epoch": 1.3447565916185162, "grad_norm": 168.5726318359375, "learning_rate": 5.120944424578508e-07, "loss": 14.3438, "step": 20248 }, { "epoch": 1.3448230059108721, "grad_norm": 813.1015625, "learning_rate": 5.12000563561595e-07, "loss": 16.2188, "step": 20249 }, { "epoch": 1.3448894202032278, "grad_norm": 185.83424377441406, "learning_rate": 5.119066903102342e-07, "loss": 14.8281, "step": 20250 }, { "epoch": 1.3449558344955834, "grad_norm": 116.25157928466797, "learning_rate": 5.118128227048564e-07, "loss": 17.1875, "step": 20251 }, { "epoch": 1.3450222487879393, "grad_norm": 357.4365539550781, "learning_rate": 5.117189607465458e-07, "loss": 14.3906, "step": 20252 }, { "epoch": 1.345088663080295, "grad_norm": 166.20755004882812, "learning_rate": 5.116251044363887e-07, "loss": 21.1562, "step": 20253 }, { "epoch": 1.3451550773726506, "grad_norm": 391.7178955078125, "learning_rate": 5.115312537754708e-07, "loss": 15.5469, "step": 20254 }, { "epoch": 1.3452214916650063, "grad_norm": 165.03236389160156, "learning_rate": 5.114374087648778e-07, "loss": 18.3281, "step": 20255 }, { "epoch": 1.345287905957362, "grad_norm": 149.89730834960938, "learning_rate": 5.113435694056951e-07, "loss": 11.625, "step": 20256 }, { "epoch": 1.3453543202497178, "grad_norm": 138.85910034179688, "learning_rate": 5.112497356990081e-07, "loss": 16.5625, "step": 20257 }, { "epoch": 1.3454207345420734, "grad_norm": 343.1615295410156, "learning_rate": 5.111559076459028e-07, "loss": 18.3594, "step": 20258 }, { "epoch": 1.345487148834429, "grad_norm": 249.79092407226562, "learning_rate": 5.110620852474632e-07, "loss": 29.5938, "step": 20259 }, { "epoch": 1.345553563126785, "grad_norm": 88.93585205078125, "learning_rate": 5.109682685047764e-07, "loss": 13.4531, "step": 20260 }, { "epoch": 1.3456199774191406, "grad_norm": 326.0381164550781, "learning_rate": 5.108744574189262e-07, "loss": 19.3906, "step": 20261 }, { "epoch": 1.3456863917114963, "grad_norm": 305.8288879394531, "learning_rate": 5.107806519909983e-07, "loss": 17.875, "step": 20262 }, { "epoch": 1.3457528060038522, "grad_norm": 377.2084045410156, "learning_rate": 5.106868522220775e-07, "loss": 14.2969, "step": 20263 }, { "epoch": 1.3458192202962078, "grad_norm": 388.70068359375, "learning_rate": 5.105930581132493e-07, "loss": 17.4688, "step": 20264 }, { "epoch": 1.3458856345885635, "grad_norm": 119.5181884765625, "learning_rate": 5.104992696655983e-07, "loss": 13.3125, "step": 20265 }, { "epoch": 1.3459520488809191, "grad_norm": 365.6809387207031, "learning_rate": 5.104054868802099e-07, "loss": 26.1562, "step": 20266 }, { "epoch": 1.3460184631732748, "grad_norm": 427.1648254394531, "learning_rate": 5.103117097581676e-07, "loss": 17.5, "step": 20267 }, { "epoch": 1.3460848774656307, "grad_norm": 213.4244384765625, "learning_rate": 5.102179383005581e-07, "loss": 16.6406, "step": 20268 }, { "epoch": 1.3461512917579863, "grad_norm": 94.99504089355469, "learning_rate": 5.101241725084644e-07, "loss": 12.7344, "step": 20269 }, { "epoch": 1.346217706050342, "grad_norm": 738.8635864257812, "learning_rate": 5.10030412382972e-07, "loss": 20.8594, "step": 20270 }, { "epoch": 1.3462841203426978, "grad_norm": 232.3533935546875, "learning_rate": 5.099366579251651e-07, "loss": 13.3594, "step": 20271 }, { "epoch": 1.3463505346350535, "grad_norm": 362.33660888671875, "learning_rate": 5.098429091361289e-07, "loss": 13.6953, "step": 20272 }, { "epoch": 1.3464169489274092, "grad_norm": 327.778564453125, "learning_rate": 5.097491660169464e-07, "loss": 16.8438, "step": 20273 }, { "epoch": 1.346483363219765, "grad_norm": 234.18626403808594, "learning_rate": 5.096554285687036e-07, "loss": 11.2656, "step": 20274 }, { "epoch": 1.3465497775121207, "grad_norm": 128.19186401367188, "learning_rate": 5.095616967924837e-07, "loss": 14.1406, "step": 20275 }, { "epoch": 1.3466161918044763, "grad_norm": 354.9440002441406, "learning_rate": 5.094679706893714e-07, "loss": 18.1562, "step": 20276 }, { "epoch": 1.346682606096832, "grad_norm": 194.50062561035156, "learning_rate": 5.093742502604506e-07, "loss": 22.2656, "step": 20277 }, { "epoch": 1.3467490203891876, "grad_norm": 194.4259796142578, "learning_rate": 5.092805355068057e-07, "loss": 15.7188, "step": 20278 }, { "epoch": 1.3468154346815435, "grad_norm": 134.03347778320312, "learning_rate": 5.091868264295205e-07, "loss": 11.75, "step": 20279 }, { "epoch": 1.3468818489738992, "grad_norm": 190.12945556640625, "learning_rate": 5.090931230296796e-07, "loss": 16.5625, "step": 20280 }, { "epoch": 1.3469482632662548, "grad_norm": 380.1390075683594, "learning_rate": 5.089994253083654e-07, "loss": 23.2344, "step": 20281 }, { "epoch": 1.3470146775586107, "grad_norm": 120.65806579589844, "learning_rate": 5.089057332666635e-07, "loss": 16.0156, "step": 20282 }, { "epoch": 1.3470810918509664, "grad_norm": 163.8822479248047, "learning_rate": 5.088120469056567e-07, "loss": 15.7656, "step": 20283 }, { "epoch": 1.347147506143322, "grad_norm": 418.05230712890625, "learning_rate": 5.087183662264288e-07, "loss": 25.1094, "step": 20284 }, { "epoch": 1.347213920435678, "grad_norm": 561.3832397460938, "learning_rate": 5.086246912300636e-07, "loss": 15.6875, "step": 20285 }, { "epoch": 1.3472803347280335, "grad_norm": 195.6387939453125, "learning_rate": 5.085310219176445e-07, "loss": 14.5, "step": 20286 }, { "epoch": 1.3473467490203892, "grad_norm": 197.25059509277344, "learning_rate": 5.084373582902553e-07, "loss": 11.0234, "step": 20287 }, { "epoch": 1.3474131633127449, "grad_norm": 165.3917694091797, "learning_rate": 5.083437003489796e-07, "loss": 18.7344, "step": 20288 }, { "epoch": 1.3474795776051005, "grad_norm": 494.35333251953125, "learning_rate": 5.082500480948997e-07, "loss": 15.7969, "step": 20289 }, { "epoch": 1.3475459918974564, "grad_norm": 315.02679443359375, "learning_rate": 5.081564015291005e-07, "loss": 21.2344, "step": 20290 }, { "epoch": 1.347612406189812, "grad_norm": 334.4562683105469, "learning_rate": 5.080627606526639e-07, "loss": 14.375, "step": 20291 }, { "epoch": 1.3476788204821677, "grad_norm": 268.2233581542969, "learning_rate": 5.079691254666734e-07, "loss": 16.2969, "step": 20292 }, { "epoch": 1.3477452347745236, "grad_norm": 113.24063110351562, "learning_rate": 5.078754959722129e-07, "loss": 12.0625, "step": 20293 }, { "epoch": 1.3478116490668792, "grad_norm": 300.730712890625, "learning_rate": 5.077818721703647e-07, "loss": 15.2344, "step": 20294 }, { "epoch": 1.3478780633592349, "grad_norm": 176.56509399414062, "learning_rate": 5.076882540622119e-07, "loss": 19.0312, "step": 20295 }, { "epoch": 1.3479444776515908, "grad_norm": 143.2050018310547, "learning_rate": 5.075946416488375e-07, "loss": 11.8594, "step": 20296 }, { "epoch": 1.3480108919439464, "grad_norm": 279.04217529296875, "learning_rate": 5.075010349313247e-07, "loss": 16.4844, "step": 20297 }, { "epoch": 1.348077306236302, "grad_norm": 169.95298767089844, "learning_rate": 5.074074339107552e-07, "loss": 16.0781, "step": 20298 }, { "epoch": 1.3481437205286577, "grad_norm": 147.58676147460938, "learning_rate": 5.073138385882135e-07, "loss": 17.7344, "step": 20299 }, { "epoch": 1.3482101348210134, "grad_norm": 317.4847412109375, "learning_rate": 5.072202489647804e-07, "loss": 15.2031, "step": 20300 }, { "epoch": 1.3482765491133692, "grad_norm": 204.92933654785156, "learning_rate": 5.071266650415396e-07, "loss": 18.0469, "step": 20301 }, { "epoch": 1.348342963405725, "grad_norm": 278.619140625, "learning_rate": 5.070330868195731e-07, "loss": 17.9375, "step": 20302 }, { "epoch": 1.3484093776980806, "grad_norm": 200.3526611328125, "learning_rate": 5.069395142999637e-07, "loss": 13.4219, "step": 20303 }, { "epoch": 1.3484757919904364, "grad_norm": 198.11776733398438, "learning_rate": 5.068459474837936e-07, "loss": 25.125, "step": 20304 }, { "epoch": 1.348542206282792, "grad_norm": 577.5670776367188, "learning_rate": 5.067523863721458e-07, "loss": 27.375, "step": 20305 }, { "epoch": 1.3486086205751477, "grad_norm": 149.70721435546875, "learning_rate": 5.066588309661011e-07, "loss": 14.5156, "step": 20306 }, { "epoch": 1.3486750348675036, "grad_norm": 108.4762954711914, "learning_rate": 5.065652812667433e-07, "loss": 15.0938, "step": 20307 }, { "epoch": 1.3487414491598593, "grad_norm": 463.6645202636719, "learning_rate": 5.064717372751535e-07, "loss": 32.6562, "step": 20308 }, { "epoch": 1.348807863452215, "grad_norm": 480.32672119140625, "learning_rate": 5.063781989924138e-07, "loss": 24.5312, "step": 20309 }, { "epoch": 1.3488742777445706, "grad_norm": 247.7833251953125, "learning_rate": 5.062846664196068e-07, "loss": 18.9688, "step": 20310 }, { "epoch": 1.3489406920369262, "grad_norm": 352.1844177246094, "learning_rate": 5.061911395578138e-07, "loss": 20.6562, "step": 20311 }, { "epoch": 1.3490071063292821, "grad_norm": 380.94482421875, "learning_rate": 5.060976184081172e-07, "loss": 17.7969, "step": 20312 }, { "epoch": 1.3490735206216378, "grad_norm": 231.90972900390625, "learning_rate": 5.060041029715986e-07, "loss": 20.7656, "step": 20313 }, { "epoch": 1.3491399349139934, "grad_norm": 231.15370178222656, "learning_rate": 5.059105932493393e-07, "loss": 22.8125, "step": 20314 }, { "epoch": 1.3492063492063493, "grad_norm": 249.94931030273438, "learning_rate": 5.058170892424219e-07, "loss": 13.6875, "step": 20315 }, { "epoch": 1.349272763498705, "grad_norm": 104.44503021240234, "learning_rate": 5.057235909519271e-07, "loss": 12.8906, "step": 20316 }, { "epoch": 1.3493391777910606, "grad_norm": 163.36474609375, "learning_rate": 5.056300983789368e-07, "loss": 11.1406, "step": 20317 }, { "epoch": 1.3494055920834165, "grad_norm": 236.203125, "learning_rate": 5.055366115245326e-07, "loss": 20.2188, "step": 20318 }, { "epoch": 1.3494720063757721, "grad_norm": 182.141357421875, "learning_rate": 5.054431303897959e-07, "loss": 14.3438, "step": 20319 }, { "epoch": 1.3495384206681278, "grad_norm": 128.61422729492188, "learning_rate": 5.053496549758071e-07, "loss": 18.5625, "step": 20320 }, { "epoch": 1.3496048349604834, "grad_norm": 151.92103576660156, "learning_rate": 5.052561852836492e-07, "loss": 12.5, "step": 20321 }, { "epoch": 1.349671249252839, "grad_norm": 103.45488739013672, "learning_rate": 5.05162721314402e-07, "loss": 15.5, "step": 20322 }, { "epoch": 1.349737663545195, "grad_norm": 172.38919067382812, "learning_rate": 5.050692630691471e-07, "loss": 17.2344, "step": 20323 }, { "epoch": 1.3498040778375506, "grad_norm": 244.8787841796875, "learning_rate": 5.049758105489656e-07, "loss": 16.2656, "step": 20324 }, { "epoch": 1.3498704921299063, "grad_norm": 217.07473754882812, "learning_rate": 5.048823637549384e-07, "loss": 20.5938, "step": 20325 }, { "epoch": 1.3499369064222622, "grad_norm": 193.80264282226562, "learning_rate": 5.047889226881467e-07, "loss": 15.9844, "step": 20326 }, { "epoch": 1.3500033207146178, "grad_norm": 156.3809051513672, "learning_rate": 5.046954873496713e-07, "loss": 18.2812, "step": 20327 }, { "epoch": 1.3500697350069735, "grad_norm": 157.62355041503906, "learning_rate": 5.046020577405923e-07, "loss": 19.5469, "step": 20328 }, { "epoch": 1.3501361492993293, "grad_norm": 260.5941162109375, "learning_rate": 5.045086338619915e-07, "loss": 16.625, "step": 20329 }, { "epoch": 1.350202563591685, "grad_norm": 133.40394592285156, "learning_rate": 5.044152157149493e-07, "loss": 15.4062, "step": 20330 }, { "epoch": 1.3502689778840407, "grad_norm": 204.4180450439453, "learning_rate": 5.043218033005454e-07, "loss": 12.8594, "step": 20331 }, { "epoch": 1.3503353921763963, "grad_norm": 225.84510803222656, "learning_rate": 5.04228396619862e-07, "loss": 18.2656, "step": 20332 }, { "epoch": 1.350401806468752, "grad_norm": 348.2393798828125, "learning_rate": 5.04134995673978e-07, "loss": 19.4062, "step": 20333 }, { "epoch": 1.3504682207611078, "grad_norm": 139.72702026367188, "learning_rate": 5.040416004639745e-07, "loss": 14.6875, "step": 20334 }, { "epoch": 1.3505346350534635, "grad_norm": 199.92263793945312, "learning_rate": 5.039482109909319e-07, "loss": 17.5156, "step": 20335 }, { "epoch": 1.3506010493458191, "grad_norm": 201.88624572753906, "learning_rate": 5.038548272559307e-07, "loss": 11.4688, "step": 20336 }, { "epoch": 1.350667463638175, "grad_norm": 555.2984619140625, "learning_rate": 5.037614492600498e-07, "loss": 14.5312, "step": 20337 }, { "epoch": 1.3507338779305307, "grad_norm": 410.98309326171875, "learning_rate": 5.036680770043713e-07, "loss": 20.0703, "step": 20338 }, { "epoch": 1.3508002922228863, "grad_norm": 157.9021759033203, "learning_rate": 5.035747104899738e-07, "loss": 14.9375, "step": 20339 }, { "epoch": 1.3508667065152422, "grad_norm": 253.8483123779297, "learning_rate": 5.034813497179377e-07, "loss": 26.0312, "step": 20340 }, { "epoch": 1.3509331208075979, "grad_norm": 125.33763885498047, "learning_rate": 5.033879946893434e-07, "loss": 11.5156, "step": 20341 }, { "epoch": 1.3509995350999535, "grad_norm": 176.57217407226562, "learning_rate": 5.0329464540527e-07, "loss": 16.7656, "step": 20342 }, { "epoch": 1.3510659493923092, "grad_norm": 399.1440124511719, "learning_rate": 5.03201301866798e-07, "loss": 9.8438, "step": 20343 }, { "epoch": 1.3511323636846648, "grad_norm": 769.6100463867188, "learning_rate": 5.031079640750071e-07, "loss": 20.3125, "step": 20344 }, { "epoch": 1.3511987779770207, "grad_norm": 235.2083740234375, "learning_rate": 5.03014632030976e-07, "loss": 13.4844, "step": 20345 }, { "epoch": 1.3512651922693764, "grad_norm": 135.37060546875, "learning_rate": 5.029213057357858e-07, "loss": 16.6562, "step": 20346 }, { "epoch": 1.351331606561732, "grad_norm": 838.2820434570312, "learning_rate": 5.02827985190515e-07, "loss": 13.0156, "step": 20347 }, { "epoch": 1.351398020854088, "grad_norm": 382.07025146484375, "learning_rate": 5.027346703962434e-07, "loss": 11.3594, "step": 20348 }, { "epoch": 1.3514644351464435, "grad_norm": 266.9377746582031, "learning_rate": 5.026413613540503e-07, "loss": 17.6719, "step": 20349 }, { "epoch": 1.3515308494387992, "grad_norm": 283.6220703125, "learning_rate": 5.025480580650152e-07, "loss": 12.7188, "step": 20350 }, { "epoch": 1.351597263731155, "grad_norm": 176.71324157714844, "learning_rate": 5.024547605302174e-07, "loss": 17.3438, "step": 20351 }, { "epoch": 1.3516636780235107, "grad_norm": 158.13194274902344, "learning_rate": 5.023614687507362e-07, "loss": 16.9531, "step": 20352 }, { "epoch": 1.3517300923158664, "grad_norm": 303.3149719238281, "learning_rate": 5.0226818272765e-07, "loss": 16.1406, "step": 20353 }, { "epoch": 1.351796506608222, "grad_norm": 152.92059326171875, "learning_rate": 5.02174902462039e-07, "loss": 14.0469, "step": 20354 }, { "epoch": 1.3518629209005777, "grad_norm": 363.1440124511719, "learning_rate": 5.020816279549816e-07, "loss": 25.9219, "step": 20355 }, { "epoch": 1.3519293351929336, "grad_norm": 207.09625244140625, "learning_rate": 5.019883592075566e-07, "loss": 17.9531, "step": 20356 }, { "epoch": 1.3519957494852892, "grad_norm": 131.45919799804688, "learning_rate": 5.01895096220843e-07, "loss": 14.7656, "step": 20357 }, { "epoch": 1.3520621637776449, "grad_norm": 288.03509521484375, "learning_rate": 5.018018389959203e-07, "loss": 13.4219, "step": 20358 }, { "epoch": 1.3521285780700008, "grad_norm": 129.880859375, "learning_rate": 5.017085875338658e-07, "loss": 11.25, "step": 20359 }, { "epoch": 1.3521949923623564, "grad_norm": 82.068115234375, "learning_rate": 5.016153418357598e-07, "loss": 11.9844, "step": 20360 }, { "epoch": 1.352261406654712, "grad_norm": 293.2378234863281, "learning_rate": 5.015221019026797e-07, "loss": 22.4062, "step": 20361 }, { "epoch": 1.352327820947068, "grad_norm": 167.97052001953125, "learning_rate": 5.014288677357046e-07, "loss": 15.1562, "step": 20362 }, { "epoch": 1.3523942352394236, "grad_norm": 89.83660125732422, "learning_rate": 5.013356393359127e-07, "loss": 15.125, "step": 20363 }, { "epoch": 1.3524606495317792, "grad_norm": 177.0125274658203, "learning_rate": 5.012424167043827e-07, "loss": 18.25, "step": 20364 }, { "epoch": 1.352527063824135, "grad_norm": 124.93683624267578, "learning_rate": 5.011491998421928e-07, "loss": 16.7344, "step": 20365 }, { "epoch": 1.3525934781164906, "grad_norm": 156.60887145996094, "learning_rate": 5.010559887504213e-07, "loss": 14.9062, "step": 20366 }, { "epoch": 1.3526598924088464, "grad_norm": 145.7607879638672, "learning_rate": 5.009627834301465e-07, "loss": 13.5, "step": 20367 }, { "epoch": 1.352726306701202, "grad_norm": 283.2504577636719, "learning_rate": 5.008695838824464e-07, "loss": 18.6719, "step": 20368 }, { "epoch": 1.3527927209935577, "grad_norm": 407.7585754394531, "learning_rate": 5.007763901083994e-07, "loss": 19.2812, "step": 20369 }, { "epoch": 1.3528591352859136, "grad_norm": 116.61644744873047, "learning_rate": 5.006832021090827e-07, "loss": 13.0859, "step": 20370 }, { "epoch": 1.3529255495782693, "grad_norm": 585.317138671875, "learning_rate": 5.005900198855755e-07, "loss": 14.3281, "step": 20371 }, { "epoch": 1.352991963870625, "grad_norm": 579.470703125, "learning_rate": 5.004968434389545e-07, "loss": 14.9375, "step": 20372 }, { "epoch": 1.3530583781629808, "grad_norm": 98.87105560302734, "learning_rate": 5.00403672770298e-07, "loss": 11.0625, "step": 20373 }, { "epoch": 1.3531247924553365, "grad_norm": 193.1488800048828, "learning_rate": 5.003105078806838e-07, "loss": 14.6406, "step": 20374 }, { "epoch": 1.353191206747692, "grad_norm": 304.83917236328125, "learning_rate": 5.002173487711893e-07, "loss": 20.0781, "step": 20375 }, { "epoch": 1.3532576210400478, "grad_norm": 124.42131042480469, "learning_rate": 5.001241954428925e-07, "loss": 15.4375, "step": 20376 }, { "epoch": 1.3533240353324034, "grad_norm": 209.5999298095703, "learning_rate": 5.000310478968713e-07, "loss": 15.125, "step": 20377 }, { "epoch": 1.3533904496247593, "grad_norm": 144.70321655273438, "learning_rate": 4.999379061342016e-07, "loss": 12.3906, "step": 20378 }, { "epoch": 1.353456863917115, "grad_norm": 311.1866760253906, "learning_rate": 4.998447701559627e-07, "loss": 24.1562, "step": 20379 }, { "epoch": 1.3535232782094706, "grad_norm": 125.03533172607422, "learning_rate": 4.997516399632307e-07, "loss": 16.6094, "step": 20380 }, { "epoch": 1.3535896925018265, "grad_norm": 234.9910125732422, "learning_rate": 4.996585155570832e-07, "loss": 14.9219, "step": 20381 }, { "epoch": 1.3536561067941821, "grad_norm": 151.98143005371094, "learning_rate": 4.995653969385976e-07, "loss": 14.8594, "step": 20382 }, { "epoch": 1.3537225210865378, "grad_norm": 539.9197387695312, "learning_rate": 4.994722841088513e-07, "loss": 15.2344, "step": 20383 }, { "epoch": 1.3537889353788937, "grad_norm": 196.81179809570312, "learning_rate": 4.993791770689203e-07, "loss": 13.0469, "step": 20384 }, { "epoch": 1.3538553496712493, "grad_norm": 162.8025360107422, "learning_rate": 4.992860758198829e-07, "loss": 13.0469, "step": 20385 }, { "epoch": 1.353921763963605, "grad_norm": 163.48695373535156, "learning_rate": 4.991929803628152e-07, "loss": 15.3125, "step": 20386 }, { "epoch": 1.3539881782559606, "grad_norm": 268.3045349121094, "learning_rate": 4.990998906987942e-07, "loss": 17.9375, "step": 20387 }, { "epoch": 1.3540545925483163, "grad_norm": 187.3284149169922, "learning_rate": 4.990068068288969e-07, "loss": 19.3125, "step": 20388 }, { "epoch": 1.3541210068406722, "grad_norm": 168.99159240722656, "learning_rate": 4.989137287542e-07, "loss": 21.6406, "step": 20389 }, { "epoch": 1.3541874211330278, "grad_norm": 144.1044464111328, "learning_rate": 4.988206564757801e-07, "loss": 16.5781, "step": 20390 }, { "epoch": 1.3542538354253835, "grad_norm": 215.77964782714844, "learning_rate": 4.987275899947142e-07, "loss": 15.4688, "step": 20391 }, { "epoch": 1.3543202497177393, "grad_norm": 197.35357666015625, "learning_rate": 4.986345293120777e-07, "loss": 14.1719, "step": 20392 }, { "epoch": 1.354386664010095, "grad_norm": 163.73854064941406, "learning_rate": 4.985414744289488e-07, "loss": 14.2031, "step": 20393 }, { "epoch": 1.3544530783024507, "grad_norm": 165.72669982910156, "learning_rate": 4.984484253464025e-07, "loss": 16.9844, "step": 20394 }, { "epoch": 1.3545194925948065, "grad_norm": 159.79991149902344, "learning_rate": 4.983553820655156e-07, "loss": 17.6719, "step": 20395 }, { "epoch": 1.3545859068871622, "grad_norm": 202.58840942382812, "learning_rate": 4.982623445873642e-07, "loss": 17.2188, "step": 20396 }, { "epoch": 1.3546523211795178, "grad_norm": 438.7818603515625, "learning_rate": 4.981693129130249e-07, "loss": 33.1094, "step": 20397 }, { "epoch": 1.3547187354718735, "grad_norm": 235.44960021972656, "learning_rate": 4.980762870435735e-07, "loss": 18.5312, "step": 20398 }, { "epoch": 1.3547851497642291, "grad_norm": 162.5772705078125, "learning_rate": 4.979832669800867e-07, "loss": 14.2578, "step": 20399 }, { "epoch": 1.354851564056585, "grad_norm": 1809.2657470703125, "learning_rate": 4.978902527236393e-07, "loss": 10.5781, "step": 20400 }, { "epoch": 1.3549179783489407, "grad_norm": 167.53416442871094, "learning_rate": 4.977972442753082e-07, "loss": 15.1875, "step": 20401 }, { "epoch": 1.3549843926412963, "grad_norm": 291.7483215332031, "learning_rate": 4.97704241636169e-07, "loss": 15.7812, "step": 20402 }, { "epoch": 1.3550508069336522, "grad_norm": 348.76263427734375, "learning_rate": 4.976112448072974e-07, "loss": 13.2344, "step": 20403 }, { "epoch": 1.3551172212260079, "grad_norm": 427.84283447265625, "learning_rate": 4.975182537897693e-07, "loss": 16.0469, "step": 20404 }, { "epoch": 1.3551836355183635, "grad_norm": 104.04467010498047, "learning_rate": 4.974252685846601e-07, "loss": 15.8906, "step": 20405 }, { "epoch": 1.3552500498107194, "grad_norm": 247.61529541015625, "learning_rate": 4.973322891930458e-07, "loss": 16.7188, "step": 20406 }, { "epoch": 1.355316464103075, "grad_norm": 120.5719985961914, "learning_rate": 4.972393156160017e-07, "loss": 13.0156, "step": 20407 }, { "epoch": 1.3553828783954307, "grad_norm": 232.10604858398438, "learning_rate": 4.971463478546036e-07, "loss": 11.5781, "step": 20408 }, { "epoch": 1.3554492926877864, "grad_norm": 220.92239379882812, "learning_rate": 4.970533859099258e-07, "loss": 18.1875, "step": 20409 }, { "epoch": 1.355515706980142, "grad_norm": 190.6001434326172, "learning_rate": 4.969604297830453e-07, "loss": 18.6875, "step": 20410 }, { "epoch": 1.3555821212724979, "grad_norm": 234.0193328857422, "learning_rate": 4.96867479475036e-07, "loss": 14.1406, "step": 20411 }, { "epoch": 1.3556485355648535, "grad_norm": 125.43640899658203, "learning_rate": 4.967745349869736e-07, "loss": 16.0469, "step": 20412 }, { "epoch": 1.3557149498572092, "grad_norm": 158.78611755371094, "learning_rate": 4.966815963199332e-07, "loss": 11.1641, "step": 20413 }, { "epoch": 1.355781364149565, "grad_norm": 501.61553955078125, "learning_rate": 4.965886634749899e-07, "loss": 18.4062, "step": 20414 }, { "epoch": 1.3558477784419207, "grad_norm": 120.13874816894531, "learning_rate": 4.964957364532187e-07, "loss": 11.4141, "step": 20415 }, { "epoch": 1.3559141927342764, "grad_norm": 267.9775085449219, "learning_rate": 4.964028152556948e-07, "loss": 17.6094, "step": 20416 }, { "epoch": 1.3559806070266323, "grad_norm": 303.9214782714844, "learning_rate": 4.963098998834919e-07, "loss": 17.3281, "step": 20417 }, { "epoch": 1.356047021318988, "grad_norm": 151.7750701904297, "learning_rate": 4.962169903376865e-07, "loss": 14.5469, "step": 20418 }, { "epoch": 1.3561134356113436, "grad_norm": 172.13253784179688, "learning_rate": 4.96124086619352e-07, "loss": 15.7969, "step": 20419 }, { "epoch": 1.3561798499036992, "grad_norm": 261.69781494140625, "learning_rate": 4.960311887295635e-07, "loss": 14.8906, "step": 20420 }, { "epoch": 1.3562462641960549, "grad_norm": 353.3579406738281, "learning_rate": 4.959382966693957e-07, "loss": 16.625, "step": 20421 }, { "epoch": 1.3563126784884108, "grad_norm": 330.5619812011719, "learning_rate": 4.958454104399234e-07, "loss": 20.2969, "step": 20422 }, { "epoch": 1.3563790927807664, "grad_norm": 195.39834594726562, "learning_rate": 4.957525300422199e-07, "loss": 16.3438, "step": 20423 }, { "epoch": 1.356445507073122, "grad_norm": 236.77381896972656, "learning_rate": 4.956596554773611e-07, "loss": 15.7969, "step": 20424 }, { "epoch": 1.356511921365478, "grad_norm": 221.79002380371094, "learning_rate": 4.955667867464201e-07, "loss": 13.3281, "step": 20425 }, { "epoch": 1.3565783356578336, "grad_norm": 161.4130096435547, "learning_rate": 4.954739238504718e-07, "loss": 16.9219, "step": 20426 }, { "epoch": 1.3566447499501892, "grad_norm": 256.6285095214844, "learning_rate": 4.953810667905903e-07, "loss": 15.3906, "step": 20427 }, { "epoch": 1.3567111642425451, "grad_norm": 579.1031494140625, "learning_rate": 4.952882155678495e-07, "loss": 15.8438, "step": 20428 }, { "epoch": 1.3567775785349008, "grad_norm": 220.75624084472656, "learning_rate": 4.951953701833236e-07, "loss": 20.2656, "step": 20429 }, { "epoch": 1.3568439928272564, "grad_norm": 168.68873596191406, "learning_rate": 4.95102530638087e-07, "loss": 21.0703, "step": 20430 }, { "epoch": 1.356910407119612, "grad_norm": 615.2210083007812, "learning_rate": 4.950096969332122e-07, "loss": 12.7344, "step": 20431 }, { "epoch": 1.3569768214119677, "grad_norm": 133.9051971435547, "learning_rate": 4.94916869069775e-07, "loss": 16.125, "step": 20432 }, { "epoch": 1.3570432357043236, "grad_norm": 320.5324401855469, "learning_rate": 4.948240470488479e-07, "loss": 21.0312, "step": 20433 }, { "epoch": 1.3571096499966793, "grad_norm": 216.8462677001953, "learning_rate": 4.947312308715048e-07, "loss": 12.7344, "step": 20434 }, { "epoch": 1.357176064289035, "grad_norm": 201.61264038085938, "learning_rate": 4.946384205388195e-07, "loss": 11.8594, "step": 20435 }, { "epoch": 1.3572424785813908, "grad_norm": 153.95413208007812, "learning_rate": 4.945456160518655e-07, "loss": 15.6094, "step": 20436 }, { "epoch": 1.3573088928737465, "grad_norm": 315.3316955566406, "learning_rate": 4.944528174117164e-07, "loss": 13.7812, "step": 20437 }, { "epoch": 1.357375307166102, "grad_norm": 213.84390258789062, "learning_rate": 4.94360024619446e-07, "loss": 17.7031, "step": 20438 }, { "epoch": 1.357441721458458, "grad_norm": 294.2238464355469, "learning_rate": 4.942672376761264e-07, "loss": 18.2656, "step": 20439 }, { "epoch": 1.3575081357508136, "grad_norm": 333.2209167480469, "learning_rate": 4.941744565828323e-07, "loss": 15.0703, "step": 20440 }, { "epoch": 1.3575745500431693, "grad_norm": 183.11941528320312, "learning_rate": 4.940816813406366e-07, "loss": 11.4688, "step": 20441 }, { "epoch": 1.357640964335525, "grad_norm": 192.59512329101562, "learning_rate": 4.939889119506121e-07, "loss": 14.0469, "step": 20442 }, { "epoch": 1.3577073786278806, "grad_norm": 266.40374755859375, "learning_rate": 4.93896148413832e-07, "loss": 24.2812, "step": 20443 }, { "epoch": 1.3577737929202365, "grad_norm": 225.4984130859375, "learning_rate": 4.938033907313695e-07, "loss": 14.9062, "step": 20444 }, { "epoch": 1.3578402072125921, "grad_norm": 309.2711181640625, "learning_rate": 4.937106389042973e-07, "loss": 20.6875, "step": 20445 }, { "epoch": 1.3579066215049478, "grad_norm": 217.0630645751953, "learning_rate": 4.936178929336886e-07, "loss": 15.875, "step": 20446 }, { "epoch": 1.3579730357973037, "grad_norm": 137.2581329345703, "learning_rate": 4.935251528206166e-07, "loss": 12.7969, "step": 20447 }, { "epoch": 1.3580394500896593, "grad_norm": 389.6460266113281, "learning_rate": 4.934324185661526e-07, "loss": 18.0781, "step": 20448 }, { "epoch": 1.358105864382015, "grad_norm": 138.7450714111328, "learning_rate": 4.933396901713713e-07, "loss": 14.0156, "step": 20449 }, { "epoch": 1.3581722786743708, "grad_norm": 158.53343200683594, "learning_rate": 4.932469676373436e-07, "loss": 14.0625, "step": 20450 }, { "epoch": 1.3582386929667265, "grad_norm": 291.4520568847656, "learning_rate": 4.93154250965143e-07, "loss": 14.4688, "step": 20451 }, { "epoch": 1.3583051072590822, "grad_norm": 215.42727661132812, "learning_rate": 4.930615401558417e-07, "loss": 22.3281, "step": 20452 }, { "epoch": 1.3583715215514378, "grad_norm": 840.3546752929688, "learning_rate": 4.929688352105122e-07, "loss": 16.1406, "step": 20453 }, { "epoch": 1.3584379358437935, "grad_norm": 222.36434936523438, "learning_rate": 4.928761361302268e-07, "loss": 17.7969, "step": 20454 }, { "epoch": 1.3585043501361493, "grad_norm": 230.65028381347656, "learning_rate": 4.927834429160582e-07, "loss": 15.8125, "step": 20455 }, { "epoch": 1.358570764428505, "grad_norm": 444.7425231933594, "learning_rate": 4.926907555690776e-07, "loss": 15.875, "step": 20456 }, { "epoch": 1.3586371787208606, "grad_norm": 276.6972351074219, "learning_rate": 4.925980740903585e-07, "loss": 19.9375, "step": 20457 }, { "epoch": 1.3587035930132165, "grad_norm": 86.13661193847656, "learning_rate": 4.925053984809719e-07, "loss": 11.1875, "step": 20458 }, { "epoch": 1.3587700073055722, "grad_norm": 227.09677124023438, "learning_rate": 4.924127287419902e-07, "loss": 15.5781, "step": 20459 }, { "epoch": 1.3588364215979278, "grad_norm": 147.1528778076172, "learning_rate": 4.923200648744854e-07, "loss": 11.0781, "step": 20460 }, { "epoch": 1.3589028358902837, "grad_norm": 567.3938598632812, "learning_rate": 4.922274068795298e-07, "loss": 19.0938, "step": 20461 }, { "epoch": 1.3589692501826394, "grad_norm": 301.2929382324219, "learning_rate": 4.921347547581939e-07, "loss": 14.9062, "step": 20462 }, { "epoch": 1.359035664474995, "grad_norm": 141.16253662109375, "learning_rate": 4.92042108511551e-07, "loss": 12.7031, "step": 20463 }, { "epoch": 1.3591020787673507, "grad_norm": 416.50933837890625, "learning_rate": 4.919494681406718e-07, "loss": 16.9531, "step": 20464 }, { "epoch": 1.3591684930597063, "grad_norm": 115.72482299804688, "learning_rate": 4.918568336466282e-07, "loss": 17.6406, "step": 20465 }, { "epoch": 1.3592349073520622, "grad_norm": 676.1226806640625, "learning_rate": 4.917642050304918e-07, "loss": 23.5156, "step": 20466 }, { "epoch": 1.3593013216444179, "grad_norm": 297.53741455078125, "learning_rate": 4.916715822933339e-07, "loss": 15.4375, "step": 20467 }, { "epoch": 1.3593677359367735, "grad_norm": 336.9594421386719, "learning_rate": 4.91578965436226e-07, "loss": 15.9531, "step": 20468 }, { "epoch": 1.3594341502291294, "grad_norm": 275.0187072753906, "learning_rate": 4.914863544602399e-07, "loss": 23.9531, "step": 20469 }, { "epoch": 1.359500564521485, "grad_norm": 214.56027221679688, "learning_rate": 4.913937493664458e-07, "loss": 17.625, "step": 20470 }, { "epoch": 1.3595669788138407, "grad_norm": 473.0348815917969, "learning_rate": 4.91301150155916e-07, "loss": 21.4844, "step": 20471 }, { "epoch": 1.3596333931061966, "grad_norm": 131.52626037597656, "learning_rate": 4.912085568297209e-07, "loss": 13.375, "step": 20472 }, { "epoch": 1.3596998073985522, "grad_norm": 226.7739715576172, "learning_rate": 4.911159693889319e-07, "loss": 14.5312, "step": 20473 }, { "epoch": 1.3597662216909079, "grad_norm": 1237.9820556640625, "learning_rate": 4.910233878346199e-07, "loss": 26.875, "step": 20474 }, { "epoch": 1.3598326359832635, "grad_norm": 224.99537658691406, "learning_rate": 4.909308121678558e-07, "loss": 17.5312, "step": 20475 }, { "epoch": 1.3598990502756192, "grad_norm": 213.22959899902344, "learning_rate": 4.908382423897105e-07, "loss": 11.0156, "step": 20476 }, { "epoch": 1.359965464567975, "grad_norm": 278.28228759765625, "learning_rate": 4.90745678501255e-07, "loss": 15.5625, "step": 20477 }, { "epoch": 1.3600318788603307, "grad_norm": 130.9931640625, "learning_rate": 4.906531205035596e-07, "loss": 13.6172, "step": 20478 }, { "epoch": 1.3600982931526864, "grad_norm": 349.60955810546875, "learning_rate": 4.905605683976953e-07, "loss": 15.8125, "step": 20479 }, { "epoch": 1.3601647074450423, "grad_norm": 114.01103973388672, "learning_rate": 4.90468022184733e-07, "loss": 11.7656, "step": 20480 }, { "epoch": 1.360231121737398, "grad_norm": 101.0329360961914, "learning_rate": 4.90375481865742e-07, "loss": 15.7812, "step": 20481 }, { "epoch": 1.3602975360297536, "grad_norm": 261.8243408203125, "learning_rate": 4.902829474417943e-07, "loss": 16.7188, "step": 20482 }, { "epoch": 1.3603639503221094, "grad_norm": 141.6977081298828, "learning_rate": 4.901904189139592e-07, "loss": 14.5156, "step": 20483 }, { "epoch": 1.360430364614465, "grad_norm": 125.29207611083984, "learning_rate": 4.900978962833073e-07, "loss": 13.4062, "step": 20484 }, { "epoch": 1.3604967789068207, "grad_norm": 259.79339599609375, "learning_rate": 4.90005379550909e-07, "loss": 15.75, "step": 20485 }, { "epoch": 1.3605631931991764, "grad_norm": 224.94662475585938, "learning_rate": 4.899128687178346e-07, "loss": 11.8906, "step": 20486 }, { "epoch": 1.360629607491532, "grad_norm": 102.97473907470703, "learning_rate": 4.898203637851535e-07, "loss": 15.1406, "step": 20487 }, { "epoch": 1.360696021783888, "grad_norm": 179.0933380126953, "learning_rate": 4.897278647539369e-07, "loss": 17.2969, "step": 20488 }, { "epoch": 1.3607624360762436, "grad_norm": 334.011474609375, "learning_rate": 4.896353716252537e-07, "loss": 20.0469, "step": 20489 }, { "epoch": 1.3608288503685992, "grad_norm": 157.33810424804688, "learning_rate": 4.895428844001742e-07, "loss": 18.3125, "step": 20490 }, { "epoch": 1.3608952646609551, "grad_norm": 213.64817810058594, "learning_rate": 4.894504030797684e-07, "loss": 16.0781, "step": 20491 }, { "epoch": 1.3609616789533108, "grad_norm": 96.6127700805664, "learning_rate": 4.893579276651057e-07, "loss": 14.1875, "step": 20492 }, { "epoch": 1.3610280932456664, "grad_norm": 258.44720458984375, "learning_rate": 4.892654581572563e-07, "loss": 13.1562, "step": 20493 }, { "epoch": 1.3610945075380223, "grad_norm": 228.6446075439453, "learning_rate": 4.891729945572898e-07, "loss": 14.8281, "step": 20494 }, { "epoch": 1.361160921830378, "grad_norm": 309.73553466796875, "learning_rate": 4.890805368662748e-07, "loss": 18.8438, "step": 20495 }, { "epoch": 1.3612273361227336, "grad_norm": 123.45718383789062, "learning_rate": 4.889880850852823e-07, "loss": 12.8281, "step": 20496 }, { "epoch": 1.3612937504150893, "grad_norm": 132.31690979003906, "learning_rate": 4.888956392153807e-07, "loss": 12.5938, "step": 20497 }, { "epoch": 1.361360164707445, "grad_norm": 159.3297576904297, "learning_rate": 4.888031992576395e-07, "loss": 15.4375, "step": 20498 }, { "epoch": 1.3614265789998008, "grad_norm": 139.8565673828125, "learning_rate": 4.887107652131282e-07, "loss": 17.6406, "step": 20499 }, { "epoch": 1.3614929932921565, "grad_norm": 378.7396240234375, "learning_rate": 4.886183370829158e-07, "loss": 22.8125, "step": 20500 }, { "epoch": 1.361559407584512, "grad_norm": 179.22291564941406, "learning_rate": 4.885259148680717e-07, "loss": 18.4219, "step": 20501 }, { "epoch": 1.361625821876868, "grad_norm": 164.10887145996094, "learning_rate": 4.884334985696654e-07, "loss": 16.2344, "step": 20502 }, { "epoch": 1.3616922361692236, "grad_norm": 414.8775329589844, "learning_rate": 4.883410881887644e-07, "loss": 19.125, "step": 20503 }, { "epoch": 1.3617586504615793, "grad_norm": 158.06597900390625, "learning_rate": 4.882486837264394e-07, "loss": 15.4844, "step": 20504 }, { "epoch": 1.3618250647539352, "grad_norm": 122.60484313964844, "learning_rate": 4.881562851837583e-07, "loss": 12.8594, "step": 20505 }, { "epoch": 1.3618914790462908, "grad_norm": 425.1082458496094, "learning_rate": 4.880638925617902e-07, "loss": 17.3438, "step": 20506 }, { "epoch": 1.3619578933386465, "grad_norm": 325.45037841796875, "learning_rate": 4.879715058616036e-07, "loss": 17.6875, "step": 20507 }, { "epoch": 1.3620243076310021, "grad_norm": 201.20401000976562, "learning_rate": 4.878791250842679e-07, "loss": 16.4844, "step": 20508 }, { "epoch": 1.3620907219233578, "grad_norm": 181.58636474609375, "learning_rate": 4.877867502308504e-07, "loss": 15.875, "step": 20509 }, { "epoch": 1.3621571362157137, "grad_norm": 329.6872863769531, "learning_rate": 4.876943813024211e-07, "loss": 13.8594, "step": 20510 }, { "epoch": 1.3622235505080693, "grad_norm": 182.59054565429688, "learning_rate": 4.876020183000475e-07, "loss": 19.6406, "step": 20511 }, { "epoch": 1.362289964800425, "grad_norm": 307.75054931640625, "learning_rate": 4.875096612247982e-07, "loss": 14.4062, "step": 20512 }, { "epoch": 1.3623563790927808, "grad_norm": 605.7301025390625, "learning_rate": 4.874173100777415e-07, "loss": 15.0469, "step": 20513 }, { "epoch": 1.3624227933851365, "grad_norm": 442.4604187011719, "learning_rate": 4.87324964859946e-07, "loss": 15.7891, "step": 20514 }, { "epoch": 1.3624892076774922, "grad_norm": 153.20591735839844, "learning_rate": 4.872326255724797e-07, "loss": 12.6719, "step": 20515 }, { "epoch": 1.362555621969848, "grad_norm": 190.17205810546875, "learning_rate": 4.871402922164105e-07, "loss": 13.8438, "step": 20516 }, { "epoch": 1.3626220362622037, "grad_norm": 368.8044738769531, "learning_rate": 4.870479647928068e-07, "loss": 20.3125, "step": 20517 }, { "epoch": 1.3626884505545593, "grad_norm": 176.41664123535156, "learning_rate": 4.869556433027365e-07, "loss": 18.8125, "step": 20518 }, { "epoch": 1.362754864846915, "grad_norm": 190.38714599609375, "learning_rate": 4.868633277472678e-07, "loss": 11.4688, "step": 20519 }, { "epoch": 1.3628212791392706, "grad_norm": 110.4532241821289, "learning_rate": 4.867710181274675e-07, "loss": 12.5938, "step": 20520 }, { "epoch": 1.3628876934316265, "grad_norm": 381.4853820800781, "learning_rate": 4.866787144444048e-07, "loss": 13.3594, "step": 20521 }, { "epoch": 1.3629541077239822, "grad_norm": 161.01084899902344, "learning_rate": 4.865864166991465e-07, "loss": 17.2969, "step": 20522 }, { "epoch": 1.3630205220163378, "grad_norm": 132.36236572265625, "learning_rate": 4.864941248927603e-07, "loss": 13.5312, "step": 20523 }, { "epoch": 1.3630869363086937, "grad_norm": 101.44485473632812, "learning_rate": 4.864018390263142e-07, "loss": 13.5312, "step": 20524 }, { "epoch": 1.3631533506010494, "grad_norm": 303.9380187988281, "learning_rate": 4.863095591008755e-07, "loss": 15.8281, "step": 20525 }, { "epoch": 1.363219764893405, "grad_norm": 287.53680419921875, "learning_rate": 4.862172851175111e-07, "loss": 16.8125, "step": 20526 }, { "epoch": 1.363286179185761, "grad_norm": 388.16217041015625, "learning_rate": 4.861250170772896e-07, "loss": 18.7344, "step": 20527 }, { "epoch": 1.3633525934781165, "grad_norm": 607.2109985351562, "learning_rate": 4.860327549812773e-07, "loss": 31.2188, "step": 20528 }, { "epoch": 1.3634190077704722, "grad_norm": 230.6402130126953, "learning_rate": 4.859404988305416e-07, "loss": 17.4219, "step": 20529 }, { "epoch": 1.3634854220628279, "grad_norm": 204.25814819335938, "learning_rate": 4.858482486261497e-07, "loss": 15.7969, "step": 20530 }, { "epoch": 1.3635518363551835, "grad_norm": 301.4227294921875, "learning_rate": 4.85756004369169e-07, "loss": 15.4375, "step": 20531 }, { "epoch": 1.3636182506475394, "grad_norm": 386.13543701171875, "learning_rate": 4.856637660606662e-07, "loss": 19.6875, "step": 20532 }, { "epoch": 1.363684664939895, "grad_norm": 103.8848876953125, "learning_rate": 4.855715337017087e-07, "loss": 13.0312, "step": 20533 }, { "epoch": 1.3637510792322507, "grad_norm": 243.58126831054688, "learning_rate": 4.854793072933623e-07, "loss": 17.125, "step": 20534 }, { "epoch": 1.3638174935246066, "grad_norm": 202.41262817382812, "learning_rate": 4.853870868366953e-07, "loss": 18.1562, "step": 20535 }, { "epoch": 1.3638839078169622, "grad_norm": 231.29254150390625, "learning_rate": 4.852948723327736e-07, "loss": 17.1094, "step": 20536 }, { "epoch": 1.3639503221093179, "grad_norm": 365.408203125, "learning_rate": 4.852026637826638e-07, "loss": 17.6875, "step": 20537 }, { "epoch": 1.3640167364016738, "grad_norm": 178.60643005371094, "learning_rate": 4.851104611874328e-07, "loss": 17.7188, "step": 20538 }, { "epoch": 1.3640831506940294, "grad_norm": 229.57888793945312, "learning_rate": 4.850182645481471e-07, "loss": 15.2109, "step": 20539 }, { "epoch": 1.364149564986385, "grad_norm": 246.65774536132812, "learning_rate": 4.849260738658731e-07, "loss": 14.25, "step": 20540 }, { "epoch": 1.3642159792787407, "grad_norm": 206.12109375, "learning_rate": 4.848338891416778e-07, "loss": 17.7812, "step": 20541 }, { "epoch": 1.3642823935710964, "grad_norm": 130.25315856933594, "learning_rate": 4.847417103766261e-07, "loss": 9.8125, "step": 20542 }, { "epoch": 1.3643488078634523, "grad_norm": 586.3485107421875, "learning_rate": 4.84649537571786e-07, "loss": 21.0312, "step": 20543 }, { "epoch": 1.364415222155808, "grad_norm": 213.73541259765625, "learning_rate": 4.845573707282224e-07, "loss": 17.2031, "step": 20544 }, { "epoch": 1.3644816364481636, "grad_norm": 130.62350463867188, "learning_rate": 4.844652098470021e-07, "loss": 15.6406, "step": 20545 }, { "epoch": 1.3645480507405194, "grad_norm": 206.8778839111328, "learning_rate": 4.843730549291909e-07, "loss": 13.9844, "step": 20546 }, { "epoch": 1.364614465032875, "grad_norm": 353.1333312988281, "learning_rate": 4.842809059758553e-07, "loss": 13.875, "step": 20547 }, { "epoch": 1.3646808793252307, "grad_norm": 170.04953002929688, "learning_rate": 4.841887629880599e-07, "loss": 13.1875, "step": 20548 }, { "epoch": 1.3647472936175866, "grad_norm": 168.54100036621094, "learning_rate": 4.840966259668723e-07, "loss": 13.5156, "step": 20549 }, { "epoch": 1.3648137079099423, "grad_norm": 129.64451599121094, "learning_rate": 4.84004494913357e-07, "loss": 16.2969, "step": 20550 }, { "epoch": 1.364880122202298, "grad_norm": 203.6663055419922, "learning_rate": 4.839123698285798e-07, "loss": 12.7031, "step": 20551 }, { "epoch": 1.3649465364946536, "grad_norm": 289.1405944824219, "learning_rate": 4.838202507136076e-07, "loss": 19.2344, "step": 20552 }, { "epoch": 1.3650129507870092, "grad_norm": 207.9725341796875, "learning_rate": 4.837281375695046e-07, "loss": 18.6562, "step": 20553 }, { "epoch": 1.3650793650793651, "grad_norm": 252.89352416992188, "learning_rate": 4.836360303973368e-07, "loss": 10.3594, "step": 20554 }, { "epoch": 1.3651457793717208, "grad_norm": 341.66815185546875, "learning_rate": 4.835439291981696e-07, "loss": 18.5, "step": 20555 }, { "epoch": 1.3652121936640764, "grad_norm": 257.6126708984375, "learning_rate": 4.834518339730686e-07, "loss": 18.75, "step": 20556 }, { "epoch": 1.3652786079564323, "grad_norm": 301.48968505859375, "learning_rate": 4.833597447230988e-07, "loss": 9.7734, "step": 20557 }, { "epoch": 1.365345022248788, "grad_norm": 250.3192138671875, "learning_rate": 4.83267661449326e-07, "loss": 20.6719, "step": 20558 }, { "epoch": 1.3654114365411436, "grad_norm": 365.3345642089844, "learning_rate": 4.83175584152814e-07, "loss": 10.875, "step": 20559 }, { "epoch": 1.3654778508334995, "grad_norm": 117.83358764648438, "learning_rate": 4.830835128346297e-07, "loss": 14.1094, "step": 20560 }, { "epoch": 1.3655442651258551, "grad_norm": 180.49993896484375, "learning_rate": 4.82991447495837e-07, "loss": 17.0156, "step": 20561 }, { "epoch": 1.3656106794182108, "grad_norm": 323.9657897949219, "learning_rate": 4.828993881375009e-07, "loss": 17.7031, "step": 20562 }, { "epoch": 1.3656770937105664, "grad_norm": 127.61262512207031, "learning_rate": 4.828073347606867e-07, "loss": 17.2812, "step": 20563 }, { "epoch": 1.365743508002922, "grad_norm": 154.5882568359375, "learning_rate": 4.827152873664589e-07, "loss": 16.0625, "step": 20564 }, { "epoch": 1.365809922295278, "grad_norm": 283.4111328125, "learning_rate": 4.826232459558824e-07, "loss": 15.9219, "step": 20565 }, { "epoch": 1.3658763365876336, "grad_norm": 244.47666931152344, "learning_rate": 4.825312105300223e-07, "loss": 20.5625, "step": 20566 }, { "epoch": 1.3659427508799893, "grad_norm": 310.3468322753906, "learning_rate": 4.824391810899418e-07, "loss": 20.7812, "step": 20567 }, { "epoch": 1.3660091651723452, "grad_norm": 221.99346923828125, "learning_rate": 4.823471576367073e-07, "loss": 15.1094, "step": 20568 }, { "epoch": 1.3660755794647008, "grad_norm": 260.7201232910156, "learning_rate": 4.82255140171382e-07, "loss": 19.2031, "step": 20569 }, { "epoch": 1.3661419937570565, "grad_norm": 606.7794189453125, "learning_rate": 4.821631286950308e-07, "loss": 18.9688, "step": 20570 }, { "epoch": 1.3662084080494123, "grad_norm": 186.87049865722656, "learning_rate": 4.820711232087178e-07, "loss": 12.2188, "step": 20571 }, { "epoch": 1.366274822341768, "grad_norm": 138.0307159423828, "learning_rate": 4.819791237135077e-07, "loss": 14.2031, "step": 20572 }, { "epoch": 1.3663412366341237, "grad_norm": 210.81369018554688, "learning_rate": 4.818871302104638e-07, "loss": 18.5, "step": 20573 }, { "epoch": 1.3664076509264793, "grad_norm": 241.193115234375, "learning_rate": 4.817951427006515e-07, "loss": 9.9844, "step": 20574 }, { "epoch": 1.366474065218835, "grad_norm": 127.98300170898438, "learning_rate": 4.817031611851338e-07, "loss": 17.9062, "step": 20575 }, { "epoch": 1.3665404795111908, "grad_norm": 145.19334411621094, "learning_rate": 4.816111856649751e-07, "loss": 12.6875, "step": 20576 }, { "epoch": 1.3666068938035465, "grad_norm": 1027.5494384765625, "learning_rate": 4.815192161412391e-07, "loss": 17.5469, "step": 20577 }, { "epoch": 1.3666733080959022, "grad_norm": 155.3261260986328, "learning_rate": 4.814272526149901e-07, "loss": 22.0625, "step": 20578 }, { "epoch": 1.366739722388258, "grad_norm": 131.38414001464844, "learning_rate": 4.813352950872914e-07, "loss": 15.8125, "step": 20579 }, { "epoch": 1.3668061366806137, "grad_norm": 156.85064697265625, "learning_rate": 4.812433435592074e-07, "loss": 18.0625, "step": 20580 }, { "epoch": 1.3668725509729693, "grad_norm": 303.0585021972656, "learning_rate": 4.811513980318005e-07, "loss": 14.2812, "step": 20581 }, { "epoch": 1.3669389652653252, "grad_norm": 303.83648681640625, "learning_rate": 4.810594585061358e-07, "loss": 19.7344, "step": 20582 }, { "epoch": 1.3670053795576809, "grad_norm": 119.1662826538086, "learning_rate": 4.809675249832757e-07, "loss": 10.4375, "step": 20583 }, { "epoch": 1.3670717938500365, "grad_norm": 175.41465759277344, "learning_rate": 4.80875597464284e-07, "loss": 14.5625, "step": 20584 }, { "epoch": 1.3671382081423922, "grad_norm": 110.64794158935547, "learning_rate": 4.80783675950224e-07, "loss": 16.1719, "step": 20585 }, { "epoch": 1.3672046224347478, "grad_norm": 225.89527893066406, "learning_rate": 4.80691760442159e-07, "loss": 21.5156, "step": 20586 }, { "epoch": 1.3672710367271037, "grad_norm": 146.68336486816406, "learning_rate": 4.805998509411523e-07, "loss": 16.9375, "step": 20587 }, { "epoch": 1.3673374510194594, "grad_norm": 171.0496063232422, "learning_rate": 4.80507947448267e-07, "loss": 12.3125, "step": 20588 }, { "epoch": 1.367403865311815, "grad_norm": 275.85064697265625, "learning_rate": 4.804160499645667e-07, "loss": 16.2344, "step": 20589 }, { "epoch": 1.367470279604171, "grad_norm": 575.27978515625, "learning_rate": 4.80324158491113e-07, "loss": 17.4219, "step": 20590 }, { "epoch": 1.3675366938965265, "grad_norm": 163.155517578125, "learning_rate": 4.802322730289706e-07, "loss": 17.3594, "step": 20591 }, { "epoch": 1.3676031081888822, "grad_norm": 177.4068145751953, "learning_rate": 4.801403935792012e-07, "loss": 21.8438, "step": 20592 }, { "epoch": 1.367669522481238, "grad_norm": 415.2408142089844, "learning_rate": 4.80048520142868e-07, "loss": 15.9844, "step": 20593 }, { "epoch": 1.3677359367735937, "grad_norm": 130.3599090576172, "learning_rate": 4.799566527210336e-07, "loss": 17.0469, "step": 20594 }, { "epoch": 1.3678023510659494, "grad_norm": 156.1647186279297, "learning_rate": 4.798647913147609e-07, "loss": 14.8906, "step": 20595 }, { "epoch": 1.367868765358305, "grad_norm": 118.98648834228516, "learning_rate": 4.797729359251121e-07, "loss": 13.6562, "step": 20596 }, { "epoch": 1.3679351796506607, "grad_norm": 273.88006591796875, "learning_rate": 4.796810865531507e-07, "loss": 18.4375, "step": 20597 }, { "epoch": 1.3680015939430166, "grad_norm": 294.0290832519531, "learning_rate": 4.795892431999375e-07, "loss": 20.4375, "step": 20598 }, { "epoch": 1.3680680082353722, "grad_norm": 229.48228454589844, "learning_rate": 4.794974058665367e-07, "loss": 18.1094, "step": 20599 }, { "epoch": 1.3681344225277279, "grad_norm": 538.623046875, "learning_rate": 4.794055745540093e-07, "loss": 18.8516, "step": 20600 }, { "epoch": 1.3682008368200838, "grad_norm": 242.97821044921875, "learning_rate": 4.79313749263418e-07, "loss": 16.5391, "step": 20601 }, { "epoch": 1.3682672511124394, "grad_norm": 248.91165161132812, "learning_rate": 4.792219299958251e-07, "loss": 14.8594, "step": 20602 }, { "epoch": 1.368333665404795, "grad_norm": 147.36758422851562, "learning_rate": 4.791301167522926e-07, "loss": 13.8438, "step": 20603 }, { "epoch": 1.368400079697151, "grad_norm": 188.6422882080078, "learning_rate": 4.790383095338826e-07, "loss": 12.8594, "step": 20604 }, { "epoch": 1.3684664939895066, "grad_norm": 209.15440368652344, "learning_rate": 4.789465083416573e-07, "loss": 14.9062, "step": 20605 }, { "epoch": 1.3685329082818622, "grad_norm": 471.9703063964844, "learning_rate": 4.788547131766777e-07, "loss": 14.4219, "step": 20606 }, { "epoch": 1.368599322574218, "grad_norm": 97.88948822021484, "learning_rate": 4.78762924040007e-07, "loss": 19.2656, "step": 20607 }, { "epoch": 1.3686657368665736, "grad_norm": 207.87454223632812, "learning_rate": 4.786711409327058e-07, "loss": 18.2188, "step": 20608 }, { "epoch": 1.3687321511589294, "grad_norm": 222.80194091796875, "learning_rate": 4.785793638558362e-07, "loss": 16.4922, "step": 20609 }, { "epoch": 1.368798565451285, "grad_norm": 146.39385986328125, "learning_rate": 4.784875928104598e-07, "loss": 12.1406, "step": 20610 }, { "epoch": 1.3688649797436407, "grad_norm": 116.61375427246094, "learning_rate": 4.783958277976388e-07, "loss": 12.7344, "step": 20611 }, { "epoch": 1.3689313940359966, "grad_norm": 153.49099731445312, "learning_rate": 4.783040688184331e-07, "loss": 16.5312, "step": 20612 }, { "epoch": 1.3689978083283523, "grad_norm": 231.14955139160156, "learning_rate": 4.782123158739059e-07, "loss": 15.7656, "step": 20613 }, { "epoch": 1.369064222620708, "grad_norm": 318.4695129394531, "learning_rate": 4.781205689651176e-07, "loss": 16.7188, "step": 20614 }, { "epoch": 1.3691306369130638, "grad_norm": 211.36453247070312, "learning_rate": 4.780288280931294e-07, "loss": 14.8281, "step": 20615 }, { "epoch": 1.3691970512054195, "grad_norm": 1347.0694580078125, "learning_rate": 4.779370932590028e-07, "loss": 17.5, "step": 20616 }, { "epoch": 1.3692634654977751, "grad_norm": 175.2948760986328, "learning_rate": 4.778453644637988e-07, "loss": 15.4688, "step": 20617 }, { "epoch": 1.3693298797901308, "grad_norm": 149.95071411132812, "learning_rate": 4.777536417085787e-07, "loss": 13.8438, "step": 20618 }, { "epoch": 1.3693962940824864, "grad_norm": 175.04676818847656, "learning_rate": 4.776619249944036e-07, "loss": 14.3438, "step": 20619 }, { "epoch": 1.3694627083748423, "grad_norm": 533.033203125, "learning_rate": 4.775702143223333e-07, "loss": 18.3438, "step": 20620 }, { "epoch": 1.369529122667198, "grad_norm": 119.7276382446289, "learning_rate": 4.774785096934304e-07, "loss": 14.9062, "step": 20621 }, { "epoch": 1.3695955369595536, "grad_norm": 252.5913543701172, "learning_rate": 4.773868111087545e-07, "loss": 20.1719, "step": 20622 }, { "epoch": 1.3696619512519095, "grad_norm": 161.9768524169922, "learning_rate": 4.77295118569366e-07, "loss": 13.3984, "step": 20623 }, { "epoch": 1.3697283655442651, "grad_norm": 166.41307067871094, "learning_rate": 4.772034320763271e-07, "loss": 14.8438, "step": 20624 }, { "epoch": 1.3697947798366208, "grad_norm": 154.94667053222656, "learning_rate": 4.771117516306969e-07, "loss": 16.6406, "step": 20625 }, { "epoch": 1.3698611941289767, "grad_norm": 246.6343994140625, "learning_rate": 4.770200772335365e-07, "loss": 13.25, "step": 20626 }, { "epoch": 1.3699276084213323, "grad_norm": 124.9811782836914, "learning_rate": 4.769284088859063e-07, "loss": 13.5938, "step": 20627 }, { "epoch": 1.369994022713688, "grad_norm": 297.2380676269531, "learning_rate": 4.7683674658886676e-07, "loss": 17.0156, "step": 20628 }, { "epoch": 1.3700604370060436, "grad_norm": 162.74618530273438, "learning_rate": 4.76745090343478e-07, "loss": 18.2031, "step": 20629 }, { "epoch": 1.3701268512983993, "grad_norm": 242.7025604248047, "learning_rate": 4.7665344015080057e-07, "loss": 18.8438, "step": 20630 }, { "epoch": 1.3701932655907552, "grad_norm": 461.1394958496094, "learning_rate": 4.7656179601189375e-07, "loss": 18.375, "step": 20631 }, { "epoch": 1.3702596798831108, "grad_norm": 616.326416015625, "learning_rate": 4.7647015792781884e-07, "loss": 20.3438, "step": 20632 }, { "epoch": 1.3703260941754665, "grad_norm": 172.12351989746094, "learning_rate": 4.76378525899635e-07, "loss": 14.3594, "step": 20633 }, { "epoch": 1.3703925084678223, "grad_norm": 154.6827392578125, "learning_rate": 4.7628689992840244e-07, "loss": 15.2188, "step": 20634 }, { "epoch": 1.370458922760178, "grad_norm": 406.0751647949219, "learning_rate": 4.761952800151811e-07, "loss": 18.5, "step": 20635 }, { "epoch": 1.3705253370525337, "grad_norm": 265.9720764160156, "learning_rate": 4.761036661610309e-07, "loss": 14.7031, "step": 20636 }, { "epoch": 1.3705917513448895, "grad_norm": 183.1073760986328, "learning_rate": 4.7601205836701073e-07, "loss": 15.0156, "step": 20637 }, { "epoch": 1.3706581656372452, "grad_norm": 194.77047729492188, "learning_rate": 4.7592045663418167e-07, "loss": 14.125, "step": 20638 }, { "epoch": 1.3707245799296008, "grad_norm": 116.71509552001953, "learning_rate": 4.758288609636022e-07, "loss": 12.6484, "step": 20639 }, { "epoch": 1.3707909942219565, "grad_norm": 173.96437072753906, "learning_rate": 4.7573727135633225e-07, "loss": 10.8281, "step": 20640 }, { "epoch": 1.3708574085143121, "grad_norm": 272.91107177734375, "learning_rate": 4.756456878134312e-07, "loss": 14.1406, "step": 20641 }, { "epoch": 1.370923822806668, "grad_norm": 127.78076934814453, "learning_rate": 4.755541103359585e-07, "loss": 15.8828, "step": 20642 }, { "epoch": 1.3709902370990237, "grad_norm": 126.62698364257812, "learning_rate": 4.7546253892497336e-07, "loss": 14.4688, "step": 20643 }, { "epoch": 1.3710566513913793, "grad_norm": 288.0270690917969, "learning_rate": 4.7537097358153554e-07, "loss": 23.6562, "step": 20644 }, { "epoch": 1.3711230656837352, "grad_norm": 222.7750701904297, "learning_rate": 4.7527941430670306e-07, "loss": 19.9375, "step": 20645 }, { "epoch": 1.3711894799760909, "grad_norm": 286.0065002441406, "learning_rate": 4.751878611015364e-07, "loss": 17.75, "step": 20646 }, { "epoch": 1.3712558942684465, "grad_norm": 568.2478637695312, "learning_rate": 4.750963139670936e-07, "loss": 15.8281, "step": 20647 }, { "epoch": 1.3713223085608024, "grad_norm": 708.5516967773438, "learning_rate": 4.75004772904434e-07, "loss": 14.1406, "step": 20648 }, { "epoch": 1.371388722853158, "grad_norm": 299.2532958984375, "learning_rate": 4.7491323791461643e-07, "loss": 18.0, "step": 20649 }, { "epoch": 1.3714551371455137, "grad_norm": 622.2584838867188, "learning_rate": 4.7482170899869977e-07, "loss": 13.3438, "step": 20650 }, { "epoch": 1.3715215514378694, "grad_norm": 329.2472839355469, "learning_rate": 4.7473018615774274e-07, "loss": 13.0938, "step": 20651 }, { "epoch": 1.371587965730225, "grad_norm": 222.7109832763672, "learning_rate": 4.746386693928044e-07, "loss": 13.5, "step": 20652 }, { "epoch": 1.3716543800225809, "grad_norm": 395.56036376953125, "learning_rate": 4.7454715870494257e-07, "loss": 19.7188, "step": 20653 }, { "epoch": 1.3717207943149365, "grad_norm": 170.7945556640625, "learning_rate": 4.744556540952163e-07, "loss": 18.1016, "step": 20654 }, { "epoch": 1.3717872086072922, "grad_norm": 328.00213623046875, "learning_rate": 4.743641555646839e-07, "loss": 21.3594, "step": 20655 }, { "epoch": 1.371853622899648, "grad_norm": 122.88230895996094, "learning_rate": 4.742726631144038e-07, "loss": 12.5469, "step": 20656 }, { "epoch": 1.3719200371920037, "grad_norm": 232.08787536621094, "learning_rate": 4.741811767454346e-07, "loss": 14.4688, "step": 20657 }, { "epoch": 1.3719864514843594, "grad_norm": 123.08714294433594, "learning_rate": 4.7408969645883456e-07, "loss": 15.6094, "step": 20658 }, { "epoch": 1.3720528657767153, "grad_norm": 448.603271484375, "learning_rate": 4.739982222556609e-07, "loss": 32.7031, "step": 20659 }, { "epoch": 1.372119280069071, "grad_norm": 137.25436401367188, "learning_rate": 4.739067541369729e-07, "loss": 11.5156, "step": 20660 }, { "epoch": 1.3721856943614266, "grad_norm": 192.0162811279297, "learning_rate": 4.738152921038285e-07, "loss": 18.6094, "step": 20661 }, { "epoch": 1.3722521086537822, "grad_norm": 207.28994750976562, "learning_rate": 4.7372383615728473e-07, "loss": 13.8906, "step": 20662 }, { "epoch": 1.3723185229461379, "grad_norm": 256.7312316894531, "learning_rate": 4.736323862984009e-07, "loss": 19.7812, "step": 20663 }, { "epoch": 1.3723849372384938, "grad_norm": 252.7761688232422, "learning_rate": 4.7354094252823375e-07, "loss": 18.375, "step": 20664 }, { "epoch": 1.3724513515308494, "grad_norm": 226.4068145751953, "learning_rate": 4.734495048478413e-07, "loss": 15.3281, "step": 20665 }, { "epoch": 1.372517765823205, "grad_norm": 265.64892578125, "learning_rate": 4.7335807325828146e-07, "loss": 14.8281, "step": 20666 }, { "epoch": 1.372584180115561, "grad_norm": 373.06854248046875, "learning_rate": 4.7326664776061167e-07, "loss": 14.5859, "step": 20667 }, { "epoch": 1.3726505944079166, "grad_norm": 171.38092041015625, "learning_rate": 4.7317522835588963e-07, "loss": 15.5, "step": 20668 }, { "epoch": 1.3727170087002722, "grad_norm": 131.50521850585938, "learning_rate": 4.7308381504517303e-07, "loss": 15.0625, "step": 20669 }, { "epoch": 1.3727834229926281, "grad_norm": 355.6888122558594, "learning_rate": 4.729924078295184e-07, "loss": 13.3359, "step": 20670 }, { "epoch": 1.3728498372849838, "grad_norm": 241.89193725585938, "learning_rate": 4.7290100670998445e-07, "loss": 16.25, "step": 20671 }, { "epoch": 1.3729162515773394, "grad_norm": 282.6650695800781, "learning_rate": 4.7280961168762725e-07, "loss": 21.3125, "step": 20672 }, { "epoch": 1.372982665869695, "grad_norm": 185.2205352783203, "learning_rate": 4.7271822276350446e-07, "loss": 18.25, "step": 20673 }, { "epoch": 1.3730490801620507, "grad_norm": 375.1667785644531, "learning_rate": 4.726268399386734e-07, "loss": 19.5, "step": 20674 }, { "epoch": 1.3731154944544066, "grad_norm": 238.1883544921875, "learning_rate": 4.7253546321419115e-07, "loss": 20.4688, "step": 20675 }, { "epoch": 1.3731819087467623, "grad_norm": 137.6282501220703, "learning_rate": 4.7244409259111373e-07, "loss": 12.5625, "step": 20676 }, { "epoch": 1.373248323039118, "grad_norm": 163.61001586914062, "learning_rate": 4.7235272807049973e-07, "loss": 16.4062, "step": 20677 }, { "epoch": 1.3733147373314738, "grad_norm": 110.93286895751953, "learning_rate": 4.7226136965340467e-07, "loss": 11.375, "step": 20678 }, { "epoch": 1.3733811516238295, "grad_norm": 227.3175048828125, "learning_rate": 4.721700173408858e-07, "loss": 19.0312, "step": 20679 }, { "epoch": 1.373447565916185, "grad_norm": 498.904296875, "learning_rate": 4.720786711339998e-07, "loss": 16.5781, "step": 20680 }, { "epoch": 1.373513980208541, "grad_norm": 281.25762939453125, "learning_rate": 4.7198733103380327e-07, "loss": 21.7188, "step": 20681 }, { "epoch": 1.3735803945008966, "grad_norm": 581.4718017578125, "learning_rate": 4.7189599704135286e-07, "loss": 18.25, "step": 20682 }, { "epoch": 1.3736468087932523, "grad_norm": 288.0283203125, "learning_rate": 4.7180466915770544e-07, "loss": 15.6562, "step": 20683 }, { "epoch": 1.373713223085608, "grad_norm": 141.5022430419922, "learning_rate": 4.717133473839162e-07, "loss": 14.4531, "step": 20684 }, { "epoch": 1.3737796373779636, "grad_norm": 358.5064392089844, "learning_rate": 4.716220317210431e-07, "loss": 21.1719, "step": 20685 }, { "epoch": 1.3738460516703195, "grad_norm": 160.73162841796875, "learning_rate": 4.7153072217014135e-07, "loss": 12.8438, "step": 20686 }, { "epoch": 1.3739124659626751, "grad_norm": 351.00421142578125, "learning_rate": 4.7143941873226745e-07, "loss": 14.9844, "step": 20687 }, { "epoch": 1.3739788802550308, "grad_norm": 229.9508514404297, "learning_rate": 4.7134812140847757e-07, "loss": 13.6094, "step": 20688 }, { "epoch": 1.3740452945473867, "grad_norm": 165.2521209716797, "learning_rate": 4.7125683019982777e-07, "loss": 13.9688, "step": 20689 }, { "epoch": 1.3741117088397423, "grad_norm": 227.40115356445312, "learning_rate": 4.7116554510737407e-07, "loss": 18.7812, "step": 20690 }, { "epoch": 1.374178123132098, "grad_norm": 181.9271697998047, "learning_rate": 4.7107426613217273e-07, "loss": 20.1562, "step": 20691 }, { "epoch": 1.3742445374244538, "grad_norm": 248.66090393066406, "learning_rate": 4.7098299327527856e-07, "loss": 14.5312, "step": 20692 }, { "epoch": 1.3743109517168095, "grad_norm": 401.4061279296875, "learning_rate": 4.708917265377489e-07, "loss": 19.0625, "step": 20693 }, { "epoch": 1.3743773660091652, "grad_norm": 189.08705139160156, "learning_rate": 4.7080046592063815e-07, "loss": 20.8438, "step": 20694 }, { "epoch": 1.3744437803015208, "grad_norm": 333.25433349609375, "learning_rate": 4.7070921142500255e-07, "loss": 23.4062, "step": 20695 }, { "epoch": 1.3745101945938765, "grad_norm": 135.5817108154297, "learning_rate": 4.706179630518975e-07, "loss": 18.2812, "step": 20696 }, { "epoch": 1.3745766088862323, "grad_norm": 346.0054016113281, "learning_rate": 4.7052672080237867e-07, "loss": 16.3281, "step": 20697 }, { "epoch": 1.374643023178588, "grad_norm": 323.88714599609375, "learning_rate": 4.7043548467750137e-07, "loss": 16.8125, "step": 20698 }, { "epoch": 1.3747094374709437, "grad_norm": 291.7080993652344, "learning_rate": 4.7034425467832107e-07, "loss": 16.7031, "step": 20699 }, { "epoch": 1.3747758517632995, "grad_norm": 264.3893127441406, "learning_rate": 4.702530308058934e-07, "loss": 15.0469, "step": 20700 }, { "epoch": 1.3748422660556552, "grad_norm": 178.55075073242188, "learning_rate": 4.7016181306127233e-07, "loss": 12.5469, "step": 20701 }, { "epoch": 1.3749086803480108, "grad_norm": 281.5788269042969, "learning_rate": 4.700706014455148e-07, "loss": 21.4219, "step": 20702 }, { "epoch": 1.3749750946403667, "grad_norm": 217.49533081054688, "learning_rate": 4.699793959596745e-07, "loss": 13.4844, "step": 20703 }, { "epoch": 1.3750415089327224, "grad_norm": 281.33978271484375, "learning_rate": 4.6988819660480694e-07, "loss": 22.3438, "step": 20704 }, { "epoch": 1.375107923225078, "grad_norm": 152.085693359375, "learning_rate": 4.6979700338196704e-07, "loss": 14.8438, "step": 20705 }, { "epoch": 1.3751743375174337, "grad_norm": 137.42910766601562, "learning_rate": 4.6970581629220964e-07, "loss": 21.5625, "step": 20706 }, { "epoch": 1.3752407518097893, "grad_norm": 114.46583557128906, "learning_rate": 4.696146353365896e-07, "loss": 14.3281, "step": 20707 }, { "epoch": 1.3753071661021452, "grad_norm": 153.8430633544922, "learning_rate": 4.6952346051616196e-07, "loss": 15.1562, "step": 20708 }, { "epoch": 1.3753735803945009, "grad_norm": 269.0173645019531, "learning_rate": 4.694322918319804e-07, "loss": 23.9766, "step": 20709 }, { "epoch": 1.3754399946868565, "grad_norm": 262.87115478515625, "learning_rate": 4.693411292851008e-07, "loss": 18.9531, "step": 20710 }, { "epoch": 1.3755064089792124, "grad_norm": 112.37785339355469, "learning_rate": 4.6924997287657677e-07, "loss": 12.1094, "step": 20711 }, { "epoch": 1.375572823271568, "grad_norm": 223.25128173828125, "learning_rate": 4.691588226074629e-07, "loss": 13.0312, "step": 20712 }, { "epoch": 1.3756392375639237, "grad_norm": 277.7015686035156, "learning_rate": 4.690676784788137e-07, "loss": 20.3906, "step": 20713 }, { "epoch": 1.3757056518562796, "grad_norm": 87.48615264892578, "learning_rate": 4.689765404916837e-07, "loss": 18.8594, "step": 20714 }, { "epoch": 1.3757720661486352, "grad_norm": 89.56031036376953, "learning_rate": 4.688854086471262e-07, "loss": 14.1875, "step": 20715 }, { "epoch": 1.3758384804409909, "grad_norm": 543.4163208007812, "learning_rate": 4.687942829461968e-07, "loss": 26.875, "step": 20716 }, { "epoch": 1.3759048947333465, "grad_norm": 419.31463623046875, "learning_rate": 4.6870316338994855e-07, "loss": 12.0781, "step": 20717 }, { "epoch": 1.3759713090257022, "grad_norm": 156.005126953125, "learning_rate": 4.686120499794356e-07, "loss": 14.4219, "step": 20718 }, { "epoch": 1.376037723318058, "grad_norm": 132.60211181640625, "learning_rate": 4.685209427157121e-07, "loss": 15.0469, "step": 20719 }, { "epoch": 1.3761041376104137, "grad_norm": 150.6199951171875, "learning_rate": 4.684298415998318e-07, "loss": 11.9688, "step": 20720 }, { "epoch": 1.3761705519027694, "grad_norm": 162.71859741210938, "learning_rate": 4.683387466328487e-07, "loss": 18.4688, "step": 20721 }, { "epoch": 1.3762369661951253, "grad_norm": 226.74411010742188, "learning_rate": 4.6824765781581666e-07, "loss": 16.5938, "step": 20722 }, { "epoch": 1.376303380487481, "grad_norm": 215.5417938232422, "learning_rate": 4.6815657514978835e-07, "loss": 17.2344, "step": 20723 }, { "epoch": 1.3763697947798366, "grad_norm": 355.884521484375, "learning_rate": 4.680654986358189e-07, "loss": 18.4844, "step": 20724 }, { "epoch": 1.3764362090721924, "grad_norm": 620.1341552734375, "learning_rate": 4.679744282749607e-07, "loss": 16.7969, "step": 20725 }, { "epoch": 1.376502623364548, "grad_norm": 176.77467346191406, "learning_rate": 4.6788336406826747e-07, "loss": 14.5469, "step": 20726 }, { "epoch": 1.3765690376569037, "grad_norm": 756.2445678710938, "learning_rate": 4.677923060167926e-07, "loss": 15.6562, "step": 20727 }, { "epoch": 1.3766354519492594, "grad_norm": 238.01927185058594, "learning_rate": 4.6770125412158944e-07, "loss": 15.1562, "step": 20728 }, { "epoch": 1.376701866241615, "grad_norm": 111.24945068359375, "learning_rate": 4.6761020838371133e-07, "loss": 15.3281, "step": 20729 }, { "epoch": 1.376768280533971, "grad_norm": 234.9918670654297, "learning_rate": 4.675191688042117e-07, "loss": 16.1719, "step": 20730 }, { "epoch": 1.3768346948263266, "grad_norm": 298.0594177246094, "learning_rate": 4.6742813538414247e-07, "loss": 17.6406, "step": 20731 }, { "epoch": 1.3769011091186822, "grad_norm": 234.94662475585938, "learning_rate": 4.673371081245583e-07, "loss": 20.5, "step": 20732 }, { "epoch": 1.3769675234110381, "grad_norm": 110.58985900878906, "learning_rate": 4.672460870265108e-07, "loss": 15.3594, "step": 20733 }, { "epoch": 1.3770339377033938, "grad_norm": 327.0907897949219, "learning_rate": 4.671550720910531e-07, "loss": 17.375, "step": 20734 }, { "epoch": 1.3771003519957494, "grad_norm": 183.2574005126953, "learning_rate": 4.6706406331923896e-07, "loss": 21.4844, "step": 20735 }, { "epoch": 1.3771667662881053, "grad_norm": 199.07315063476562, "learning_rate": 4.6697306071212007e-07, "loss": 16.8281, "step": 20736 }, { "epoch": 1.377233180580461, "grad_norm": 214.46009826660156, "learning_rate": 4.668820642707495e-07, "loss": 21.25, "step": 20737 }, { "epoch": 1.3772995948728166, "grad_norm": 745.7666625976562, "learning_rate": 4.6679107399617977e-07, "loss": 14.3828, "step": 20738 }, { "epoch": 1.3773660091651723, "grad_norm": 92.44598388671875, "learning_rate": 4.6670008988946376e-07, "loss": 11.3438, "step": 20739 }, { "epoch": 1.377432423457528, "grad_norm": 265.9447937011719, "learning_rate": 4.6660911195165287e-07, "loss": 14.3125, "step": 20740 }, { "epoch": 1.3774988377498838, "grad_norm": 404.35052490234375, "learning_rate": 4.6651814018380096e-07, "loss": 16.7344, "step": 20741 }, { "epoch": 1.3775652520422395, "grad_norm": 140.5889434814453, "learning_rate": 4.6642717458695926e-07, "loss": 18.2188, "step": 20742 }, { "epoch": 1.377631666334595, "grad_norm": 109.63115692138672, "learning_rate": 4.6633621516218025e-07, "loss": 16.0625, "step": 20743 }, { "epoch": 1.377698080626951, "grad_norm": 154.2010498046875, "learning_rate": 4.6624526191051616e-07, "loss": 14.6406, "step": 20744 }, { "epoch": 1.3777644949193066, "grad_norm": 176.476806640625, "learning_rate": 4.6615431483301916e-07, "loss": 13.9531, "step": 20745 }, { "epoch": 1.3778309092116623, "grad_norm": 181.29763793945312, "learning_rate": 4.6606337393074123e-07, "loss": 19.0625, "step": 20746 }, { "epoch": 1.3778973235040182, "grad_norm": 106.77181243896484, "learning_rate": 4.6597243920473463e-07, "loss": 13.8594, "step": 20747 }, { "epoch": 1.3779637377963738, "grad_norm": 150.55850219726562, "learning_rate": 4.658815106560502e-07, "loss": 12.9531, "step": 20748 }, { "epoch": 1.3780301520887295, "grad_norm": 239.8740692138672, "learning_rate": 4.657905882857411e-07, "loss": 15.0156, "step": 20749 }, { "epoch": 1.3780965663810851, "grad_norm": 124.72528839111328, "learning_rate": 4.656996720948582e-07, "loss": 13.9844, "step": 20750 }, { "epoch": 1.378162980673441, "grad_norm": 168.59849548339844, "learning_rate": 4.656087620844533e-07, "loss": 21.7188, "step": 20751 }, { "epoch": 1.3782293949657967, "grad_norm": 164.0456085205078, "learning_rate": 4.6551785825557823e-07, "loss": 19.875, "step": 20752 }, { "epoch": 1.3782958092581523, "grad_norm": 150.62933349609375, "learning_rate": 4.6542696060928424e-07, "loss": 15.3906, "step": 20753 }, { "epoch": 1.378362223550508, "grad_norm": 143.64077758789062, "learning_rate": 4.6533606914662293e-07, "loss": 14.75, "step": 20754 }, { "epoch": 1.3784286378428638, "grad_norm": 236.78970336914062, "learning_rate": 4.6524518386864606e-07, "loss": 16.125, "step": 20755 }, { "epoch": 1.3784950521352195, "grad_norm": 557.9773559570312, "learning_rate": 4.6515430477640385e-07, "loss": 21.75, "step": 20756 }, { "epoch": 1.3785614664275752, "grad_norm": 128.5286407470703, "learning_rate": 4.65063431870949e-07, "loss": 13.0156, "step": 20757 }, { "epoch": 1.378627880719931, "grad_norm": 294.94586181640625, "learning_rate": 4.649725651533314e-07, "loss": 15.9375, "step": 20758 }, { "epoch": 1.3786942950122867, "grad_norm": 250.73355102539062, "learning_rate": 4.6488170462460274e-07, "loss": 17.4062, "step": 20759 }, { "epoch": 1.3787607093046423, "grad_norm": 122.06394958496094, "learning_rate": 4.64790850285814e-07, "loss": 20.0625, "step": 20760 }, { "epoch": 1.378827123596998, "grad_norm": 342.6063232421875, "learning_rate": 4.6470000213801643e-07, "loss": 18.8438, "step": 20761 }, { "epoch": 1.3788935378893539, "grad_norm": 189.07891845703125, "learning_rate": 4.6460916018225973e-07, "loss": 11.0391, "step": 20762 }, { "epoch": 1.3789599521817095, "grad_norm": 93.51614379882812, "learning_rate": 4.645183244195964e-07, "loss": 16.1562, "step": 20763 }, { "epoch": 1.3790263664740652, "grad_norm": 476.20770263671875, "learning_rate": 4.644274948510758e-07, "loss": 25.1875, "step": 20764 }, { "epoch": 1.3790927807664208, "grad_norm": 357.9822082519531, "learning_rate": 4.643366714777492e-07, "loss": 16.6719, "step": 20765 }, { "epoch": 1.3791591950587767, "grad_norm": 470.09423828125, "learning_rate": 4.642458543006671e-07, "loss": 11.9375, "step": 20766 }, { "epoch": 1.3792256093511324, "grad_norm": 274.1295471191406, "learning_rate": 4.641550433208801e-07, "loss": 15.8594, "step": 20767 }, { "epoch": 1.379292023643488, "grad_norm": 555.259033203125, "learning_rate": 4.6406423853943845e-07, "loss": 14.7969, "step": 20768 }, { "epoch": 1.379358437935844, "grad_norm": 265.2696533203125, "learning_rate": 4.63973439957393e-07, "loss": 19.1094, "step": 20769 }, { "epoch": 1.3794248522281995, "grad_norm": 201.15841674804688, "learning_rate": 4.638826475757931e-07, "loss": 20.7656, "step": 20770 }, { "epoch": 1.3794912665205552, "grad_norm": 230.7372283935547, "learning_rate": 4.6379186139568993e-07, "loss": 15.3594, "step": 20771 }, { "epoch": 1.3795576808129109, "grad_norm": 412.4915771484375, "learning_rate": 4.637010814181336e-07, "loss": 14.2969, "step": 20772 }, { "epoch": 1.3796240951052667, "grad_norm": 123.0113754272461, "learning_rate": 4.6361030764417317e-07, "loss": 21.2812, "step": 20773 }, { "epoch": 1.3796905093976224, "grad_norm": 236.69129943847656, "learning_rate": 4.635195400748603e-07, "loss": 15.4844, "step": 20774 }, { "epoch": 1.379756923689978, "grad_norm": 172.56517028808594, "learning_rate": 4.6342877871124354e-07, "loss": 12.1719, "step": 20775 }, { "epoch": 1.3798233379823337, "grad_norm": 192.87478637695312, "learning_rate": 4.633380235543732e-07, "loss": 11.6094, "step": 20776 }, { "epoch": 1.3798897522746896, "grad_norm": 111.30952453613281, "learning_rate": 4.632472746052992e-07, "loss": 15.7656, "step": 20777 }, { "epoch": 1.3799561665670452, "grad_norm": 193.2388916015625, "learning_rate": 4.6315653186507167e-07, "loss": 17.0156, "step": 20778 }, { "epoch": 1.3800225808594009, "grad_norm": 323.6007385253906, "learning_rate": 4.6306579533473897e-07, "loss": 23.2344, "step": 20779 }, { "epoch": 1.3800889951517568, "grad_norm": 174.88023376464844, "learning_rate": 4.6297506501535244e-07, "loss": 15.6562, "step": 20780 }, { "epoch": 1.3801554094441124, "grad_norm": 270.69049072265625, "learning_rate": 4.6288434090796013e-07, "loss": 19.2656, "step": 20781 }, { "epoch": 1.380221823736468, "grad_norm": 190.45196533203125, "learning_rate": 4.6279362301361213e-07, "loss": 20.0, "step": 20782 }, { "epoch": 1.3802882380288237, "grad_norm": 168.90802001953125, "learning_rate": 4.627029113333578e-07, "loss": 16.4531, "step": 20783 }, { "epoch": 1.3803546523211796, "grad_norm": 138.15086364746094, "learning_rate": 4.6261220586824624e-07, "loss": 18.1406, "step": 20784 }, { "epoch": 1.3804210666135353, "grad_norm": 230.36781311035156, "learning_rate": 4.625215066193269e-07, "loss": 22.6875, "step": 20785 }, { "epoch": 1.380487480905891, "grad_norm": 6820.8642578125, "learning_rate": 4.6243081358764915e-07, "loss": 23.0312, "step": 20786 }, { "epoch": 1.3805538951982466, "grad_norm": 172.6246337890625, "learning_rate": 4.6234012677426104e-07, "loss": 14.8672, "step": 20787 }, { "epoch": 1.3806203094906024, "grad_norm": 182.5814666748047, "learning_rate": 4.622494461802131e-07, "loss": 14.5938, "step": 20788 }, { "epoch": 1.380686723782958, "grad_norm": 422.5094909667969, "learning_rate": 4.62158771806553e-07, "loss": 23.2188, "step": 20789 }, { "epoch": 1.3807531380753137, "grad_norm": 168.37362670898438, "learning_rate": 4.6206810365433026e-07, "loss": 16.7656, "step": 20790 }, { "epoch": 1.3808195523676696, "grad_norm": 404.3299865722656, "learning_rate": 4.6197744172459343e-07, "loss": 22.1562, "step": 20791 }, { "epoch": 1.3808859666600253, "grad_norm": 152.92977905273438, "learning_rate": 4.6188678601839126e-07, "loss": 15.3594, "step": 20792 }, { "epoch": 1.380952380952381, "grad_norm": 105.97428131103516, "learning_rate": 4.6179613653677243e-07, "loss": 11.4141, "step": 20793 }, { "epoch": 1.3810187952447366, "grad_norm": 187.24160766601562, "learning_rate": 4.61705493280786e-07, "loss": 16.4688, "step": 20794 }, { "epoch": 1.3810852095370925, "grad_norm": 155.70640563964844, "learning_rate": 4.616148562514792e-07, "loss": 19.625, "step": 20795 }, { "epoch": 1.3811516238294481, "grad_norm": 531.5556030273438, "learning_rate": 4.6152422544990223e-07, "loss": 24.0938, "step": 20796 }, { "epoch": 1.3812180381218038, "grad_norm": 155.78472900390625, "learning_rate": 4.6143360087710193e-07, "loss": 14.9688, "step": 20797 }, { "epoch": 1.3812844524141594, "grad_norm": 246.0816650390625, "learning_rate": 4.6134298253412727e-07, "loss": 21.25, "step": 20798 }, { "epoch": 1.3813508667065153, "grad_norm": 247.41159057617188, "learning_rate": 4.612523704220264e-07, "loss": 21.9531, "step": 20799 }, { "epoch": 1.381417280998871, "grad_norm": 306.3652648925781, "learning_rate": 4.6116176454184775e-07, "loss": 15.9688, "step": 20800 }, { "epoch": 1.3814836952912266, "grad_norm": 179.0397491455078, "learning_rate": 4.610711648946384e-07, "loss": 11.9219, "step": 20801 }, { "epoch": 1.3815501095835825, "grad_norm": 275.8350830078125, "learning_rate": 4.6098057148144775e-07, "loss": 16.1875, "step": 20802 }, { "epoch": 1.3816165238759381, "grad_norm": 396.66168212890625, "learning_rate": 4.608899843033227e-07, "loss": 22.6875, "step": 20803 }, { "epoch": 1.3816829381682938, "grad_norm": 100.4714126586914, "learning_rate": 4.607994033613115e-07, "loss": 12.625, "step": 20804 }, { "epoch": 1.3817493524606494, "grad_norm": 473.3411560058594, "learning_rate": 4.607088286564619e-07, "loss": 15.8438, "step": 20805 }, { "epoch": 1.3818157667530053, "grad_norm": 191.1318359375, "learning_rate": 4.606182601898214e-07, "loss": 22.125, "step": 20806 }, { "epoch": 1.381882181045361, "grad_norm": 430.6414794921875, "learning_rate": 4.60527697962438e-07, "loss": 13.9062, "step": 20807 }, { "epoch": 1.3819485953377166, "grad_norm": 217.74424743652344, "learning_rate": 4.604371419753591e-07, "loss": 11.9453, "step": 20808 }, { "epoch": 1.3820150096300723, "grad_norm": 313.5958557128906, "learning_rate": 4.603465922296321e-07, "loss": 23.1562, "step": 20809 }, { "epoch": 1.3820814239224282, "grad_norm": 134.1274871826172, "learning_rate": 4.6025604872630463e-07, "loss": 14.0781, "step": 20810 }, { "epoch": 1.3821478382147838, "grad_norm": 445.4935607910156, "learning_rate": 4.6016551146642435e-07, "loss": 11.8125, "step": 20811 }, { "epoch": 1.3822142525071395, "grad_norm": 200.9156036376953, "learning_rate": 4.6007498045103745e-07, "loss": 16.5781, "step": 20812 }, { "epoch": 1.3822806667994954, "grad_norm": 107.14260864257812, "learning_rate": 4.599844556811925e-07, "loss": 14.125, "step": 20813 }, { "epoch": 1.382347081091851, "grad_norm": 386.02294921875, "learning_rate": 4.5989393715793555e-07, "loss": 14.7031, "step": 20814 }, { "epoch": 1.3824134953842067, "grad_norm": 374.40411376953125, "learning_rate": 4.5980342488231415e-07, "loss": 20.6406, "step": 20815 }, { "epoch": 1.3824799096765623, "grad_norm": 269.66094970703125, "learning_rate": 4.5971291885537524e-07, "loss": 13.875, "step": 20816 }, { "epoch": 1.3825463239689182, "grad_norm": 370.16644287109375, "learning_rate": 4.596224190781658e-07, "loss": 18.9062, "step": 20817 }, { "epoch": 1.3826127382612738, "grad_norm": 433.31683349609375, "learning_rate": 4.5953192555173247e-07, "loss": 14.5938, "step": 20818 }, { "epoch": 1.3826791525536295, "grad_norm": 226.76466369628906, "learning_rate": 4.594414382771227e-07, "loss": 16.0469, "step": 20819 }, { "epoch": 1.3827455668459852, "grad_norm": 239.28111267089844, "learning_rate": 4.593509572553819e-07, "loss": 13.5312, "step": 20820 }, { "epoch": 1.382811981138341, "grad_norm": 189.07896423339844, "learning_rate": 4.592604824875581e-07, "loss": 22.3906, "step": 20821 }, { "epoch": 1.3828783954306967, "grad_norm": 167.11146545410156, "learning_rate": 4.591700139746969e-07, "loss": 12.7031, "step": 20822 }, { "epoch": 1.3829448097230523, "grad_norm": 362.9970703125, "learning_rate": 4.590795517178452e-07, "loss": 19.0625, "step": 20823 }, { "epoch": 1.3830112240154082, "grad_norm": 305.89794921875, "learning_rate": 4.589890957180492e-07, "loss": 15.7344, "step": 20824 }, { "epoch": 1.3830776383077639, "grad_norm": 302.10638427734375, "learning_rate": 4.5889864597635584e-07, "loss": 18.4531, "step": 20825 }, { "epoch": 1.3831440526001195, "grad_norm": 198.94113159179688, "learning_rate": 4.5880820249381016e-07, "loss": 20.4062, "step": 20826 }, { "epoch": 1.3832104668924754, "grad_norm": 211.1563720703125, "learning_rate": 4.587177652714599e-07, "loss": 15.9219, "step": 20827 }, { "epoch": 1.383276881184831, "grad_norm": 265.01318359375, "learning_rate": 4.5862733431034995e-07, "loss": 15.4844, "step": 20828 }, { "epoch": 1.3833432954771867, "grad_norm": 144.78805541992188, "learning_rate": 4.585369096115269e-07, "loss": 15.75, "step": 20829 }, { "epoch": 1.3834097097695424, "grad_norm": 437.013916015625, "learning_rate": 4.5844649117603664e-07, "loss": 19.0938, "step": 20830 }, { "epoch": 1.383476124061898, "grad_norm": 137.0869598388672, "learning_rate": 4.583560790049251e-07, "loss": 17.3125, "step": 20831 }, { "epoch": 1.383542538354254, "grad_norm": 145.62542724609375, "learning_rate": 4.5826567309923813e-07, "loss": 14.5312, "step": 20832 }, { "epoch": 1.3836089526466095, "grad_norm": 169.6269073486328, "learning_rate": 4.5817527346002184e-07, "loss": 16.75, "step": 20833 }, { "epoch": 1.3836753669389652, "grad_norm": 404.0869445800781, "learning_rate": 4.580848800883207e-07, "loss": 26.5469, "step": 20834 }, { "epoch": 1.383741781231321, "grad_norm": 133.3218994140625, "learning_rate": 4.57994492985182e-07, "loss": 14.2188, "step": 20835 }, { "epoch": 1.3838081955236767, "grad_norm": 570.1835327148438, "learning_rate": 4.5790411215165016e-07, "loss": 14.7812, "step": 20836 }, { "epoch": 1.3838746098160324, "grad_norm": 1989.3087158203125, "learning_rate": 4.5781373758877085e-07, "loss": 18.7031, "step": 20837 }, { "epoch": 1.3839410241083883, "grad_norm": 350.89202880859375, "learning_rate": 4.577233692975897e-07, "loss": 13.1562, "step": 20838 }, { "epoch": 1.384007438400744, "grad_norm": 131.6155242919922, "learning_rate": 4.576330072791518e-07, "loss": 15.75, "step": 20839 }, { "epoch": 1.3840738526930996, "grad_norm": 309.3397216796875, "learning_rate": 4.5754265153450255e-07, "loss": 23.1562, "step": 20840 }, { "epoch": 1.3841402669854552, "grad_norm": 125.32923126220703, "learning_rate": 4.5745230206468753e-07, "loss": 12.7969, "step": 20841 }, { "epoch": 1.3842066812778109, "grad_norm": 217.91217041015625, "learning_rate": 4.57361958870751e-07, "loss": 13.5781, "step": 20842 }, { "epoch": 1.3842730955701668, "grad_norm": 180.61195373535156, "learning_rate": 4.572716219537385e-07, "loss": 11.1875, "step": 20843 }, { "epoch": 1.3843395098625224, "grad_norm": 149.14260864257812, "learning_rate": 4.5718129131469486e-07, "loss": 14.0, "step": 20844 }, { "epoch": 1.384405924154878, "grad_norm": 120.61470031738281, "learning_rate": 4.570909669546651e-07, "loss": 13.3594, "step": 20845 }, { "epoch": 1.384472338447234, "grad_norm": 156.59027099609375, "learning_rate": 4.570006488746939e-07, "loss": 18.875, "step": 20846 }, { "epoch": 1.3845387527395896, "grad_norm": 157.58816528320312, "learning_rate": 4.5691033707582616e-07, "loss": 11.5469, "step": 20847 }, { "epoch": 1.3846051670319452, "grad_norm": 147.41387939453125, "learning_rate": 4.5682003155910643e-07, "loss": 16.8906, "step": 20848 }, { "epoch": 1.3846715813243011, "grad_norm": 197.64756774902344, "learning_rate": 4.567297323255793e-07, "loss": 14.9531, "step": 20849 }, { "epoch": 1.3847379956166568, "grad_norm": 167.7657470703125, "learning_rate": 4.566394393762898e-07, "loss": 12.2734, "step": 20850 }, { "epoch": 1.3848044099090124, "grad_norm": 106.38959503173828, "learning_rate": 4.565491527122812e-07, "loss": 12.2188, "step": 20851 }, { "epoch": 1.384870824201368, "grad_norm": 175.65924072265625, "learning_rate": 4.5645887233459944e-07, "loss": 17.125, "step": 20852 }, { "epoch": 1.3849372384937237, "grad_norm": 115.2229995727539, "learning_rate": 4.5636859824428753e-07, "loss": 14.0469, "step": 20853 }, { "epoch": 1.3850036527860796, "grad_norm": 701.2376708984375, "learning_rate": 4.5627833044239026e-07, "loss": 15.2656, "step": 20854 }, { "epoch": 1.3850700670784353, "grad_norm": 156.1328125, "learning_rate": 4.5618806892995175e-07, "loss": 14.7344, "step": 20855 }, { "epoch": 1.385136481370791, "grad_norm": 275.1209716796875, "learning_rate": 4.56097813708016e-07, "loss": 13.0781, "step": 20856 }, { "epoch": 1.3852028956631468, "grad_norm": 238.549072265625, "learning_rate": 4.5600756477762714e-07, "loss": 18.5, "step": 20857 }, { "epoch": 1.3852693099555025, "grad_norm": 155.38600158691406, "learning_rate": 4.5591732213982947e-07, "loss": 17.3594, "step": 20858 }, { "epoch": 1.3853357242478581, "grad_norm": 370.47979736328125, "learning_rate": 4.5582708579566573e-07, "loss": 18.25, "step": 20859 }, { "epoch": 1.385402138540214, "grad_norm": 211.5736541748047, "learning_rate": 4.5573685574618124e-07, "loss": 18.8281, "step": 20860 }, { "epoch": 1.3854685528325696, "grad_norm": 193.28201293945312, "learning_rate": 4.5564663199241857e-07, "loss": 18.7031, "step": 20861 }, { "epoch": 1.3855349671249253, "grad_norm": 105.57861328125, "learning_rate": 4.555564145354218e-07, "loss": 15.5625, "step": 20862 }, { "epoch": 1.385601381417281, "grad_norm": 129.3011932373047, "learning_rate": 4.5546620337623454e-07, "loss": 17.4688, "step": 20863 }, { "epoch": 1.3856677957096366, "grad_norm": 133.3364715576172, "learning_rate": 4.553759985159005e-07, "loss": 13.0469, "step": 20864 }, { "epoch": 1.3857342100019925, "grad_norm": 254.08372497558594, "learning_rate": 4.552857999554622e-07, "loss": 19.2188, "step": 20865 }, { "epoch": 1.3858006242943481, "grad_norm": 187.93971252441406, "learning_rate": 4.551956076959643e-07, "loss": 18.5, "step": 20866 }, { "epoch": 1.3858670385867038, "grad_norm": 334.2712097167969, "learning_rate": 4.551054217384492e-07, "loss": 22.0781, "step": 20867 }, { "epoch": 1.3859334528790597, "grad_norm": 253.5227508544922, "learning_rate": 4.5501524208396035e-07, "loss": 20.7188, "step": 20868 }, { "epoch": 1.3859998671714153, "grad_norm": 126.83136749267578, "learning_rate": 4.5492506873354087e-07, "loss": 14.4844, "step": 20869 }, { "epoch": 1.386066281463771, "grad_norm": 240.19418334960938, "learning_rate": 4.548349016882339e-07, "loss": 23.0469, "step": 20870 }, { "epoch": 1.3861326957561269, "grad_norm": 203.00503540039062, "learning_rate": 4.547447409490824e-07, "loss": 15.1094, "step": 20871 }, { "epoch": 1.3861991100484825, "grad_norm": 216.7159881591797, "learning_rate": 4.5465458651712984e-07, "loss": 13.5938, "step": 20872 }, { "epoch": 1.3862655243408382, "grad_norm": 156.48826599121094, "learning_rate": 4.5456443839341776e-07, "loss": 15.6406, "step": 20873 }, { "epoch": 1.3863319386331938, "grad_norm": 263.8896179199219, "learning_rate": 4.544742965789904e-07, "loss": 18.8438, "step": 20874 }, { "epoch": 1.3863983529255495, "grad_norm": 265.3208923339844, "learning_rate": 4.5438416107488955e-07, "loss": 17.5156, "step": 20875 }, { "epoch": 1.3864647672179053, "grad_norm": 242.00071716308594, "learning_rate": 4.5429403188215796e-07, "loss": 14.5781, "step": 20876 }, { "epoch": 1.386531181510261, "grad_norm": 195.7388916015625, "learning_rate": 4.5420390900183835e-07, "loss": 19.7344, "step": 20877 }, { "epoch": 1.3865975958026167, "grad_norm": 173.20277404785156, "learning_rate": 4.5411379243497325e-07, "loss": 14.5938, "step": 20878 }, { "epoch": 1.3866640100949725, "grad_norm": 232.2720184326172, "learning_rate": 4.540236821826051e-07, "loss": 18.8438, "step": 20879 }, { "epoch": 1.3867304243873282, "grad_norm": 157.58770751953125, "learning_rate": 4.539335782457765e-07, "loss": 17.125, "step": 20880 }, { "epoch": 1.3867968386796838, "grad_norm": 257.85650634765625, "learning_rate": 4.5384348062552855e-07, "loss": 16.1406, "step": 20881 }, { "epoch": 1.3868632529720397, "grad_norm": 197.3216552734375, "learning_rate": 4.5375338932290474e-07, "loss": 19.2031, "step": 20882 }, { "epoch": 1.3869296672643954, "grad_norm": 168.08438110351562, "learning_rate": 4.536633043389471e-07, "loss": 17.0156, "step": 20883 }, { "epoch": 1.386996081556751, "grad_norm": 356.47491455078125, "learning_rate": 4.535732256746966e-07, "loss": 16.8281, "step": 20884 }, { "epoch": 1.3870624958491067, "grad_norm": 213.90048217773438, "learning_rate": 4.5348315333119656e-07, "loss": 20.2969, "step": 20885 }, { "epoch": 1.3871289101414623, "grad_norm": 318.3908996582031, "learning_rate": 4.53393087309488e-07, "loss": 15.875, "step": 20886 }, { "epoch": 1.3871953244338182, "grad_norm": 514.8748779296875, "learning_rate": 4.53303027610613e-07, "loss": 18.4531, "step": 20887 }, { "epoch": 1.3872617387261739, "grad_norm": 168.0231170654297, "learning_rate": 4.5321297423561334e-07, "loss": 18.0781, "step": 20888 }, { "epoch": 1.3873281530185295, "grad_norm": 1390.1688232421875, "learning_rate": 4.5312292718553103e-07, "loss": 17.0938, "step": 20889 }, { "epoch": 1.3873945673108854, "grad_norm": 523.0244750976562, "learning_rate": 4.530328864614067e-07, "loss": 17.4375, "step": 20890 }, { "epoch": 1.387460981603241, "grad_norm": 156.46241760253906, "learning_rate": 4.529428520642832e-07, "loss": 11.1094, "step": 20891 }, { "epoch": 1.3875273958955967, "grad_norm": 306.9195861816406, "learning_rate": 4.52852823995201e-07, "loss": 13.2969, "step": 20892 }, { "epoch": 1.3875938101879526, "grad_norm": 437.00518798828125, "learning_rate": 4.527628022552018e-07, "loss": 17.8438, "step": 20893 }, { "epoch": 1.3876602244803082, "grad_norm": 154.47727966308594, "learning_rate": 4.5267278684532695e-07, "loss": 13.9375, "step": 20894 }, { "epoch": 1.387726638772664, "grad_norm": 122.07079315185547, "learning_rate": 4.5258277776661766e-07, "loss": 12.4219, "step": 20895 }, { "epoch": 1.3877930530650195, "grad_norm": 244.64027404785156, "learning_rate": 4.524927750201152e-07, "loss": 18.0, "step": 20896 }, { "epoch": 1.3878594673573752, "grad_norm": 804.816650390625, "learning_rate": 4.5240277860686096e-07, "loss": 23.3125, "step": 20897 }, { "epoch": 1.387925881649731, "grad_norm": 186.201416015625, "learning_rate": 4.523127885278949e-07, "loss": 13.7891, "step": 20898 }, { "epoch": 1.3879922959420867, "grad_norm": 210.0062255859375, "learning_rate": 4.5222280478425935e-07, "loss": 12.3906, "step": 20899 }, { "epoch": 1.3880587102344424, "grad_norm": 153.43650817871094, "learning_rate": 4.521328273769942e-07, "loss": 20.9531, "step": 20900 }, { "epoch": 1.3881251245267983, "grad_norm": 210.96722412109375, "learning_rate": 4.5204285630714066e-07, "loss": 16.4531, "step": 20901 }, { "epoch": 1.388191538819154, "grad_norm": 1781.6004638671875, "learning_rate": 4.5195289157573933e-07, "loss": 15.2188, "step": 20902 }, { "epoch": 1.3882579531115096, "grad_norm": 259.24658203125, "learning_rate": 4.5186293318383084e-07, "loss": 13.3906, "step": 20903 }, { "epoch": 1.3883243674038654, "grad_norm": 400.35028076171875, "learning_rate": 4.51772981132456e-07, "loss": 20.8594, "step": 20904 }, { "epoch": 1.388390781696221, "grad_norm": 210.03892517089844, "learning_rate": 4.516830354226555e-07, "loss": 14.5156, "step": 20905 }, { "epoch": 1.3884571959885768, "grad_norm": 182.44418334960938, "learning_rate": 4.5159309605546916e-07, "loss": 14.5781, "step": 20906 }, { "epoch": 1.3885236102809324, "grad_norm": 659.8961181640625, "learning_rate": 4.5150316303193756e-07, "loss": 20.6719, "step": 20907 }, { "epoch": 1.388590024573288, "grad_norm": 205.51104736328125, "learning_rate": 4.514132363531011e-07, "loss": 12.7031, "step": 20908 }, { "epoch": 1.388656438865644, "grad_norm": 184.5631103515625, "learning_rate": 4.5132331602000004e-07, "loss": 21.4844, "step": 20909 }, { "epoch": 1.3887228531579996, "grad_norm": 277.0580749511719, "learning_rate": 4.5123340203367443e-07, "loss": 19.0391, "step": 20910 }, { "epoch": 1.3887892674503552, "grad_norm": 213.71035766601562, "learning_rate": 4.5114349439516477e-07, "loss": 16.6172, "step": 20911 }, { "epoch": 1.3888556817427111, "grad_norm": 93.36849212646484, "learning_rate": 4.5105359310550993e-07, "loss": 16.7031, "step": 20912 }, { "epoch": 1.3889220960350668, "grad_norm": 166.23483276367188, "learning_rate": 4.509636981657514e-07, "loss": 15.8906, "step": 20913 }, { "epoch": 1.3889885103274224, "grad_norm": 468.28131103515625, "learning_rate": 4.5087380957692776e-07, "loss": 14.1719, "step": 20914 }, { "epoch": 1.3890549246197783, "grad_norm": 126.44261169433594, "learning_rate": 4.507839273400793e-07, "loss": 12.6875, "step": 20915 }, { "epoch": 1.389121338912134, "grad_norm": 120.69754791259766, "learning_rate": 4.506940514562456e-07, "loss": 14.2812, "step": 20916 }, { "epoch": 1.3891877532044896, "grad_norm": 261.9731140136719, "learning_rate": 4.506041819264664e-07, "loss": 12.4844, "step": 20917 }, { "epoch": 1.3892541674968453, "grad_norm": 2343.33837890625, "learning_rate": 4.5051431875178126e-07, "loss": 15.9062, "step": 20918 }, { "epoch": 1.389320581789201, "grad_norm": 219.2617950439453, "learning_rate": 4.504244619332296e-07, "loss": 14.0156, "step": 20919 }, { "epoch": 1.3893869960815568, "grad_norm": 212.26959228515625, "learning_rate": 4.503346114718508e-07, "loss": 11.0469, "step": 20920 }, { "epoch": 1.3894534103739125, "grad_norm": 209.41578674316406, "learning_rate": 4.5024476736868445e-07, "loss": 18.6719, "step": 20921 }, { "epoch": 1.389519824666268, "grad_norm": 176.53125, "learning_rate": 4.5015492962476973e-07, "loss": 17.8281, "step": 20922 }, { "epoch": 1.389586238958624, "grad_norm": 228.51956176757812, "learning_rate": 4.5006509824114526e-07, "loss": 19.5, "step": 20923 }, { "epoch": 1.3896526532509796, "grad_norm": 301.25897216796875, "learning_rate": 4.499752732188512e-07, "loss": 15.9062, "step": 20924 }, { "epoch": 1.3897190675433353, "grad_norm": 141.80406188964844, "learning_rate": 4.498854545589257e-07, "loss": 14.9922, "step": 20925 }, { "epoch": 1.3897854818356912, "grad_norm": 227.71107482910156, "learning_rate": 4.4979564226240793e-07, "loss": 15.4688, "step": 20926 }, { "epoch": 1.3898518961280468, "grad_norm": 141.4357452392578, "learning_rate": 4.49705836330337e-07, "loss": 12.5781, "step": 20927 }, { "epoch": 1.3899183104204025, "grad_norm": 153.69810485839844, "learning_rate": 4.49616036763752e-07, "loss": 14.8906, "step": 20928 }, { "epoch": 1.3899847247127581, "grad_norm": 142.90414428710938, "learning_rate": 4.4952624356369064e-07, "loss": 14.0234, "step": 20929 }, { "epoch": 1.3900511390051138, "grad_norm": 176.55889892578125, "learning_rate": 4.494364567311929e-07, "loss": 16.9688, "step": 20930 }, { "epoch": 1.3901175532974697, "grad_norm": 235.2005157470703, "learning_rate": 4.493466762672965e-07, "loss": 20.5625, "step": 20931 }, { "epoch": 1.3901839675898253, "grad_norm": 182.86380004882812, "learning_rate": 4.492569021730401e-07, "loss": 13.7656, "step": 20932 }, { "epoch": 1.390250381882181, "grad_norm": 361.4154052734375, "learning_rate": 4.4916713444946244e-07, "loss": 16.2891, "step": 20933 }, { "epoch": 1.3903167961745369, "grad_norm": 122.43397521972656, "learning_rate": 4.4907737309760154e-07, "loss": 16.3594, "step": 20934 }, { "epoch": 1.3903832104668925, "grad_norm": 178.65618896484375, "learning_rate": 4.489876181184961e-07, "loss": 13.4844, "step": 20935 }, { "epoch": 1.3904496247592482, "grad_norm": 286.9045104980469, "learning_rate": 4.4889786951318444e-07, "loss": 17.7188, "step": 20936 }, { "epoch": 1.390516039051604, "grad_norm": 1172.701171875, "learning_rate": 4.488081272827037e-07, "loss": 19.5156, "step": 20937 }, { "epoch": 1.3905824533439597, "grad_norm": 435.793701171875, "learning_rate": 4.487183914280934e-07, "loss": 17.9375, "step": 20938 }, { "epoch": 1.3906488676363153, "grad_norm": 77.85450744628906, "learning_rate": 4.4862866195039053e-07, "loss": 14.8125, "step": 20939 }, { "epoch": 1.390715281928671, "grad_norm": 162.61900329589844, "learning_rate": 4.4853893885063334e-07, "loss": 14.4844, "step": 20940 }, { "epoch": 1.3907816962210267, "grad_norm": 222.7096405029297, "learning_rate": 4.484492221298598e-07, "loss": 16.2266, "step": 20941 }, { "epoch": 1.3908481105133825, "grad_norm": 121.82087707519531, "learning_rate": 4.483595117891076e-07, "loss": 13.3281, "step": 20942 }, { "epoch": 1.3909145248057382, "grad_norm": 325.5572509765625, "learning_rate": 4.4826980782941436e-07, "loss": 14.8281, "step": 20943 }, { "epoch": 1.3909809390980938, "grad_norm": 481.5022888183594, "learning_rate": 4.4818011025181835e-07, "loss": 12.4219, "step": 20944 }, { "epoch": 1.3910473533904497, "grad_norm": 164.4126739501953, "learning_rate": 4.480904190573558e-07, "loss": 14.8125, "step": 20945 }, { "epoch": 1.3911137676828054, "grad_norm": 132.50108337402344, "learning_rate": 4.4800073424706575e-07, "loss": 14.7969, "step": 20946 }, { "epoch": 1.391180181975161, "grad_norm": 254.2638397216797, "learning_rate": 4.4791105582198466e-07, "loss": 19.4062, "step": 20947 }, { "epoch": 1.391246596267517, "grad_norm": 719.459228515625, "learning_rate": 4.478213837831499e-07, "loss": 23.9062, "step": 20948 }, { "epoch": 1.3913130105598726, "grad_norm": 650.48046875, "learning_rate": 4.4773171813159915e-07, "loss": 16.7656, "step": 20949 }, { "epoch": 1.3913794248522282, "grad_norm": 150.17648315429688, "learning_rate": 4.476420588683698e-07, "loss": 14.875, "step": 20950 }, { "epoch": 1.3914458391445839, "grad_norm": 175.39987182617188, "learning_rate": 4.4755240599449783e-07, "loss": 14.4844, "step": 20951 }, { "epoch": 1.3915122534369395, "grad_norm": 410.85247802734375, "learning_rate": 4.4746275951102177e-07, "loss": 21.0156, "step": 20952 }, { "epoch": 1.3915786677292954, "grad_norm": 202.82752990722656, "learning_rate": 4.473731194189776e-07, "loss": 16.7031, "step": 20953 }, { "epoch": 1.391645082021651, "grad_norm": 472.99090576171875, "learning_rate": 4.472834857194021e-07, "loss": 18.4062, "step": 20954 }, { "epoch": 1.3917114963140067, "grad_norm": 652.7525024414062, "learning_rate": 4.471938584133332e-07, "loss": 11.2188, "step": 20955 }, { "epoch": 1.3917779106063626, "grad_norm": 98.89765167236328, "learning_rate": 4.471042375018067e-07, "loss": 12.8281, "step": 20956 }, { "epoch": 1.3918443248987182, "grad_norm": 189.12954711914062, "learning_rate": 4.4701462298585954e-07, "loss": 15.0781, "step": 20957 }, { "epoch": 1.3919107391910739, "grad_norm": 106.20571899414062, "learning_rate": 4.469250148665283e-07, "loss": 15.0, "step": 20958 }, { "epoch": 1.3919771534834298, "grad_norm": 317.4947814941406, "learning_rate": 4.4683541314484976e-07, "loss": 17.6406, "step": 20959 }, { "epoch": 1.3920435677757854, "grad_norm": 263.1802062988281, "learning_rate": 4.4674581782185994e-07, "loss": 14.0781, "step": 20960 }, { "epoch": 1.392109982068141, "grad_norm": 154.16502380371094, "learning_rate": 4.46656228898596e-07, "loss": 14.8125, "step": 20961 }, { "epoch": 1.3921763963604967, "grad_norm": 151.1579132080078, "learning_rate": 4.4656664637609297e-07, "loss": 11.2734, "step": 20962 }, { "epoch": 1.3922428106528524, "grad_norm": 167.83416748046875, "learning_rate": 4.464770702553885e-07, "loss": 11.5625, "step": 20963 }, { "epoch": 1.3923092249452083, "grad_norm": 97.18539428710938, "learning_rate": 4.4638750053751784e-07, "loss": 16.0312, "step": 20964 }, { "epoch": 1.392375639237564, "grad_norm": 168.71128845214844, "learning_rate": 4.462979372235173e-07, "loss": 13.8125, "step": 20965 }, { "epoch": 1.3924420535299196, "grad_norm": 229.48226928710938, "learning_rate": 4.462083803144229e-07, "loss": 16.0156, "step": 20966 }, { "epoch": 1.3925084678222754, "grad_norm": 361.06610107421875, "learning_rate": 4.46118829811271e-07, "loss": 12.7812, "step": 20967 }, { "epoch": 1.392574882114631, "grad_norm": 237.87599182128906, "learning_rate": 4.460292857150965e-07, "loss": 17.1719, "step": 20968 }, { "epoch": 1.3926412964069868, "grad_norm": 460.120361328125, "learning_rate": 4.4593974802693636e-07, "loss": 15.9219, "step": 20969 }, { "epoch": 1.3927077106993426, "grad_norm": 105.88226318359375, "learning_rate": 4.458502167478253e-07, "loss": 13.4453, "step": 20970 }, { "epoch": 1.3927741249916983, "grad_norm": 426.3183898925781, "learning_rate": 4.457606918787995e-07, "loss": 18.125, "step": 20971 }, { "epoch": 1.392840539284054, "grad_norm": 277.5526123046875, "learning_rate": 4.4567117342089444e-07, "loss": 13.1562, "step": 20972 }, { "epoch": 1.3929069535764096, "grad_norm": 248.97772216796875, "learning_rate": 4.4558166137514553e-07, "loss": 15.875, "step": 20973 }, { "epoch": 1.3929733678687652, "grad_norm": 196.14535522460938, "learning_rate": 4.4549215574258837e-07, "loss": 17.9375, "step": 20974 }, { "epoch": 1.3930397821611211, "grad_norm": 167.35462951660156, "learning_rate": 4.4540265652425845e-07, "loss": 14.1875, "step": 20975 }, { "epoch": 1.3931061964534768, "grad_norm": 135.84188842773438, "learning_rate": 4.453131637211902e-07, "loss": 10.5, "step": 20976 }, { "epoch": 1.3931726107458324, "grad_norm": 175.849365234375, "learning_rate": 4.4522367733442e-07, "loss": 14.8516, "step": 20977 }, { "epoch": 1.3932390250381883, "grad_norm": 213.43272399902344, "learning_rate": 4.4513419736498215e-07, "loss": 13.5859, "step": 20978 }, { "epoch": 1.393305439330544, "grad_norm": 236.6050262451172, "learning_rate": 4.450447238139119e-07, "loss": 15.2031, "step": 20979 }, { "epoch": 1.3933718536228996, "grad_norm": 367.90240478515625, "learning_rate": 4.449552566822443e-07, "loss": 14.9688, "step": 20980 }, { "epoch": 1.3934382679152555, "grad_norm": 151.1769256591797, "learning_rate": 4.4486579597101425e-07, "loss": 12.7031, "step": 20981 }, { "epoch": 1.3935046822076111, "grad_norm": 221.69435119628906, "learning_rate": 4.4477634168125653e-07, "loss": 14.2812, "step": 20982 }, { "epoch": 1.3935710964999668, "grad_norm": 139.71597290039062, "learning_rate": 4.446868938140063e-07, "loss": 15.8594, "step": 20983 }, { "epoch": 1.3936375107923225, "grad_norm": 96.9849853515625, "learning_rate": 4.445974523702971e-07, "loss": 13.4375, "step": 20984 }, { "epoch": 1.393703925084678, "grad_norm": 184.17433166503906, "learning_rate": 4.4450801735116515e-07, "loss": 17.6875, "step": 20985 }, { "epoch": 1.393770339377034, "grad_norm": 261.83331298828125, "learning_rate": 4.444185887576437e-07, "loss": 17.5938, "step": 20986 }, { "epoch": 1.3938367536693896, "grad_norm": 149.2342071533203, "learning_rate": 4.443291665907677e-07, "loss": 16.2188, "step": 20987 }, { "epoch": 1.3939031679617453, "grad_norm": 368.2139892578125, "learning_rate": 4.442397508515714e-07, "loss": 15.7344, "step": 20988 }, { "epoch": 1.3939695822541012, "grad_norm": 159.72738647460938, "learning_rate": 4.4415034154108955e-07, "loss": 17.5781, "step": 20989 }, { "epoch": 1.3940359965464568, "grad_norm": 226.3389892578125, "learning_rate": 4.4406093866035534e-07, "loss": 14.6562, "step": 20990 }, { "epoch": 1.3941024108388125, "grad_norm": 458.7626953125, "learning_rate": 4.43971542210404e-07, "loss": 20.6719, "step": 20991 }, { "epoch": 1.3941688251311684, "grad_norm": 112.446044921875, "learning_rate": 4.4388215219226965e-07, "loss": 11.8438, "step": 20992 }, { "epoch": 1.394235239423524, "grad_norm": 176.71572875976562, "learning_rate": 4.43792768606985e-07, "loss": 17.5156, "step": 20993 }, { "epoch": 1.3943016537158797, "grad_norm": 302.05364990234375, "learning_rate": 4.437033914555858e-07, "loss": 16.4062, "step": 20994 }, { "epoch": 1.3943680680082353, "grad_norm": 195.5135040283203, "learning_rate": 4.436140207391045e-07, "loss": 14.3906, "step": 20995 }, { "epoch": 1.394434482300591, "grad_norm": 147.2781524658203, "learning_rate": 4.4352465645857537e-07, "loss": 16.7812, "step": 20996 }, { "epoch": 1.3945008965929468, "grad_norm": 145.4939422607422, "learning_rate": 4.4343529861503215e-07, "loss": 21.0312, "step": 20997 }, { "epoch": 1.3945673108853025, "grad_norm": 179.29119873046875, "learning_rate": 4.433459472095085e-07, "loss": 13.875, "step": 20998 }, { "epoch": 1.3946337251776582, "grad_norm": 347.4364013671875, "learning_rate": 4.4325660224303797e-07, "loss": 19.5312, "step": 20999 }, { "epoch": 1.394700139470014, "grad_norm": 220.51388549804688, "learning_rate": 4.4316726371665434e-07, "loss": 19.0312, "step": 21000 }, { "epoch": 1.3947665537623697, "grad_norm": 111.39277648925781, "learning_rate": 4.4307793163138993e-07, "loss": 12.9844, "step": 21001 }, { "epoch": 1.3948329680547253, "grad_norm": 290.53802490234375, "learning_rate": 4.429886059882797e-07, "loss": 19.2031, "step": 21002 }, { "epoch": 1.3948993823470812, "grad_norm": 119.35435485839844, "learning_rate": 4.428992867883556e-07, "loss": 13.5938, "step": 21003 }, { "epoch": 1.3949657966394369, "grad_norm": 113.51814270019531, "learning_rate": 4.428099740326514e-07, "loss": 17.6719, "step": 21004 }, { "epoch": 1.3950322109317925, "grad_norm": 491.7545166015625, "learning_rate": 4.4272066772220016e-07, "loss": 19.6875, "step": 21005 }, { "epoch": 1.3950986252241482, "grad_norm": 168.1320343017578, "learning_rate": 4.426313678580349e-07, "loss": 16.5156, "step": 21006 }, { "epoch": 1.3951650395165038, "grad_norm": 188.89393615722656, "learning_rate": 4.4254207444118854e-07, "loss": 17.4062, "step": 21007 }, { "epoch": 1.3952314538088597, "grad_norm": 198.0755615234375, "learning_rate": 4.424527874726943e-07, "loss": 15.3438, "step": 21008 }, { "epoch": 1.3952978681012154, "grad_norm": 244.4733123779297, "learning_rate": 4.4236350695358404e-07, "loss": 21.8281, "step": 21009 }, { "epoch": 1.395364282393571, "grad_norm": 193.3789520263672, "learning_rate": 4.42274232884892e-07, "loss": 13.7812, "step": 21010 }, { "epoch": 1.395430696685927, "grad_norm": 200.2269744873047, "learning_rate": 4.4218496526764946e-07, "loss": 16.2734, "step": 21011 }, { "epoch": 1.3954971109782826, "grad_norm": 154.94677734375, "learning_rate": 4.420957041028898e-07, "loss": 14.5469, "step": 21012 }, { "epoch": 1.3955635252706382, "grad_norm": 148.00071716308594, "learning_rate": 4.420064493916452e-07, "loss": 12.7031, "step": 21013 }, { "epoch": 1.395629939562994, "grad_norm": 103.79804992675781, "learning_rate": 4.419172011349486e-07, "loss": 12.7188, "step": 21014 }, { "epoch": 1.3956963538553497, "grad_norm": 123.5728988647461, "learning_rate": 4.4182795933383134e-07, "loss": 16.9531, "step": 21015 }, { "epoch": 1.3957627681477054, "grad_norm": 213.82984924316406, "learning_rate": 4.417387239893271e-07, "loss": 15.4375, "step": 21016 }, { "epoch": 1.395829182440061, "grad_norm": 137.32318115234375, "learning_rate": 4.4164949510246696e-07, "loss": 17.75, "step": 21017 }, { "epoch": 1.3958955967324167, "grad_norm": 597.8095703125, "learning_rate": 4.415602726742836e-07, "loss": 17.0156, "step": 21018 }, { "epoch": 1.3959620110247726, "grad_norm": 308.1307067871094, "learning_rate": 4.4147105670580896e-07, "loss": 16.0781, "step": 21019 }, { "epoch": 1.3960284253171282, "grad_norm": 154.0377960205078, "learning_rate": 4.4138184719807503e-07, "loss": 15.4375, "step": 21020 }, { "epoch": 1.3960948396094839, "grad_norm": 128.3463897705078, "learning_rate": 4.4129264415211374e-07, "loss": 12.7031, "step": 21021 }, { "epoch": 1.3961612539018398, "grad_norm": 424.5887756347656, "learning_rate": 4.4120344756895745e-07, "loss": 15.8281, "step": 21022 }, { "epoch": 1.3962276681941954, "grad_norm": 294.527099609375, "learning_rate": 4.411142574496367e-07, "loss": 14.2031, "step": 21023 }, { "epoch": 1.396294082486551, "grad_norm": 136.84971618652344, "learning_rate": 4.410250737951846e-07, "loss": 15.5156, "step": 21024 }, { "epoch": 1.396360496778907, "grad_norm": 137.3081512451172, "learning_rate": 4.409358966066319e-07, "loss": 16.3281, "step": 21025 }, { "epoch": 1.3964269110712626, "grad_norm": 551.3235473632812, "learning_rate": 4.4084672588501026e-07, "loss": 9.8438, "step": 21026 }, { "epoch": 1.3964933253636183, "grad_norm": 145.02554321289062, "learning_rate": 4.4075756163135123e-07, "loss": 15.5469, "step": 21027 }, { "epoch": 1.396559739655974, "grad_norm": 526.9899291992188, "learning_rate": 4.4066840384668637e-07, "loss": 16.5312, "step": 21028 }, { "epoch": 1.3966261539483296, "grad_norm": 130.1889190673828, "learning_rate": 4.405792525320469e-07, "loss": 12.6406, "step": 21029 }, { "epoch": 1.3966925682406854, "grad_norm": 222.10536193847656, "learning_rate": 4.4049010768846384e-07, "loss": 20.0312, "step": 21030 }, { "epoch": 1.396758982533041, "grad_norm": 122.0556411743164, "learning_rate": 4.404009693169691e-07, "loss": 13.4844, "step": 21031 }, { "epoch": 1.3968253968253967, "grad_norm": 314.99237060546875, "learning_rate": 4.4031183741859256e-07, "loss": 20.7812, "step": 21032 }, { "epoch": 1.3968918111177526, "grad_norm": 450.0475769042969, "learning_rate": 4.4022271199436665e-07, "loss": 11.75, "step": 21033 }, { "epoch": 1.3969582254101083, "grad_norm": 540.6591796875, "learning_rate": 4.4013359304532126e-07, "loss": 22.6562, "step": 21034 }, { "epoch": 1.397024639702464, "grad_norm": 241.007080078125, "learning_rate": 4.4004448057248767e-07, "loss": 15.1719, "step": 21035 }, { "epoch": 1.3970910539948198, "grad_norm": 163.15599060058594, "learning_rate": 4.399553745768966e-07, "loss": 15.7969, "step": 21036 }, { "epoch": 1.3971574682871755, "grad_norm": 111.37529754638672, "learning_rate": 4.3986627505957873e-07, "loss": 13.5625, "step": 21037 }, { "epoch": 1.3972238825795311, "grad_norm": 161.25631713867188, "learning_rate": 4.397771820215649e-07, "loss": 20.1875, "step": 21038 }, { "epoch": 1.3972902968718868, "grad_norm": 738.4248657226562, "learning_rate": 4.3968809546388597e-07, "loss": 16.9688, "step": 21039 }, { "epoch": 1.3973567111642424, "grad_norm": 145.1162872314453, "learning_rate": 4.3959901538757127e-07, "loss": 12.1406, "step": 21040 }, { "epoch": 1.3974231254565983, "grad_norm": 845.738525390625, "learning_rate": 4.3950994179365277e-07, "loss": 14.9375, "step": 21041 }, { "epoch": 1.397489539748954, "grad_norm": 156.38694763183594, "learning_rate": 4.394208746831596e-07, "loss": 16.3125, "step": 21042 }, { "epoch": 1.3975559540413096, "grad_norm": 232.11935424804688, "learning_rate": 4.393318140571226e-07, "loss": 16.9062, "step": 21043 }, { "epoch": 1.3976223683336655, "grad_norm": 123.5487289428711, "learning_rate": 4.392427599165719e-07, "loss": 12.5312, "step": 21044 }, { "epoch": 1.3976887826260211, "grad_norm": 128.8993377685547, "learning_rate": 4.391537122625375e-07, "loss": 13.5781, "step": 21045 }, { "epoch": 1.3977551969183768, "grad_norm": 104.53691101074219, "learning_rate": 4.3906467109604974e-07, "loss": 12.6016, "step": 21046 }, { "epoch": 1.3978216112107327, "grad_norm": 222.6005859375, "learning_rate": 4.3897563641813864e-07, "loss": 14.25, "step": 21047 }, { "epoch": 1.3978880255030883, "grad_norm": 186.391845703125, "learning_rate": 4.3888660822983314e-07, "loss": 13.25, "step": 21048 }, { "epoch": 1.397954439795444, "grad_norm": 124.1594009399414, "learning_rate": 4.3879758653216445e-07, "loss": 16.9609, "step": 21049 }, { "epoch": 1.3980208540877996, "grad_norm": 248.0199737548828, "learning_rate": 4.3870857132616146e-07, "loss": 19.3281, "step": 21050 }, { "epoch": 1.3980872683801553, "grad_norm": 209.0600128173828, "learning_rate": 4.3861956261285406e-07, "loss": 14.5781, "step": 21051 }, { "epoch": 1.3981536826725112, "grad_norm": 161.5651397705078, "learning_rate": 4.3853056039327175e-07, "loss": 11.1484, "step": 21052 }, { "epoch": 1.3982200969648668, "grad_norm": 287.82440185546875, "learning_rate": 4.3844156466844464e-07, "loss": 12.7031, "step": 21053 }, { "epoch": 1.3982865112572225, "grad_norm": 167.84239196777344, "learning_rate": 4.3835257543940086e-07, "loss": 13.5781, "step": 21054 }, { "epoch": 1.3983529255495784, "grad_norm": 883.4622192382812, "learning_rate": 4.382635927071714e-07, "loss": 16.75, "step": 21055 }, { "epoch": 1.398419339841934, "grad_norm": 1209.4312744140625, "learning_rate": 4.3817461647278433e-07, "loss": 17.2969, "step": 21056 }, { "epoch": 1.3984857541342897, "grad_norm": 202.37086486816406, "learning_rate": 4.3808564673726945e-07, "loss": 21.7656, "step": 21057 }, { "epoch": 1.3985521684266455, "grad_norm": 450.4248962402344, "learning_rate": 4.3799668350165576e-07, "loss": 14.8281, "step": 21058 }, { "epoch": 1.3986185827190012, "grad_norm": 84.94993591308594, "learning_rate": 4.379077267669723e-07, "loss": 13.0625, "step": 21059 }, { "epoch": 1.3986849970113568, "grad_norm": 370.8047790527344, "learning_rate": 4.3781877653424813e-07, "loss": 18.5312, "step": 21060 }, { "epoch": 1.3987514113037125, "grad_norm": 375.3484191894531, "learning_rate": 4.377298328045127e-07, "loss": 18.0312, "step": 21061 }, { "epoch": 1.3988178255960682, "grad_norm": 136.98324584960938, "learning_rate": 4.3764089557879345e-07, "loss": 16.0625, "step": 21062 }, { "epoch": 1.398884239888424, "grad_norm": 227.6046600341797, "learning_rate": 4.3755196485812085e-07, "loss": 13.9844, "step": 21063 }, { "epoch": 1.3989506541807797, "grad_norm": 671.3432006835938, "learning_rate": 4.3746304064352245e-07, "loss": 30.6719, "step": 21064 }, { "epoch": 1.3990170684731353, "grad_norm": 224.4740447998047, "learning_rate": 4.373741229360268e-07, "loss": 15.0938, "step": 21065 }, { "epoch": 1.3990834827654912, "grad_norm": 105.98361206054688, "learning_rate": 4.3728521173666356e-07, "loss": 14.7031, "step": 21066 }, { "epoch": 1.3991498970578469, "grad_norm": 245.9128875732422, "learning_rate": 4.3719630704646025e-07, "loss": 17.5938, "step": 21067 }, { "epoch": 1.3992163113502025, "grad_norm": 456.0240173339844, "learning_rate": 4.3710740886644546e-07, "loss": 17.3594, "step": 21068 }, { "epoch": 1.3992827256425584, "grad_norm": 224.13955688476562, "learning_rate": 4.370185171976476e-07, "loss": 13.0391, "step": 21069 }, { "epoch": 1.399349139934914, "grad_norm": 176.9337615966797, "learning_rate": 4.369296320410949e-07, "loss": 19.4375, "step": 21070 }, { "epoch": 1.3994155542272697, "grad_norm": 146.39540100097656, "learning_rate": 4.3684075339781555e-07, "loss": 11.8906, "step": 21071 }, { "epoch": 1.3994819685196254, "grad_norm": 368.68408203125, "learning_rate": 4.3675188126883797e-07, "loss": 14.7188, "step": 21072 }, { "epoch": 1.399548382811981, "grad_norm": 98.35704803466797, "learning_rate": 4.3666301565518917e-07, "loss": 16.0156, "step": 21073 }, { "epoch": 1.399614797104337, "grad_norm": 580.9225463867188, "learning_rate": 4.365741565578984e-07, "loss": 16.3906, "step": 21074 }, { "epoch": 1.3996812113966925, "grad_norm": 203.6622314453125, "learning_rate": 4.3648530397799257e-07, "loss": 13.2031, "step": 21075 }, { "epoch": 1.3997476256890482, "grad_norm": 168.82554626464844, "learning_rate": 4.3639645791649984e-07, "loss": 18.125, "step": 21076 }, { "epoch": 1.399814039981404, "grad_norm": 203.688232421875, "learning_rate": 4.3630761837444786e-07, "loss": 14.5156, "step": 21077 }, { "epoch": 1.3998804542737597, "grad_norm": 106.19052124023438, "learning_rate": 4.362187853528646e-07, "loss": 13.1719, "step": 21078 }, { "epoch": 1.3999468685661154, "grad_norm": 137.68557739257812, "learning_rate": 4.361299588527767e-07, "loss": 14.1094, "step": 21079 }, { "epoch": 1.4000132828584713, "grad_norm": 662.9556884765625, "learning_rate": 4.3604113887521297e-07, "loss": 12.5938, "step": 21080 }, { "epoch": 1.400079697150827, "grad_norm": 295.8665771484375, "learning_rate": 4.3595232542119984e-07, "loss": 10.6641, "step": 21081 }, { "epoch": 1.4001461114431826, "grad_norm": 192.65235900878906, "learning_rate": 4.358635184917649e-07, "loss": 16.7812, "step": 21082 }, { "epoch": 1.4002125257355382, "grad_norm": 211.081787109375, "learning_rate": 4.357747180879355e-07, "loss": 17.6719, "step": 21083 }, { "epoch": 1.4002789400278939, "grad_norm": 152.15016174316406, "learning_rate": 4.356859242107388e-07, "loss": 12.7812, "step": 21084 }, { "epoch": 1.4003453543202498, "grad_norm": 362.8421325683594, "learning_rate": 4.35597136861202e-07, "loss": 14.8438, "step": 21085 }, { "epoch": 1.4004117686126054, "grad_norm": 94.31854248046875, "learning_rate": 4.355083560403524e-07, "loss": 15.4062, "step": 21086 }, { "epoch": 1.400478182904961, "grad_norm": 155.21670532226562, "learning_rate": 4.3541958174921587e-07, "loss": 10.6562, "step": 21087 }, { "epoch": 1.400544597197317, "grad_norm": 161.70285034179688, "learning_rate": 4.3533081398882076e-07, "loss": 17.2188, "step": 21088 }, { "epoch": 1.4006110114896726, "grad_norm": 176.5744171142578, "learning_rate": 4.3524205276019286e-07, "loss": 15.4531, "step": 21089 }, { "epoch": 1.4006774257820283, "grad_norm": 222.63685607910156, "learning_rate": 4.351532980643593e-07, "loss": 16.9531, "step": 21090 }, { "epoch": 1.4007438400743841, "grad_norm": 274.2882995605469, "learning_rate": 4.3506454990234654e-07, "loss": 15.7656, "step": 21091 }, { "epoch": 1.4008102543667398, "grad_norm": 332.2604064941406, "learning_rate": 4.3497580827518145e-07, "loss": 14.7969, "step": 21092 }, { "epoch": 1.4008766686590954, "grad_norm": 160.20693969726562, "learning_rate": 4.348870731838903e-07, "loss": 19.0938, "step": 21093 }, { "epoch": 1.400943082951451, "grad_norm": 133.00897216796875, "learning_rate": 4.3479834462950004e-07, "loss": 16.7031, "step": 21094 }, { "epoch": 1.4010094972438067, "grad_norm": 192.5115509033203, "learning_rate": 4.3470962261303624e-07, "loss": 14.0, "step": 21095 }, { "epoch": 1.4010759115361626, "grad_norm": 246.72471618652344, "learning_rate": 4.3462090713552555e-07, "loss": 14.5625, "step": 21096 }, { "epoch": 1.4011423258285183, "grad_norm": 190.42193603515625, "learning_rate": 4.3453219819799416e-07, "loss": 15.2188, "step": 21097 }, { "epoch": 1.401208740120874, "grad_norm": 220.81788635253906, "learning_rate": 4.3444349580146834e-07, "loss": 19.75, "step": 21098 }, { "epoch": 1.4012751544132298, "grad_norm": 128.19705200195312, "learning_rate": 4.343547999469739e-07, "loss": 10.9688, "step": 21099 }, { "epoch": 1.4013415687055855, "grad_norm": 264.3682556152344, "learning_rate": 4.342661106355374e-07, "loss": 16.4531, "step": 21100 }, { "epoch": 1.4014079829979411, "grad_norm": 360.616943359375, "learning_rate": 4.3417742786818354e-07, "loss": 14.4688, "step": 21101 }, { "epoch": 1.401474397290297, "grad_norm": 136.15399169921875, "learning_rate": 4.340887516459394e-07, "loss": 15.8594, "step": 21102 }, { "epoch": 1.4015408115826526, "grad_norm": 181.85044860839844, "learning_rate": 4.340000819698305e-07, "loss": 20.0625, "step": 21103 }, { "epoch": 1.4016072258750083, "grad_norm": 163.52175903320312, "learning_rate": 4.339114188408817e-07, "loss": 20.5, "step": 21104 }, { "epoch": 1.401673640167364, "grad_norm": 379.0879821777344, "learning_rate": 4.338227622601197e-07, "loss": 13.0938, "step": 21105 }, { "epoch": 1.4017400544597196, "grad_norm": 132.42050170898438, "learning_rate": 4.3373411222856924e-07, "loss": 15.3438, "step": 21106 }, { "epoch": 1.4018064687520755, "grad_norm": 133.92686462402344, "learning_rate": 4.3364546874725617e-07, "loss": 14.9844, "step": 21107 }, { "epoch": 1.4018728830444311, "grad_norm": 342.977783203125, "learning_rate": 4.335568318172056e-07, "loss": 23.9375, "step": 21108 }, { "epoch": 1.4019392973367868, "grad_norm": 336.3099060058594, "learning_rate": 4.33468201439443e-07, "loss": 18.4844, "step": 21109 }, { "epoch": 1.4020057116291427, "grad_norm": 160.74671936035156, "learning_rate": 4.3337957761499355e-07, "loss": 15.2969, "step": 21110 }, { "epoch": 1.4020721259214983, "grad_norm": 192.4600372314453, "learning_rate": 4.332909603448828e-07, "loss": 13.5469, "step": 21111 }, { "epoch": 1.402138540213854, "grad_norm": 232.74562072753906, "learning_rate": 4.332023496301347e-07, "loss": 18.3594, "step": 21112 }, { "epoch": 1.4022049545062099, "grad_norm": 296.71978759765625, "learning_rate": 4.331137454717757e-07, "loss": 18.9688, "step": 21113 }, { "epoch": 1.4022713687985655, "grad_norm": 473.06744384765625, "learning_rate": 4.330251478708297e-07, "loss": 15.1562, "step": 21114 }, { "epoch": 1.4023377830909212, "grad_norm": 253.1996612548828, "learning_rate": 4.3293655682832174e-07, "loss": 25.1562, "step": 21115 }, { "epoch": 1.4024041973832768, "grad_norm": 144.24801635742188, "learning_rate": 4.328479723452768e-07, "loss": 15.9062, "step": 21116 }, { "epoch": 1.4024706116756325, "grad_norm": 377.223876953125, "learning_rate": 4.3275939442271983e-07, "loss": 22.875, "step": 21117 }, { "epoch": 1.4025370259679883, "grad_norm": 344.40283203125, "learning_rate": 4.3267082306167436e-07, "loss": 17.5469, "step": 21118 }, { "epoch": 1.402603440260344, "grad_norm": 113.73465728759766, "learning_rate": 4.325822582631663e-07, "loss": 15.3125, "step": 21119 }, { "epoch": 1.4026698545526997, "grad_norm": 148.9724884033203, "learning_rate": 4.324937000282193e-07, "loss": 10.9375, "step": 21120 }, { "epoch": 1.4027362688450555, "grad_norm": 295.18060302734375, "learning_rate": 4.324051483578578e-07, "loss": 13.8594, "step": 21121 }, { "epoch": 1.4028026831374112, "grad_norm": 186.31692504882812, "learning_rate": 4.3231660325310626e-07, "loss": 16.6719, "step": 21122 }, { "epoch": 1.4028690974297668, "grad_norm": 189.01625061035156, "learning_rate": 4.3222806471498895e-07, "loss": 19.6719, "step": 21123 }, { "epoch": 1.4029355117221227, "grad_norm": 322.69647216796875, "learning_rate": 4.3213953274452994e-07, "loss": 12.7812, "step": 21124 }, { "epoch": 1.4030019260144784, "grad_norm": 156.55023193359375, "learning_rate": 4.320510073427538e-07, "loss": 14.9219, "step": 21125 }, { "epoch": 1.403068340306834, "grad_norm": 1009.1892700195312, "learning_rate": 4.319624885106834e-07, "loss": 33.875, "step": 21126 }, { "epoch": 1.4031347545991897, "grad_norm": 177.67408752441406, "learning_rate": 4.318739762493441e-07, "loss": 21.4062, "step": 21127 }, { "epoch": 1.4032011688915453, "grad_norm": 319.50567626953125, "learning_rate": 4.3178547055975856e-07, "loss": 18.625, "step": 21128 }, { "epoch": 1.4032675831839012, "grad_norm": 199.2635498046875, "learning_rate": 4.316969714429511e-07, "loss": 15.375, "step": 21129 }, { "epoch": 1.4033339974762569, "grad_norm": 164.5000457763672, "learning_rate": 4.316084788999454e-07, "loss": 17.125, "step": 21130 }, { "epoch": 1.4034004117686125, "grad_norm": 173.24412536621094, "learning_rate": 4.315199929317651e-07, "loss": 19.8125, "step": 21131 }, { "epoch": 1.4034668260609684, "grad_norm": 210.05633544921875, "learning_rate": 4.3143151353943375e-07, "loss": 13.8438, "step": 21132 }, { "epoch": 1.403533240353324, "grad_norm": 2186.1865234375, "learning_rate": 4.31343040723975e-07, "loss": 16.0312, "step": 21133 }, { "epoch": 1.4035996546456797, "grad_norm": 152.1140899658203, "learning_rate": 4.3125457448641145e-07, "loss": 13.8828, "step": 21134 }, { "epoch": 1.4036660689380356, "grad_norm": 101.00900268554688, "learning_rate": 4.3116611482776777e-07, "loss": 14.6875, "step": 21135 }, { "epoch": 1.4037324832303912, "grad_norm": 165.6510009765625, "learning_rate": 4.31077661749066e-07, "loss": 16.25, "step": 21136 }, { "epoch": 1.403798897522747, "grad_norm": 169.6046600341797, "learning_rate": 4.309892152513298e-07, "loss": 15.6562, "step": 21137 }, { "epoch": 1.4038653118151025, "grad_norm": 175.007080078125, "learning_rate": 4.309007753355823e-07, "loss": 14.1562, "step": 21138 }, { "epoch": 1.4039317261074582, "grad_norm": 254.8363494873047, "learning_rate": 4.3081234200284646e-07, "loss": 15.4688, "step": 21139 }, { "epoch": 1.403998140399814, "grad_norm": 190.39999389648438, "learning_rate": 4.307239152541452e-07, "loss": 21.0156, "step": 21140 }, { "epoch": 1.4040645546921697, "grad_norm": 374.7518005371094, "learning_rate": 4.3063549509050146e-07, "loss": 20.7969, "step": 21141 }, { "epoch": 1.4041309689845254, "grad_norm": 185.5027618408203, "learning_rate": 4.3054708151293827e-07, "loss": 19.2344, "step": 21142 }, { "epoch": 1.4041973832768813, "grad_norm": 322.2471923828125, "learning_rate": 4.304586745224774e-07, "loss": 16.5938, "step": 21143 }, { "epoch": 1.404263797569237, "grad_norm": 177.80374145507812, "learning_rate": 4.3037027412014304e-07, "loss": 14.8906, "step": 21144 }, { "epoch": 1.4043302118615926, "grad_norm": 231.06430053710938, "learning_rate": 4.302818803069563e-07, "loss": 14.5, "step": 21145 }, { "epoch": 1.4043966261539484, "grad_norm": 269.845947265625, "learning_rate": 4.3019349308394036e-07, "loss": 16.8281, "step": 21146 }, { "epoch": 1.404463040446304, "grad_norm": 176.22752380371094, "learning_rate": 4.301051124521176e-07, "loss": 17.1406, "step": 21147 }, { "epoch": 1.4045294547386598, "grad_norm": 179.73214721679688, "learning_rate": 4.300167384125101e-07, "loss": 12.1094, "step": 21148 }, { "epoch": 1.4045958690310154, "grad_norm": 186.11181640625, "learning_rate": 4.2992837096614045e-07, "loss": 12.3906, "step": 21149 }, { "epoch": 1.404662283323371, "grad_norm": 308.6327209472656, "learning_rate": 4.2984001011403094e-07, "loss": 25.4219, "step": 21150 }, { "epoch": 1.404728697615727, "grad_norm": 210.70277404785156, "learning_rate": 4.2975165585720276e-07, "loss": 13.8594, "step": 21151 }, { "epoch": 1.4047951119080826, "grad_norm": 317.28033447265625, "learning_rate": 4.296633081966793e-07, "loss": 13.6406, "step": 21152 }, { "epoch": 1.4048615262004382, "grad_norm": 249.82518005371094, "learning_rate": 4.295749671334814e-07, "loss": 13.0781, "step": 21153 }, { "epoch": 1.4049279404927941, "grad_norm": 210.8832550048828, "learning_rate": 4.2948663266863137e-07, "loss": 21.8906, "step": 21154 }, { "epoch": 1.4049943547851498, "grad_norm": 270.170166015625, "learning_rate": 4.293983048031511e-07, "loss": 24.8125, "step": 21155 }, { "epoch": 1.4050607690775054, "grad_norm": 557.4520263671875, "learning_rate": 4.2930998353806213e-07, "loss": 18.7812, "step": 21156 }, { "epoch": 1.4051271833698613, "grad_norm": 426.017578125, "learning_rate": 4.292216688743863e-07, "loss": 16.2656, "step": 21157 }, { "epoch": 1.405193597662217, "grad_norm": 283.224609375, "learning_rate": 4.291333608131453e-07, "loss": 16.6562, "step": 21158 }, { "epoch": 1.4052600119545726, "grad_norm": 285.4650573730469, "learning_rate": 4.2904505935536004e-07, "loss": 21.1719, "step": 21159 }, { "epoch": 1.4053264262469283, "grad_norm": 139.08872985839844, "learning_rate": 4.2895676450205243e-07, "loss": 18.0625, "step": 21160 }, { "epoch": 1.405392840539284, "grad_norm": 251.7286834716797, "learning_rate": 4.288684762542436e-07, "loss": 19.3281, "step": 21161 }, { "epoch": 1.4054592548316398, "grad_norm": 135.94761657714844, "learning_rate": 4.287801946129549e-07, "loss": 11.5469, "step": 21162 }, { "epoch": 1.4055256691239955, "grad_norm": 133.15301513671875, "learning_rate": 4.2869191957920746e-07, "loss": 13.0781, "step": 21163 }, { "epoch": 1.4055920834163511, "grad_norm": 177.33741760253906, "learning_rate": 4.2860365115402296e-07, "loss": 10.3672, "step": 21164 }, { "epoch": 1.405658497708707, "grad_norm": 417.03729248046875, "learning_rate": 4.2851538933842114e-07, "loss": 29.25, "step": 21165 }, { "epoch": 1.4057249120010626, "grad_norm": 122.20012664794922, "learning_rate": 4.284271341334245e-07, "loss": 16.4219, "step": 21166 }, { "epoch": 1.4057913262934183, "grad_norm": 129.56663513183594, "learning_rate": 4.283388855400528e-07, "loss": 14.4219, "step": 21167 }, { "epoch": 1.4058577405857742, "grad_norm": 130.1270294189453, "learning_rate": 4.282506435593273e-07, "loss": 18.9219, "step": 21168 }, { "epoch": 1.4059241548781298, "grad_norm": 196.0495147705078, "learning_rate": 4.281624081922686e-07, "loss": 15.7188, "step": 21169 }, { "epoch": 1.4059905691704855, "grad_norm": 152.5256805419922, "learning_rate": 4.280741794398973e-07, "loss": 16.7031, "step": 21170 }, { "epoch": 1.4060569834628411, "grad_norm": 338.3847351074219, "learning_rate": 4.2798595730323426e-07, "loss": 17.5156, "step": 21171 }, { "epoch": 1.4061233977551968, "grad_norm": 453.4694519042969, "learning_rate": 4.278977417833002e-07, "loss": 20.2969, "step": 21172 }, { "epoch": 1.4061898120475527, "grad_norm": 172.0553436279297, "learning_rate": 4.2780953288111444e-07, "loss": 22.3438, "step": 21173 }, { "epoch": 1.4062562263399083, "grad_norm": 716.554931640625, "learning_rate": 4.2772133059769866e-07, "loss": 21.7656, "step": 21174 }, { "epoch": 1.406322640632264, "grad_norm": 262.7490234375, "learning_rate": 4.2763313493407226e-07, "loss": 13.5, "step": 21175 }, { "epoch": 1.4063890549246199, "grad_norm": 494.26617431640625, "learning_rate": 4.2754494589125545e-07, "loss": 13.6719, "step": 21176 }, { "epoch": 1.4064554692169755, "grad_norm": 200.87557983398438, "learning_rate": 4.2745676347026915e-07, "loss": 19.7188, "step": 21177 }, { "epoch": 1.4065218835093312, "grad_norm": 255.21485900878906, "learning_rate": 4.2736858767213256e-07, "loss": 15.5625, "step": 21178 }, { "epoch": 1.406588297801687, "grad_norm": 330.475341796875, "learning_rate": 4.27280418497866e-07, "loss": 13.5938, "step": 21179 }, { "epoch": 1.4066547120940427, "grad_norm": 164.5598602294922, "learning_rate": 4.271922559484893e-07, "loss": 15.7344, "step": 21180 }, { "epoch": 1.4067211263863983, "grad_norm": 239.53541564941406, "learning_rate": 4.2710410002502263e-07, "loss": 19.3359, "step": 21181 }, { "epoch": 1.406787540678754, "grad_norm": 183.14456176757812, "learning_rate": 4.2701595072848465e-07, "loss": 15.8906, "step": 21182 }, { "epoch": 1.4068539549711097, "grad_norm": 100.99901580810547, "learning_rate": 4.2692780805989647e-07, "loss": 12.3828, "step": 21183 }, { "epoch": 1.4069203692634655, "grad_norm": 157.33154296875, "learning_rate": 4.268396720202766e-07, "loss": 15.8438, "step": 21184 }, { "epoch": 1.4069867835558212, "grad_norm": 98.02777862548828, "learning_rate": 4.2675154261064497e-07, "loss": 13.0312, "step": 21185 }, { "epoch": 1.4070531978481768, "grad_norm": 228.1533203125, "learning_rate": 4.266634198320208e-07, "loss": 17.9062, "step": 21186 }, { "epoch": 1.4071196121405327, "grad_norm": 173.8975067138672, "learning_rate": 4.2657530368542373e-07, "loss": 15.7031, "step": 21187 }, { "epoch": 1.4071860264328884, "grad_norm": 290.8453063964844, "learning_rate": 4.264871941718727e-07, "loss": 14.2344, "step": 21188 }, { "epoch": 1.407252440725244, "grad_norm": 134.43975830078125, "learning_rate": 4.263990912923876e-07, "loss": 13.8594, "step": 21189 }, { "epoch": 1.4073188550176, "grad_norm": 168.31211853027344, "learning_rate": 4.263109950479864e-07, "loss": 15.5625, "step": 21190 }, { "epoch": 1.4073852693099556, "grad_norm": 616.607421875, "learning_rate": 4.262229054396894e-07, "loss": 14.3438, "step": 21191 }, { "epoch": 1.4074516836023112, "grad_norm": 176.8529052734375, "learning_rate": 4.261348224685146e-07, "loss": 15.3281, "step": 21192 }, { "epoch": 1.4075180978946669, "grad_norm": 1206.3287353515625, "learning_rate": 4.260467461354813e-07, "loss": 16.0, "step": 21193 }, { "epoch": 1.4075845121870225, "grad_norm": 276.8150329589844, "learning_rate": 4.259586764416083e-07, "loss": 14.0, "step": 21194 }, { "epoch": 1.4076509264793784, "grad_norm": 4144.37353515625, "learning_rate": 4.258706133879143e-07, "loss": 16.2812, "step": 21195 }, { "epoch": 1.407717340771734, "grad_norm": 109.27163696289062, "learning_rate": 4.25782556975418e-07, "loss": 15.5156, "step": 21196 }, { "epoch": 1.4077837550640897, "grad_norm": 344.3749694824219, "learning_rate": 4.2569450720513834e-07, "loss": 11.625, "step": 21197 }, { "epoch": 1.4078501693564456, "grad_norm": 168.75750732421875, "learning_rate": 4.2560646407809273e-07, "loss": 14.9375, "step": 21198 }, { "epoch": 1.4079165836488012, "grad_norm": 199.72804260253906, "learning_rate": 4.25518427595301e-07, "loss": 13.4531, "step": 21199 }, { "epoch": 1.4079829979411569, "grad_norm": 224.07542419433594, "learning_rate": 4.2543039775778053e-07, "loss": 14.8281, "step": 21200 }, { "epoch": 1.4080494122335128, "grad_norm": 542.6414184570312, "learning_rate": 4.2534237456654985e-07, "loss": 22.2031, "step": 21201 }, { "epoch": 1.4081158265258684, "grad_norm": 276.2975158691406, "learning_rate": 4.252543580226273e-07, "loss": 13.8516, "step": 21202 }, { "epoch": 1.408182240818224, "grad_norm": 134.4900665283203, "learning_rate": 4.251663481270312e-07, "loss": 14.6094, "step": 21203 }, { "epoch": 1.4082486551105797, "grad_norm": 159.9947509765625, "learning_rate": 4.2507834488077865e-07, "loss": 13.1875, "step": 21204 }, { "epoch": 1.4083150694029354, "grad_norm": 259.1202087402344, "learning_rate": 4.24990348284889e-07, "loss": 15.0938, "step": 21205 }, { "epoch": 1.4083814836952913, "grad_norm": 175.15455627441406, "learning_rate": 4.24902358340379e-07, "loss": 17.5156, "step": 21206 }, { "epoch": 1.408447897987647, "grad_norm": 90.92752075195312, "learning_rate": 4.24814375048267e-07, "loss": 13.6016, "step": 21207 }, { "epoch": 1.4085143122800026, "grad_norm": 437.9139709472656, "learning_rate": 4.247263984095706e-07, "loss": 19.8281, "step": 21208 }, { "epoch": 1.4085807265723584, "grad_norm": 162.31385803222656, "learning_rate": 4.246384284253075e-07, "loss": 17.9219, "step": 21209 }, { "epoch": 1.408647140864714, "grad_norm": 326.68096923828125, "learning_rate": 4.245504650964953e-07, "loss": 17.5312, "step": 21210 }, { "epoch": 1.4087135551570698, "grad_norm": 120.56192016601562, "learning_rate": 4.2446250842415177e-07, "loss": 10.2031, "step": 21211 }, { "epoch": 1.4087799694494256, "grad_norm": 179.64004516601562, "learning_rate": 4.2437455840929336e-07, "loss": 17.0156, "step": 21212 }, { "epoch": 1.4088463837417813, "grad_norm": 199.66519165039062, "learning_rate": 4.242866150529386e-07, "loss": 12.125, "step": 21213 }, { "epoch": 1.408912798034137, "grad_norm": 207.03965759277344, "learning_rate": 4.241986783561046e-07, "loss": 15.7031, "step": 21214 }, { "epoch": 1.4089792123264926, "grad_norm": 221.45509338378906, "learning_rate": 4.241107483198075e-07, "loss": 13.4688, "step": 21215 }, { "epoch": 1.4090456266188482, "grad_norm": 391.5167236328125, "learning_rate": 4.2402282494506603e-07, "loss": 19.3281, "step": 21216 }, { "epoch": 1.4091120409112041, "grad_norm": 249.7395782470703, "learning_rate": 4.23934908232896e-07, "loss": 21.2188, "step": 21217 }, { "epoch": 1.4091784552035598, "grad_norm": 129.84556579589844, "learning_rate": 4.238469981843147e-07, "loss": 15.8906, "step": 21218 }, { "epoch": 1.4092448694959154, "grad_norm": 223.22328186035156, "learning_rate": 4.237590948003391e-07, "loss": 13.6328, "step": 21219 }, { "epoch": 1.4093112837882713, "grad_norm": 290.9249267578125, "learning_rate": 4.236711980819865e-07, "loss": 17.8906, "step": 21220 }, { "epoch": 1.409377698080627, "grad_norm": 285.3485412597656, "learning_rate": 4.235833080302723e-07, "loss": 17.3906, "step": 21221 }, { "epoch": 1.4094441123729826, "grad_norm": 101.32498931884766, "learning_rate": 4.234954246462147e-07, "loss": 13.2344, "step": 21222 }, { "epoch": 1.4095105266653385, "grad_norm": 132.09341430664062, "learning_rate": 4.2340754793082933e-07, "loss": 16.0625, "step": 21223 }, { "epoch": 1.4095769409576941, "grad_norm": 308.4870910644531, "learning_rate": 4.233196778851329e-07, "loss": 16.0312, "step": 21224 }, { "epoch": 1.4096433552500498, "grad_norm": 125.85796356201172, "learning_rate": 4.23231814510142e-07, "loss": 12.1406, "step": 21225 }, { "epoch": 1.4097097695424055, "grad_norm": 172.47047424316406, "learning_rate": 4.2314395780687286e-07, "loss": 14.7031, "step": 21226 }, { "epoch": 1.409776183834761, "grad_norm": 261.6118469238281, "learning_rate": 4.230561077763417e-07, "loss": 15.5625, "step": 21227 }, { "epoch": 1.409842598127117, "grad_norm": 94.14501190185547, "learning_rate": 4.2296826441956526e-07, "loss": 13.4375, "step": 21228 }, { "epoch": 1.4099090124194726, "grad_norm": 154.6277618408203, "learning_rate": 4.228804277375584e-07, "loss": 18.875, "step": 21229 }, { "epoch": 1.4099754267118283, "grad_norm": 126.27909851074219, "learning_rate": 4.2279259773133866e-07, "loss": 9.625, "step": 21230 }, { "epoch": 1.4100418410041842, "grad_norm": 147.8565216064453, "learning_rate": 4.22704774401921e-07, "loss": 12.8594, "step": 21231 }, { "epoch": 1.4101082552965398, "grad_norm": 153.8895263671875, "learning_rate": 4.2261695775032167e-07, "loss": 12.625, "step": 21232 }, { "epoch": 1.4101746695888955, "grad_norm": 180.38351440429688, "learning_rate": 4.2252914777755633e-07, "loss": 15.9844, "step": 21233 }, { "epoch": 1.4102410838812514, "grad_norm": 831.3977661132812, "learning_rate": 4.22441344484641e-07, "loss": 20.8047, "step": 21234 }, { "epoch": 1.410307498173607, "grad_norm": 261.9345703125, "learning_rate": 4.223535478725909e-07, "loss": 20.8438, "step": 21235 }, { "epoch": 1.4103739124659627, "grad_norm": 227.52334594726562, "learning_rate": 4.2226575794242246e-07, "loss": 12.7969, "step": 21236 }, { "epoch": 1.4104403267583183, "grad_norm": 121.85443878173828, "learning_rate": 4.221779746951497e-07, "loss": 14.6875, "step": 21237 }, { "epoch": 1.410506741050674, "grad_norm": 135.88841247558594, "learning_rate": 4.220901981317898e-07, "loss": 14.75, "step": 21238 }, { "epoch": 1.4105731553430298, "grad_norm": 308.4726257324219, "learning_rate": 4.2200242825335683e-07, "loss": 14.7344, "step": 21239 }, { "epoch": 1.4106395696353855, "grad_norm": 755.3087768554688, "learning_rate": 4.219146650608665e-07, "loss": 14.9062, "step": 21240 }, { "epoch": 1.4107059839277412, "grad_norm": 185.8655548095703, "learning_rate": 4.218269085553339e-07, "loss": 19.9375, "step": 21241 }, { "epoch": 1.410772398220097, "grad_norm": 220.6723175048828, "learning_rate": 4.217391587377747e-07, "loss": 13.5938, "step": 21242 }, { "epoch": 1.4108388125124527, "grad_norm": 387.45819091796875, "learning_rate": 4.216514156092027e-07, "loss": 14.3281, "step": 21243 }, { "epoch": 1.4109052268048083, "grad_norm": 215.56007385253906, "learning_rate": 4.215636791706343e-07, "loss": 15.0391, "step": 21244 }, { "epoch": 1.4109716410971642, "grad_norm": 171.37435913085938, "learning_rate": 4.214759494230834e-07, "loss": 13.4219, "step": 21245 }, { "epoch": 1.4110380553895199, "grad_norm": 116.8936767578125, "learning_rate": 4.213882263675651e-07, "loss": 16.625, "step": 21246 }, { "epoch": 1.4111044696818755, "grad_norm": 239.14695739746094, "learning_rate": 4.2130051000509415e-07, "loss": 13.5156, "step": 21247 }, { "epoch": 1.4111708839742312, "grad_norm": 256.6103820800781, "learning_rate": 4.2121280033668526e-07, "loss": 15.8594, "step": 21248 }, { "epoch": 1.4112372982665868, "grad_norm": 198.29371643066406, "learning_rate": 4.211250973633529e-07, "loss": 15.1719, "step": 21249 }, { "epoch": 1.4113037125589427, "grad_norm": 256.2965087890625, "learning_rate": 4.2103740108611166e-07, "loss": 20.75, "step": 21250 }, { "epoch": 1.4113701268512984, "grad_norm": 187.74057006835938, "learning_rate": 4.209497115059758e-07, "loss": 13.5781, "step": 21251 }, { "epoch": 1.411436541143654, "grad_norm": 185.22694396972656, "learning_rate": 4.208620286239598e-07, "loss": 14.6797, "step": 21252 }, { "epoch": 1.41150295543601, "grad_norm": 309.970947265625, "learning_rate": 4.207743524410784e-07, "loss": 16.1406, "step": 21253 }, { "epoch": 1.4115693697283656, "grad_norm": 130.19776916503906, "learning_rate": 4.2068668295834443e-07, "loss": 14.4531, "step": 21254 }, { "epoch": 1.4116357840207212, "grad_norm": 206.2928924560547, "learning_rate": 4.2059902017677364e-07, "loss": 12.0781, "step": 21255 }, { "epoch": 1.411702198313077, "grad_norm": 252.76356506347656, "learning_rate": 4.2051136409737895e-07, "loss": 14.0938, "step": 21256 }, { "epoch": 1.4117686126054327, "grad_norm": 222.953125, "learning_rate": 4.2042371472117457e-07, "loss": 17.75, "step": 21257 }, { "epoch": 1.4118350268977884, "grad_norm": 131.4442901611328, "learning_rate": 4.2033607204917456e-07, "loss": 15.875, "step": 21258 }, { "epoch": 1.411901441190144, "grad_norm": 549.8538818359375, "learning_rate": 4.202484360823926e-07, "loss": 15.5625, "step": 21259 }, { "epoch": 1.4119678554824997, "grad_norm": 148.7657470703125, "learning_rate": 4.201608068218423e-07, "loss": 17.75, "step": 21260 }, { "epoch": 1.4120342697748556, "grad_norm": 267.68359375, "learning_rate": 4.2007318426853786e-07, "loss": 9.9375, "step": 21261 }, { "epoch": 1.4121006840672112, "grad_norm": 600.311767578125, "learning_rate": 4.1998556842349176e-07, "loss": 13.2188, "step": 21262 }, { "epoch": 1.4121670983595669, "grad_norm": 174.44964599609375, "learning_rate": 4.198979592877188e-07, "loss": 18.8125, "step": 21263 }, { "epoch": 1.4122335126519228, "grad_norm": 253.67445373535156, "learning_rate": 4.1981035686223144e-07, "loss": 17.8047, "step": 21264 }, { "epoch": 1.4122999269442784, "grad_norm": 84.37999725341797, "learning_rate": 4.1972276114804327e-07, "loss": 9.3359, "step": 21265 }, { "epoch": 1.412366341236634, "grad_norm": 2958.522216796875, "learning_rate": 4.1963517214616763e-07, "loss": 13.5156, "step": 21266 }, { "epoch": 1.41243275552899, "grad_norm": 174.75955200195312, "learning_rate": 4.1954758985761796e-07, "loss": 15.8594, "step": 21267 }, { "epoch": 1.4124991698213456, "grad_norm": 116.27259826660156, "learning_rate": 4.194600142834064e-07, "loss": 13.8594, "step": 21268 }, { "epoch": 1.4125655841137013, "grad_norm": 150.73435974121094, "learning_rate": 4.193724454245473e-07, "loss": 18.5625, "step": 21269 }, { "epoch": 1.412631998406057, "grad_norm": 340.5179748535156, "learning_rate": 4.1928488328205256e-07, "loss": 21.4844, "step": 21270 }, { "epoch": 1.4126984126984126, "grad_norm": 127.81168365478516, "learning_rate": 4.191973278569355e-07, "loss": 14.6484, "step": 21271 }, { "epoch": 1.4127648269907684, "grad_norm": 432.3318786621094, "learning_rate": 4.1910977915020884e-07, "loss": 25.5312, "step": 21272 }, { "epoch": 1.412831241283124, "grad_norm": 199.37667846679688, "learning_rate": 4.190222371628853e-07, "loss": 15.0938, "step": 21273 }, { "epoch": 1.4128976555754797, "grad_norm": 141.42251586914062, "learning_rate": 4.1893470189597744e-07, "loss": 13.1406, "step": 21274 }, { "epoch": 1.4129640698678356, "grad_norm": 105.87737274169922, "learning_rate": 4.1884717335049835e-07, "loss": 18.0156, "step": 21275 }, { "epoch": 1.4130304841601913, "grad_norm": 99.79940032958984, "learning_rate": 4.187596515274593e-07, "loss": 12.4375, "step": 21276 }, { "epoch": 1.413096898452547, "grad_norm": 1201.324951171875, "learning_rate": 4.1867213642787414e-07, "loss": 11.7969, "step": 21277 }, { "epoch": 1.4131633127449028, "grad_norm": 210.0127410888672, "learning_rate": 4.185846280527541e-07, "loss": 14.4531, "step": 21278 }, { "epoch": 1.4132297270372585, "grad_norm": 133.94143676757812, "learning_rate": 4.1849712640311187e-07, "loss": 14.7812, "step": 21279 }, { "epoch": 1.4132961413296141, "grad_norm": 120.8567886352539, "learning_rate": 4.1840963147995956e-07, "loss": 15.7969, "step": 21280 }, { "epoch": 1.4133625556219698, "grad_norm": 88.29932403564453, "learning_rate": 4.1832214328430925e-07, "loss": 14.4688, "step": 21281 }, { "epoch": 1.4134289699143254, "grad_norm": 212.49855041503906, "learning_rate": 4.1823466181717295e-07, "loss": 18.7031, "step": 21282 }, { "epoch": 1.4134953842066813, "grad_norm": 239.3525390625, "learning_rate": 4.18147187079563e-07, "loss": 14.1094, "step": 21283 }, { "epoch": 1.413561798499037, "grad_norm": 120.87491607666016, "learning_rate": 4.1805971907249047e-07, "loss": 11.3594, "step": 21284 }, { "epoch": 1.4136282127913926, "grad_norm": 168.96730041503906, "learning_rate": 4.179722577969672e-07, "loss": 16.4531, "step": 21285 }, { "epoch": 1.4136946270837485, "grad_norm": 158.15472412109375, "learning_rate": 4.178848032540061e-07, "loss": 14.8438, "step": 21286 }, { "epoch": 1.4137610413761041, "grad_norm": 240.5870361328125, "learning_rate": 4.1779735544461737e-07, "loss": 14.2891, "step": 21287 }, { "epoch": 1.4138274556684598, "grad_norm": 109.70452880859375, "learning_rate": 4.1770991436981316e-07, "loss": 10.125, "step": 21288 }, { "epoch": 1.4138938699608157, "grad_norm": 114.5600357055664, "learning_rate": 4.1762248003060494e-07, "loss": 11.75, "step": 21289 }, { "epoch": 1.4139602842531713, "grad_norm": 119.18566131591797, "learning_rate": 4.175350524280041e-07, "loss": 18.2812, "step": 21290 }, { "epoch": 1.414026698545527, "grad_norm": 1129.6929931640625, "learning_rate": 4.1744763156302185e-07, "loss": 29.8125, "step": 21291 }, { "epoch": 1.4140931128378826, "grad_norm": 116.44203186035156, "learning_rate": 4.173602174366698e-07, "loss": 15.3281, "step": 21292 }, { "epoch": 1.4141595271302383, "grad_norm": 201.4773712158203, "learning_rate": 4.1727281004995817e-07, "loss": 19.0469, "step": 21293 }, { "epoch": 1.4142259414225942, "grad_norm": 163.05941772460938, "learning_rate": 4.1718540940389926e-07, "loss": 12.5, "step": 21294 }, { "epoch": 1.4142923557149498, "grad_norm": 212.21253967285156, "learning_rate": 4.170980154995032e-07, "loss": 13.6406, "step": 21295 }, { "epoch": 1.4143587700073055, "grad_norm": 279.5554504394531, "learning_rate": 4.170106283377811e-07, "loss": 22.7031, "step": 21296 }, { "epoch": 1.4144251842996614, "grad_norm": 212.57156372070312, "learning_rate": 4.1692324791974387e-07, "loss": 15.5781, "step": 21297 }, { "epoch": 1.414491598592017, "grad_norm": 156.91677856445312, "learning_rate": 4.1683587424640234e-07, "loss": 15.5312, "step": 21298 }, { "epoch": 1.4145580128843727, "grad_norm": 233.03482055664062, "learning_rate": 4.167485073187672e-07, "loss": 23.4219, "step": 21299 }, { "epoch": 1.4146244271767285, "grad_norm": 371.42877197265625, "learning_rate": 4.1666114713784927e-07, "loss": 15.125, "step": 21300 }, { "epoch": 1.4146908414690842, "grad_norm": 199.41676330566406, "learning_rate": 4.1657379370465806e-07, "loss": 19.2188, "step": 21301 }, { "epoch": 1.4147572557614398, "grad_norm": 336.1007385253906, "learning_rate": 4.164864470202056e-07, "loss": 19.9062, "step": 21302 }, { "epoch": 1.4148236700537955, "grad_norm": 1037.357421875, "learning_rate": 4.163991070855011e-07, "loss": 19.7656, "step": 21303 }, { "epoch": 1.4148900843461512, "grad_norm": 180.6887969970703, "learning_rate": 4.1631177390155516e-07, "loss": 18.75, "step": 21304 }, { "epoch": 1.414956498638507, "grad_norm": 212.57249450683594, "learning_rate": 4.16224447469378e-07, "loss": 15.4062, "step": 21305 }, { "epoch": 1.4150229129308627, "grad_norm": 125.06675720214844, "learning_rate": 4.161371277899802e-07, "loss": 18.6875, "step": 21306 }, { "epoch": 1.4150893272232183, "grad_norm": 281.46124267578125, "learning_rate": 4.1604981486437074e-07, "loss": 14.6094, "step": 21307 }, { "epoch": 1.4151557415155742, "grad_norm": 637.5074462890625, "learning_rate": 4.1596250869356097e-07, "loss": 23.0, "step": 21308 }, { "epoch": 1.4152221558079299, "grad_norm": 207.40725708007812, "learning_rate": 4.158752092785598e-07, "loss": 13.1406, "step": 21309 }, { "epoch": 1.4152885701002855, "grad_norm": 299.516357421875, "learning_rate": 4.1578791662037736e-07, "loss": 12.375, "step": 21310 }, { "epoch": 1.4153549843926414, "grad_norm": 211.97109985351562, "learning_rate": 4.157006307200235e-07, "loss": 16.4062, "step": 21311 }, { "epoch": 1.415421398684997, "grad_norm": 211.71487426757812, "learning_rate": 4.1561335157850776e-07, "loss": 14.4531, "step": 21312 }, { "epoch": 1.4154878129773527, "grad_norm": 179.72113037109375, "learning_rate": 4.155260791968398e-07, "loss": 12.75, "step": 21313 }, { "epoch": 1.4155542272697084, "grad_norm": 298.6043395996094, "learning_rate": 4.154388135760294e-07, "loss": 16.1875, "step": 21314 }, { "epoch": 1.415620641562064, "grad_norm": 200.81454467773438, "learning_rate": 4.1535155471708517e-07, "loss": 18.2344, "step": 21315 }, { "epoch": 1.41568705585442, "grad_norm": 1019.7769775390625, "learning_rate": 4.152643026210176e-07, "loss": 15.7344, "step": 21316 }, { "epoch": 1.4157534701467756, "grad_norm": 175.05258178710938, "learning_rate": 4.151770572888351e-07, "loss": 16.3438, "step": 21317 }, { "epoch": 1.4158198844391312, "grad_norm": 148.3668212890625, "learning_rate": 4.1508981872154715e-07, "loss": 17.9531, "step": 21318 }, { "epoch": 1.415886298731487, "grad_norm": 274.9779968261719, "learning_rate": 4.15002586920163e-07, "loss": 19.5625, "step": 21319 }, { "epoch": 1.4159527130238427, "grad_norm": 167.21434020996094, "learning_rate": 4.1491536188569144e-07, "loss": 14.125, "step": 21320 }, { "epoch": 1.4160191273161984, "grad_norm": 155.38526916503906, "learning_rate": 4.1482814361914175e-07, "loss": 22.0938, "step": 21321 }, { "epoch": 1.4160855416085543, "grad_norm": 193.74859619140625, "learning_rate": 4.147409321215225e-07, "loss": 21.3438, "step": 21322 }, { "epoch": 1.41615195590091, "grad_norm": 93.57160186767578, "learning_rate": 4.1465372739384284e-07, "loss": 12.3359, "step": 21323 }, { "epoch": 1.4162183701932656, "grad_norm": 182.29757690429688, "learning_rate": 4.1456652943711124e-07, "loss": 15.0, "step": 21324 }, { "epoch": 1.4162847844856212, "grad_norm": 299.0241394042969, "learning_rate": 4.1447933825233686e-07, "loss": 16.3594, "step": 21325 }, { "epoch": 1.4163511987779769, "grad_norm": 145.56808471679688, "learning_rate": 4.1439215384052707e-07, "loss": 16.7344, "step": 21326 }, { "epoch": 1.4164176130703328, "grad_norm": 145.312255859375, "learning_rate": 4.143049762026919e-07, "loss": 13.8594, "step": 21327 }, { "epoch": 1.4164840273626884, "grad_norm": 119.45655822753906, "learning_rate": 4.142178053398386e-07, "loss": 12.625, "step": 21328 }, { "epoch": 1.416550441655044, "grad_norm": 420.0638122558594, "learning_rate": 4.1413064125297614e-07, "loss": 10.7188, "step": 21329 }, { "epoch": 1.4166168559474, "grad_norm": 148.40597534179688, "learning_rate": 4.140434839431125e-07, "loss": 15.8125, "step": 21330 }, { "epoch": 1.4166832702397556, "grad_norm": 197.1119842529297, "learning_rate": 4.139563334112562e-07, "loss": 13.9844, "step": 21331 }, { "epoch": 1.4167496845321113, "grad_norm": 339.5854797363281, "learning_rate": 4.1386918965841455e-07, "loss": 19.75, "step": 21332 }, { "epoch": 1.4168160988244671, "grad_norm": 272.079833984375, "learning_rate": 4.1378205268559675e-07, "loss": 13.4219, "step": 21333 }, { "epoch": 1.4168825131168228, "grad_norm": 135.93142700195312, "learning_rate": 4.136949224938098e-07, "loss": 14.7188, "step": 21334 }, { "epoch": 1.4169489274091784, "grad_norm": 336.5708312988281, "learning_rate": 4.1360779908406195e-07, "loss": 16.1875, "step": 21335 }, { "epoch": 1.417015341701534, "grad_norm": 244.91415405273438, "learning_rate": 4.1352068245736094e-07, "loss": 16.2969, "step": 21336 }, { "epoch": 1.4170817559938897, "grad_norm": 731.1780395507812, "learning_rate": 4.1343357261471447e-07, "loss": 14.6406, "step": 21337 }, { "epoch": 1.4171481702862456, "grad_norm": 163.83746337890625, "learning_rate": 4.1334646955713013e-07, "loss": 17.5938, "step": 21338 }, { "epoch": 1.4172145845786013, "grad_norm": 341.6339416503906, "learning_rate": 4.13259373285616e-07, "loss": 15.0938, "step": 21339 }, { "epoch": 1.417280998870957, "grad_norm": 156.31436157226562, "learning_rate": 4.131722838011783e-07, "loss": 14.7344, "step": 21340 }, { "epoch": 1.4173474131633128, "grad_norm": 280.40618896484375, "learning_rate": 4.1308520110482606e-07, "loss": 18.6562, "step": 21341 }, { "epoch": 1.4174138274556685, "grad_norm": 281.611572265625, "learning_rate": 4.129981251975654e-07, "loss": 19.5, "step": 21342 }, { "epoch": 1.4174802417480241, "grad_norm": 118.97378540039062, "learning_rate": 4.129110560804039e-07, "loss": 15.5469, "step": 21343 }, { "epoch": 1.41754665604038, "grad_norm": 217.3262939453125, "learning_rate": 4.1282399375434874e-07, "loss": 17.4062, "step": 21344 }, { "epoch": 1.4176130703327356, "grad_norm": 1675.461669921875, "learning_rate": 4.12736938220407e-07, "loss": 22.625, "step": 21345 }, { "epoch": 1.4176794846250913, "grad_norm": 233.73309326171875, "learning_rate": 4.126498894795859e-07, "loss": 17.5469, "step": 21346 }, { "epoch": 1.417745898917447, "grad_norm": 140.28573608398438, "learning_rate": 4.125628475328924e-07, "loss": 12.6719, "step": 21347 }, { "epoch": 1.4178123132098028, "grad_norm": 265.4517517089844, "learning_rate": 4.124758123813328e-07, "loss": 22.8594, "step": 21348 }, { "epoch": 1.4178787275021585, "grad_norm": 139.09129333496094, "learning_rate": 4.1238878402591427e-07, "loss": 14.0938, "step": 21349 }, { "epoch": 1.4179451417945141, "grad_norm": 187.4426727294922, "learning_rate": 4.123017624676435e-07, "loss": 15.75, "step": 21350 }, { "epoch": 1.4180115560868698, "grad_norm": 190.40115356445312, "learning_rate": 4.1221474770752696e-07, "loss": 14.1719, "step": 21351 }, { "epoch": 1.4180779703792257, "grad_norm": 169.0215606689453, "learning_rate": 4.121277397465712e-07, "loss": 18.625, "step": 21352 }, { "epoch": 1.4181443846715813, "grad_norm": 371.2151794433594, "learning_rate": 4.1204073858578327e-07, "loss": 20.7188, "step": 21353 }, { "epoch": 1.418210798963937, "grad_norm": 733.2516479492188, "learning_rate": 4.1195374422616834e-07, "loss": 21.0469, "step": 21354 }, { "epoch": 1.4182772132562929, "grad_norm": 413.1673889160156, "learning_rate": 4.1186675666873403e-07, "loss": 18.9688, "step": 21355 }, { "epoch": 1.4183436275486485, "grad_norm": 415.8843994140625, "learning_rate": 4.1177977591448565e-07, "loss": 22.5156, "step": 21356 }, { "epoch": 1.4184100418410042, "grad_norm": 117.96510314941406, "learning_rate": 4.116928019644296e-07, "loss": 12.9531, "step": 21357 }, { "epoch": 1.4184764561333598, "grad_norm": 95.407958984375, "learning_rate": 4.1160583481957203e-07, "loss": 13.4141, "step": 21358 }, { "epoch": 1.4185428704257157, "grad_norm": 219.4658660888672, "learning_rate": 4.115188744809187e-07, "loss": 16.4844, "step": 21359 }, { "epoch": 1.4186092847180714, "grad_norm": 142.13412475585938, "learning_rate": 4.1143192094947587e-07, "loss": 14.4062, "step": 21360 }, { "epoch": 1.418675699010427, "grad_norm": 384.75677490234375, "learning_rate": 4.1134497422624914e-07, "loss": 17.5, "step": 21361 }, { "epoch": 1.4187421133027827, "grad_norm": 180.85052490234375, "learning_rate": 4.1125803431224436e-07, "loss": 18.6094, "step": 21362 }, { "epoch": 1.4188085275951385, "grad_norm": 374.3603820800781, "learning_rate": 4.111711012084671e-07, "loss": 23.1562, "step": 21363 }, { "epoch": 1.4188749418874942, "grad_norm": 148.45655822753906, "learning_rate": 4.110841749159233e-07, "loss": 17.375, "step": 21364 }, { "epoch": 1.4189413561798498, "grad_norm": 181.69007873535156, "learning_rate": 4.1099725543561757e-07, "loss": 15.0625, "step": 21365 }, { "epoch": 1.4190077704722057, "grad_norm": 306.0756530761719, "learning_rate": 4.109103427685566e-07, "loss": 25.4375, "step": 21366 }, { "epoch": 1.4190741847645614, "grad_norm": 183.91014099121094, "learning_rate": 4.1082343691574473e-07, "loss": 16.5156, "step": 21367 }, { "epoch": 1.419140599056917, "grad_norm": 240.4215545654297, "learning_rate": 4.1073653787818763e-07, "loss": 18.5781, "step": 21368 }, { "epoch": 1.4192070133492727, "grad_norm": 420.4237060546875, "learning_rate": 4.1064964565689054e-07, "loss": 17.0781, "step": 21369 }, { "epoch": 1.4192734276416286, "grad_norm": 267.1736755371094, "learning_rate": 4.1056276025285876e-07, "loss": 12.8906, "step": 21370 }, { "epoch": 1.4193398419339842, "grad_norm": 245.69268798828125, "learning_rate": 4.1047588166709646e-07, "loss": 15.3906, "step": 21371 }, { "epoch": 1.4194062562263399, "grad_norm": 230.10157775878906, "learning_rate": 4.1038900990060986e-07, "loss": 15.0156, "step": 21372 }, { "epoch": 1.4194726705186955, "grad_norm": 239.37445068359375, "learning_rate": 4.1030214495440285e-07, "loss": 16.5, "step": 21373 }, { "epoch": 1.4195390848110514, "grad_norm": 297.04644775390625, "learning_rate": 4.102152868294806e-07, "loss": 16.7031, "step": 21374 }, { "epoch": 1.419605499103407, "grad_norm": 408.08795166015625, "learning_rate": 4.1012843552684784e-07, "loss": 20.4062, "step": 21375 }, { "epoch": 1.4196719133957627, "grad_norm": 602.2483520507812, "learning_rate": 4.100415910475091e-07, "loss": 24.7031, "step": 21376 }, { "epoch": 1.4197383276881186, "grad_norm": 221.49781799316406, "learning_rate": 4.099547533924692e-07, "loss": 23.5156, "step": 21377 }, { "epoch": 1.4198047419804742, "grad_norm": 264.1855163574219, "learning_rate": 4.0986792256273273e-07, "loss": 16.0312, "step": 21378 }, { "epoch": 1.41987115627283, "grad_norm": 174.06832885742188, "learning_rate": 4.097810985593032e-07, "loss": 15.9375, "step": 21379 }, { "epoch": 1.4199375705651855, "grad_norm": 113.14561462402344, "learning_rate": 4.096942813831863e-07, "loss": 10.0156, "step": 21380 }, { "epoch": 1.4200039848575414, "grad_norm": 238.8658905029297, "learning_rate": 4.096074710353852e-07, "loss": 20.6875, "step": 21381 }, { "epoch": 1.420070399149897, "grad_norm": 236.16148376464844, "learning_rate": 4.095206675169043e-07, "loss": 13.3594, "step": 21382 }, { "epoch": 1.4201368134422527, "grad_norm": 296.353515625, "learning_rate": 4.0943387082874793e-07, "loss": 19.0938, "step": 21383 }, { "epoch": 1.4202032277346084, "grad_norm": 378.2975769042969, "learning_rate": 4.0934708097192006e-07, "loss": 26.2812, "step": 21384 }, { "epoch": 1.4202696420269643, "grad_norm": 420.9532470703125, "learning_rate": 4.092602979474244e-07, "loss": 22.5781, "step": 21385 }, { "epoch": 1.42033605631932, "grad_norm": 146.302978515625, "learning_rate": 4.091735217562655e-07, "loss": 13.4531, "step": 21386 }, { "epoch": 1.4204024706116756, "grad_norm": 152.34251403808594, "learning_rate": 4.090867523994459e-07, "loss": 16.5156, "step": 21387 }, { "epoch": 1.4204688849040314, "grad_norm": 262.9516296386719, "learning_rate": 4.089999898779706e-07, "loss": 23.0, "step": 21388 }, { "epoch": 1.420535299196387, "grad_norm": 221.18125915527344, "learning_rate": 4.089132341928423e-07, "loss": 14.4688, "step": 21389 }, { "epoch": 1.4206017134887428, "grad_norm": 196.26376342773438, "learning_rate": 4.088264853450649e-07, "loss": 15.3828, "step": 21390 }, { "epoch": 1.4206681277810984, "grad_norm": 189.71632385253906, "learning_rate": 4.0873974333564185e-07, "loss": 18.2656, "step": 21391 }, { "epoch": 1.4207345420734543, "grad_norm": 541.6692504882812, "learning_rate": 4.086530081655768e-07, "loss": 22.7344, "step": 21392 }, { "epoch": 1.42080095636581, "grad_norm": 128.08914184570312, "learning_rate": 4.085662798358721e-07, "loss": 19.6719, "step": 21393 }, { "epoch": 1.4208673706581656, "grad_norm": 192.5945281982422, "learning_rate": 4.084795583475323e-07, "loss": 12.6094, "step": 21394 }, { "epoch": 1.4209337849505213, "grad_norm": 285.7419128417969, "learning_rate": 4.083928437015596e-07, "loss": 14.5938, "step": 21395 }, { "epoch": 1.4210001992428771, "grad_norm": 106.68849182128906, "learning_rate": 4.083061358989569e-07, "loss": 14.9531, "step": 21396 }, { "epoch": 1.4210666135352328, "grad_norm": 298.21246337890625, "learning_rate": 4.082194349407284e-07, "loss": 12.8125, "step": 21397 }, { "epoch": 1.4211330278275884, "grad_norm": 142.74734497070312, "learning_rate": 4.0813274082787584e-07, "loss": 12.7031, "step": 21398 }, { "epoch": 1.4211994421199443, "grad_norm": 230.05014038085938, "learning_rate": 4.080460535614025e-07, "loss": 16.0469, "step": 21399 }, { "epoch": 1.4212658564123, "grad_norm": 166.20071411132812, "learning_rate": 4.0795937314231113e-07, "loss": 19.9062, "step": 21400 }, { "epoch": 1.4213322707046556, "grad_norm": 1119.4405517578125, "learning_rate": 4.078726995716042e-07, "loss": 10.8047, "step": 21401 }, { "epoch": 1.4213986849970113, "grad_norm": 200.56607055664062, "learning_rate": 4.077860328502845e-07, "loss": 16.4219, "step": 21402 }, { "epoch": 1.4214650992893672, "grad_norm": 362.73577880859375, "learning_rate": 4.0769937297935477e-07, "loss": 15.4531, "step": 21403 }, { "epoch": 1.4215315135817228, "grad_norm": 227.04397583007812, "learning_rate": 4.076127199598165e-07, "loss": 14.3594, "step": 21404 }, { "epoch": 1.4215979278740785, "grad_norm": 259.4764404296875, "learning_rate": 4.0752607379267344e-07, "loss": 20.2969, "step": 21405 }, { "epoch": 1.4216643421664341, "grad_norm": 169.79969787597656, "learning_rate": 4.074394344789267e-07, "loss": 19.8594, "step": 21406 }, { "epoch": 1.42173075645879, "grad_norm": 149.26296997070312, "learning_rate": 4.07352802019579e-07, "loss": 17.2969, "step": 21407 }, { "epoch": 1.4217971707511456, "grad_norm": 149.05975341796875, "learning_rate": 4.072661764156322e-07, "loss": 15.7344, "step": 21408 }, { "epoch": 1.4218635850435013, "grad_norm": 266.2774658203125, "learning_rate": 4.0717955766808856e-07, "loss": 19.8438, "step": 21409 }, { "epoch": 1.4219299993358572, "grad_norm": 113.87942504882812, "learning_rate": 4.0709294577794996e-07, "loss": 11.0625, "step": 21410 }, { "epoch": 1.4219964136282128, "grad_norm": 117.19086456298828, "learning_rate": 4.0700634074621866e-07, "loss": 16.0312, "step": 21411 }, { "epoch": 1.4220628279205685, "grad_norm": 262.620849609375, "learning_rate": 4.069197425738956e-07, "loss": 16.2812, "step": 21412 }, { "epoch": 1.4221292422129241, "grad_norm": 294.21612548828125, "learning_rate": 4.0683315126198305e-07, "loss": 14.8906, "step": 21413 }, { "epoch": 1.42219565650528, "grad_norm": 244.28443908691406, "learning_rate": 4.0674656681148255e-07, "loss": 17.9688, "step": 21414 }, { "epoch": 1.4222620707976357, "grad_norm": 160.7677459716797, "learning_rate": 4.066599892233957e-07, "loss": 16.7031, "step": 21415 }, { "epoch": 1.4223284850899913, "grad_norm": 446.68841552734375, "learning_rate": 4.06573418498724e-07, "loss": 19.8594, "step": 21416 }, { "epoch": 1.422394899382347, "grad_norm": 353.062255859375, "learning_rate": 4.064868546384691e-07, "loss": 17.9375, "step": 21417 }, { "epoch": 1.4224613136747029, "grad_norm": 160.2789764404297, "learning_rate": 4.064002976436313e-07, "loss": 13.2031, "step": 21418 }, { "epoch": 1.4225277279670585, "grad_norm": 239.92739868164062, "learning_rate": 4.063137475152133e-07, "loss": 28.5469, "step": 21419 }, { "epoch": 1.4225941422594142, "grad_norm": 177.42926025390625, "learning_rate": 4.062272042542151e-07, "loss": 17.3438, "step": 21420 }, { "epoch": 1.42266055655177, "grad_norm": 194.7977294921875, "learning_rate": 4.061406678616383e-07, "loss": 19.9062, "step": 21421 }, { "epoch": 1.4227269708441257, "grad_norm": 238.31512451171875, "learning_rate": 4.060541383384838e-07, "loss": 13.8281, "step": 21422 }, { "epoch": 1.4227933851364813, "grad_norm": 134.46188354492188, "learning_rate": 4.0596761568575253e-07, "loss": 15.1406, "step": 21423 }, { "epoch": 1.4228597994288372, "grad_norm": 204.15792846679688, "learning_rate": 4.058810999044453e-07, "loss": 16.25, "step": 21424 }, { "epoch": 1.4229262137211929, "grad_norm": 113.32967376708984, "learning_rate": 4.0579459099556336e-07, "loss": 12.6406, "step": 21425 }, { "epoch": 1.4229926280135485, "grad_norm": 179.9826202392578, "learning_rate": 4.0570808896010623e-07, "loss": 13.3438, "step": 21426 }, { "epoch": 1.4230590423059042, "grad_norm": 219.77655029296875, "learning_rate": 4.056215937990759e-07, "loss": 15.1719, "step": 21427 }, { "epoch": 1.4231254565982598, "grad_norm": 193.90017700195312, "learning_rate": 4.055351055134719e-07, "loss": 14.7344, "step": 21428 }, { "epoch": 1.4231918708906157, "grad_norm": 163.32449340820312, "learning_rate": 4.05448624104295e-07, "loss": 18.375, "step": 21429 }, { "epoch": 1.4232582851829714, "grad_norm": 88.13428497314453, "learning_rate": 4.0536214957254555e-07, "loss": 12.7344, "step": 21430 }, { "epoch": 1.423324699475327, "grad_norm": 263.0592346191406, "learning_rate": 4.0527568191922434e-07, "loss": 15.5625, "step": 21431 }, { "epoch": 1.423391113767683, "grad_norm": 277.6518249511719, "learning_rate": 4.051892211453303e-07, "loss": 12.4062, "step": 21432 }, { "epoch": 1.4234575280600386, "grad_norm": 174.97653198242188, "learning_rate": 4.0510276725186464e-07, "loss": 15.2656, "step": 21433 }, { "epoch": 1.4235239423523942, "grad_norm": 96.68912506103516, "learning_rate": 4.050163202398277e-07, "loss": 11.3203, "step": 21434 }, { "epoch": 1.42359035664475, "grad_norm": 225.40866088867188, "learning_rate": 4.04929880110218e-07, "loss": 13.5156, "step": 21435 }, { "epoch": 1.4236567709371057, "grad_norm": 128.27499389648438, "learning_rate": 4.0484344686403705e-07, "loss": 13.7188, "step": 21436 }, { "epoch": 1.4237231852294614, "grad_norm": 120.7593994140625, "learning_rate": 4.0475702050228357e-07, "loss": 11.1094, "step": 21437 }, { "epoch": 1.423789599521817, "grad_norm": 217.67596435546875, "learning_rate": 4.0467060102595764e-07, "loss": 15.3438, "step": 21438 }, { "epoch": 1.4238560138141727, "grad_norm": 290.3516540527344, "learning_rate": 4.045841884360589e-07, "loss": 18.5312, "step": 21439 }, { "epoch": 1.4239224281065286, "grad_norm": 328.5914611816406, "learning_rate": 4.04497782733587e-07, "loss": 15.8125, "step": 21440 }, { "epoch": 1.4239888423988842, "grad_norm": 151.9022674560547, "learning_rate": 4.0441138391954134e-07, "loss": 13.1406, "step": 21441 }, { "epoch": 1.42405525669124, "grad_norm": 232.79226684570312, "learning_rate": 4.0432499199492163e-07, "loss": 16.625, "step": 21442 }, { "epoch": 1.4241216709835958, "grad_norm": 324.78338623046875, "learning_rate": 4.042386069607263e-07, "loss": 18.25, "step": 21443 }, { "epoch": 1.4241880852759514, "grad_norm": 312.6924743652344, "learning_rate": 4.0415222881795596e-07, "loss": 14.4219, "step": 21444 }, { "epoch": 1.424254499568307, "grad_norm": 197.52700805664062, "learning_rate": 4.040658575676086e-07, "loss": 13.5312, "step": 21445 }, { "epoch": 1.424320913860663, "grad_norm": 161.90423583984375, "learning_rate": 4.039794932106838e-07, "loss": 15.4688, "step": 21446 }, { "epoch": 1.4243873281530186, "grad_norm": 576.2627563476562, "learning_rate": 4.0389313574818064e-07, "loss": 21.1719, "step": 21447 }, { "epoch": 1.4244537424453743, "grad_norm": 489.87445068359375, "learning_rate": 4.038067851810978e-07, "loss": 12.0, "step": 21448 }, { "epoch": 1.42452015673773, "grad_norm": 485.6136779785156, "learning_rate": 4.0372044151043437e-07, "loss": 16.9219, "step": 21449 }, { "epoch": 1.4245865710300856, "grad_norm": 134.82081604003906, "learning_rate": 4.036341047371894e-07, "loss": 15.3906, "step": 21450 }, { "epoch": 1.4246529853224414, "grad_norm": 145.7252197265625, "learning_rate": 4.035477748623605e-07, "loss": 15.5938, "step": 21451 }, { "epoch": 1.424719399614797, "grad_norm": 173.36390686035156, "learning_rate": 4.034614518869476e-07, "loss": 11.9375, "step": 21452 }, { "epoch": 1.4247858139071528, "grad_norm": 146.82598876953125, "learning_rate": 4.0337513581194835e-07, "loss": 17.7188, "step": 21453 }, { "epoch": 1.4248522281995086, "grad_norm": 439.92974853515625, "learning_rate": 4.032888266383616e-07, "loss": 18.125, "step": 21454 }, { "epoch": 1.4249186424918643, "grad_norm": 249.7708282470703, "learning_rate": 4.032025243671855e-07, "loss": 17.0938, "step": 21455 }, { "epoch": 1.42498505678422, "grad_norm": 752.6512451171875, "learning_rate": 4.031162289994189e-07, "loss": 15.7031, "step": 21456 }, { "epoch": 1.4250514710765758, "grad_norm": 193.4949188232422, "learning_rate": 4.0302994053605885e-07, "loss": 13.4141, "step": 21457 }, { "epoch": 1.4251178853689315, "grad_norm": 116.53724670410156, "learning_rate": 4.029436589781049e-07, "loss": 15.0, "step": 21458 }, { "epoch": 1.4251842996612871, "grad_norm": 159.7466278076172, "learning_rate": 4.0285738432655416e-07, "loss": 16.5938, "step": 21459 }, { "epoch": 1.4252507139536428, "grad_norm": 153.13070678710938, "learning_rate": 4.027711165824047e-07, "loss": 16.4375, "step": 21460 }, { "epoch": 1.4253171282459984, "grad_norm": 209.95364379882812, "learning_rate": 4.0268485574665466e-07, "loss": 15.6719, "step": 21461 }, { "epoch": 1.4253835425383543, "grad_norm": 263.9261474609375, "learning_rate": 4.0259860182030183e-07, "loss": 13.6953, "step": 21462 }, { "epoch": 1.42544995683071, "grad_norm": 211.74081420898438, "learning_rate": 4.0251235480434385e-07, "loss": 15.9219, "step": 21463 }, { "epoch": 1.4255163711230656, "grad_norm": 107.96439361572266, "learning_rate": 4.0242611469977873e-07, "loss": 12.4219, "step": 21464 }, { "epoch": 1.4255827854154215, "grad_norm": 251.72879028320312, "learning_rate": 4.023398815076031e-07, "loss": 16.0938, "step": 21465 }, { "epoch": 1.4256491997077771, "grad_norm": 135.07762145996094, "learning_rate": 4.022536552288157e-07, "loss": 18.0469, "step": 21466 }, { "epoch": 1.4257156140001328, "grad_norm": 139.18392944335938, "learning_rate": 4.02167435864413e-07, "loss": 17.2812, "step": 21467 }, { "epoch": 1.4257820282924887, "grad_norm": 82.96822357177734, "learning_rate": 4.0208122341539274e-07, "loss": 17.8594, "step": 21468 }, { "epoch": 1.4258484425848443, "grad_norm": 131.79714965820312, "learning_rate": 4.019950178827521e-07, "loss": 15.375, "step": 21469 }, { "epoch": 1.4259148568772, "grad_norm": 271.0296630859375, "learning_rate": 4.0190881926748833e-07, "loss": 17.9062, "step": 21470 }, { "epoch": 1.4259812711695556, "grad_norm": 358.86627197265625, "learning_rate": 4.018226275705984e-07, "loss": 17.7344, "step": 21471 }, { "epoch": 1.4260476854619113, "grad_norm": 200.7002716064453, "learning_rate": 4.017364427930793e-07, "loss": 17.1875, "step": 21472 }, { "epoch": 1.4261140997542672, "grad_norm": 422.1856689453125, "learning_rate": 4.016502649359281e-07, "loss": 16.7656, "step": 21473 }, { "epoch": 1.4261805140466228, "grad_norm": 249.72216796875, "learning_rate": 4.015640940001417e-07, "loss": 16.5, "step": 21474 }, { "epoch": 1.4262469283389785, "grad_norm": 196.40357971191406, "learning_rate": 4.01477929986717e-07, "loss": 17.75, "step": 21475 }, { "epoch": 1.4263133426313344, "grad_norm": 101.38619995117188, "learning_rate": 4.0139177289665017e-07, "loss": 17.7969, "step": 21476 }, { "epoch": 1.42637975692369, "grad_norm": 384.2064514160156, "learning_rate": 4.013056227309382e-07, "loss": 12.2344, "step": 21477 }, { "epoch": 1.4264461712160457, "grad_norm": 143.21173095703125, "learning_rate": 4.0121947949057745e-07, "loss": 13.7188, "step": 21478 }, { "epoch": 1.4265125855084015, "grad_norm": 126.57659912109375, "learning_rate": 4.0113334317656454e-07, "loss": 13.3281, "step": 21479 }, { "epoch": 1.4265789998007572, "grad_norm": 305.58453369140625, "learning_rate": 4.010472137898958e-07, "loss": 17.0781, "step": 21480 }, { "epoch": 1.4266454140931129, "grad_norm": 281.8880310058594, "learning_rate": 4.0096109133156787e-07, "loss": 19.1719, "step": 21481 }, { "epoch": 1.4267118283854685, "grad_norm": 209.2202911376953, "learning_rate": 4.008749758025759e-07, "loss": 15.7969, "step": 21482 }, { "epoch": 1.4267782426778242, "grad_norm": 168.0069580078125, "learning_rate": 4.0078886720391746e-07, "loss": 16.3906, "step": 21483 }, { "epoch": 1.42684465697018, "grad_norm": 977.8242797851562, "learning_rate": 4.0070276553658744e-07, "loss": 23.375, "step": 21484 }, { "epoch": 1.4269110712625357, "grad_norm": 289.70758056640625, "learning_rate": 4.0061667080158236e-07, "loss": 16.1875, "step": 21485 }, { "epoch": 1.4269774855548913, "grad_norm": 148.1892852783203, "learning_rate": 4.0053058299989793e-07, "loss": 19.75, "step": 21486 }, { "epoch": 1.4270438998472472, "grad_norm": 154.56036376953125, "learning_rate": 4.0044450213253014e-07, "loss": 15.5781, "step": 21487 }, { "epoch": 1.4271103141396029, "grad_norm": 123.9550552368164, "learning_rate": 4.003584282004745e-07, "loss": 17.3438, "step": 21488 }, { "epoch": 1.4271767284319585, "grad_norm": 209.5193634033203, "learning_rate": 4.002723612047272e-07, "loss": 14.6094, "step": 21489 }, { "epoch": 1.4272431427243144, "grad_norm": 124.10037231445312, "learning_rate": 4.001863011462827e-07, "loss": 13.0781, "step": 21490 }, { "epoch": 1.42730955701667, "grad_norm": 222.17083740234375, "learning_rate": 4.0010024802613794e-07, "loss": 17.6875, "step": 21491 }, { "epoch": 1.4273759713090257, "grad_norm": 102.35050964355469, "learning_rate": 4.000142018452871e-07, "loss": 14.0, "step": 21492 }, { "epoch": 1.4274423856013814, "grad_norm": 318.9403991699219, "learning_rate": 3.9992816260472616e-07, "loss": 18.0938, "step": 21493 }, { "epoch": 1.427508799893737, "grad_norm": 240.29132080078125, "learning_rate": 3.998421303054501e-07, "loss": 14.0625, "step": 21494 }, { "epoch": 1.427575214186093, "grad_norm": 555.600830078125, "learning_rate": 3.997561049484547e-07, "loss": 17.4062, "step": 21495 }, { "epoch": 1.4276416284784486, "grad_norm": 91.0055160522461, "learning_rate": 3.9967008653473367e-07, "loss": 14.0781, "step": 21496 }, { "epoch": 1.4277080427708042, "grad_norm": 217.94566345214844, "learning_rate": 3.995840750652838e-07, "loss": 15.1875, "step": 21497 }, { "epoch": 1.42777445706316, "grad_norm": 339.8049621582031, "learning_rate": 3.994980705410986e-07, "loss": 17.6875, "step": 21498 }, { "epoch": 1.4278408713555157, "grad_norm": 178.1328582763672, "learning_rate": 3.994120729631736e-07, "loss": 13.875, "step": 21499 }, { "epoch": 1.4279072856478714, "grad_norm": 193.79055786132812, "learning_rate": 3.993260823325033e-07, "loss": 14.9844, "step": 21500 }, { "epoch": 1.4279736999402273, "grad_norm": 249.1285858154297, "learning_rate": 3.9924009865008255e-07, "loss": 17.3281, "step": 21501 }, { "epoch": 1.428040114232583, "grad_norm": 182.8563232421875, "learning_rate": 3.99154121916906e-07, "loss": 16.2656, "step": 21502 }, { "epoch": 1.4281065285249386, "grad_norm": 164.17286682128906, "learning_rate": 3.990681521339684e-07, "loss": 13.9219, "step": 21503 }, { "epoch": 1.4281729428172942, "grad_norm": 307.73553466796875, "learning_rate": 3.989821893022631e-07, "loss": 17.3906, "step": 21504 }, { "epoch": 1.4282393571096499, "grad_norm": 132.62142944335938, "learning_rate": 3.988962334227861e-07, "loss": 14.0469, "step": 21505 }, { "epoch": 1.4283057714020058, "grad_norm": 222.9137725830078, "learning_rate": 3.9881028449653043e-07, "loss": 12.2656, "step": 21506 }, { "epoch": 1.4283721856943614, "grad_norm": 152.56082153320312, "learning_rate": 3.987243425244904e-07, "loss": 20.5625, "step": 21507 }, { "epoch": 1.428438599986717, "grad_norm": 6278.8505859375, "learning_rate": 3.986384075076612e-07, "loss": 16.9219, "step": 21508 }, { "epoch": 1.428505014279073, "grad_norm": 332.5272216796875, "learning_rate": 3.9855247944703574e-07, "loss": 16.8438, "step": 21509 }, { "epoch": 1.4285714285714286, "grad_norm": 300.45758056640625, "learning_rate": 3.9846655834360845e-07, "loss": 20.2812, "step": 21510 }, { "epoch": 1.4286378428637843, "grad_norm": 277.2286682128906, "learning_rate": 3.98380644198373e-07, "loss": 17.8438, "step": 21511 }, { "epoch": 1.4287042571561401, "grad_norm": 244.23297119140625, "learning_rate": 3.982947370123235e-07, "loss": 12.2969, "step": 21512 }, { "epoch": 1.4287706714484958, "grad_norm": 259.50494384765625, "learning_rate": 3.9820883678645345e-07, "loss": 18.2812, "step": 21513 }, { "epoch": 1.4288370857408514, "grad_norm": 349.427490234375, "learning_rate": 3.981229435217569e-07, "loss": 20.8281, "step": 21514 }, { "epoch": 1.428903500033207, "grad_norm": 126.61548614501953, "learning_rate": 3.980370572192264e-07, "loss": 18.3125, "step": 21515 }, { "epoch": 1.4289699143255628, "grad_norm": 122.48611450195312, "learning_rate": 3.9795117787985677e-07, "loss": 14.6875, "step": 21516 }, { "epoch": 1.4290363286179186, "grad_norm": 305.6504211425781, "learning_rate": 3.978653055046405e-07, "loss": 15.3125, "step": 21517 }, { "epoch": 1.4291027429102743, "grad_norm": 173.4949188232422, "learning_rate": 3.977794400945711e-07, "loss": 15.625, "step": 21518 }, { "epoch": 1.42916915720263, "grad_norm": 207.60733032226562, "learning_rate": 3.976935816506418e-07, "loss": 14.7969, "step": 21519 }, { "epoch": 1.4292355714949858, "grad_norm": 291.87335205078125, "learning_rate": 3.976077301738463e-07, "loss": 14.2656, "step": 21520 }, { "epoch": 1.4293019857873415, "grad_norm": 230.30780029296875, "learning_rate": 3.9752188566517655e-07, "loss": 12.4375, "step": 21521 }, { "epoch": 1.4293684000796971, "grad_norm": 319.0339050292969, "learning_rate": 3.974360481256268e-07, "loss": 12.2656, "step": 21522 }, { "epoch": 1.429434814372053, "grad_norm": 123.18231964111328, "learning_rate": 3.9735021755618914e-07, "loss": 15.0781, "step": 21523 }, { "epoch": 1.4295012286644087, "grad_norm": 580.763427734375, "learning_rate": 3.972643939578566e-07, "loss": 21.9688, "step": 21524 }, { "epoch": 1.4295676429567643, "grad_norm": 584.2811279296875, "learning_rate": 3.9717857733162196e-07, "loss": 15.7109, "step": 21525 }, { "epoch": 1.42963405724912, "grad_norm": 219.4449005126953, "learning_rate": 3.97092767678478e-07, "loss": 21.625, "step": 21526 }, { "epoch": 1.4297004715414756, "grad_norm": 317.52203369140625, "learning_rate": 3.970069649994172e-07, "loss": 33.5781, "step": 21527 }, { "epoch": 1.4297668858338315, "grad_norm": 199.9571533203125, "learning_rate": 3.9692116929543253e-07, "loss": 22.7188, "step": 21528 }, { "epoch": 1.4298333001261871, "grad_norm": 778.8333129882812, "learning_rate": 3.968353805675152e-07, "loss": 14.8438, "step": 21529 }, { "epoch": 1.4298997144185428, "grad_norm": 344.0706481933594, "learning_rate": 3.9674959881665927e-07, "loss": 11.3594, "step": 21530 }, { "epoch": 1.4299661287108987, "grad_norm": 1701.313720703125, "learning_rate": 3.9666382404385556e-07, "loss": 19.5625, "step": 21531 }, { "epoch": 1.4300325430032543, "grad_norm": 232.33778381347656, "learning_rate": 3.9657805625009697e-07, "loss": 17.5625, "step": 21532 }, { "epoch": 1.43009895729561, "grad_norm": 378.80426025390625, "learning_rate": 3.964922954363754e-07, "loss": 14.2578, "step": 21533 }, { "epoch": 1.4301653715879659, "grad_norm": 210.93751525878906, "learning_rate": 3.96406541603683e-07, "loss": 13.5, "step": 21534 }, { "epoch": 1.4302317858803215, "grad_norm": 222.81654357910156, "learning_rate": 3.963207947530117e-07, "loss": 11.3906, "step": 21535 }, { "epoch": 1.4302982001726772, "grad_norm": 130.84715270996094, "learning_rate": 3.962350548853537e-07, "loss": 11.0938, "step": 21536 }, { "epoch": 1.4303646144650328, "grad_norm": 140.37899780273438, "learning_rate": 3.9614932200170003e-07, "loss": 13.6719, "step": 21537 }, { "epoch": 1.4304310287573885, "grad_norm": 106.73178100585938, "learning_rate": 3.9606359610304286e-07, "loss": 10.0938, "step": 21538 }, { "epoch": 1.4304974430497444, "grad_norm": 125.07070922851562, "learning_rate": 3.959778771903737e-07, "loss": 16.2344, "step": 21539 }, { "epoch": 1.4305638573421, "grad_norm": 1081.138671875, "learning_rate": 3.958921652646842e-07, "loss": 13.0625, "step": 21540 }, { "epoch": 1.4306302716344557, "grad_norm": 326.6932067871094, "learning_rate": 3.9580646032696575e-07, "loss": 21.0938, "step": 21541 }, { "epoch": 1.4306966859268115, "grad_norm": 200.8943328857422, "learning_rate": 3.957207623782102e-07, "loss": 18.5625, "step": 21542 }, { "epoch": 1.4307631002191672, "grad_norm": 240.8571014404297, "learning_rate": 3.9563507141940754e-07, "loss": 16.75, "step": 21543 }, { "epoch": 1.4308295145115228, "grad_norm": 280.33056640625, "learning_rate": 3.955493874515504e-07, "loss": 17.6875, "step": 21544 }, { "epoch": 1.4308959288038787, "grad_norm": 550.5269165039062, "learning_rate": 3.9546371047562963e-07, "loss": 24.9375, "step": 21545 }, { "epoch": 1.4309623430962344, "grad_norm": 133.0355224609375, "learning_rate": 3.9537804049263544e-07, "loss": 14.7812, "step": 21546 }, { "epoch": 1.43102875738859, "grad_norm": 427.3402099609375, "learning_rate": 3.9529237750356017e-07, "loss": 18.5859, "step": 21547 }, { "epoch": 1.4310951716809457, "grad_norm": 191.55517578125, "learning_rate": 3.952067215093936e-07, "loss": 15.9062, "step": 21548 }, { "epoch": 1.4311615859733013, "grad_norm": 129.14927673339844, "learning_rate": 3.951210725111269e-07, "loss": 15.7266, "step": 21549 }, { "epoch": 1.4312280002656572, "grad_norm": 176.93478393554688, "learning_rate": 3.9503543050975085e-07, "loss": 12.3906, "step": 21550 }, { "epoch": 1.4312944145580129, "grad_norm": 494.40008544921875, "learning_rate": 3.9494979550625605e-07, "loss": 11.3438, "step": 21551 }, { "epoch": 1.4313608288503685, "grad_norm": 225.61473083496094, "learning_rate": 3.948641675016332e-07, "loss": 23.9219, "step": 21552 }, { "epoch": 1.4314272431427244, "grad_norm": 141.4418487548828, "learning_rate": 3.9477854649687303e-07, "loss": 11.2969, "step": 21553 }, { "epoch": 1.43149365743508, "grad_norm": 286.7761535644531, "learning_rate": 3.9469293249296496e-07, "loss": 17.7656, "step": 21554 }, { "epoch": 1.4315600717274357, "grad_norm": 150.54261779785156, "learning_rate": 3.946073254909007e-07, "loss": 11.75, "step": 21555 }, { "epoch": 1.4316264860197916, "grad_norm": 282.3536682128906, "learning_rate": 3.945217254916695e-07, "loss": 14.9375, "step": 21556 }, { "epoch": 1.4316929003121472, "grad_norm": 146.36988830566406, "learning_rate": 3.9443613249626176e-07, "loss": 11.2656, "step": 21557 }, { "epoch": 1.431759314604503, "grad_norm": 159.26974487304688, "learning_rate": 3.9435054650566766e-07, "loss": 12.3047, "step": 21558 }, { "epoch": 1.4318257288968586, "grad_norm": 157.43865966796875, "learning_rate": 3.9426496752087757e-07, "loss": 17.75, "step": 21559 }, { "epoch": 1.4318921431892142, "grad_norm": 532.9611206054688, "learning_rate": 3.941793955428804e-07, "loss": 17.5, "step": 21560 }, { "epoch": 1.43195855748157, "grad_norm": 346.9306945800781, "learning_rate": 3.940938305726673e-07, "loss": 13.7422, "step": 21561 }, { "epoch": 1.4320249717739257, "grad_norm": 432.03973388671875, "learning_rate": 3.94008272611227e-07, "loss": 10.3438, "step": 21562 }, { "epoch": 1.4320913860662814, "grad_norm": 138.46786499023438, "learning_rate": 3.9392272165954966e-07, "loss": 12.875, "step": 21563 }, { "epoch": 1.4321578003586373, "grad_norm": 191.11387634277344, "learning_rate": 3.938371777186248e-07, "loss": 16.75, "step": 21564 }, { "epoch": 1.432224214650993, "grad_norm": 194.10923767089844, "learning_rate": 3.937516407894418e-07, "loss": 21.2812, "step": 21565 }, { "epoch": 1.4322906289433486, "grad_norm": 329.1239929199219, "learning_rate": 3.936661108729903e-07, "loss": 17.9922, "step": 21566 }, { "epoch": 1.4323570432357045, "grad_norm": 120.78173065185547, "learning_rate": 3.9358058797025996e-07, "loss": 11.5, "step": 21567 }, { "epoch": 1.43242345752806, "grad_norm": 91.3944320678711, "learning_rate": 3.93495072082239e-07, "loss": 14.9844, "step": 21568 }, { "epoch": 1.4324898718204158, "grad_norm": 255.16307067871094, "learning_rate": 3.9340956320991803e-07, "loss": 18.0469, "step": 21569 }, { "epoch": 1.4325562861127714, "grad_norm": 483.2792053222656, "learning_rate": 3.933240613542851e-07, "loss": 17.8594, "step": 21570 }, { "epoch": 1.432622700405127, "grad_norm": 193.1833953857422, "learning_rate": 3.9323856651632956e-07, "loss": 16.5156, "step": 21571 }, { "epoch": 1.432689114697483, "grad_norm": 138.52072143554688, "learning_rate": 3.931530786970404e-07, "loss": 13.9453, "step": 21572 }, { "epoch": 1.4327555289898386, "grad_norm": 125.69623565673828, "learning_rate": 3.930675978974065e-07, "loss": 17.2188, "step": 21573 }, { "epoch": 1.4328219432821943, "grad_norm": 278.1724853515625, "learning_rate": 3.929821241184167e-07, "loss": 17.2344, "step": 21574 }, { "epoch": 1.4328883575745501, "grad_norm": 243.5821533203125, "learning_rate": 3.9289665736106005e-07, "loss": 14.8594, "step": 21575 }, { "epoch": 1.4329547718669058, "grad_norm": 140.66119384765625, "learning_rate": 3.92811197626324e-07, "loss": 16.0469, "step": 21576 }, { "epoch": 1.4330211861592614, "grad_norm": 194.12249755859375, "learning_rate": 3.9272574491519863e-07, "loss": 15.0938, "step": 21577 }, { "epoch": 1.4330876004516173, "grad_norm": 166.55479431152344, "learning_rate": 3.9264029922867123e-07, "loss": 15.1094, "step": 21578 }, { "epoch": 1.433154014743973, "grad_norm": 325.34930419921875, "learning_rate": 3.9255486056773077e-07, "loss": 11.9844, "step": 21579 }, { "epoch": 1.4332204290363286, "grad_norm": 199.33871459960938, "learning_rate": 3.924694289333653e-07, "loss": 16.9688, "step": 21580 }, { "epoch": 1.4332868433286843, "grad_norm": 475.32891845703125, "learning_rate": 3.9238400432656327e-07, "loss": 17.0938, "step": 21581 }, { "epoch": 1.43335325762104, "grad_norm": 301.0372009277344, "learning_rate": 3.922985867483126e-07, "loss": 23.75, "step": 21582 }, { "epoch": 1.4334196719133958, "grad_norm": 116.27230834960938, "learning_rate": 3.922131761996015e-07, "loss": 17.3906, "step": 21583 }, { "epoch": 1.4334860862057515, "grad_norm": 138.2311553955078, "learning_rate": 3.921277726814183e-07, "loss": 16.0938, "step": 21584 }, { "epoch": 1.4335525004981071, "grad_norm": 345.5082702636719, "learning_rate": 3.9204237619474965e-07, "loss": 19.0156, "step": 21585 }, { "epoch": 1.433618914790463, "grad_norm": 146.50970458984375, "learning_rate": 3.919569867405851e-07, "loss": 15.1406, "step": 21586 }, { "epoch": 1.4336853290828186, "grad_norm": 238.12728881835938, "learning_rate": 3.918716043199111e-07, "loss": 20.0, "step": 21587 }, { "epoch": 1.4337517433751743, "grad_norm": 185.93728637695312, "learning_rate": 3.917862289337156e-07, "loss": 13.9219, "step": 21588 }, { "epoch": 1.4338181576675302, "grad_norm": 164.22496032714844, "learning_rate": 3.9170086058298646e-07, "loss": 11.9219, "step": 21589 }, { "epoch": 1.4338845719598858, "grad_norm": 286.6055603027344, "learning_rate": 3.9161549926871097e-07, "loss": 15.0938, "step": 21590 }, { "epoch": 1.4339509862522415, "grad_norm": 443.9713134765625, "learning_rate": 3.9153014499187643e-07, "loss": 22.8438, "step": 21591 }, { "epoch": 1.4340174005445971, "grad_norm": 346.03265380859375, "learning_rate": 3.9144479775347085e-07, "loss": 18.9219, "step": 21592 }, { "epoch": 1.4340838148369528, "grad_norm": 298.359375, "learning_rate": 3.9135945755448007e-07, "loss": 12.3125, "step": 21593 }, { "epoch": 1.4341502291293087, "grad_norm": 328.9067077636719, "learning_rate": 3.912741243958929e-07, "loss": 21.4688, "step": 21594 }, { "epoch": 1.4342166434216643, "grad_norm": 126.79708862304688, "learning_rate": 3.911887982786953e-07, "loss": 15.2344, "step": 21595 }, { "epoch": 1.43428305771402, "grad_norm": 519.1548461914062, "learning_rate": 3.9110347920387466e-07, "loss": 11.2734, "step": 21596 }, { "epoch": 1.4343494720063759, "grad_norm": 221.23492431640625, "learning_rate": 3.910181671724178e-07, "loss": 18.2031, "step": 21597 }, { "epoch": 1.4344158862987315, "grad_norm": 105.16552734375, "learning_rate": 3.909328621853116e-07, "loss": 13.0781, "step": 21598 }, { "epoch": 1.4344823005910872, "grad_norm": 211.54428100585938, "learning_rate": 3.908475642435429e-07, "loss": 18.75, "step": 21599 }, { "epoch": 1.434548714883443, "grad_norm": 332.5491943359375, "learning_rate": 3.9076227334809864e-07, "loss": 14.8594, "step": 21600 }, { "epoch": 1.4346151291757987, "grad_norm": 372.1975402832031, "learning_rate": 3.9067698949996473e-07, "loss": 19.9531, "step": 21601 }, { "epoch": 1.4346815434681544, "grad_norm": 166.35324096679688, "learning_rate": 3.9059171270012813e-07, "loss": 16.2969, "step": 21602 }, { "epoch": 1.43474795776051, "grad_norm": 217.16885375976562, "learning_rate": 3.9050644294957516e-07, "loss": 13.125, "step": 21603 }, { "epoch": 1.4348143720528657, "grad_norm": 558.7383422851562, "learning_rate": 3.9042118024929214e-07, "loss": 10.6719, "step": 21604 }, { "epoch": 1.4348807863452215, "grad_norm": 132.927001953125, "learning_rate": 3.9033592460026543e-07, "loss": 12.1719, "step": 21605 }, { "epoch": 1.4349472006375772, "grad_norm": 137.72793579101562, "learning_rate": 3.9025067600348163e-07, "loss": 15.2578, "step": 21606 }, { "epoch": 1.4350136149299328, "grad_norm": 315.0135803222656, "learning_rate": 3.9016543445992556e-07, "loss": 16.9688, "step": 21607 }, { "epoch": 1.4350800292222887, "grad_norm": 369.9947204589844, "learning_rate": 3.900801999705849e-07, "loss": 14.5156, "step": 21608 }, { "epoch": 1.4351464435146444, "grad_norm": 228.2156524658203, "learning_rate": 3.8999497253644444e-07, "loss": 15.6875, "step": 21609 }, { "epoch": 1.435212857807, "grad_norm": 174.25003051757812, "learning_rate": 3.899097521584903e-07, "loss": 17.5312, "step": 21610 }, { "epoch": 1.435279272099356, "grad_norm": 122.33177947998047, "learning_rate": 3.8982453883770836e-07, "loss": 16.7344, "step": 21611 }, { "epoch": 1.4353456863917116, "grad_norm": 176.5886993408203, "learning_rate": 3.897393325750844e-07, "loss": 16.7031, "step": 21612 }, { "epoch": 1.4354121006840672, "grad_norm": 76.68738555908203, "learning_rate": 3.896541333716039e-07, "loss": 10.8906, "step": 21613 }, { "epoch": 1.4354785149764229, "grad_norm": 167.51516723632812, "learning_rate": 3.895689412282528e-07, "loss": 23.7969, "step": 21614 }, { "epoch": 1.4355449292687785, "grad_norm": 217.2816925048828, "learning_rate": 3.894837561460155e-07, "loss": 20.6875, "step": 21615 }, { "epoch": 1.4356113435611344, "grad_norm": 134.70411682128906, "learning_rate": 3.893985781258784e-07, "loss": 17.0312, "step": 21616 }, { "epoch": 1.43567775785349, "grad_norm": 161.50994873046875, "learning_rate": 3.8931340716882675e-07, "loss": 23.3594, "step": 21617 }, { "epoch": 1.4357441721458457, "grad_norm": 176.8242645263672, "learning_rate": 3.892282432758448e-07, "loss": 18.75, "step": 21618 }, { "epoch": 1.4358105864382016, "grad_norm": 1147.8743896484375, "learning_rate": 3.8914308644791915e-07, "loss": 14.2656, "step": 21619 }, { "epoch": 1.4358770007305572, "grad_norm": 287.68731689453125, "learning_rate": 3.890579366860336e-07, "loss": 12.1953, "step": 21620 }, { "epoch": 1.435943415022913, "grad_norm": 232.52745056152344, "learning_rate": 3.8897279399117343e-07, "loss": 24.3125, "step": 21621 }, { "epoch": 1.4360098293152688, "grad_norm": 280.6029968261719, "learning_rate": 3.888876583643238e-07, "loss": 15.5312, "step": 21622 }, { "epoch": 1.4360762436076244, "grad_norm": 179.65992736816406, "learning_rate": 3.8880252980646965e-07, "loss": 11.1094, "step": 21623 }, { "epoch": 1.43614265789998, "grad_norm": 406.44696044921875, "learning_rate": 3.887174083185947e-07, "loss": 15.1719, "step": 21624 }, { "epoch": 1.4362090721923357, "grad_norm": 119.04481506347656, "learning_rate": 3.8863229390168496e-07, "loss": 17.3594, "step": 21625 }, { "epoch": 1.4362754864846914, "grad_norm": 275.4898681640625, "learning_rate": 3.8854718655672403e-07, "loss": 19.2344, "step": 21626 }, { "epoch": 1.4363419007770473, "grad_norm": 188.5235595703125, "learning_rate": 3.884620862846966e-07, "loss": 19.0781, "step": 21627 }, { "epoch": 1.436408315069403, "grad_norm": 153.37681579589844, "learning_rate": 3.8837699308658713e-07, "loss": 15.3125, "step": 21628 }, { "epoch": 1.4364747293617586, "grad_norm": 858.2308959960938, "learning_rate": 3.882919069633799e-07, "loss": 12.1875, "step": 21629 }, { "epoch": 1.4365411436541144, "grad_norm": 874.3844604492188, "learning_rate": 3.882068279160592e-07, "loss": 20.5, "step": 21630 }, { "epoch": 1.43660755794647, "grad_norm": 320.06219482421875, "learning_rate": 3.881217559456095e-07, "loss": 23.5469, "step": 21631 }, { "epoch": 1.4366739722388258, "grad_norm": 231.8853759765625, "learning_rate": 3.880366910530137e-07, "loss": 14.7344, "step": 21632 }, { "epoch": 1.4367403865311816, "grad_norm": 166.6202850341797, "learning_rate": 3.879516332392574e-07, "loss": 13.9062, "step": 21633 }, { "epoch": 1.4368068008235373, "grad_norm": 106.50958251953125, "learning_rate": 3.878665825053233e-07, "loss": 17.4844, "step": 21634 }, { "epoch": 1.436873215115893, "grad_norm": 355.065185546875, "learning_rate": 3.877815388521956e-07, "loss": 16.75, "step": 21635 }, { "epoch": 1.4369396294082486, "grad_norm": 204.491455078125, "learning_rate": 3.8769650228085794e-07, "loss": 14.5625, "step": 21636 }, { "epoch": 1.4370060437006043, "grad_norm": 191.13246154785156, "learning_rate": 3.8761147279229414e-07, "loss": 18.7344, "step": 21637 }, { "epoch": 1.4370724579929601, "grad_norm": 363.6687316894531, "learning_rate": 3.875264503874878e-07, "loss": 18.3438, "step": 21638 }, { "epoch": 1.4371388722853158, "grad_norm": 167.65731811523438, "learning_rate": 3.8744143506742244e-07, "loss": 16.7969, "step": 21639 }, { "epoch": 1.4372052865776714, "grad_norm": 155.83262634277344, "learning_rate": 3.873564268330808e-07, "loss": 16.0625, "step": 21640 }, { "epoch": 1.4372717008700273, "grad_norm": 299.847412109375, "learning_rate": 3.8727142568544736e-07, "loss": 17.7188, "step": 21641 }, { "epoch": 1.437338115162383, "grad_norm": 192.89476013183594, "learning_rate": 3.8718643162550444e-07, "loss": 18.0938, "step": 21642 }, { "epoch": 1.4374045294547386, "grad_norm": 141.9440155029297, "learning_rate": 3.8710144465423556e-07, "loss": 16.1406, "step": 21643 }, { "epoch": 1.4374709437470945, "grad_norm": 268.5964050292969, "learning_rate": 3.870164647726236e-07, "loss": 17.8594, "step": 21644 }, { "epoch": 1.4375373580394502, "grad_norm": 161.09576416015625, "learning_rate": 3.869314919816522e-07, "loss": 12.9453, "step": 21645 }, { "epoch": 1.4376037723318058, "grad_norm": 208.5874481201172, "learning_rate": 3.8684652628230306e-07, "loss": 17.5156, "step": 21646 }, { "epoch": 1.4376701866241615, "grad_norm": 226.42286682128906, "learning_rate": 3.8676156767556047e-07, "loss": 14.3125, "step": 21647 }, { "epoch": 1.4377366009165171, "grad_norm": 167.25021362304688, "learning_rate": 3.86676616162406e-07, "loss": 19.75, "step": 21648 }, { "epoch": 1.437803015208873, "grad_norm": 611.3985595703125, "learning_rate": 3.8659167174382277e-07, "loss": 17.6484, "step": 21649 }, { "epoch": 1.4378694295012286, "grad_norm": 750.5625610351562, "learning_rate": 3.865067344207935e-07, "loss": 15.3594, "step": 21650 }, { "epoch": 1.4379358437935843, "grad_norm": 124.00607299804688, "learning_rate": 3.864218041943005e-07, "loss": 14.5469, "step": 21651 }, { "epoch": 1.4380022580859402, "grad_norm": 1900.490478515625, "learning_rate": 3.8633688106532627e-07, "loss": 15.5, "step": 21652 }, { "epoch": 1.4380686723782958, "grad_norm": 318.95672607421875, "learning_rate": 3.86251965034853e-07, "loss": 13.0156, "step": 21653 }, { "epoch": 1.4381350866706515, "grad_norm": 158.26409912109375, "learning_rate": 3.861670561038632e-07, "loss": 18.5781, "step": 21654 }, { "epoch": 1.4382015009630074, "grad_norm": 1946.955810546875, "learning_rate": 3.86082154273339e-07, "loss": 14.9219, "step": 21655 }, { "epoch": 1.438267915255363, "grad_norm": 275.1334533691406, "learning_rate": 3.8599725954426277e-07, "loss": 14.0156, "step": 21656 }, { "epoch": 1.4383343295477187, "grad_norm": 211.1836700439453, "learning_rate": 3.859123719176155e-07, "loss": 15.6875, "step": 21657 }, { "epoch": 1.4384007438400743, "grad_norm": 536.79833984375, "learning_rate": 3.858274913943804e-07, "loss": 14.7188, "step": 21658 }, { "epoch": 1.43846715813243, "grad_norm": 135.25262451171875, "learning_rate": 3.857426179755384e-07, "loss": 17.8281, "step": 21659 }, { "epoch": 1.4385335724247859, "grad_norm": 327.4281311035156, "learning_rate": 3.8565775166207173e-07, "loss": 19.9062, "step": 21660 }, { "epoch": 1.4385999867171415, "grad_norm": 270.9872131347656, "learning_rate": 3.855728924549618e-07, "loss": 14.0625, "step": 21661 }, { "epoch": 1.4386664010094972, "grad_norm": 290.76641845703125, "learning_rate": 3.854880403551903e-07, "loss": 15.0156, "step": 21662 }, { "epoch": 1.438732815301853, "grad_norm": 369.5111999511719, "learning_rate": 3.85403195363739e-07, "loss": 14.9219, "step": 21663 }, { "epoch": 1.4387992295942087, "grad_norm": 204.1041717529297, "learning_rate": 3.8531835748158935e-07, "loss": 17.5625, "step": 21664 }, { "epoch": 1.4388656438865643, "grad_norm": 163.76637268066406, "learning_rate": 3.852335267097222e-07, "loss": 14.3906, "step": 21665 }, { "epoch": 1.4389320581789202, "grad_norm": 214.5346221923828, "learning_rate": 3.8514870304911905e-07, "loss": 16.7656, "step": 21666 }, { "epoch": 1.4389984724712759, "grad_norm": 89.34271240234375, "learning_rate": 3.8506388650076127e-07, "loss": 12.0469, "step": 21667 }, { "epoch": 1.4390648867636315, "grad_norm": 655.0751953125, "learning_rate": 3.849790770656297e-07, "loss": 15.8438, "step": 21668 }, { "epoch": 1.4391313010559872, "grad_norm": 109.86851501464844, "learning_rate": 3.848942747447057e-07, "loss": 16.0625, "step": 21669 }, { "epoch": 1.4391977153483428, "grad_norm": 161.4040985107422, "learning_rate": 3.8480947953897027e-07, "loss": 13.7656, "step": 21670 }, { "epoch": 1.4392641296406987, "grad_norm": 112.7725601196289, "learning_rate": 3.847246914494033e-07, "loss": 15.7188, "step": 21671 }, { "epoch": 1.4393305439330544, "grad_norm": 499.9411926269531, "learning_rate": 3.846399104769872e-07, "loss": 14.7344, "step": 21672 }, { "epoch": 1.43939695822541, "grad_norm": 230.06329345703125, "learning_rate": 3.8455513662270134e-07, "loss": 22.6406, "step": 21673 }, { "epoch": 1.439463372517766, "grad_norm": 240.95880126953125, "learning_rate": 3.844703698875267e-07, "loss": 17.8281, "step": 21674 }, { "epoch": 1.4395297868101216, "grad_norm": 216.2925262451172, "learning_rate": 3.84385610272444e-07, "loss": 13.2344, "step": 21675 }, { "epoch": 1.4395962011024772, "grad_norm": 1263.8121337890625, "learning_rate": 3.8430085777843345e-07, "loss": 15.0, "step": 21676 }, { "epoch": 1.439662615394833, "grad_norm": 490.924072265625, "learning_rate": 3.8421611240647556e-07, "loss": 16.0156, "step": 21677 }, { "epoch": 1.4397290296871887, "grad_norm": 182.2388458251953, "learning_rate": 3.8413137415755103e-07, "loss": 17.375, "step": 21678 }, { "epoch": 1.4397954439795444, "grad_norm": 140.22816467285156, "learning_rate": 3.840466430326389e-07, "loss": 13.0938, "step": 21679 }, { "epoch": 1.4398618582719, "grad_norm": 279.15496826171875, "learning_rate": 3.839619190327208e-07, "loss": 15.4531, "step": 21680 }, { "epoch": 1.4399282725642557, "grad_norm": 259.7588195800781, "learning_rate": 3.838772021587755e-07, "loss": 17.3594, "step": 21681 }, { "epoch": 1.4399946868566116, "grad_norm": 289.6504821777344, "learning_rate": 3.837924924117836e-07, "loss": 18.5469, "step": 21682 }, { "epoch": 1.4400611011489672, "grad_norm": 94.73438262939453, "learning_rate": 3.8370778979272465e-07, "loss": 12.1406, "step": 21683 }, { "epoch": 1.440127515441323, "grad_norm": 319.7076416015625, "learning_rate": 3.836230943025791e-07, "loss": 17.4062, "step": 21684 }, { "epoch": 1.4401939297336788, "grad_norm": 164.12820434570312, "learning_rate": 3.8353840594232547e-07, "loss": 16.9688, "step": 21685 }, { "epoch": 1.4402603440260344, "grad_norm": 305.8216247558594, "learning_rate": 3.834537247129447e-07, "loss": 12.6719, "step": 21686 }, { "epoch": 1.44032675831839, "grad_norm": 187.89418029785156, "learning_rate": 3.8336905061541535e-07, "loss": 13.3125, "step": 21687 }, { "epoch": 1.440393172610746, "grad_norm": 605.5340576171875, "learning_rate": 3.832843836507174e-07, "loss": 18.3281, "step": 21688 }, { "epoch": 1.4404595869031016, "grad_norm": 187.5144805908203, "learning_rate": 3.8319972381983e-07, "loss": 14.4531, "step": 21689 }, { "epoch": 1.4405260011954573, "grad_norm": 137.47030639648438, "learning_rate": 3.8311507112373253e-07, "loss": 14.8125, "step": 21690 }, { "epoch": 1.440592415487813, "grad_norm": 103.45067596435547, "learning_rate": 3.8303042556340414e-07, "loss": 15.2969, "step": 21691 }, { "epoch": 1.4406588297801686, "grad_norm": 174.2623748779297, "learning_rate": 3.8294578713982406e-07, "loss": 17.0312, "step": 21692 }, { "epoch": 1.4407252440725244, "grad_norm": 158.73031616210938, "learning_rate": 3.828611558539714e-07, "loss": 16.8281, "step": 21693 }, { "epoch": 1.44079165836488, "grad_norm": 161.22276306152344, "learning_rate": 3.827765317068249e-07, "loss": 17.5781, "step": 21694 }, { "epoch": 1.4408580726572358, "grad_norm": 647.1658935546875, "learning_rate": 3.8269191469936403e-07, "loss": 13.5, "step": 21695 }, { "epoch": 1.4409244869495916, "grad_norm": 475.0168762207031, "learning_rate": 3.8260730483256634e-07, "loss": 12.9297, "step": 21696 }, { "epoch": 1.4409909012419473, "grad_norm": 176.96337890625, "learning_rate": 3.82522702107412e-07, "loss": 18.1562, "step": 21697 }, { "epoch": 1.441057315534303, "grad_norm": 271.4530944824219, "learning_rate": 3.824381065248787e-07, "loss": 24.9375, "step": 21698 }, { "epoch": 1.4411237298266588, "grad_norm": 371.21929931640625, "learning_rate": 3.823535180859453e-07, "loss": 16.0469, "step": 21699 }, { "epoch": 1.4411901441190145, "grad_norm": 169.05886840820312, "learning_rate": 3.8226893679159025e-07, "loss": 16.2969, "step": 21700 }, { "epoch": 1.4412565584113701, "grad_norm": 215.39263916015625, "learning_rate": 3.8218436264279186e-07, "loss": 12.8906, "step": 21701 }, { "epoch": 1.4413229727037258, "grad_norm": 193.0854949951172, "learning_rate": 3.8209979564052866e-07, "loss": 12.7031, "step": 21702 }, { "epoch": 1.4413893869960814, "grad_norm": 169.12086486816406, "learning_rate": 3.8201523578577886e-07, "loss": 14.0938, "step": 21703 }, { "epoch": 1.4414558012884373, "grad_norm": 274.80615234375, "learning_rate": 3.819306830795199e-07, "loss": 15.4219, "step": 21704 }, { "epoch": 1.441522215580793, "grad_norm": 174.61062622070312, "learning_rate": 3.818461375227311e-07, "loss": 13.4688, "step": 21705 }, { "epoch": 1.4415886298731486, "grad_norm": 230.2692413330078, "learning_rate": 3.817615991163894e-07, "loss": 11.7344, "step": 21706 }, { "epoch": 1.4416550441655045, "grad_norm": 313.3333740234375, "learning_rate": 3.816770678614729e-07, "loss": 18.2969, "step": 21707 }, { "epoch": 1.4417214584578601, "grad_norm": 235.49851989746094, "learning_rate": 3.8159254375895966e-07, "loss": 14.2344, "step": 21708 }, { "epoch": 1.4417878727502158, "grad_norm": 220.41111755371094, "learning_rate": 3.815080268098276e-07, "loss": 18.5312, "step": 21709 }, { "epoch": 1.4418542870425717, "grad_norm": 189.9764862060547, "learning_rate": 3.8142351701505324e-07, "loss": 15.9062, "step": 21710 }, { "epoch": 1.4419207013349273, "grad_norm": 411.255859375, "learning_rate": 3.813390143756158e-07, "loss": 16.4062, "step": 21711 }, { "epoch": 1.441987115627283, "grad_norm": 365.1995544433594, "learning_rate": 3.812545188924915e-07, "loss": 23.3438, "step": 21712 }, { "epoch": 1.4420535299196386, "grad_norm": 259.8045654296875, "learning_rate": 3.811700305666581e-07, "loss": 16.7969, "step": 21713 }, { "epoch": 1.4421199442119943, "grad_norm": 137.77066040039062, "learning_rate": 3.81085549399093e-07, "loss": 15.1562, "step": 21714 }, { "epoch": 1.4421863585043502, "grad_norm": 189.08599853515625, "learning_rate": 3.810010753907733e-07, "loss": 17.5, "step": 21715 }, { "epoch": 1.4422527727967058, "grad_norm": 243.0769805908203, "learning_rate": 3.8091660854267625e-07, "loss": 20.0312, "step": 21716 }, { "epoch": 1.4423191870890615, "grad_norm": 191.38754272460938, "learning_rate": 3.808321488557793e-07, "loss": 18.625, "step": 21717 }, { "epoch": 1.4423856013814174, "grad_norm": 183.84288024902344, "learning_rate": 3.807476963310583e-07, "loss": 19.5625, "step": 21718 }, { "epoch": 1.442452015673773, "grad_norm": 126.85111236572266, "learning_rate": 3.8066325096949147e-07, "loss": 11.9375, "step": 21719 }, { "epoch": 1.4425184299661287, "grad_norm": 244.19554138183594, "learning_rate": 3.805788127720548e-07, "loss": 15.3125, "step": 21720 }, { "epoch": 1.4425848442584845, "grad_norm": 271.979736328125, "learning_rate": 3.8049438173972515e-07, "loss": 19.7344, "step": 21721 }, { "epoch": 1.4426512585508402, "grad_norm": 1826.3533935546875, "learning_rate": 3.804099578734794e-07, "loss": 13.5156, "step": 21722 }, { "epoch": 1.4427176728431959, "grad_norm": 158.08428955078125, "learning_rate": 3.8032554117429385e-07, "loss": 20.0312, "step": 21723 }, { "epoch": 1.4427840871355515, "grad_norm": 384.5963439941406, "learning_rate": 3.802411316431452e-07, "loss": 13.7812, "step": 21724 }, { "epoch": 1.4428505014279072, "grad_norm": 148.73426818847656, "learning_rate": 3.8015672928101004e-07, "loss": 13.9531, "step": 21725 }, { "epoch": 1.442916915720263, "grad_norm": 262.1831359863281, "learning_rate": 3.8007233408886375e-07, "loss": 12.375, "step": 21726 }, { "epoch": 1.4429833300126187, "grad_norm": 181.84010314941406, "learning_rate": 3.7998794606768356e-07, "loss": 17.1562, "step": 21727 }, { "epoch": 1.4430497443049743, "grad_norm": 293.6138610839844, "learning_rate": 3.799035652184456e-07, "loss": 17.7812, "step": 21728 }, { "epoch": 1.4431161585973302, "grad_norm": 234.55763244628906, "learning_rate": 3.798191915421254e-07, "loss": 14.1094, "step": 21729 }, { "epoch": 1.4431825728896859, "grad_norm": 177.7098388671875, "learning_rate": 3.797348250396991e-07, "loss": 15.8281, "step": 21730 }, { "epoch": 1.4432489871820415, "grad_norm": 166.23577880859375, "learning_rate": 3.7965046571214267e-07, "loss": 15.3281, "step": 21731 }, { "epoch": 1.4433154014743974, "grad_norm": 226.941650390625, "learning_rate": 3.7956611356043187e-07, "loss": 13.6719, "step": 21732 }, { "epoch": 1.443381815766753, "grad_norm": 214.74049377441406, "learning_rate": 3.794817685855427e-07, "loss": 22.1094, "step": 21733 }, { "epoch": 1.4434482300591087, "grad_norm": 266.21417236328125, "learning_rate": 3.793974307884508e-07, "loss": 12.1094, "step": 21734 }, { "epoch": 1.4435146443514644, "grad_norm": 828.8252563476562, "learning_rate": 3.7931310017013096e-07, "loss": 12.9375, "step": 21735 }, { "epoch": 1.44358105864382, "grad_norm": 287.4404296875, "learning_rate": 3.7922877673155996e-07, "loss": 15.1406, "step": 21736 }, { "epoch": 1.443647472936176, "grad_norm": 218.0610809326172, "learning_rate": 3.791444604737121e-07, "loss": 15.6562, "step": 21737 }, { "epoch": 1.4437138872285316, "grad_norm": 196.19264221191406, "learning_rate": 3.790601513975631e-07, "loss": 11.6719, "step": 21738 }, { "epoch": 1.4437803015208872, "grad_norm": 123.09015655517578, "learning_rate": 3.789758495040882e-07, "loss": 16.0469, "step": 21739 }, { "epoch": 1.443846715813243, "grad_norm": 218.5467987060547, "learning_rate": 3.788915547942627e-07, "loss": 17.1562, "step": 21740 }, { "epoch": 1.4439131301055987, "grad_norm": 571.0867919921875, "learning_rate": 3.788072672690615e-07, "loss": 18.8906, "step": 21741 }, { "epoch": 1.4439795443979544, "grad_norm": 165.12725830078125, "learning_rate": 3.787229869294599e-07, "loss": 23.0938, "step": 21742 }, { "epoch": 1.4440459586903103, "grad_norm": 404.2750549316406, "learning_rate": 3.7863871377643195e-07, "loss": 20.5625, "step": 21743 }, { "epoch": 1.444112372982666, "grad_norm": 166.318359375, "learning_rate": 3.785544478109537e-07, "loss": 14.0625, "step": 21744 }, { "epoch": 1.4441787872750216, "grad_norm": 137.200927734375, "learning_rate": 3.7847018903399887e-07, "loss": 14.6875, "step": 21745 }, { "epoch": 1.4442452015673772, "grad_norm": 279.747314453125, "learning_rate": 3.7838593744654256e-07, "loss": 17.2188, "step": 21746 }, { "epoch": 1.444311615859733, "grad_norm": 105.9854965209961, "learning_rate": 3.7830169304955926e-07, "loss": 15.5312, "step": 21747 }, { "epoch": 1.4443780301520888, "grad_norm": 1558.04833984375, "learning_rate": 3.7821745584402385e-07, "loss": 14.6094, "step": 21748 }, { "epoch": 1.4444444444444444, "grad_norm": 193.61802673339844, "learning_rate": 3.781332258309097e-07, "loss": 16.0625, "step": 21749 }, { "epoch": 1.4445108587368, "grad_norm": 146.4017791748047, "learning_rate": 3.7804900301119233e-07, "loss": 16.0469, "step": 21750 }, { "epoch": 1.444577273029156, "grad_norm": 168.7356414794922, "learning_rate": 3.7796478738584524e-07, "loss": 15.6875, "step": 21751 }, { "epoch": 1.4446436873215116, "grad_norm": 154.5121307373047, "learning_rate": 3.7788057895584293e-07, "loss": 14.6719, "step": 21752 }, { "epoch": 1.4447101016138673, "grad_norm": 243.94386291503906, "learning_rate": 3.777963777221591e-07, "loss": 16.5625, "step": 21753 }, { "epoch": 1.4447765159062231, "grad_norm": 215.09320068359375, "learning_rate": 3.777121836857682e-07, "loss": 20.375, "step": 21754 }, { "epoch": 1.4448429301985788, "grad_norm": 332.7795715332031, "learning_rate": 3.776279968476438e-07, "loss": 23.8125, "step": 21755 }, { "epoch": 1.4449093444909344, "grad_norm": 143.61517333984375, "learning_rate": 3.7754381720876017e-07, "loss": 17.7344, "step": 21756 }, { "epoch": 1.44497575878329, "grad_norm": 147.92323303222656, "learning_rate": 3.774596447700902e-07, "loss": 19.9844, "step": 21757 }, { "epoch": 1.4450421730756458, "grad_norm": 104.43898010253906, "learning_rate": 3.773754795326086e-07, "loss": 14.5469, "step": 21758 }, { "epoch": 1.4451085873680016, "grad_norm": 247.75111389160156, "learning_rate": 3.7729132149728816e-07, "loss": 21.3516, "step": 21759 }, { "epoch": 1.4451750016603573, "grad_norm": 245.70323181152344, "learning_rate": 3.7720717066510264e-07, "loss": 24.2656, "step": 21760 }, { "epoch": 1.445241415952713, "grad_norm": 192.7710723876953, "learning_rate": 3.7712302703702545e-07, "loss": 14.5625, "step": 21761 }, { "epoch": 1.4453078302450688, "grad_norm": 110.90586853027344, "learning_rate": 3.7703889061402995e-07, "loss": 12.4141, "step": 21762 }, { "epoch": 1.4453742445374245, "grad_norm": 122.55046844482422, "learning_rate": 3.7695476139708927e-07, "loss": 15.875, "step": 21763 }, { "epoch": 1.4454406588297801, "grad_norm": 227.29202270507812, "learning_rate": 3.7687063938717664e-07, "loss": 14.3438, "step": 21764 }, { "epoch": 1.445507073122136, "grad_norm": 390.7480773925781, "learning_rate": 3.7678652458526516e-07, "loss": 18.125, "step": 21765 }, { "epoch": 1.4455734874144917, "grad_norm": 503.3343200683594, "learning_rate": 3.7670241699232785e-07, "loss": 16.1719, "step": 21766 }, { "epoch": 1.4456399017068473, "grad_norm": 502.9233093261719, "learning_rate": 3.7661831660933785e-07, "loss": 18.2031, "step": 21767 }, { "epoch": 1.445706315999203, "grad_norm": 148.79721069335938, "learning_rate": 3.7653422343726706e-07, "loss": 16.4062, "step": 21768 }, { "epoch": 1.4457727302915586, "grad_norm": 787.2274169921875, "learning_rate": 3.7645013747708943e-07, "loss": 17.5625, "step": 21769 }, { "epoch": 1.4458391445839145, "grad_norm": 225.93551635742188, "learning_rate": 3.7636605872977677e-07, "loss": 21.25, "step": 21770 }, { "epoch": 1.4459055588762701, "grad_norm": 182.17645263671875, "learning_rate": 3.76281987196302e-07, "loss": 12.375, "step": 21771 }, { "epoch": 1.4459719731686258, "grad_norm": 162.8289031982422, "learning_rate": 3.7619792287763754e-07, "loss": 17.0156, "step": 21772 }, { "epoch": 1.4460383874609817, "grad_norm": 140.37997436523438, "learning_rate": 3.7611386577475614e-07, "loss": 17.3906, "step": 21773 }, { "epoch": 1.4461048017533373, "grad_norm": 115.48721313476562, "learning_rate": 3.76029815888629e-07, "loss": 14.0938, "step": 21774 }, { "epoch": 1.446171216045693, "grad_norm": 421.8968811035156, "learning_rate": 3.7594577322022994e-07, "loss": 20.3125, "step": 21775 }, { "epoch": 1.4462376303380489, "grad_norm": 182.44081115722656, "learning_rate": 3.7586173777052986e-07, "loss": 16.0156, "step": 21776 }, { "epoch": 1.4463040446304045, "grad_norm": 178.59918212890625, "learning_rate": 3.7577770954050146e-07, "loss": 13.7656, "step": 21777 }, { "epoch": 1.4463704589227602, "grad_norm": 229.07687377929688, "learning_rate": 3.756936885311165e-07, "loss": 12.7344, "step": 21778 }, { "epoch": 1.4464368732151158, "grad_norm": 97.60514831542969, "learning_rate": 3.7560967474334695e-07, "loss": 15.0625, "step": 21779 }, { "epoch": 1.4465032875074715, "grad_norm": 646.635986328125, "learning_rate": 3.755256681781647e-07, "loss": 14.9062, "step": 21780 }, { "epoch": 1.4465697017998274, "grad_norm": 135.02841186523438, "learning_rate": 3.754416688365416e-07, "loss": 15.8125, "step": 21781 }, { "epoch": 1.446636116092183, "grad_norm": 249.47679138183594, "learning_rate": 3.7535767671944864e-07, "loss": 20.4844, "step": 21782 }, { "epoch": 1.4467025303845387, "grad_norm": 127.37657928466797, "learning_rate": 3.752736918278584e-07, "loss": 13.625, "step": 21783 }, { "epoch": 1.4467689446768945, "grad_norm": 118.82147216796875, "learning_rate": 3.751897141627416e-07, "loss": 17.1719, "step": 21784 }, { "epoch": 1.4468353589692502, "grad_norm": 153.603515625, "learning_rate": 3.7510574372507e-07, "loss": 14.0156, "step": 21785 }, { "epoch": 1.4469017732616059, "grad_norm": 134.46630859375, "learning_rate": 3.7502178051581467e-07, "loss": 13.3906, "step": 21786 }, { "epoch": 1.4469681875539617, "grad_norm": 141.15525817871094, "learning_rate": 3.74937824535947e-07, "loss": 13.3125, "step": 21787 }, { "epoch": 1.4470346018463174, "grad_norm": 364.5652770996094, "learning_rate": 3.7485387578643825e-07, "loss": 16.2812, "step": 21788 }, { "epoch": 1.447101016138673, "grad_norm": 109.50534057617188, "learning_rate": 3.7476993426825966e-07, "loss": 16.8438, "step": 21789 }, { "epoch": 1.4471674304310287, "grad_norm": 238.03443908691406, "learning_rate": 3.746859999823816e-07, "loss": 17.1094, "step": 21790 }, { "epoch": 1.4472338447233843, "grad_norm": 221.65753173828125, "learning_rate": 3.746020729297753e-07, "loss": 14.8594, "step": 21791 }, { "epoch": 1.4473002590157402, "grad_norm": 210.8195343017578, "learning_rate": 3.7451815311141164e-07, "loss": 18.8594, "step": 21792 }, { "epoch": 1.4473666733080959, "grad_norm": 261.97332763671875, "learning_rate": 3.7443424052826134e-07, "loss": 22.0625, "step": 21793 }, { "epoch": 1.4474330876004515, "grad_norm": 248.6901397705078, "learning_rate": 3.7435033518129497e-07, "loss": 19.1562, "step": 21794 }, { "epoch": 1.4474995018928074, "grad_norm": 140.70904541015625, "learning_rate": 3.742664370714835e-07, "loss": 16.4531, "step": 21795 }, { "epoch": 1.447565916185163, "grad_norm": 117.5841064453125, "learning_rate": 3.741825461997964e-07, "loss": 15.2656, "step": 21796 }, { "epoch": 1.4476323304775187, "grad_norm": 172.48712158203125, "learning_rate": 3.7409866256720546e-07, "loss": 12.2656, "step": 21797 }, { "epoch": 1.4476987447698746, "grad_norm": 142.98580932617188, "learning_rate": 3.7401478617467984e-07, "loss": 8.7109, "step": 21798 }, { "epoch": 1.4477651590622302, "grad_norm": 195.86961364746094, "learning_rate": 3.7393091702319023e-07, "loss": 13.4688, "step": 21799 }, { "epoch": 1.447831573354586, "grad_norm": 245.1080780029297, "learning_rate": 3.7384705511370685e-07, "loss": 17.5938, "step": 21800 }, { "epoch": 1.4478979876469416, "grad_norm": 539.3285522460938, "learning_rate": 3.7376320044719956e-07, "loss": 13.8438, "step": 21801 }, { "epoch": 1.4479644019392972, "grad_norm": 518.36328125, "learning_rate": 3.7367935302463847e-07, "loss": 17.8125, "step": 21802 }, { "epoch": 1.448030816231653, "grad_norm": 171.20799255371094, "learning_rate": 3.7359551284699354e-07, "loss": 15.0469, "step": 21803 }, { "epoch": 1.4480972305240087, "grad_norm": 494.6412658691406, "learning_rate": 3.7351167991523447e-07, "loss": 16.5312, "step": 21804 }, { "epoch": 1.4481636448163644, "grad_norm": 427.4228515625, "learning_rate": 3.7342785423033105e-07, "loss": 19.6562, "step": 21805 }, { "epoch": 1.4482300591087203, "grad_norm": 266.1409606933594, "learning_rate": 3.7334403579325324e-07, "loss": 17.3906, "step": 21806 }, { "epoch": 1.448296473401076, "grad_norm": 160.0298309326172, "learning_rate": 3.7326022460496963e-07, "loss": 16.4531, "step": 21807 }, { "epoch": 1.4483628876934316, "grad_norm": 152.9410400390625, "learning_rate": 3.731764206664509e-07, "loss": 12.7344, "step": 21808 }, { "epoch": 1.4484293019857875, "grad_norm": 225.2731475830078, "learning_rate": 3.730926239786657e-07, "loss": 15.2031, "step": 21809 }, { "epoch": 1.448495716278143, "grad_norm": 473.7994079589844, "learning_rate": 3.7300883454258345e-07, "loss": 18.8125, "step": 21810 }, { "epoch": 1.4485621305704988, "grad_norm": 102.69921875, "learning_rate": 3.729250523591735e-07, "loss": 12.4219, "step": 21811 }, { "epoch": 1.4486285448628544, "grad_norm": 188.12173461914062, "learning_rate": 3.728412774294053e-07, "loss": 17.0156, "step": 21812 }, { "epoch": 1.44869495915521, "grad_norm": 80.99212646484375, "learning_rate": 3.727575097542468e-07, "loss": 11.625, "step": 21813 }, { "epoch": 1.448761373447566, "grad_norm": 151.07028198242188, "learning_rate": 3.7267374933466844e-07, "loss": 15.9688, "step": 21814 }, { "epoch": 1.4488277877399216, "grad_norm": 437.30657958984375, "learning_rate": 3.725899961716381e-07, "loss": 15.8906, "step": 21815 }, { "epoch": 1.4488942020322773, "grad_norm": 113.19160461425781, "learning_rate": 3.7250625026612493e-07, "loss": 17.5469, "step": 21816 }, { "epoch": 1.4489606163246331, "grad_norm": 152.062255859375, "learning_rate": 3.7242251161909754e-07, "loss": 14.0781, "step": 21817 }, { "epoch": 1.4490270306169888, "grad_norm": 181.34475708007812, "learning_rate": 3.723387802315248e-07, "loss": 14.4531, "step": 21818 }, { "epoch": 1.4490934449093444, "grad_norm": 906.8052978515625, "learning_rate": 3.7225505610437493e-07, "loss": 20.8906, "step": 21819 }, { "epoch": 1.4491598592017003, "grad_norm": 584.5882568359375, "learning_rate": 3.721713392386171e-07, "loss": 17.0156, "step": 21820 }, { "epoch": 1.449226273494056, "grad_norm": 546.14501953125, "learning_rate": 3.7208762963521846e-07, "loss": 21.5938, "step": 21821 }, { "epoch": 1.4492926877864116, "grad_norm": 180.09176635742188, "learning_rate": 3.7200392729514863e-07, "loss": 12.9375, "step": 21822 }, { "epoch": 1.4493591020787673, "grad_norm": 193.2418212890625, "learning_rate": 3.71920232219375e-07, "loss": 17.2344, "step": 21823 }, { "epoch": 1.449425516371123, "grad_norm": 237.68101501464844, "learning_rate": 3.718365444088659e-07, "loss": 13.0156, "step": 21824 }, { "epoch": 1.4494919306634788, "grad_norm": 330.95452880859375, "learning_rate": 3.717528638645895e-07, "loss": 12.4688, "step": 21825 }, { "epoch": 1.4495583449558345, "grad_norm": 95.88558197021484, "learning_rate": 3.7166919058751366e-07, "loss": 13.9766, "step": 21826 }, { "epoch": 1.4496247592481901, "grad_norm": 410.14312744140625, "learning_rate": 3.7158552457860627e-07, "loss": 12.0078, "step": 21827 }, { "epoch": 1.449691173540546, "grad_norm": 121.21723175048828, "learning_rate": 3.715018658388356e-07, "loss": 13.7969, "step": 21828 }, { "epoch": 1.4497575878329017, "grad_norm": 156.93997192382812, "learning_rate": 3.714182143691682e-07, "loss": 13.7812, "step": 21829 }, { "epoch": 1.4498240021252573, "grad_norm": 86.7434310913086, "learning_rate": 3.7133457017057315e-07, "loss": 14.8438, "step": 21830 }, { "epoch": 1.4498904164176132, "grad_norm": 595.7027587890625, "learning_rate": 3.712509332440169e-07, "loss": 16.5938, "step": 21831 }, { "epoch": 1.4499568307099688, "grad_norm": 157.6796112060547, "learning_rate": 3.711673035904673e-07, "loss": 18.8438, "step": 21832 }, { "epoch": 1.4500232450023245, "grad_norm": 191.72027587890625, "learning_rate": 3.7108368121089175e-07, "loss": 12.7344, "step": 21833 }, { "epoch": 1.4500896592946801, "grad_norm": 169.02293395996094, "learning_rate": 3.710000661062578e-07, "loss": 17.9531, "step": 21834 }, { "epoch": 1.4501560735870358, "grad_norm": 274.4909973144531, "learning_rate": 3.709164582775317e-07, "loss": 14.2344, "step": 21835 }, { "epoch": 1.4502224878793917, "grad_norm": 218.16143798828125, "learning_rate": 3.7083285772568206e-07, "loss": 15.9375, "step": 21836 }, { "epoch": 1.4502889021717473, "grad_norm": 275.86431884765625, "learning_rate": 3.7074926445167475e-07, "loss": 15.0, "step": 21837 }, { "epoch": 1.450355316464103, "grad_norm": 215.62037658691406, "learning_rate": 3.7066567845647666e-07, "loss": 16.0781, "step": 21838 }, { "epoch": 1.4504217307564589, "grad_norm": 114.07166290283203, "learning_rate": 3.705820997410559e-07, "loss": 17.25, "step": 21839 }, { "epoch": 1.4504881450488145, "grad_norm": 253.28729248046875, "learning_rate": 3.7049852830637806e-07, "loss": 14.5625, "step": 21840 }, { "epoch": 1.4505545593411702, "grad_norm": 247.31683349609375, "learning_rate": 3.704149641534102e-07, "loss": 18.9844, "step": 21841 }, { "epoch": 1.450620973633526, "grad_norm": 1592.516357421875, "learning_rate": 3.703314072831192e-07, "loss": 19.4531, "step": 21842 }, { "epoch": 1.4506873879258817, "grad_norm": 234.86082458496094, "learning_rate": 3.702478576964712e-07, "loss": 21.625, "step": 21843 }, { "epoch": 1.4507538022182374, "grad_norm": 714.0208740234375, "learning_rate": 3.701643153944329e-07, "loss": 30.9062, "step": 21844 }, { "epoch": 1.450820216510593, "grad_norm": 124.56126403808594, "learning_rate": 3.70080780377971e-07, "loss": 17.5156, "step": 21845 }, { "epoch": 1.4508866308029487, "grad_norm": 353.3447265625, "learning_rate": 3.699972526480506e-07, "loss": 16.1406, "step": 21846 }, { "epoch": 1.4509530450953045, "grad_norm": 240.9296875, "learning_rate": 3.699137322056395e-07, "loss": 17.0781, "step": 21847 }, { "epoch": 1.4510194593876602, "grad_norm": 226.6404571533203, "learning_rate": 3.6983021905170265e-07, "loss": 13.3594, "step": 21848 }, { "epoch": 1.4510858736800158, "grad_norm": 163.50030517578125, "learning_rate": 3.6974671318720643e-07, "loss": 11.5078, "step": 21849 }, { "epoch": 1.4511522879723717, "grad_norm": 331.7916564941406, "learning_rate": 3.696632146131168e-07, "loss": 23.6875, "step": 21850 }, { "epoch": 1.4512187022647274, "grad_norm": 117.19430541992188, "learning_rate": 3.6957972333039966e-07, "loss": 13.6719, "step": 21851 }, { "epoch": 1.451285116557083, "grad_norm": 254.70924377441406, "learning_rate": 3.694962393400207e-07, "loss": 20.1719, "step": 21852 }, { "epoch": 1.451351530849439, "grad_norm": 164.51832580566406, "learning_rate": 3.6941276264294617e-07, "loss": 17.75, "step": 21853 }, { "epoch": 1.4514179451417946, "grad_norm": 162.87135314941406, "learning_rate": 3.6932929324014074e-07, "loss": 14.0312, "step": 21854 }, { "epoch": 1.4514843594341502, "grad_norm": 160.9585723876953, "learning_rate": 3.6924583113257036e-07, "loss": 13.3281, "step": 21855 }, { "epoch": 1.4515507737265059, "grad_norm": 171.12571716308594, "learning_rate": 3.691623763212005e-07, "loss": 16.8125, "step": 21856 }, { "epoch": 1.4516171880188615, "grad_norm": 225.42518615722656, "learning_rate": 3.6907892880699655e-07, "loss": 17.8906, "step": 21857 }, { "epoch": 1.4516836023112174, "grad_norm": 126.30559539794922, "learning_rate": 3.6899548859092367e-07, "loss": 14.0938, "step": 21858 }, { "epoch": 1.451750016603573, "grad_norm": 165.49620056152344, "learning_rate": 3.6891205567394746e-07, "loss": 18.5312, "step": 21859 }, { "epoch": 1.4518164308959287, "grad_norm": 155.97679138183594, "learning_rate": 3.68828630057032e-07, "loss": 12.2031, "step": 21860 }, { "epoch": 1.4518828451882846, "grad_norm": 227.96188354492188, "learning_rate": 3.6874521174114383e-07, "loss": 15.2656, "step": 21861 }, { "epoch": 1.4519492594806402, "grad_norm": 180.4437255859375, "learning_rate": 3.686618007272465e-07, "loss": 15.4531, "step": 21862 }, { "epoch": 1.452015673772996, "grad_norm": 281.5980224609375, "learning_rate": 3.6857839701630553e-07, "loss": 18.3438, "step": 21863 }, { "epoch": 1.4520820880653518, "grad_norm": 214.7698974609375, "learning_rate": 3.6849500060928553e-07, "loss": 21.0625, "step": 21864 }, { "epoch": 1.4521485023577074, "grad_norm": 190.31027221679688, "learning_rate": 3.6841161150715126e-07, "loss": 16.5156, "step": 21865 }, { "epoch": 1.452214916650063, "grad_norm": 414.7479248046875, "learning_rate": 3.683282297108672e-07, "loss": 21.25, "step": 21866 }, { "epoch": 1.4522813309424187, "grad_norm": 226.03753662109375, "learning_rate": 3.682448552213984e-07, "loss": 14.8125, "step": 21867 }, { "epoch": 1.4523477452347744, "grad_norm": 171.4469757080078, "learning_rate": 3.6816148803970804e-07, "loss": 15.7656, "step": 21868 }, { "epoch": 1.4524141595271303, "grad_norm": 303.7409362792969, "learning_rate": 3.6807812816676187e-07, "loss": 9.9609, "step": 21869 }, { "epoch": 1.452480573819486, "grad_norm": 242.62841796875, "learning_rate": 3.679947756035233e-07, "loss": 20.3125, "step": 21870 }, { "epoch": 1.4525469881118416, "grad_norm": 296.7081604003906, "learning_rate": 3.6791143035095663e-07, "loss": 17.7344, "step": 21871 }, { "epoch": 1.4526134024041975, "grad_norm": 96.85482788085938, "learning_rate": 3.678280924100261e-07, "loss": 12.5156, "step": 21872 }, { "epoch": 1.452679816696553, "grad_norm": 227.3785858154297, "learning_rate": 3.677447617816961e-07, "loss": 13.8281, "step": 21873 }, { "epoch": 1.4527462309889088, "grad_norm": 339.93829345703125, "learning_rate": 3.676614384669293e-07, "loss": 17.25, "step": 21874 }, { "epoch": 1.4528126452812646, "grad_norm": 294.7012634277344, "learning_rate": 3.675781224666907e-07, "loss": 16.6875, "step": 21875 }, { "epoch": 1.4528790595736203, "grad_norm": 131.98385620117188, "learning_rate": 3.6749481378194414e-07, "loss": 15.2344, "step": 21876 }, { "epoch": 1.452945473865976, "grad_norm": 337.05108642578125, "learning_rate": 3.674115124136521e-07, "loss": 15.0312, "step": 21877 }, { "epoch": 1.4530118881583316, "grad_norm": 123.16968536376953, "learning_rate": 3.673282183627796e-07, "loss": 26.1719, "step": 21878 }, { "epoch": 1.4530783024506873, "grad_norm": 334.51373291015625, "learning_rate": 3.672449316302891e-07, "loss": 15.6875, "step": 21879 }, { "epoch": 1.4531447167430431, "grad_norm": 208.04908752441406, "learning_rate": 3.6716165221714444e-07, "loss": 14.4531, "step": 21880 }, { "epoch": 1.4532111310353988, "grad_norm": 191.52053833007812, "learning_rate": 3.670783801243088e-07, "loss": 12.7266, "step": 21881 }, { "epoch": 1.4532775453277544, "grad_norm": 160.918212890625, "learning_rate": 3.669951153527455e-07, "loss": 15.875, "step": 21882 }, { "epoch": 1.4533439596201103, "grad_norm": 212.6705322265625, "learning_rate": 3.669118579034177e-07, "loss": 15.4375, "step": 21883 }, { "epoch": 1.453410373912466, "grad_norm": 202.50173950195312, "learning_rate": 3.668286077772889e-07, "loss": 15.5781, "step": 21884 }, { "epoch": 1.4534767882048216, "grad_norm": 195.99615478515625, "learning_rate": 3.6674536497532084e-07, "loss": 17.6719, "step": 21885 }, { "epoch": 1.4535432024971775, "grad_norm": 112.08353424072266, "learning_rate": 3.6666212949847796e-07, "loss": 12.6406, "step": 21886 }, { "epoch": 1.4536096167895332, "grad_norm": 233.8577423095703, "learning_rate": 3.665789013477221e-07, "loss": 18.6719, "step": 21887 }, { "epoch": 1.4536760310818888, "grad_norm": 116.9768295288086, "learning_rate": 3.6649568052401614e-07, "loss": 17.5625, "step": 21888 }, { "epoch": 1.4537424453742445, "grad_norm": 149.229248046875, "learning_rate": 3.6641246702832294e-07, "loss": 14.5469, "step": 21889 }, { "epoch": 1.4538088596666001, "grad_norm": 147.48843383789062, "learning_rate": 3.6632926086160497e-07, "loss": 17.7344, "step": 21890 }, { "epoch": 1.453875273958956, "grad_norm": 163.02178955078125, "learning_rate": 3.662460620248247e-07, "loss": 13.2031, "step": 21891 }, { "epoch": 1.4539416882513116, "grad_norm": 826.2056274414062, "learning_rate": 3.6616287051894477e-07, "loss": 17.9375, "step": 21892 }, { "epoch": 1.4540081025436673, "grad_norm": 1173.4140625, "learning_rate": 3.6607968634492667e-07, "loss": 16.6016, "step": 21893 }, { "epoch": 1.4540745168360232, "grad_norm": 161.1675262451172, "learning_rate": 3.6599650950373395e-07, "loss": 17.8125, "step": 21894 }, { "epoch": 1.4541409311283788, "grad_norm": 139.88922119140625, "learning_rate": 3.6591333999632765e-07, "loss": 17.3594, "step": 21895 }, { "epoch": 1.4542073454207345, "grad_norm": 152.74160766601562, "learning_rate": 3.658301778236702e-07, "loss": 14.1719, "step": 21896 }, { "epoch": 1.4542737597130904, "grad_norm": 149.8483123779297, "learning_rate": 3.6574702298672356e-07, "loss": 16.0, "step": 21897 }, { "epoch": 1.454340174005446, "grad_norm": 273.30084228515625, "learning_rate": 3.6566387548644994e-07, "loss": 15.2188, "step": 21898 }, { "epoch": 1.4544065882978017, "grad_norm": 240.3262939453125, "learning_rate": 3.6558073532381015e-07, "loss": 13.9062, "step": 21899 }, { "epoch": 1.4544730025901573, "grad_norm": 218.38970947265625, "learning_rate": 3.6549760249976735e-07, "loss": 16.3281, "step": 21900 }, { "epoch": 1.454539416882513, "grad_norm": 177.2173614501953, "learning_rate": 3.6541447701528193e-07, "loss": 13.4531, "step": 21901 }, { "epoch": 1.4546058311748689, "grad_norm": 147.11647033691406, "learning_rate": 3.6533135887131606e-07, "loss": 16.4141, "step": 21902 }, { "epoch": 1.4546722454672245, "grad_norm": 120.09070587158203, "learning_rate": 3.652482480688309e-07, "loss": 17.9375, "step": 21903 }, { "epoch": 1.4547386597595802, "grad_norm": 130.4036865234375, "learning_rate": 3.651651446087881e-07, "loss": 14.9844, "step": 21904 }, { "epoch": 1.454805074051936, "grad_norm": 310.19525146484375, "learning_rate": 3.6508204849214886e-07, "loss": 16.6875, "step": 21905 }, { "epoch": 1.4548714883442917, "grad_norm": 250.06297302246094, "learning_rate": 3.649989597198747e-07, "loss": 12.4766, "step": 21906 }, { "epoch": 1.4549379026366474, "grad_norm": 170.09864807128906, "learning_rate": 3.649158782929257e-07, "loss": 15.8125, "step": 21907 }, { "epoch": 1.4550043169290032, "grad_norm": 242.52357482910156, "learning_rate": 3.6483280421226426e-07, "loss": 20.1562, "step": 21908 }, { "epoch": 1.4550707312213589, "grad_norm": 278.7584228515625, "learning_rate": 3.647497374788504e-07, "loss": 14.6641, "step": 21909 }, { "epoch": 1.4551371455137145, "grad_norm": 109.3176040649414, "learning_rate": 3.646666780936453e-07, "loss": 14.3125, "step": 21910 }, { "epoch": 1.4552035598060702, "grad_norm": 156.71214294433594, "learning_rate": 3.645836260576096e-07, "loss": 12.2422, "step": 21911 }, { "epoch": 1.4552699740984258, "grad_norm": 188.21511840820312, "learning_rate": 3.6450058137170416e-07, "loss": 17.0938, "step": 21912 }, { "epoch": 1.4553363883907817, "grad_norm": 248.1499481201172, "learning_rate": 3.644175440368895e-07, "loss": 23.2812, "step": 21913 }, { "epoch": 1.4554028026831374, "grad_norm": 174.91702270507812, "learning_rate": 3.643345140541263e-07, "loss": 16.2031, "step": 21914 }, { "epoch": 1.455469216975493, "grad_norm": 403.50311279296875, "learning_rate": 3.6425149142437484e-07, "loss": 18.6797, "step": 21915 }, { "epoch": 1.455535631267849, "grad_norm": 212.98411560058594, "learning_rate": 3.6416847614859545e-07, "loss": 16.7969, "step": 21916 }, { "epoch": 1.4556020455602046, "grad_norm": 158.51243591308594, "learning_rate": 3.64085468227749e-07, "loss": 20.125, "step": 21917 }, { "epoch": 1.4556684598525602, "grad_norm": 503.8112487792969, "learning_rate": 3.6400246766279474e-07, "loss": 14.7656, "step": 21918 }, { "epoch": 1.455734874144916, "grad_norm": 120.3080062866211, "learning_rate": 3.6391947445469315e-07, "loss": 14.1719, "step": 21919 }, { "epoch": 1.4558012884372717, "grad_norm": 174.67906188964844, "learning_rate": 3.638364886044043e-07, "loss": 20.5625, "step": 21920 }, { "epoch": 1.4558677027296274, "grad_norm": 108.79928588867188, "learning_rate": 3.6375351011288825e-07, "loss": 17.7344, "step": 21921 }, { "epoch": 1.455934117021983, "grad_norm": 203.92575073242188, "learning_rate": 3.6367053898110457e-07, "loss": 18.5625, "step": 21922 }, { "epoch": 1.4560005313143387, "grad_norm": 140.5149688720703, "learning_rate": 3.635875752100136e-07, "loss": 17.5156, "step": 21923 }, { "epoch": 1.4560669456066946, "grad_norm": 201.2375030517578, "learning_rate": 3.6350461880057384e-07, "loss": 12.7188, "step": 21924 }, { "epoch": 1.4561333598990502, "grad_norm": 149.26611328125, "learning_rate": 3.634216697537463e-07, "loss": 21.8125, "step": 21925 }, { "epoch": 1.456199774191406, "grad_norm": 178.6811981201172, "learning_rate": 3.6333872807048946e-07, "loss": 16.9219, "step": 21926 }, { "epoch": 1.4562661884837618, "grad_norm": 268.9923095703125, "learning_rate": 3.632557937517632e-07, "loss": 15.0625, "step": 21927 }, { "epoch": 1.4563326027761174, "grad_norm": 148.36468505859375, "learning_rate": 3.6317286679852666e-07, "loss": 10.5312, "step": 21928 }, { "epoch": 1.456399017068473, "grad_norm": 178.69654846191406, "learning_rate": 3.6308994721173915e-07, "loss": 16.1094, "step": 21929 }, { "epoch": 1.456465431360829, "grad_norm": 1769.764404296875, "learning_rate": 3.6300703499236e-07, "loss": 16.2812, "step": 21930 }, { "epoch": 1.4565318456531846, "grad_norm": 208.59744262695312, "learning_rate": 3.629241301413484e-07, "loss": 16.0469, "step": 21931 }, { "epoch": 1.4565982599455403, "grad_norm": 459.41156005859375, "learning_rate": 3.628412326596624e-07, "loss": 14.0156, "step": 21932 }, { "epoch": 1.456664674237896, "grad_norm": 240.03488159179688, "learning_rate": 3.627583425482623e-07, "loss": 17.8438, "step": 21933 }, { "epoch": 1.4567310885302516, "grad_norm": 156.29197692871094, "learning_rate": 3.6267545980810597e-07, "loss": 16.8594, "step": 21934 }, { "epoch": 1.4567975028226074, "grad_norm": 364.677978515625, "learning_rate": 3.625925844401524e-07, "loss": 18.375, "step": 21935 }, { "epoch": 1.456863917114963, "grad_norm": 3737.8876953125, "learning_rate": 3.625097164453602e-07, "loss": 15.4531, "step": 21936 }, { "epoch": 1.4569303314073188, "grad_norm": 113.28179931640625, "learning_rate": 3.6242685582468835e-07, "loss": 15.375, "step": 21937 }, { "epoch": 1.4569967456996746, "grad_norm": 204.85450744628906, "learning_rate": 3.6234400257909436e-07, "loss": 14.8594, "step": 21938 }, { "epoch": 1.4570631599920303, "grad_norm": 212.97410583496094, "learning_rate": 3.6226115670953793e-07, "loss": 14.4844, "step": 21939 }, { "epoch": 1.457129574284386, "grad_norm": 132.94175720214844, "learning_rate": 3.621783182169763e-07, "loss": 15.1406, "step": 21940 }, { "epoch": 1.4571959885767418, "grad_norm": 355.8705749511719, "learning_rate": 3.6209548710236815e-07, "loss": 19.1875, "step": 21941 }, { "epoch": 1.4572624028690975, "grad_norm": 110.51246643066406, "learning_rate": 3.620126633666716e-07, "loss": 14.6094, "step": 21942 }, { "epoch": 1.4573288171614531, "grad_norm": 269.7581481933594, "learning_rate": 3.619298470108446e-07, "loss": 15.9219, "step": 21943 }, { "epoch": 1.4573952314538088, "grad_norm": 266.3907775878906, "learning_rate": 3.6184703803584525e-07, "loss": 17.5469, "step": 21944 }, { "epoch": 1.4574616457461644, "grad_norm": 167.93850708007812, "learning_rate": 3.617642364426318e-07, "loss": 14.9688, "step": 21945 }, { "epoch": 1.4575280600385203, "grad_norm": 136.6689453125, "learning_rate": 3.616814422321608e-07, "loss": 18.4688, "step": 21946 }, { "epoch": 1.457594474330876, "grad_norm": 139.95887756347656, "learning_rate": 3.6159865540539113e-07, "loss": 17.5938, "step": 21947 }, { "epoch": 1.4576608886232316, "grad_norm": 357.0291748046875, "learning_rate": 3.615158759632806e-07, "loss": 16.3125, "step": 21948 }, { "epoch": 1.4577273029155875, "grad_norm": 157.90582275390625, "learning_rate": 3.614331039067854e-07, "loss": 17.3906, "step": 21949 }, { "epoch": 1.4577937172079432, "grad_norm": 132.9464569091797, "learning_rate": 3.6135033923686467e-07, "loss": 15.4844, "step": 21950 }, { "epoch": 1.4578601315002988, "grad_norm": 235.1529998779297, "learning_rate": 3.612675819544745e-07, "loss": 14.8281, "step": 21951 }, { "epoch": 1.4579265457926547, "grad_norm": 456.767822265625, "learning_rate": 3.611848320605726e-07, "loss": 19.0781, "step": 21952 }, { "epoch": 1.4579929600850103, "grad_norm": 243.7066650390625, "learning_rate": 3.6110208955611623e-07, "loss": 18.8125, "step": 21953 }, { "epoch": 1.458059374377366, "grad_norm": 174.22012329101562, "learning_rate": 3.6101935444206255e-07, "loss": 13.7031, "step": 21954 }, { "epoch": 1.4581257886697216, "grad_norm": 213.32400512695312, "learning_rate": 3.609366267193684e-07, "loss": 20.2656, "step": 21955 }, { "epoch": 1.4581922029620775, "grad_norm": 211.11563110351562, "learning_rate": 3.608539063889913e-07, "loss": 11.1094, "step": 21956 }, { "epoch": 1.4582586172544332, "grad_norm": 220.6664581298828, "learning_rate": 3.607711934518869e-07, "loss": 16.5156, "step": 21957 }, { "epoch": 1.4583250315467888, "grad_norm": 174.00367736816406, "learning_rate": 3.606884879090135e-07, "loss": 15.1406, "step": 21958 }, { "epoch": 1.4583914458391445, "grad_norm": 645.214111328125, "learning_rate": 3.6060578976132663e-07, "loss": 34.5469, "step": 21959 }, { "epoch": 1.4584578601315004, "grad_norm": 995.4210205078125, "learning_rate": 3.605230990097833e-07, "loss": 13.2578, "step": 21960 }, { "epoch": 1.458524274423856, "grad_norm": 245.0083770751953, "learning_rate": 3.6044041565534e-07, "loss": 17.0312, "step": 21961 }, { "epoch": 1.4585906887162117, "grad_norm": 370.62408447265625, "learning_rate": 3.6035773969895357e-07, "loss": 19.5312, "step": 21962 }, { "epoch": 1.4586571030085675, "grad_norm": 312.7749328613281, "learning_rate": 3.602750711415792e-07, "loss": 19.3438, "step": 21963 }, { "epoch": 1.4587235173009232, "grad_norm": 393.2178955078125, "learning_rate": 3.601924099841747e-07, "loss": 11.9062, "step": 21964 }, { "epoch": 1.4587899315932789, "grad_norm": 208.98077392578125, "learning_rate": 3.6010975622769503e-07, "loss": 18.2969, "step": 21965 }, { "epoch": 1.4588563458856345, "grad_norm": 212.74037170410156, "learning_rate": 3.600271098730967e-07, "loss": 18.0, "step": 21966 }, { "epoch": 1.4589227601779904, "grad_norm": 218.06692504882812, "learning_rate": 3.5994447092133584e-07, "loss": 16.875, "step": 21967 }, { "epoch": 1.458989174470346, "grad_norm": 328.1406555175781, "learning_rate": 3.598618393733682e-07, "loss": 17.2812, "step": 21968 }, { "epoch": 1.4590555887627017, "grad_norm": 92.6484375, "learning_rate": 3.5977921523014967e-07, "loss": 16.625, "step": 21969 }, { "epoch": 1.4591220030550573, "grad_norm": 232.31607055664062, "learning_rate": 3.5969659849263645e-07, "loss": 13.9453, "step": 21970 }, { "epoch": 1.4591884173474132, "grad_norm": 251.6407012939453, "learning_rate": 3.5961398916178297e-07, "loss": 16.4531, "step": 21971 }, { "epoch": 1.4592548316397689, "grad_norm": 137.1525115966797, "learning_rate": 3.595313872385463e-07, "loss": 15.4062, "step": 21972 }, { "epoch": 1.4593212459321245, "grad_norm": 349.5707092285156, "learning_rate": 3.5944879272388083e-07, "loss": 18.5938, "step": 21973 }, { "epoch": 1.4593876602244804, "grad_norm": 178.85304260253906, "learning_rate": 3.5936620561874245e-07, "loss": 17.9531, "step": 21974 }, { "epoch": 1.459454074516836, "grad_norm": 248.3751220703125, "learning_rate": 3.5928362592408636e-07, "loss": 16.1875, "step": 21975 }, { "epoch": 1.4595204888091917, "grad_norm": 106.1175537109375, "learning_rate": 3.592010536408678e-07, "loss": 15.8125, "step": 21976 }, { "epoch": 1.4595869031015474, "grad_norm": 180.1316375732422, "learning_rate": 3.59118488770042e-07, "loss": 13.0156, "step": 21977 }, { "epoch": 1.4596533173939032, "grad_norm": 221.58541870117188, "learning_rate": 3.590359313125644e-07, "loss": 12.7031, "step": 21978 }, { "epoch": 1.459719731686259, "grad_norm": 362.34832763671875, "learning_rate": 3.5895338126938867e-07, "loss": 27.3906, "step": 21979 }, { "epoch": 1.4597861459786146, "grad_norm": 132.12362670898438, "learning_rate": 3.588708386414714e-07, "loss": 16.2812, "step": 21980 }, { "epoch": 1.4598525602709702, "grad_norm": 321.8169860839844, "learning_rate": 3.587883034297662e-07, "loss": 14.1562, "step": 21981 }, { "epoch": 1.459918974563326, "grad_norm": 157.7259521484375, "learning_rate": 3.5870577563522806e-07, "loss": 15.625, "step": 21982 }, { "epoch": 1.4599853888556817, "grad_norm": 156.3186798095703, "learning_rate": 3.586232552588118e-07, "loss": 14.0938, "step": 21983 }, { "epoch": 1.4600518031480374, "grad_norm": 185.8882598876953, "learning_rate": 3.585407423014719e-07, "loss": 13.5781, "step": 21984 }, { "epoch": 1.4601182174403933, "grad_norm": 623.6843872070312, "learning_rate": 3.5845823676416274e-07, "loss": 20.75, "step": 21985 }, { "epoch": 1.460184631732749, "grad_norm": 255.96714782714844, "learning_rate": 3.5837573864783886e-07, "loss": 18.7031, "step": 21986 }, { "epoch": 1.4602510460251046, "grad_norm": 109.40625762939453, "learning_rate": 3.5829324795345474e-07, "loss": 11.75, "step": 21987 }, { "epoch": 1.4603174603174602, "grad_norm": 288.1189880371094, "learning_rate": 3.5821076468196353e-07, "loss": 12.5156, "step": 21988 }, { "epoch": 1.4603838746098161, "grad_norm": 552.6396484375, "learning_rate": 3.581282888343209e-07, "loss": 13.0469, "step": 21989 }, { "epoch": 1.4604502889021718, "grad_norm": 368.674560546875, "learning_rate": 3.580458204114798e-07, "loss": 19.5781, "step": 21990 }, { "epoch": 1.4605167031945274, "grad_norm": 197.80990600585938, "learning_rate": 3.5796335941439437e-07, "loss": 15.9531, "step": 21991 }, { "epoch": 1.460583117486883, "grad_norm": 142.64181518554688, "learning_rate": 3.5788090584401865e-07, "loss": 12.4375, "step": 21992 }, { "epoch": 1.460649531779239, "grad_norm": 208.6486053466797, "learning_rate": 3.577984597013063e-07, "loss": 15.5156, "step": 21993 }, { "epoch": 1.4607159460715946, "grad_norm": 187.64413452148438, "learning_rate": 3.577160209872111e-07, "loss": 20.3438, "step": 21994 }, { "epoch": 1.4607823603639503, "grad_norm": 627.7403564453125, "learning_rate": 3.57633589702687e-07, "loss": 14.8281, "step": 21995 }, { "epoch": 1.4608487746563061, "grad_norm": 179.92359924316406, "learning_rate": 3.5755116584868626e-07, "loss": 16.8594, "step": 21996 }, { "epoch": 1.4609151889486618, "grad_norm": 161.19032287597656, "learning_rate": 3.5746874942616403e-07, "loss": 13.5312, "step": 21997 }, { "epoch": 1.4609816032410174, "grad_norm": 149.08485412597656, "learning_rate": 3.573863404360724e-07, "loss": 11.8516, "step": 21998 }, { "epoch": 1.461048017533373, "grad_norm": 217.3249053955078, "learning_rate": 3.5730393887936504e-07, "loss": 21.5312, "step": 21999 }, { "epoch": 1.461114431825729, "grad_norm": 89.63908386230469, "learning_rate": 3.5722154475699505e-07, "loss": 15.9844, "step": 22000 }, { "epoch": 1.4611808461180846, "grad_norm": 138.14495849609375, "learning_rate": 3.57139158069916e-07, "loss": 18.2188, "step": 22001 }, { "epoch": 1.4612472604104403, "grad_norm": 260.8353271484375, "learning_rate": 3.570567788190798e-07, "loss": 15.5469, "step": 22002 }, { "epoch": 1.461313674702796, "grad_norm": 207.9426727294922, "learning_rate": 3.569744070054407e-07, "loss": 15.9688, "step": 22003 }, { "epoch": 1.4613800889951518, "grad_norm": 100.74918365478516, "learning_rate": 3.5689204262995054e-07, "loss": 11.9688, "step": 22004 }, { "epoch": 1.4614465032875075, "grad_norm": 170.81781005859375, "learning_rate": 3.568096856935623e-07, "loss": 18.3594, "step": 22005 }, { "epoch": 1.4615129175798631, "grad_norm": 151.86260986328125, "learning_rate": 3.5672733619722883e-07, "loss": 16.0, "step": 22006 }, { "epoch": 1.461579331872219, "grad_norm": 173.506591796875, "learning_rate": 3.5664499414190264e-07, "loss": 14.3125, "step": 22007 }, { "epoch": 1.4616457461645747, "grad_norm": 436.0241394042969, "learning_rate": 3.5656265952853603e-07, "loss": 15.0938, "step": 22008 }, { "epoch": 1.4617121604569303, "grad_norm": 161.0034637451172, "learning_rate": 3.564803323580821e-07, "loss": 14.9297, "step": 22009 }, { "epoch": 1.461778574749286, "grad_norm": 251.0955047607422, "learning_rate": 3.5639801263149173e-07, "loss": 18.6562, "step": 22010 }, { "epoch": 1.4618449890416418, "grad_norm": 1427.73388671875, "learning_rate": 3.563157003497188e-07, "loss": 18.1406, "step": 22011 }, { "epoch": 1.4619114033339975, "grad_norm": 342.724853515625, "learning_rate": 3.5623339551371435e-07, "loss": 17.3906, "step": 22012 }, { "epoch": 1.4619778176263531, "grad_norm": 104.90536499023438, "learning_rate": 3.5615109812443066e-07, "loss": 15.5, "step": 22013 }, { "epoch": 1.4620442319187088, "grad_norm": 150.20440673828125, "learning_rate": 3.5606880818281994e-07, "loss": 16.3594, "step": 22014 }, { "epoch": 1.4621106462110647, "grad_norm": 1033.5955810546875, "learning_rate": 3.559865256898338e-07, "loss": 16.9375, "step": 22015 }, { "epoch": 1.4621770605034203, "grad_norm": 115.94377136230469, "learning_rate": 3.5590425064642427e-07, "loss": 12.75, "step": 22016 }, { "epoch": 1.462243474795776, "grad_norm": 191.4342803955078, "learning_rate": 3.558219830535433e-07, "loss": 16.875, "step": 22017 }, { "epoch": 1.4623098890881319, "grad_norm": 153.6360626220703, "learning_rate": 3.557397229121415e-07, "loss": 17.7656, "step": 22018 }, { "epoch": 1.4623763033804875, "grad_norm": 227.40725708007812, "learning_rate": 3.556574702231717e-07, "loss": 18.2656, "step": 22019 }, { "epoch": 1.4624427176728432, "grad_norm": 418.474609375, "learning_rate": 3.555752249875844e-07, "loss": 14.8281, "step": 22020 }, { "epoch": 1.462509131965199, "grad_norm": 214.09584045410156, "learning_rate": 3.5549298720633103e-07, "loss": 16.5625, "step": 22021 }, { "epoch": 1.4625755462575547, "grad_norm": 111.84638214111328, "learning_rate": 3.554107568803638e-07, "loss": 14.1562, "step": 22022 }, { "epoch": 1.4626419605499104, "grad_norm": 443.3147277832031, "learning_rate": 3.553285340106329e-07, "loss": 21.2031, "step": 22023 }, { "epoch": 1.462708374842266, "grad_norm": 284.66546630859375, "learning_rate": 3.5524631859808974e-07, "loss": 14.2031, "step": 22024 }, { "epoch": 1.4627747891346217, "grad_norm": 240.8098602294922, "learning_rate": 3.5516411064368537e-07, "loss": 17.3906, "step": 22025 }, { "epoch": 1.4628412034269775, "grad_norm": 419.6111145019531, "learning_rate": 3.550819101483711e-07, "loss": 10.9688, "step": 22026 }, { "epoch": 1.4629076177193332, "grad_norm": 245.0516357421875, "learning_rate": 3.549997171130967e-07, "loss": 15.5781, "step": 22027 }, { "epoch": 1.4629740320116889, "grad_norm": 487.2283630371094, "learning_rate": 3.5491753153881443e-07, "loss": 16.4922, "step": 22028 }, { "epoch": 1.4630404463040447, "grad_norm": 152.45550537109375, "learning_rate": 3.5483535342647373e-07, "loss": 17.9844, "step": 22029 }, { "epoch": 1.4631068605964004, "grad_norm": 363.0357971191406, "learning_rate": 3.547531827770257e-07, "loss": 17.4219, "step": 22030 }, { "epoch": 1.463173274888756, "grad_norm": 584.7924194335938, "learning_rate": 3.5467101959142077e-07, "loss": 16.2188, "step": 22031 }, { "epoch": 1.463239689181112, "grad_norm": 267.3214416503906, "learning_rate": 3.545888638706094e-07, "loss": 20.0938, "step": 22032 }, { "epoch": 1.4633061034734676, "grad_norm": 228.73411560058594, "learning_rate": 3.545067156155418e-07, "loss": 15.1562, "step": 22033 }, { "epoch": 1.4633725177658232, "grad_norm": 167.5081024169922, "learning_rate": 3.544245748271687e-07, "loss": 16.7188, "step": 22034 }, { "epoch": 1.4634389320581789, "grad_norm": 290.2420959472656, "learning_rate": 3.543424415064392e-07, "loss": 16.75, "step": 22035 }, { "epoch": 1.4635053463505345, "grad_norm": 235.14791870117188, "learning_rate": 3.542603156543047e-07, "loss": 13.2969, "step": 22036 }, { "epoch": 1.4635717606428904, "grad_norm": 182.6507568359375, "learning_rate": 3.5417819727171407e-07, "loss": 17.0156, "step": 22037 }, { "epoch": 1.463638174935246, "grad_norm": 162.08192443847656, "learning_rate": 3.5409608635961775e-07, "loss": 18.6562, "step": 22038 }, { "epoch": 1.4637045892276017, "grad_norm": 216.98728942871094, "learning_rate": 3.5401398291896544e-07, "loss": 22.8281, "step": 22039 }, { "epoch": 1.4637710035199576, "grad_norm": 106.28771209716797, "learning_rate": 3.539318869507069e-07, "loss": 11.9297, "step": 22040 }, { "epoch": 1.4638374178123132, "grad_norm": 124.3142318725586, "learning_rate": 3.538497984557917e-07, "loss": 20.3125, "step": 22041 }, { "epoch": 1.463903832104669, "grad_norm": 182.8014678955078, "learning_rate": 3.5376771743516974e-07, "loss": 14.9219, "step": 22042 }, { "epoch": 1.4639702463970248, "grad_norm": 190.57896423339844, "learning_rate": 3.5368564388978996e-07, "loss": 14.7188, "step": 22043 }, { "epoch": 1.4640366606893804, "grad_norm": 227.71630859375, "learning_rate": 3.536035778206019e-07, "loss": 11.8906, "step": 22044 }, { "epoch": 1.464103074981736, "grad_norm": 215.2360382080078, "learning_rate": 3.53521519228555e-07, "loss": 19.4062, "step": 22045 }, { "epoch": 1.4641694892740917, "grad_norm": 189.58180236816406, "learning_rate": 3.5343946811459826e-07, "loss": 17.5625, "step": 22046 }, { "epoch": 1.4642359035664474, "grad_norm": 237.36309814453125, "learning_rate": 3.5335742447968096e-07, "loss": 10.3828, "step": 22047 }, { "epoch": 1.4643023178588033, "grad_norm": 364.59149169921875, "learning_rate": 3.5327538832475243e-07, "loss": 16.75, "step": 22048 }, { "epoch": 1.464368732151159, "grad_norm": 519.7266235351562, "learning_rate": 3.531933596507607e-07, "loss": 16.625, "step": 22049 }, { "epoch": 1.4644351464435146, "grad_norm": 182.77760314941406, "learning_rate": 3.531113384586558e-07, "loss": 16.7969, "step": 22050 }, { "epoch": 1.4645015607358705, "grad_norm": 191.70254516601562, "learning_rate": 3.5302932474938563e-07, "loss": 14.9219, "step": 22051 }, { "epoch": 1.464567975028226, "grad_norm": 252.2295379638672, "learning_rate": 3.5294731852389915e-07, "loss": 20.4062, "step": 22052 }, { "epoch": 1.4646343893205818, "grad_norm": 189.9642791748047, "learning_rate": 3.5286531978314493e-07, "loss": 11.8984, "step": 22053 }, { "epoch": 1.4647008036129376, "grad_norm": 519.33349609375, "learning_rate": 3.527833285280716e-07, "loss": 25.0625, "step": 22054 }, { "epoch": 1.4647672179052933, "grad_norm": 331.3438415527344, "learning_rate": 3.5270134475962754e-07, "loss": 22.0625, "step": 22055 }, { "epoch": 1.464833632197649, "grad_norm": 186.72381591796875, "learning_rate": 3.5261936847876126e-07, "loss": 17.1562, "step": 22056 }, { "epoch": 1.4649000464900046, "grad_norm": 151.22967529296875, "learning_rate": 3.5253739968642025e-07, "loss": 17.7812, "step": 22057 }, { "epoch": 1.4649664607823603, "grad_norm": 358.87384033203125, "learning_rate": 3.524554383835536e-07, "loss": 17.1875, "step": 22058 }, { "epoch": 1.4650328750747161, "grad_norm": 252.87074279785156, "learning_rate": 3.523734845711094e-07, "loss": 12.5156, "step": 22059 }, { "epoch": 1.4650992893670718, "grad_norm": 239.02613830566406, "learning_rate": 3.5229153825003466e-07, "loss": 12.7109, "step": 22060 }, { "epoch": 1.4651657036594274, "grad_norm": 165.2266387939453, "learning_rate": 3.522095994212785e-07, "loss": 19.2812, "step": 22061 }, { "epoch": 1.4652321179517833, "grad_norm": 153.3227996826172, "learning_rate": 3.5212766808578774e-07, "loss": 13.1172, "step": 22062 }, { "epoch": 1.465298532244139, "grad_norm": 129.54977416992188, "learning_rate": 3.520457442445107e-07, "loss": 14.0078, "step": 22063 }, { "epoch": 1.4653649465364946, "grad_norm": 409.55462646484375, "learning_rate": 3.5196382789839474e-07, "loss": 17.9844, "step": 22064 }, { "epoch": 1.4654313608288505, "grad_norm": 185.15447998046875, "learning_rate": 3.5188191904838795e-07, "loss": 13.5469, "step": 22065 }, { "epoch": 1.4654977751212062, "grad_norm": 139.37437438964844, "learning_rate": 3.518000176954367e-07, "loss": 17.0, "step": 22066 }, { "epoch": 1.4655641894135618, "grad_norm": 600.0084838867188, "learning_rate": 3.517181238404897e-07, "loss": 20.7812, "step": 22067 }, { "epoch": 1.4656306037059175, "grad_norm": 158.6448516845703, "learning_rate": 3.516362374844933e-07, "loss": 13.2031, "step": 22068 }, { "epoch": 1.4656970179982731, "grad_norm": 172.4556427001953, "learning_rate": 3.515543586283951e-07, "loss": 16.6094, "step": 22069 }, { "epoch": 1.465763432290629, "grad_norm": 208.41403198242188, "learning_rate": 3.5147248727314216e-07, "loss": 16.2656, "step": 22070 }, { "epoch": 1.4658298465829847, "grad_norm": 183.53964233398438, "learning_rate": 3.513906234196814e-07, "loss": 17.3125, "step": 22071 }, { "epoch": 1.4658962608753403, "grad_norm": 130.05560302734375, "learning_rate": 3.513087670689601e-07, "loss": 18.6562, "step": 22072 }, { "epoch": 1.4659626751676962, "grad_norm": 136.80650329589844, "learning_rate": 3.5122691822192506e-07, "loss": 11.8359, "step": 22073 }, { "epoch": 1.4660290894600518, "grad_norm": 167.4248809814453, "learning_rate": 3.511450768795223e-07, "loss": 15.0625, "step": 22074 }, { "epoch": 1.4660955037524075, "grad_norm": 281.0376281738281, "learning_rate": 3.5106324304269986e-07, "loss": 17.7188, "step": 22075 }, { "epoch": 1.4661619180447634, "grad_norm": 327.29241943359375, "learning_rate": 3.5098141671240335e-07, "loss": 15.8281, "step": 22076 }, { "epoch": 1.466228332337119, "grad_norm": 275.0758972167969, "learning_rate": 3.5089959788957943e-07, "loss": 11.375, "step": 22077 }, { "epoch": 1.4662947466294747, "grad_norm": 129.08885192871094, "learning_rate": 3.508177865751748e-07, "loss": 13.5625, "step": 22078 }, { "epoch": 1.4663611609218303, "grad_norm": 375.1065673828125, "learning_rate": 3.507359827701356e-07, "loss": 22.4688, "step": 22079 }, { "epoch": 1.466427575214186, "grad_norm": 182.0212860107422, "learning_rate": 3.506541864754081e-07, "loss": 16.5469, "step": 22080 }, { "epoch": 1.4664939895065419, "grad_norm": 200.8153533935547, "learning_rate": 3.50572397691939e-07, "loss": 13.8281, "step": 22081 }, { "epoch": 1.4665604037988975, "grad_norm": 149.54119873046875, "learning_rate": 3.504906164206732e-07, "loss": 17.3281, "step": 22082 }, { "epoch": 1.4666268180912532, "grad_norm": 518.1724853515625, "learning_rate": 3.504088426625581e-07, "loss": 20.1562, "step": 22083 }, { "epoch": 1.466693232383609, "grad_norm": 182.59857177734375, "learning_rate": 3.5032707641853854e-07, "loss": 14.7969, "step": 22084 }, { "epoch": 1.4667596466759647, "grad_norm": 195.76390075683594, "learning_rate": 3.502453176895608e-07, "loss": 21.3125, "step": 22085 }, { "epoch": 1.4668260609683204, "grad_norm": 185.03964233398438, "learning_rate": 3.501635664765705e-07, "loss": 17.0781, "step": 22086 }, { "epoch": 1.4668924752606762, "grad_norm": 236.1755828857422, "learning_rate": 3.5008182278051377e-07, "loss": 16.9531, "step": 22087 }, { "epoch": 1.4669588895530319, "grad_norm": 255.6009979248047, "learning_rate": 3.5000008660233506e-07, "loss": 12.6406, "step": 22088 }, { "epoch": 1.4670253038453875, "grad_norm": 98.00531768798828, "learning_rate": 3.499183579429812e-07, "loss": 12.3281, "step": 22089 }, { "epoch": 1.4670917181377432, "grad_norm": 174.21156311035156, "learning_rate": 3.498366368033965e-07, "loss": 16.0, "step": 22090 }, { "epoch": 1.4671581324300988, "grad_norm": 119.98007202148438, "learning_rate": 3.497549231845268e-07, "loss": 12.3672, "step": 22091 }, { "epoch": 1.4672245467224547, "grad_norm": 463.0685729980469, "learning_rate": 3.4967321708731714e-07, "loss": 15.4062, "step": 22092 }, { "epoch": 1.4672909610148104, "grad_norm": 182.32406616210938, "learning_rate": 3.4959151851271264e-07, "loss": 15.5938, "step": 22093 }, { "epoch": 1.467357375307166, "grad_norm": 117.90129089355469, "learning_rate": 3.495098274616585e-07, "loss": 13.0625, "step": 22094 }, { "epoch": 1.467423789599522, "grad_norm": 924.6625366210938, "learning_rate": 3.4942814393509956e-07, "loss": 10.7031, "step": 22095 }, { "epoch": 1.4674902038918776, "grad_norm": 115.36307525634766, "learning_rate": 3.493464679339807e-07, "loss": 11.1406, "step": 22096 }, { "epoch": 1.4675566181842332, "grad_norm": 257.7181396484375, "learning_rate": 3.492647994592467e-07, "loss": 16.3594, "step": 22097 }, { "epoch": 1.467623032476589, "grad_norm": 113.13890075683594, "learning_rate": 3.4918313851184254e-07, "loss": 14.5938, "step": 22098 }, { "epoch": 1.4676894467689447, "grad_norm": 396.63262939453125, "learning_rate": 3.49101485092712e-07, "loss": 12.125, "step": 22099 }, { "epoch": 1.4677558610613004, "grad_norm": 145.22621154785156, "learning_rate": 3.4901983920280074e-07, "loss": 13.1094, "step": 22100 }, { "epoch": 1.467822275353656, "grad_norm": 612.030517578125, "learning_rate": 3.4893820084305235e-07, "loss": 18.4375, "step": 22101 }, { "epoch": 1.4678886896460117, "grad_norm": 245.46607971191406, "learning_rate": 3.488565700144114e-07, "loss": 17.4844, "step": 22102 }, { "epoch": 1.4679551039383676, "grad_norm": 129.73147583007812, "learning_rate": 3.4877494671782216e-07, "loss": 15.0547, "step": 22103 }, { "epoch": 1.4680215182307232, "grad_norm": 235.2498779296875, "learning_rate": 3.486933309542288e-07, "loss": 21.5938, "step": 22104 }, { "epoch": 1.468087932523079, "grad_norm": 214.0892333984375, "learning_rate": 3.486117227245754e-07, "loss": 18.2344, "step": 22105 }, { "epoch": 1.4681543468154348, "grad_norm": 130.9552459716797, "learning_rate": 3.485301220298064e-07, "loss": 14.8125, "step": 22106 }, { "epoch": 1.4682207611077904, "grad_norm": 224.20458984375, "learning_rate": 3.4844852887086484e-07, "loss": 12.3828, "step": 22107 }, { "epoch": 1.468287175400146, "grad_norm": 215.27432250976562, "learning_rate": 3.483669432486951e-07, "loss": 15.9688, "step": 22108 }, { "epoch": 1.468353589692502, "grad_norm": 224.2691192626953, "learning_rate": 3.4828536516424077e-07, "loss": 13.8594, "step": 22109 }, { "epoch": 1.4684200039848576, "grad_norm": 186.9796905517578, "learning_rate": 3.482037946184455e-07, "loss": 11.7656, "step": 22110 }, { "epoch": 1.4684864182772133, "grad_norm": 338.549560546875, "learning_rate": 3.4812223161225295e-07, "loss": 17.7969, "step": 22111 }, { "epoch": 1.468552832569569, "grad_norm": 156.73080444335938, "learning_rate": 3.480406761466068e-07, "loss": 15.8125, "step": 22112 }, { "epoch": 1.4686192468619246, "grad_norm": 239.181396484375, "learning_rate": 3.4795912822244955e-07, "loss": 19.5312, "step": 22113 }, { "epoch": 1.4686856611542805, "grad_norm": 1916.458984375, "learning_rate": 3.478775878407257e-07, "loss": 12.7812, "step": 22114 }, { "epoch": 1.468752075446636, "grad_norm": 155.89541625976562, "learning_rate": 3.4779605500237764e-07, "loss": 14.4531, "step": 22115 }, { "epoch": 1.4688184897389918, "grad_norm": 133.166015625, "learning_rate": 3.4771452970834867e-07, "loss": 16.1719, "step": 22116 }, { "epoch": 1.4688849040313476, "grad_norm": 198.88763427734375, "learning_rate": 3.4763301195958193e-07, "loss": 11.2812, "step": 22117 }, { "epoch": 1.4689513183237033, "grad_norm": 215.95870971679688, "learning_rate": 3.475515017570203e-07, "loss": 15.625, "step": 22118 }, { "epoch": 1.469017732616059, "grad_norm": 224.3029327392578, "learning_rate": 3.474699991016068e-07, "loss": 15.4375, "step": 22119 }, { "epoch": 1.4690841469084148, "grad_norm": 641.2318725585938, "learning_rate": 3.473885039942842e-07, "loss": 18.0625, "step": 22120 }, { "epoch": 1.4691505612007705, "grad_norm": 208.44552612304688, "learning_rate": 3.4730701643599457e-07, "loss": 17.25, "step": 22121 }, { "epoch": 1.4692169754931261, "grad_norm": 231.22071838378906, "learning_rate": 3.4722553642768157e-07, "loss": 24.0312, "step": 22122 }, { "epoch": 1.4692833897854818, "grad_norm": 140.77255249023438, "learning_rate": 3.4714406397028693e-07, "loss": 11.25, "step": 22123 }, { "epoch": 1.4693498040778374, "grad_norm": 282.3497619628906, "learning_rate": 3.470625990647532e-07, "loss": 16.3594, "step": 22124 }, { "epoch": 1.4694162183701933, "grad_norm": 224.85903930664062, "learning_rate": 3.4698114171202284e-07, "loss": 18.3281, "step": 22125 }, { "epoch": 1.469482632662549, "grad_norm": 129.2307586669922, "learning_rate": 3.4689969191303847e-07, "loss": 13.25, "step": 22126 }, { "epoch": 1.4695490469549046, "grad_norm": 143.62796020507812, "learning_rate": 3.4681824966874117e-07, "loss": 12.5547, "step": 22127 }, { "epoch": 1.4696154612472605, "grad_norm": 145.25123596191406, "learning_rate": 3.467368149800743e-07, "loss": 10.8828, "step": 22128 }, { "epoch": 1.4696818755396162, "grad_norm": 167.91078186035156, "learning_rate": 3.466553878479791e-07, "loss": 16.4375, "step": 22129 }, { "epoch": 1.4697482898319718, "grad_norm": 342.6893310546875, "learning_rate": 3.4657396827339747e-07, "loss": 15.8594, "step": 22130 }, { "epoch": 1.4698147041243277, "grad_norm": 413.99072265625, "learning_rate": 3.4649255625727147e-07, "loss": 19.0, "step": 22131 }, { "epoch": 1.4698811184166833, "grad_norm": 292.9705505371094, "learning_rate": 3.464111518005427e-07, "loss": 15.0938, "step": 22132 }, { "epoch": 1.469947532709039, "grad_norm": 1661.5008544921875, "learning_rate": 3.4632975490415284e-07, "loss": 17.0469, "step": 22133 }, { "epoch": 1.4700139470013946, "grad_norm": 162.78250122070312, "learning_rate": 3.462483655690435e-07, "loss": 16.5078, "step": 22134 }, { "epoch": 1.4700803612937503, "grad_norm": 1335.453857421875, "learning_rate": 3.4616698379615607e-07, "loss": 21.9844, "step": 22135 }, { "epoch": 1.4701467755861062, "grad_norm": 320.59368896484375, "learning_rate": 3.4608560958643194e-07, "loss": 12.2344, "step": 22136 }, { "epoch": 1.4702131898784618, "grad_norm": 93.78897094726562, "learning_rate": 3.4600424294081277e-07, "loss": 14.1719, "step": 22137 }, { "epoch": 1.4702796041708175, "grad_norm": 214.2742462158203, "learning_rate": 3.459228838602387e-07, "loss": 17.8906, "step": 22138 }, { "epoch": 1.4703460184631734, "grad_norm": 234.0912628173828, "learning_rate": 3.4584153234565226e-07, "loss": 19.0, "step": 22139 }, { "epoch": 1.470412432755529, "grad_norm": 409.3255920410156, "learning_rate": 3.4576018839799335e-07, "loss": 17.7812, "step": 22140 }, { "epoch": 1.4704788470478847, "grad_norm": 195.8568878173828, "learning_rate": 3.4567885201820345e-07, "loss": 15.2891, "step": 22141 }, { "epoch": 1.4705452613402406, "grad_norm": 202.92568969726562, "learning_rate": 3.4559752320722316e-07, "loss": 19.9062, "step": 22142 }, { "epoch": 1.4706116756325962, "grad_norm": 225.77130126953125, "learning_rate": 3.455162019659934e-07, "loss": 12.8125, "step": 22143 }, { "epoch": 1.4706780899249519, "grad_norm": 227.5556182861328, "learning_rate": 3.4543488829545487e-07, "loss": 24.1875, "step": 22144 }, { "epoch": 1.4707445042173075, "grad_norm": 121.67140197753906, "learning_rate": 3.453535821965484e-07, "loss": 15.7656, "step": 22145 }, { "epoch": 1.4708109185096632, "grad_norm": 177.67222595214844, "learning_rate": 3.4527228367021354e-07, "loss": 13.8438, "step": 22146 }, { "epoch": 1.470877332802019, "grad_norm": 251.39149475097656, "learning_rate": 3.4519099271739205e-07, "loss": 14.4219, "step": 22147 }, { "epoch": 1.4709437470943747, "grad_norm": 443.0941162109375, "learning_rate": 3.451097093390233e-07, "loss": 19.1719, "step": 22148 }, { "epoch": 1.4710101613867304, "grad_norm": 135.33673095703125, "learning_rate": 3.450284335360477e-07, "loss": 18.6406, "step": 22149 }, { "epoch": 1.4710765756790862, "grad_norm": 234.6529541015625, "learning_rate": 3.449471653094055e-07, "loss": 29.5938, "step": 22150 }, { "epoch": 1.4711429899714419, "grad_norm": 152.92674255371094, "learning_rate": 3.448659046600372e-07, "loss": 15.6172, "step": 22151 }, { "epoch": 1.4712094042637975, "grad_norm": 271.9801940917969, "learning_rate": 3.447846515888815e-07, "loss": 21.0078, "step": 22152 }, { "epoch": 1.4712758185561534, "grad_norm": 152.8321075439453, "learning_rate": 3.4470340609687995e-07, "loss": 13.3594, "step": 22153 }, { "epoch": 1.471342232848509, "grad_norm": 251.0580596923828, "learning_rate": 3.4462216818497114e-07, "loss": 17.7969, "step": 22154 }, { "epoch": 1.4714086471408647, "grad_norm": 121.17353057861328, "learning_rate": 3.4454093785409524e-07, "loss": 10.7891, "step": 22155 }, { "epoch": 1.4714750614332204, "grad_norm": 1394.580078125, "learning_rate": 3.4445971510519167e-07, "loss": 19.4062, "step": 22156 }, { "epoch": 1.471541475725576, "grad_norm": 159.6183624267578, "learning_rate": 3.443784999392002e-07, "loss": 16.5781, "step": 22157 }, { "epoch": 1.471607890017932, "grad_norm": 396.1187744140625, "learning_rate": 3.442972923570602e-07, "loss": 16.6562, "step": 22158 }, { "epoch": 1.4716743043102876, "grad_norm": 105.677978515625, "learning_rate": 3.442160923597114e-07, "loss": 15.2344, "step": 22159 }, { "epoch": 1.4717407186026432, "grad_norm": 173.75445556640625, "learning_rate": 3.4413489994809195e-07, "loss": 16.0469, "step": 22160 }, { "epoch": 1.471807132894999, "grad_norm": 199.30267333984375, "learning_rate": 3.4405371512314254e-07, "loss": 17.0, "step": 22161 }, { "epoch": 1.4718735471873547, "grad_norm": 1482.4207763671875, "learning_rate": 3.4397253788580103e-07, "loss": 21.3594, "step": 22162 }, { "epoch": 1.4719399614797104, "grad_norm": 210.67706298828125, "learning_rate": 3.4389136823700705e-07, "loss": 14.7031, "step": 22163 }, { "epoch": 1.4720063757720663, "grad_norm": 258.2305603027344, "learning_rate": 3.4381020617769927e-07, "loss": 17.375, "step": 22164 }, { "epoch": 1.472072790064422, "grad_norm": 149.1395263671875, "learning_rate": 3.437290517088167e-07, "loss": 24.4375, "step": 22165 }, { "epoch": 1.4721392043567776, "grad_norm": 226.79827880859375, "learning_rate": 3.4364790483129815e-07, "loss": 13.2188, "step": 22166 }, { "epoch": 1.4722056186491332, "grad_norm": 232.3924560546875, "learning_rate": 3.435667655460823e-07, "loss": 18.5156, "step": 22167 }, { "epoch": 1.472272032941489, "grad_norm": 796.7545776367188, "learning_rate": 3.43485633854107e-07, "loss": 15.7109, "step": 22168 }, { "epoch": 1.4723384472338448, "grad_norm": 150.2854461669922, "learning_rate": 3.4340450975631176e-07, "loss": 12.4062, "step": 22169 }, { "epoch": 1.4724048615262004, "grad_norm": 392.5920715332031, "learning_rate": 3.433233932536347e-07, "loss": 15.3438, "step": 22170 }, { "epoch": 1.472471275818556, "grad_norm": 107.64443969726562, "learning_rate": 3.4324228434701375e-07, "loss": 13.0078, "step": 22171 }, { "epoch": 1.472537690110912, "grad_norm": 192.7268524169922, "learning_rate": 3.431611830373873e-07, "loss": 13.7969, "step": 22172 }, { "epoch": 1.4726041044032676, "grad_norm": 279.2419738769531, "learning_rate": 3.4308008932569357e-07, "loss": 18.2656, "step": 22173 }, { "epoch": 1.4726705186956233, "grad_norm": 373.1527404785156, "learning_rate": 3.4299900321287047e-07, "loss": 14.2344, "step": 22174 }, { "epoch": 1.4727369329879791, "grad_norm": 699.11181640625, "learning_rate": 3.4291792469985614e-07, "loss": 16.0625, "step": 22175 }, { "epoch": 1.4728033472803348, "grad_norm": 246.70452880859375, "learning_rate": 3.4283685378758864e-07, "loss": 14.5469, "step": 22176 }, { "epoch": 1.4728697615726905, "grad_norm": 1110.1220703125, "learning_rate": 3.4275579047700476e-07, "loss": 14.9531, "step": 22177 }, { "epoch": 1.472936175865046, "grad_norm": 316.5657958984375, "learning_rate": 3.426747347690437e-07, "loss": 15.7344, "step": 22178 }, { "epoch": 1.4730025901574018, "grad_norm": 386.8160095214844, "learning_rate": 3.4259368666464185e-07, "loss": 15.9375, "step": 22179 }, { "epoch": 1.4730690044497576, "grad_norm": 286.1014099121094, "learning_rate": 3.425126461647372e-07, "loss": 17.0156, "step": 22180 }, { "epoch": 1.4731354187421133, "grad_norm": 209.37257385253906, "learning_rate": 3.4243161327026715e-07, "loss": 19.4219, "step": 22181 }, { "epoch": 1.473201833034469, "grad_norm": 196.568359375, "learning_rate": 3.4235058798216896e-07, "loss": 13.3203, "step": 22182 }, { "epoch": 1.4732682473268248, "grad_norm": 281.45501708984375, "learning_rate": 3.422695703013799e-07, "loss": 16.9219, "step": 22183 }, { "epoch": 1.4733346616191805, "grad_norm": 215.3248748779297, "learning_rate": 3.421885602288377e-07, "loss": 15.6562, "step": 22184 }, { "epoch": 1.4734010759115361, "grad_norm": 706.7736206054688, "learning_rate": 3.4210755776547804e-07, "loss": 24.5, "step": 22185 }, { "epoch": 1.473467490203892, "grad_norm": 140.80715942382812, "learning_rate": 3.4202656291223963e-07, "loss": 13.1875, "step": 22186 }, { "epoch": 1.4735339044962477, "grad_norm": 182.140625, "learning_rate": 3.419455756700581e-07, "loss": 12.875, "step": 22187 }, { "epoch": 1.4736003187886033, "grad_norm": 157.37734985351562, "learning_rate": 3.418645960398707e-07, "loss": 16.2656, "step": 22188 }, { "epoch": 1.473666733080959, "grad_norm": 141.6917266845703, "learning_rate": 3.417836240226142e-07, "loss": 11.5469, "step": 22189 }, { "epoch": 1.4737331473733146, "grad_norm": 556.886962890625, "learning_rate": 3.417026596192255e-07, "loss": 15.1719, "step": 22190 }, { "epoch": 1.4737995616656705, "grad_norm": 85.11902618408203, "learning_rate": 3.416217028306402e-07, "loss": 10.5156, "step": 22191 }, { "epoch": 1.4738659759580262, "grad_norm": 265.0437316894531, "learning_rate": 3.41540753657796e-07, "loss": 18.8594, "step": 22192 }, { "epoch": 1.4739323902503818, "grad_norm": 355.73974609375, "learning_rate": 3.4145981210162833e-07, "loss": 18.2344, "step": 22193 }, { "epoch": 1.4739988045427377, "grad_norm": 177.1019287109375, "learning_rate": 3.4137887816307386e-07, "loss": 17.8438, "step": 22194 }, { "epoch": 1.4740652188350933, "grad_norm": 373.5611877441406, "learning_rate": 3.412979518430688e-07, "loss": 21.1562, "step": 22195 }, { "epoch": 1.474131633127449, "grad_norm": 123.45695495605469, "learning_rate": 3.4121703314254915e-07, "loss": 16.2188, "step": 22196 }, { "epoch": 1.4741980474198049, "grad_norm": 117.87134552001953, "learning_rate": 3.411361220624509e-07, "loss": 12.5469, "step": 22197 }, { "epoch": 1.4742644617121605, "grad_norm": 210.13111877441406, "learning_rate": 3.4105521860371043e-07, "loss": 22.5312, "step": 22198 }, { "epoch": 1.4743308760045162, "grad_norm": 490.5074157714844, "learning_rate": 3.409743227672627e-07, "loss": 20.0, "step": 22199 }, { "epoch": 1.4743972902968718, "grad_norm": 455.6742858886719, "learning_rate": 3.408934345540445e-07, "loss": 18.7031, "step": 22200 }, { "epoch": 1.4744637045892275, "grad_norm": 187.37538146972656, "learning_rate": 3.4081255396499074e-07, "loss": 13.7188, "step": 22201 }, { "epoch": 1.4745301188815834, "grad_norm": 328.20587158203125, "learning_rate": 3.407316810010372e-07, "loss": 29.2969, "step": 22202 }, { "epoch": 1.474596533173939, "grad_norm": 159.9087371826172, "learning_rate": 3.406508156631194e-07, "loss": 15.5938, "step": 22203 }, { "epoch": 1.4746629474662947, "grad_norm": 341.4810791015625, "learning_rate": 3.405699579521728e-07, "loss": 16.1406, "step": 22204 }, { "epoch": 1.4747293617586505, "grad_norm": 200.57330322265625, "learning_rate": 3.4048910786913266e-07, "loss": 18.0156, "step": 22205 }, { "epoch": 1.4747957760510062, "grad_norm": 262.56658935546875, "learning_rate": 3.404082654149343e-07, "loss": 14.7188, "step": 22206 }, { "epoch": 1.4748621903433619, "grad_norm": 143.82781982421875, "learning_rate": 3.403274305905127e-07, "loss": 17.2656, "step": 22207 }, { "epoch": 1.4749286046357177, "grad_norm": 299.23944091796875, "learning_rate": 3.40246603396803e-07, "loss": 18.8438, "step": 22208 }, { "epoch": 1.4749950189280734, "grad_norm": 443.707763671875, "learning_rate": 3.401657838347405e-07, "loss": 16.4531, "step": 22209 }, { "epoch": 1.475061433220429, "grad_norm": 425.37603759765625, "learning_rate": 3.400849719052591e-07, "loss": 17.1094, "step": 22210 }, { "epoch": 1.4751278475127847, "grad_norm": 134.81817626953125, "learning_rate": 3.4000416760929484e-07, "loss": 12.2891, "step": 22211 }, { "epoch": 1.4751942618051403, "grad_norm": 227.21987915039062, "learning_rate": 3.3992337094778147e-07, "loss": 15.0156, "step": 22212 }, { "epoch": 1.4752606760974962, "grad_norm": 127.5744857788086, "learning_rate": 3.3984258192165395e-07, "loss": 13.1406, "step": 22213 }, { "epoch": 1.4753270903898519, "grad_norm": 284.121337890625, "learning_rate": 3.397618005318467e-07, "loss": 17.7031, "step": 22214 }, { "epoch": 1.4753935046822075, "grad_norm": 117.1756362915039, "learning_rate": 3.3968102677929456e-07, "loss": 18.4062, "step": 22215 }, { "epoch": 1.4754599189745634, "grad_norm": 246.98577880859375, "learning_rate": 3.3960026066493096e-07, "loss": 25.9688, "step": 22216 }, { "epoch": 1.475526333266919, "grad_norm": 470.32421875, "learning_rate": 3.395195021896914e-07, "loss": 15.5, "step": 22217 }, { "epoch": 1.4755927475592747, "grad_norm": 274.6370849609375, "learning_rate": 3.3943875135450896e-07, "loss": 16.2188, "step": 22218 }, { "epoch": 1.4756591618516306, "grad_norm": 128.73460388183594, "learning_rate": 3.393580081603182e-07, "loss": 12.5625, "step": 22219 }, { "epoch": 1.4757255761439863, "grad_norm": 353.134765625, "learning_rate": 3.3927727260805304e-07, "loss": 9.3281, "step": 22220 }, { "epoch": 1.475791990436342, "grad_norm": 171.91720581054688, "learning_rate": 3.3919654469864747e-07, "loss": 16.6562, "step": 22221 }, { "epoch": 1.4758584047286976, "grad_norm": 211.08572387695312, "learning_rate": 3.391158244330351e-07, "loss": 16.2812, "step": 22222 }, { "epoch": 1.4759248190210532, "grad_norm": 317.82537841796875, "learning_rate": 3.390351118121503e-07, "loss": 17.875, "step": 22223 }, { "epoch": 1.475991233313409, "grad_norm": 418.93670654296875, "learning_rate": 3.3895440683692535e-07, "loss": 21.7969, "step": 22224 }, { "epoch": 1.4760576476057647, "grad_norm": 202.27423095703125, "learning_rate": 3.388737095082953e-07, "loss": 16.9531, "step": 22225 }, { "epoch": 1.4761240618981204, "grad_norm": 271.40118408203125, "learning_rate": 3.3879301982719276e-07, "loss": 14.8594, "step": 22226 }, { "epoch": 1.4761904761904763, "grad_norm": 293.7660217285156, "learning_rate": 3.387123377945512e-07, "loss": 22.9688, "step": 22227 }, { "epoch": 1.476256890482832, "grad_norm": 514.4735107421875, "learning_rate": 3.38631663411304e-07, "loss": 18.1875, "step": 22228 }, { "epoch": 1.4763233047751876, "grad_norm": 102.2912826538086, "learning_rate": 3.385509966783844e-07, "loss": 14.25, "step": 22229 }, { "epoch": 1.4763897190675435, "grad_norm": 192.75254821777344, "learning_rate": 3.384703375967254e-07, "loss": 15.5625, "step": 22230 }, { "epoch": 1.4764561333598991, "grad_norm": 164.33961486816406, "learning_rate": 3.383896861672604e-07, "loss": 16.4375, "step": 22231 }, { "epoch": 1.4765225476522548, "grad_norm": 189.65785217285156, "learning_rate": 3.383090423909214e-07, "loss": 17.4375, "step": 22232 }, { "epoch": 1.4765889619446104, "grad_norm": 96.95596313476562, "learning_rate": 3.3822840626864245e-07, "loss": 12.0703, "step": 22233 }, { "epoch": 1.476655376236966, "grad_norm": 221.7115478515625, "learning_rate": 3.381477778013554e-07, "loss": 12.6484, "step": 22234 }, { "epoch": 1.476721790529322, "grad_norm": 208.5291290283203, "learning_rate": 3.380671569899932e-07, "loss": 14.8906, "step": 22235 }, { "epoch": 1.4767882048216776, "grad_norm": 193.8956756591797, "learning_rate": 3.379865438354885e-07, "loss": 19.125, "step": 22236 }, { "epoch": 1.4768546191140333, "grad_norm": 276.08807373046875, "learning_rate": 3.37905938338774e-07, "loss": 16.1094, "step": 22237 }, { "epoch": 1.4769210334063891, "grad_norm": 320.09918212890625, "learning_rate": 3.3782534050078116e-07, "loss": 18.2344, "step": 22238 }, { "epoch": 1.4769874476987448, "grad_norm": 166.59219360351562, "learning_rate": 3.3774475032244356e-07, "loss": 17.25, "step": 22239 }, { "epoch": 1.4770538619911004, "grad_norm": 177.0565948486328, "learning_rate": 3.376641678046925e-07, "loss": 17.0312, "step": 22240 }, { "epoch": 1.4771202762834563, "grad_norm": 237.79843139648438, "learning_rate": 3.3758359294846053e-07, "loss": 12.0938, "step": 22241 }, { "epoch": 1.477186690575812, "grad_norm": 221.87445068359375, "learning_rate": 3.375030257546795e-07, "loss": 16.7812, "step": 22242 }, { "epoch": 1.4772531048681676, "grad_norm": 161.7899932861328, "learning_rate": 3.3742246622428137e-07, "loss": 10.8906, "step": 22243 }, { "epoch": 1.4773195191605233, "grad_norm": 433.5834655761719, "learning_rate": 3.3734191435819815e-07, "loss": 19.6094, "step": 22244 }, { "epoch": 1.477385933452879, "grad_norm": 173.24099731445312, "learning_rate": 3.3726137015736155e-07, "loss": 16.25, "step": 22245 }, { "epoch": 1.4774523477452348, "grad_norm": 158.77786254882812, "learning_rate": 3.371808336227033e-07, "loss": 16.625, "step": 22246 }, { "epoch": 1.4775187620375905, "grad_norm": 218.35525512695312, "learning_rate": 3.371003047551549e-07, "loss": 18.0156, "step": 22247 }, { "epoch": 1.4775851763299461, "grad_norm": 173.8592529296875, "learning_rate": 3.370197835556484e-07, "loss": 11.5703, "step": 22248 }, { "epoch": 1.477651590622302, "grad_norm": 161.15577697753906, "learning_rate": 3.369392700251139e-07, "loss": 15.9688, "step": 22249 }, { "epoch": 1.4777180049146577, "grad_norm": 146.3040771484375, "learning_rate": 3.3685876416448446e-07, "loss": 12.7031, "step": 22250 }, { "epoch": 1.4777844192070133, "grad_norm": 316.6368408203125, "learning_rate": 3.3677826597468995e-07, "loss": 16.125, "step": 22251 }, { "epoch": 1.4778508334993692, "grad_norm": 171.66360473632812, "learning_rate": 3.36697775456662e-07, "loss": 13.9688, "step": 22252 }, { "epoch": 1.4779172477917248, "grad_norm": 211.31158447265625, "learning_rate": 3.3661729261133177e-07, "loss": 15.3438, "step": 22253 }, { "epoch": 1.4779836620840805, "grad_norm": 181.6887664794922, "learning_rate": 3.365368174396305e-07, "loss": 15.7031, "step": 22254 }, { "epoch": 1.4780500763764362, "grad_norm": 202.90513610839844, "learning_rate": 3.364563499424881e-07, "loss": 15.375, "step": 22255 }, { "epoch": 1.4781164906687918, "grad_norm": 131.77032470703125, "learning_rate": 3.363758901208367e-07, "loss": 19.1875, "step": 22256 }, { "epoch": 1.4781829049611477, "grad_norm": 6192.03466796875, "learning_rate": 3.362954379756059e-07, "loss": 14.7969, "step": 22257 }, { "epoch": 1.4782493192535033, "grad_norm": 206.13633728027344, "learning_rate": 3.3621499350772687e-07, "loss": 16.5, "step": 22258 }, { "epoch": 1.478315733545859, "grad_norm": 1088.38916015625, "learning_rate": 3.3613455671813e-07, "loss": 14.7969, "step": 22259 }, { "epoch": 1.4783821478382149, "grad_norm": 720.1486206054688, "learning_rate": 3.3605412760774575e-07, "loss": 19.6406, "step": 22260 }, { "epoch": 1.4784485621305705, "grad_norm": 105.08514404296875, "learning_rate": 3.3597370617750455e-07, "loss": 10.2344, "step": 22261 }, { "epoch": 1.4785149764229262, "grad_norm": 130.4398193359375, "learning_rate": 3.3589329242833683e-07, "loss": 17.1406, "step": 22262 }, { "epoch": 1.478581390715282, "grad_norm": 304.0274658203125, "learning_rate": 3.35812886361172e-07, "loss": 23.3594, "step": 22263 }, { "epoch": 1.4786478050076377, "grad_norm": 224.2580108642578, "learning_rate": 3.357324879769414e-07, "loss": 15.9531, "step": 22264 }, { "epoch": 1.4787142192999934, "grad_norm": 182.59254455566406, "learning_rate": 3.3565209727657394e-07, "loss": 14.9531, "step": 22265 }, { "epoch": 1.478780633592349, "grad_norm": 372.8981628417969, "learning_rate": 3.3557171426099997e-07, "loss": 16.1094, "step": 22266 }, { "epoch": 1.4788470478847047, "grad_norm": 324.42181396484375, "learning_rate": 3.3549133893114923e-07, "loss": 16.8594, "step": 22267 }, { "epoch": 1.4789134621770605, "grad_norm": 193.90982055664062, "learning_rate": 3.354109712879516e-07, "loss": 14.5156, "step": 22268 }, { "epoch": 1.4789798764694162, "grad_norm": 291.5446472167969, "learning_rate": 3.353306113323365e-07, "loss": 17.8594, "step": 22269 }, { "epoch": 1.4790462907617719, "grad_norm": 197.481689453125, "learning_rate": 3.3525025906523406e-07, "loss": 15.3906, "step": 22270 }, { "epoch": 1.4791127050541277, "grad_norm": 170.75807189941406, "learning_rate": 3.351699144875726e-07, "loss": 18.1406, "step": 22271 }, { "epoch": 1.4791791193464834, "grad_norm": 327.7322998046875, "learning_rate": 3.350895776002829e-07, "loss": 14.6562, "step": 22272 }, { "epoch": 1.479245533638839, "grad_norm": 224.2110595703125, "learning_rate": 3.350092484042931e-07, "loss": 20.5156, "step": 22273 }, { "epoch": 1.479311947931195, "grad_norm": 206.33897399902344, "learning_rate": 3.34928926900533e-07, "loss": 13.1875, "step": 22274 }, { "epoch": 1.4793783622235506, "grad_norm": 268.49169921875, "learning_rate": 3.348486130899315e-07, "loss": 16.9062, "step": 22275 }, { "epoch": 1.4794447765159062, "grad_norm": 258.3076171875, "learning_rate": 3.3476830697341807e-07, "loss": 15.0156, "step": 22276 }, { "epoch": 1.4795111908082619, "grad_norm": 89.02777099609375, "learning_rate": 3.346880085519206e-07, "loss": 12.25, "step": 22277 }, { "epoch": 1.4795776051006175, "grad_norm": 174.77061462402344, "learning_rate": 3.346077178263692e-07, "loss": 15.6719, "step": 22278 }, { "epoch": 1.4796440193929734, "grad_norm": 221.98219299316406, "learning_rate": 3.345274347976919e-07, "loss": 24.2969, "step": 22279 }, { "epoch": 1.479710433685329, "grad_norm": 363.6737976074219, "learning_rate": 3.344471594668169e-07, "loss": 18.25, "step": 22280 }, { "epoch": 1.4797768479776847, "grad_norm": 152.65127563476562, "learning_rate": 3.3436689183467415e-07, "loss": 13.5312, "step": 22281 }, { "epoch": 1.4798432622700406, "grad_norm": 178.81790161132812, "learning_rate": 3.3428663190219106e-07, "loss": 17.625, "step": 22282 }, { "epoch": 1.4799096765623962, "grad_norm": 240.57456970214844, "learning_rate": 3.342063796702964e-07, "loss": 17.2656, "step": 22283 }, { "epoch": 1.479976090854752, "grad_norm": 545.2958984375, "learning_rate": 3.341261351399184e-07, "loss": 13.6719, "step": 22284 }, { "epoch": 1.4800425051471078, "grad_norm": 178.666259765625, "learning_rate": 3.3404589831198516e-07, "loss": 18.6094, "step": 22285 }, { "epoch": 1.4801089194394634, "grad_norm": 155.74391174316406, "learning_rate": 3.339656691874251e-07, "loss": 15.1172, "step": 22286 }, { "epoch": 1.480175333731819, "grad_norm": 241.6599884033203, "learning_rate": 3.338854477671664e-07, "loss": 20.6875, "step": 22287 }, { "epoch": 1.4802417480241747, "grad_norm": 139.74285888671875, "learning_rate": 3.338052340521361e-07, "loss": 13.4531, "step": 22288 }, { "epoch": 1.4803081623165304, "grad_norm": 197.22598266601562, "learning_rate": 3.337250280432633e-07, "loss": 16.6406, "step": 22289 }, { "epoch": 1.4803745766088863, "grad_norm": 277.2047424316406, "learning_rate": 3.33644829741475e-07, "loss": 16.9375, "step": 22290 }, { "epoch": 1.480440990901242, "grad_norm": 804.404296875, "learning_rate": 3.3356463914769896e-07, "loss": 14.7109, "step": 22291 }, { "epoch": 1.4805074051935976, "grad_norm": 202.07261657714844, "learning_rate": 3.334844562628628e-07, "loss": 17.3438, "step": 22292 }, { "epoch": 1.4805738194859535, "grad_norm": 272.05230712890625, "learning_rate": 3.334042810878943e-07, "loss": 12.7969, "step": 22293 }, { "epoch": 1.4806402337783091, "grad_norm": 265.9971923828125, "learning_rate": 3.333241136237206e-07, "loss": 27.4531, "step": 22294 }, { "epoch": 1.4807066480706648, "grad_norm": 225.85162353515625, "learning_rate": 3.3324395387126945e-07, "loss": 16.9531, "step": 22295 }, { "epoch": 1.4807730623630206, "grad_norm": 159.82275390625, "learning_rate": 3.331638018314674e-07, "loss": 16.5938, "step": 22296 }, { "epoch": 1.4808394766553763, "grad_norm": 176.26495361328125, "learning_rate": 3.33083657505242e-07, "loss": 17.5156, "step": 22297 }, { "epoch": 1.480905890947732, "grad_norm": 122.8740005493164, "learning_rate": 3.3300352089352033e-07, "loss": 14.9375, "step": 22298 }, { "epoch": 1.4809723052400876, "grad_norm": 137.2828369140625, "learning_rate": 3.329233919972293e-07, "loss": 12.2344, "step": 22299 }, { "epoch": 1.4810387195324433, "grad_norm": 135.95912170410156, "learning_rate": 3.328432708172958e-07, "loss": 11.5156, "step": 22300 }, { "epoch": 1.4811051338247991, "grad_norm": 541.456787109375, "learning_rate": 3.32763157354647e-07, "loss": 17.625, "step": 22301 }, { "epoch": 1.4811715481171548, "grad_norm": 540.0122680664062, "learning_rate": 3.326830516102085e-07, "loss": 15.1719, "step": 22302 }, { "epoch": 1.4812379624095104, "grad_norm": 106.45784759521484, "learning_rate": 3.326029535849084e-07, "loss": 11.7812, "step": 22303 }, { "epoch": 1.4813043767018663, "grad_norm": 235.1210479736328, "learning_rate": 3.3252286327967215e-07, "loss": 12.3594, "step": 22304 }, { "epoch": 1.481370790994222, "grad_norm": 160.32151794433594, "learning_rate": 3.3244278069542655e-07, "loss": 15.9062, "step": 22305 }, { "epoch": 1.4814372052865776, "grad_norm": 243.86959838867188, "learning_rate": 3.323627058330979e-07, "loss": 15.4219, "step": 22306 }, { "epoch": 1.4815036195789335, "grad_norm": 386.75726318359375, "learning_rate": 3.322826386936124e-07, "loss": 14.2188, "step": 22307 }, { "epoch": 1.4815700338712892, "grad_norm": 106.86051177978516, "learning_rate": 3.3220257927789643e-07, "loss": 14.375, "step": 22308 }, { "epoch": 1.4816364481636448, "grad_norm": 272.36614990234375, "learning_rate": 3.321225275868762e-07, "loss": 17.5, "step": 22309 }, { "epoch": 1.4817028624560005, "grad_norm": 143.99351501464844, "learning_rate": 3.320424836214769e-07, "loss": 15.2188, "step": 22310 }, { "epoch": 1.4817692767483561, "grad_norm": 338.8668212890625, "learning_rate": 3.319624473826256e-07, "loss": 15.5312, "step": 22311 }, { "epoch": 1.481835691040712, "grad_norm": 98.33407592773438, "learning_rate": 3.3188241887124715e-07, "loss": 12.5547, "step": 22312 }, { "epoch": 1.4819021053330677, "grad_norm": 174.53871154785156, "learning_rate": 3.318023980882675e-07, "loss": 10.4688, "step": 22313 }, { "epoch": 1.4819685196254233, "grad_norm": 168.83941650390625, "learning_rate": 3.317223850346126e-07, "loss": 18.0312, "step": 22314 }, { "epoch": 1.4820349339177792, "grad_norm": 128.84364318847656, "learning_rate": 3.316423797112077e-07, "loss": 15.7188, "step": 22315 }, { "epoch": 1.4821013482101348, "grad_norm": 156.7943878173828, "learning_rate": 3.315623821189784e-07, "loss": 13.9219, "step": 22316 }, { "epoch": 1.4821677625024905, "grad_norm": 761.4345703125, "learning_rate": 3.314823922588499e-07, "loss": 18.125, "step": 22317 }, { "epoch": 1.4822341767948464, "grad_norm": 188.65203857421875, "learning_rate": 3.3140241013174795e-07, "loss": 14.8125, "step": 22318 }, { "epoch": 1.482300591087202, "grad_norm": 160.8428497314453, "learning_rate": 3.3132243573859687e-07, "loss": 14.125, "step": 22319 }, { "epoch": 1.4823670053795577, "grad_norm": 172.94302368164062, "learning_rate": 3.312424690803228e-07, "loss": 19.7656, "step": 22320 }, { "epoch": 1.4824334196719133, "grad_norm": 194.95834350585938, "learning_rate": 3.3116251015785e-07, "loss": 15.1719, "step": 22321 }, { "epoch": 1.482499833964269, "grad_norm": 141.1513671875, "learning_rate": 3.310825589721035e-07, "loss": 11.3047, "step": 22322 }, { "epoch": 1.4825662482566249, "grad_norm": 140.45848083496094, "learning_rate": 3.310026155240083e-07, "loss": 11.7812, "step": 22323 }, { "epoch": 1.4826326625489805, "grad_norm": 116.71501922607422, "learning_rate": 3.3092267981448907e-07, "loss": 15.9531, "step": 22324 }, { "epoch": 1.4826990768413362, "grad_norm": 136.52728271484375, "learning_rate": 3.308427518444704e-07, "loss": 12.125, "step": 22325 }, { "epoch": 1.482765491133692, "grad_norm": 207.90802001953125, "learning_rate": 3.307628316148774e-07, "loss": 14.9219, "step": 22326 }, { "epoch": 1.4828319054260477, "grad_norm": 640.7192993164062, "learning_rate": 3.3068291912663327e-07, "loss": 14.7031, "step": 22327 }, { "epoch": 1.4828983197184034, "grad_norm": 173.91358947753906, "learning_rate": 3.3060301438066384e-07, "loss": 12.7422, "step": 22328 }, { "epoch": 1.4829647340107592, "grad_norm": 242.35418701171875, "learning_rate": 3.3052311737789253e-07, "loss": 16.4844, "step": 22329 }, { "epoch": 1.4830311483031149, "grad_norm": 216.91192626953125, "learning_rate": 3.3044322811924365e-07, "loss": 19.5625, "step": 22330 }, { "epoch": 1.4830975625954705, "grad_norm": 332.3952331542969, "learning_rate": 3.303633466056415e-07, "loss": 18.9844, "step": 22331 }, { "epoch": 1.4831639768878262, "grad_norm": 210.65542602539062, "learning_rate": 3.3028347283800994e-07, "loss": 19.8594, "step": 22332 }, { "epoch": 1.4832303911801819, "grad_norm": 131.44039916992188, "learning_rate": 3.30203606817273e-07, "loss": 16.5312, "step": 22333 }, { "epoch": 1.4832968054725377, "grad_norm": 268.19085693359375, "learning_rate": 3.3012374854435487e-07, "loss": 18.7031, "step": 22334 }, { "epoch": 1.4833632197648934, "grad_norm": 113.18264770507812, "learning_rate": 3.300438980201782e-07, "loss": 17.1094, "step": 22335 }, { "epoch": 1.483429634057249, "grad_norm": 102.65452575683594, "learning_rate": 3.2996405524566816e-07, "loss": 11.2734, "step": 22336 }, { "epoch": 1.483496048349605, "grad_norm": 159.02862548828125, "learning_rate": 3.2988422022174724e-07, "loss": 13.2188, "step": 22337 }, { "epoch": 1.4835624626419606, "grad_norm": 173.49807739257812, "learning_rate": 3.298043929493392e-07, "loss": 18.3281, "step": 22338 }, { "epoch": 1.4836288769343162, "grad_norm": 428.85064697265625, "learning_rate": 3.2972457342936744e-07, "loss": 17.2344, "step": 22339 }, { "epoch": 1.483695291226672, "grad_norm": 378.1103210449219, "learning_rate": 3.296447616627557e-07, "loss": 10.5156, "step": 22340 }, { "epoch": 1.4837617055190278, "grad_norm": 111.08903503417969, "learning_rate": 3.2956495765042615e-07, "loss": 12.4531, "step": 22341 }, { "epoch": 1.4838281198113834, "grad_norm": 193.63682556152344, "learning_rate": 3.2948516139330317e-07, "loss": 22.0469, "step": 22342 }, { "epoch": 1.483894534103739, "grad_norm": 195.053955078125, "learning_rate": 3.2940537289230895e-07, "loss": 17.8906, "step": 22343 }, { "epoch": 1.4839609483960947, "grad_norm": 174.2820281982422, "learning_rate": 3.2932559214836676e-07, "loss": 13.7188, "step": 22344 }, { "epoch": 1.4840273626884506, "grad_norm": 226.66421508789062, "learning_rate": 3.2924581916239923e-07, "loss": 22.1562, "step": 22345 }, { "epoch": 1.4840937769808062, "grad_norm": 426.68096923828125, "learning_rate": 3.2916605393532936e-07, "loss": 19.0469, "step": 22346 }, { "epoch": 1.484160191273162, "grad_norm": 143.3343048095703, "learning_rate": 3.290862964680797e-07, "loss": 15.75, "step": 22347 }, { "epoch": 1.4842266055655178, "grad_norm": 122.3046875, "learning_rate": 3.290065467615734e-07, "loss": 15.4531, "step": 22348 }, { "epoch": 1.4842930198578734, "grad_norm": 166.14581298828125, "learning_rate": 3.2892680481673164e-07, "loss": 13.125, "step": 22349 }, { "epoch": 1.484359434150229, "grad_norm": 153.72206115722656, "learning_rate": 3.2884707063447823e-07, "loss": 11.6797, "step": 22350 }, { "epoch": 1.484425848442585, "grad_norm": 179.82620239257812, "learning_rate": 3.287673442157347e-07, "loss": 17.5156, "step": 22351 }, { "epoch": 1.4844922627349406, "grad_norm": 97.45958709716797, "learning_rate": 3.2868762556142305e-07, "loss": 15.2188, "step": 22352 }, { "epoch": 1.4845586770272963, "grad_norm": 205.10302734375, "learning_rate": 3.2860791467246664e-07, "loss": 13.6875, "step": 22353 }, { "epoch": 1.484625091319652, "grad_norm": 162.9778289794922, "learning_rate": 3.285282115497863e-07, "loss": 16.375, "step": 22354 }, { "epoch": 1.4846915056120076, "grad_norm": 234.96145629882812, "learning_rate": 3.2844851619430435e-07, "loss": 13.0781, "step": 22355 }, { "epoch": 1.4847579199043635, "grad_norm": 198.54925537109375, "learning_rate": 3.2836882860694284e-07, "loss": 13.0938, "step": 22356 }, { "epoch": 1.484824334196719, "grad_norm": 236.28944396972656, "learning_rate": 3.282891487886235e-07, "loss": 10.9531, "step": 22357 }, { "epoch": 1.4848907484890748, "grad_norm": 234.1046905517578, "learning_rate": 3.282094767402678e-07, "loss": 14.6094, "step": 22358 }, { "epoch": 1.4849571627814306, "grad_norm": 401.3809814453125, "learning_rate": 3.281298124627979e-07, "loss": 21.2188, "step": 22359 }, { "epoch": 1.4850235770737863, "grad_norm": 215.76318359375, "learning_rate": 3.280501559571345e-07, "loss": 20.7344, "step": 22360 }, { "epoch": 1.485089991366142, "grad_norm": 184.09194946289062, "learning_rate": 3.279705072241995e-07, "loss": 16.0156, "step": 22361 }, { "epoch": 1.4851564056584978, "grad_norm": 560.969482421875, "learning_rate": 3.2789086626491413e-07, "loss": 14.6562, "step": 22362 }, { "epoch": 1.4852228199508535, "grad_norm": 153.51556396484375, "learning_rate": 3.278112330801998e-07, "loss": 17.6094, "step": 22363 }, { "epoch": 1.4852892342432091, "grad_norm": 207.83265686035156, "learning_rate": 3.277316076709773e-07, "loss": 19.2969, "step": 22364 }, { "epoch": 1.4853556485355648, "grad_norm": 598.4883422851562, "learning_rate": 3.2765199003816833e-07, "loss": 20.4219, "step": 22365 }, { "epoch": 1.4854220628279204, "grad_norm": 148.33302307128906, "learning_rate": 3.2757238018269285e-07, "loss": 16.4844, "step": 22366 }, { "epoch": 1.4854884771202763, "grad_norm": 153.99154663085938, "learning_rate": 3.274927781054728e-07, "loss": 17.1719, "step": 22367 }, { "epoch": 1.485554891412632, "grad_norm": 411.62310791015625, "learning_rate": 3.2741318380742833e-07, "loss": 16.4844, "step": 22368 }, { "epoch": 1.4856213057049876, "grad_norm": 507.2167053222656, "learning_rate": 3.2733359728948026e-07, "loss": 15.625, "step": 22369 }, { "epoch": 1.4856877199973435, "grad_norm": 140.10093688964844, "learning_rate": 3.2725401855254933e-07, "loss": 14.2344, "step": 22370 }, { "epoch": 1.4857541342896992, "grad_norm": 237.3584747314453, "learning_rate": 3.27174447597556e-07, "loss": 22.1719, "step": 22371 }, { "epoch": 1.4858205485820548, "grad_norm": 378.3587646484375, "learning_rate": 3.2709488442542053e-07, "loss": 12.9531, "step": 22372 }, { "epoch": 1.4858869628744107, "grad_norm": 2451.342041015625, "learning_rate": 3.2701532903706385e-07, "loss": 14.8125, "step": 22373 }, { "epoch": 1.4859533771667663, "grad_norm": 404.4264831542969, "learning_rate": 3.2693578143340514e-07, "loss": 16.4688, "step": 22374 }, { "epoch": 1.486019791459122, "grad_norm": 245.55125427246094, "learning_rate": 3.2685624161536586e-07, "loss": 17.6328, "step": 22375 }, { "epoch": 1.4860862057514777, "grad_norm": 319.35394287109375, "learning_rate": 3.2677670958386504e-07, "loss": 15.4688, "step": 22376 }, { "epoch": 1.4861526200438333, "grad_norm": 152.86341857910156, "learning_rate": 3.2669718533982304e-07, "loss": 14.6406, "step": 22377 }, { "epoch": 1.4862190343361892, "grad_norm": 155.82327270507812, "learning_rate": 3.266176688841598e-07, "loss": 17.125, "step": 22378 }, { "epoch": 1.4862854486285448, "grad_norm": 278.2803039550781, "learning_rate": 3.2653816021779537e-07, "loss": 21.1094, "step": 22379 }, { "epoch": 1.4863518629209005, "grad_norm": 653.1658935546875, "learning_rate": 3.264586593416485e-07, "loss": 15.5469, "step": 22380 }, { "epoch": 1.4864182772132564, "grad_norm": 154.3872833251953, "learning_rate": 3.2637916625664006e-07, "loss": 15.0469, "step": 22381 }, { "epoch": 1.486484691505612, "grad_norm": 160.77536010742188, "learning_rate": 3.2629968096368877e-07, "loss": 12.5, "step": 22382 }, { "epoch": 1.4865511057979677, "grad_norm": 238.71868896484375, "learning_rate": 3.2622020346371424e-07, "loss": 21.5, "step": 22383 }, { "epoch": 1.4866175200903236, "grad_norm": 357.78704833984375, "learning_rate": 3.2614073375763597e-07, "loss": 23.1094, "step": 22384 }, { "epoch": 1.4866839343826792, "grad_norm": 175.72103881835938, "learning_rate": 3.26061271846373e-07, "loss": 14.4844, "step": 22385 }, { "epoch": 1.4867503486750349, "grad_norm": 329.6047058105469, "learning_rate": 3.2598181773084463e-07, "loss": 15.3594, "step": 22386 }, { "epoch": 1.4868167629673905, "grad_norm": 247.61534118652344, "learning_rate": 3.259023714119704e-07, "loss": 18.6172, "step": 22387 }, { "epoch": 1.4868831772597462, "grad_norm": 323.5601806640625, "learning_rate": 3.258229328906681e-07, "loss": 15.125, "step": 22388 }, { "epoch": 1.486949591552102, "grad_norm": 169.05072021484375, "learning_rate": 3.2574350216785763e-07, "loss": 14.1562, "step": 22389 }, { "epoch": 1.4870160058444577, "grad_norm": 186.3292236328125, "learning_rate": 3.2566407924445783e-07, "loss": 14.7031, "step": 22390 }, { "epoch": 1.4870824201368134, "grad_norm": 141.56170654296875, "learning_rate": 3.255846641213865e-07, "loss": 11.8594, "step": 22391 }, { "epoch": 1.4871488344291692, "grad_norm": 727.5198364257812, "learning_rate": 3.2550525679956364e-07, "loss": 21.6875, "step": 22392 }, { "epoch": 1.4872152487215249, "grad_norm": 146.73187255859375, "learning_rate": 3.2542585727990657e-07, "loss": 21.5156, "step": 22393 }, { "epoch": 1.4872816630138805, "grad_norm": 188.3743438720703, "learning_rate": 3.253464655633343e-07, "loss": 12.6562, "step": 22394 }, { "epoch": 1.4873480773062364, "grad_norm": 721.5494995117188, "learning_rate": 3.25267081650765e-07, "loss": 23.2344, "step": 22395 }, { "epoch": 1.487414491598592, "grad_norm": 166.24964904785156, "learning_rate": 3.2518770554311716e-07, "loss": 10.4844, "step": 22396 }, { "epoch": 1.4874809058909477, "grad_norm": 239.76380920410156, "learning_rate": 3.251083372413087e-07, "loss": 16.5625, "step": 22397 }, { "epoch": 1.4875473201833034, "grad_norm": 287.5068054199219, "learning_rate": 3.250289767462582e-07, "loss": 15.3906, "step": 22398 }, { "epoch": 1.487613734475659, "grad_norm": 134.39154052734375, "learning_rate": 3.249496240588826e-07, "loss": 15.3125, "step": 22399 }, { "epoch": 1.487680148768015, "grad_norm": 89.21994018554688, "learning_rate": 3.2487027918010115e-07, "loss": 15.0938, "step": 22400 }, { "epoch": 1.4877465630603706, "grad_norm": 239.934326171875, "learning_rate": 3.247909421108307e-07, "loss": 15.8906, "step": 22401 }, { "epoch": 1.4878129773527262, "grad_norm": 173.01304626464844, "learning_rate": 3.2471161285198933e-07, "loss": 15.375, "step": 22402 }, { "epoch": 1.487879391645082, "grad_norm": 354.317138671875, "learning_rate": 3.2463229140449456e-07, "loss": 18.3281, "step": 22403 }, { "epoch": 1.4879458059374377, "grad_norm": 176.6456298828125, "learning_rate": 3.245529777692644e-07, "loss": 15.75, "step": 22404 }, { "epoch": 1.4880122202297934, "grad_norm": 112.39839935302734, "learning_rate": 3.244736719472152e-07, "loss": 14.7812, "step": 22405 }, { "epoch": 1.4880786345221493, "grad_norm": 521.998046875, "learning_rate": 3.243943739392656e-07, "loss": 19.7188, "step": 22406 }, { "epoch": 1.488145048814505, "grad_norm": 169.2814483642578, "learning_rate": 3.243150837463321e-07, "loss": 12.625, "step": 22407 }, { "epoch": 1.4882114631068606, "grad_norm": 203.91989135742188, "learning_rate": 3.2423580136933205e-07, "loss": 16.7031, "step": 22408 }, { "epoch": 1.4882778773992162, "grad_norm": 250.5791015625, "learning_rate": 3.241565268091826e-07, "loss": 16.2344, "step": 22409 }, { "epoch": 1.488344291691572, "grad_norm": 166.6164093017578, "learning_rate": 3.240772600668008e-07, "loss": 18.5156, "step": 22410 }, { "epoch": 1.4884107059839278, "grad_norm": 432.19598388671875, "learning_rate": 3.239980011431034e-07, "loss": 14.375, "step": 22411 }, { "epoch": 1.4884771202762834, "grad_norm": 376.6510314941406, "learning_rate": 3.239187500390077e-07, "loss": 14.0, "step": 22412 }, { "epoch": 1.488543534568639, "grad_norm": 186.91783142089844, "learning_rate": 3.238395067554294e-07, "loss": 14.8438, "step": 22413 }, { "epoch": 1.488609948860995, "grad_norm": 711.28173828125, "learning_rate": 3.2376027129328633e-07, "loss": 13.5781, "step": 22414 }, { "epoch": 1.4886763631533506, "grad_norm": 320.56549072265625, "learning_rate": 3.2368104365349425e-07, "loss": 23.1562, "step": 22415 }, { "epoch": 1.4887427774457063, "grad_norm": 141.33262634277344, "learning_rate": 3.236018238369699e-07, "loss": 13.4688, "step": 22416 }, { "epoch": 1.4888091917380621, "grad_norm": 264.16943359375, "learning_rate": 3.2352261184462957e-07, "loss": 20.7656, "step": 22417 }, { "epoch": 1.4888756060304178, "grad_norm": 129.9163055419922, "learning_rate": 3.234434076773896e-07, "loss": 12.8125, "step": 22418 }, { "epoch": 1.4889420203227735, "grad_norm": 99.2353515625, "learning_rate": 3.233642113361662e-07, "loss": 13.2031, "step": 22419 }, { "epoch": 1.489008434615129, "grad_norm": 145.9412841796875, "learning_rate": 3.232850228218756e-07, "loss": 13.2891, "step": 22420 }, { "epoch": 1.4890748489074848, "grad_norm": 236.2855224609375, "learning_rate": 3.232058421354332e-07, "loss": 18.3594, "step": 22421 }, { "epoch": 1.4891412631998406, "grad_norm": 206.8596954345703, "learning_rate": 3.231266692777559e-07, "loss": 21.8438, "step": 22422 }, { "epoch": 1.4892076774921963, "grad_norm": 326.464599609375, "learning_rate": 3.230475042497587e-07, "loss": 15.9062, "step": 22423 }, { "epoch": 1.489274091784552, "grad_norm": 111.31723022460938, "learning_rate": 3.2296834705235767e-07, "loss": 13.1875, "step": 22424 }, { "epoch": 1.4893405060769078, "grad_norm": 805.9498901367188, "learning_rate": 3.2288919768646827e-07, "loss": 15.25, "step": 22425 }, { "epoch": 1.4894069203692635, "grad_norm": 275.73736572265625, "learning_rate": 3.228100561530063e-07, "loss": 29.2344, "step": 22426 }, { "epoch": 1.4894733346616191, "grad_norm": 408.4990234375, "learning_rate": 3.22730922452887e-07, "loss": 31.125, "step": 22427 }, { "epoch": 1.489539748953975, "grad_norm": 376.0862121582031, "learning_rate": 3.226517965870259e-07, "loss": 13.7188, "step": 22428 }, { "epoch": 1.4896061632463307, "grad_norm": 188.2574005126953, "learning_rate": 3.2257267855633863e-07, "loss": 15.75, "step": 22429 }, { "epoch": 1.4896725775386863, "grad_norm": 1570.264892578125, "learning_rate": 3.2249356836173923e-07, "loss": 17.6094, "step": 22430 }, { "epoch": 1.489738991831042, "grad_norm": 163.83700561523438, "learning_rate": 3.2241446600414425e-07, "loss": 18.2969, "step": 22431 }, { "epoch": 1.4898054061233976, "grad_norm": 334.3017883300781, "learning_rate": 3.2233537148446775e-07, "loss": 17.2031, "step": 22432 }, { "epoch": 1.4898718204157535, "grad_norm": 117.99991607666016, "learning_rate": 3.222562848036248e-07, "loss": 12.1094, "step": 22433 }, { "epoch": 1.4899382347081092, "grad_norm": 252.81724548339844, "learning_rate": 3.221772059625304e-07, "loss": 11.5469, "step": 22434 }, { "epoch": 1.4900046490004648, "grad_norm": 171.950439453125, "learning_rate": 3.2209813496209926e-07, "loss": 16.9531, "step": 22435 }, { "epoch": 1.4900710632928207, "grad_norm": 227.53378295898438, "learning_rate": 3.2201907180324594e-07, "loss": 15.3125, "step": 22436 }, { "epoch": 1.4901374775851763, "grad_norm": 253.83021545410156, "learning_rate": 3.219400164868854e-07, "loss": 14.5312, "step": 22437 }, { "epoch": 1.490203891877532, "grad_norm": 180.4065704345703, "learning_rate": 3.21860969013931e-07, "loss": 16.9062, "step": 22438 }, { "epoch": 1.4902703061698879, "grad_norm": 271.6761474609375, "learning_rate": 3.217819293852987e-07, "loss": 16.8906, "step": 22439 }, { "epoch": 1.4903367204622435, "grad_norm": 195.67625427246094, "learning_rate": 3.217028976019015e-07, "loss": 15.8125, "step": 22440 }, { "epoch": 1.4904031347545992, "grad_norm": 254.06861877441406, "learning_rate": 3.2162387366465417e-07, "loss": 19.5156, "step": 22441 }, { "epoch": 1.4904695490469548, "grad_norm": 128.70098876953125, "learning_rate": 3.215448575744706e-07, "loss": 14.5469, "step": 22442 }, { "epoch": 1.4905359633393105, "grad_norm": 151.88909912109375, "learning_rate": 3.2146584933226515e-07, "loss": 12.3438, "step": 22443 }, { "epoch": 1.4906023776316664, "grad_norm": 190.52191162109375, "learning_rate": 3.2138684893895095e-07, "loss": 11.2188, "step": 22444 }, { "epoch": 1.490668791924022, "grad_norm": 144.0481719970703, "learning_rate": 3.213078563954431e-07, "loss": 15.1562, "step": 22445 }, { "epoch": 1.4907352062163777, "grad_norm": 283.1783142089844, "learning_rate": 3.212288717026541e-07, "loss": 22.9531, "step": 22446 }, { "epoch": 1.4908016205087335, "grad_norm": 287.9593200683594, "learning_rate": 3.2114989486149823e-07, "loss": 14.1094, "step": 22447 }, { "epoch": 1.4908680348010892, "grad_norm": 416.4032287597656, "learning_rate": 3.2107092587288897e-07, "loss": 18.7344, "step": 22448 }, { "epoch": 1.4909344490934449, "grad_norm": 160.13526916503906, "learning_rate": 3.209919647377397e-07, "loss": 17.5469, "step": 22449 }, { "epoch": 1.4910008633858007, "grad_norm": 427.73614501953125, "learning_rate": 3.209130114569638e-07, "loss": 17.5625, "step": 22450 }, { "epoch": 1.4910672776781564, "grad_norm": 316.5875244140625, "learning_rate": 3.2083406603147487e-07, "loss": 17.4375, "step": 22451 }, { "epoch": 1.491133691970512, "grad_norm": 179.26280212402344, "learning_rate": 3.2075512846218536e-07, "loss": 11.4375, "step": 22452 }, { "epoch": 1.4912001062628677, "grad_norm": 175.36129760742188, "learning_rate": 3.206761987500094e-07, "loss": 14.3281, "step": 22453 }, { "epoch": 1.4912665205552234, "grad_norm": 161.7646484375, "learning_rate": 3.205972768958591e-07, "loss": 16.2344, "step": 22454 }, { "epoch": 1.4913329348475792, "grad_norm": 253.93630981445312, "learning_rate": 3.205183629006477e-07, "loss": 21.4688, "step": 22455 }, { "epoch": 1.4913993491399349, "grad_norm": 660.0170288085938, "learning_rate": 3.2043945676528814e-07, "loss": 14.4688, "step": 22456 }, { "epoch": 1.4914657634322905, "grad_norm": 364.67803955078125, "learning_rate": 3.203605584906931e-07, "loss": 13.1562, "step": 22457 }, { "epoch": 1.4915321777246464, "grad_norm": 223.02513122558594, "learning_rate": 3.202816680777752e-07, "loss": 13.0625, "step": 22458 }, { "epoch": 1.491598592017002, "grad_norm": 164.4327850341797, "learning_rate": 3.202027855274474e-07, "loss": 14.5469, "step": 22459 }, { "epoch": 1.4916650063093577, "grad_norm": 153.86929321289062, "learning_rate": 3.2012391084062105e-07, "loss": 15.5625, "step": 22460 }, { "epoch": 1.4917314206017136, "grad_norm": 213.3905029296875, "learning_rate": 3.2004504401820986e-07, "loss": 17.0312, "step": 22461 }, { "epoch": 1.4917978348940693, "grad_norm": 184.56390380859375, "learning_rate": 3.199661850611253e-07, "loss": 13.7812, "step": 22462 }, { "epoch": 1.491864249186425, "grad_norm": 308.135009765625, "learning_rate": 3.1988733397027947e-07, "loss": 21.8438, "step": 22463 }, { "epoch": 1.4919306634787806, "grad_norm": 607.4027099609375, "learning_rate": 3.1980849074658535e-07, "loss": 13.375, "step": 22464 }, { "epoch": 1.4919970777711362, "grad_norm": 117.60838317871094, "learning_rate": 3.197296553909541e-07, "loss": 16.9531, "step": 22465 }, { "epoch": 1.492063492063492, "grad_norm": 907.5428466796875, "learning_rate": 3.196508279042979e-07, "loss": 20.9219, "step": 22466 }, { "epoch": 1.4921299063558477, "grad_norm": 102.55631256103516, "learning_rate": 3.195720082875286e-07, "loss": 14.6094, "step": 22467 }, { "epoch": 1.4921963206482034, "grad_norm": 106.2734603881836, "learning_rate": 3.1949319654155836e-07, "loss": 12.8125, "step": 22468 }, { "epoch": 1.4922627349405593, "grad_norm": 107.91313171386719, "learning_rate": 3.1941439266729777e-07, "loss": 15.5625, "step": 22469 }, { "epoch": 1.492329149232915, "grad_norm": 482.53082275390625, "learning_rate": 3.193355966656596e-07, "loss": 19.8203, "step": 22470 }, { "epoch": 1.4923955635252706, "grad_norm": 196.6681671142578, "learning_rate": 3.1925680853755445e-07, "loss": 15.0469, "step": 22471 }, { "epoch": 1.4924619778176265, "grad_norm": 217.1092529296875, "learning_rate": 3.191780282838941e-07, "loss": 20.3594, "step": 22472 }, { "epoch": 1.4925283921099821, "grad_norm": 420.9608154296875, "learning_rate": 3.190992559055896e-07, "loss": 17.9219, "step": 22473 }, { "epoch": 1.4925948064023378, "grad_norm": 149.46568298339844, "learning_rate": 3.1902049140355237e-07, "loss": 13.1406, "step": 22474 }, { "epoch": 1.4926612206946934, "grad_norm": 285.1187438964844, "learning_rate": 3.1894173477869334e-07, "loss": 17.7812, "step": 22475 }, { "epoch": 1.492727634987049, "grad_norm": 279.8356018066406, "learning_rate": 3.1886298603192395e-07, "loss": 17.1719, "step": 22476 }, { "epoch": 1.492794049279405, "grad_norm": 242.77772521972656, "learning_rate": 3.1878424516415413e-07, "loss": 20.6719, "step": 22477 }, { "epoch": 1.4928604635717606, "grad_norm": 170.4669952392578, "learning_rate": 3.1870551217629604e-07, "loss": 12.1875, "step": 22478 }, { "epoch": 1.4929268778641163, "grad_norm": 380.6982421875, "learning_rate": 3.186267870692594e-07, "loss": 15.8125, "step": 22479 }, { "epoch": 1.4929932921564721, "grad_norm": 166.77305603027344, "learning_rate": 3.185480698439551e-07, "loss": 18.875, "step": 22480 }, { "epoch": 1.4930597064488278, "grad_norm": 350.6295166015625, "learning_rate": 3.184693605012938e-07, "loss": 21.4375, "step": 22481 }, { "epoch": 1.4931261207411834, "grad_norm": 326.6513977050781, "learning_rate": 3.18390659042186e-07, "loss": 15.5156, "step": 22482 }, { "epoch": 1.4931925350335393, "grad_norm": 389.6302795410156, "learning_rate": 3.18311965467542e-07, "loss": 20.8281, "step": 22483 }, { "epoch": 1.493258949325895, "grad_norm": 199.46475219726562, "learning_rate": 3.182332797782724e-07, "loss": 18.2188, "step": 22484 }, { "epoch": 1.4933253636182506, "grad_norm": 179.25865173339844, "learning_rate": 3.1815460197528653e-07, "loss": 17.6562, "step": 22485 }, { "epoch": 1.4933917779106063, "grad_norm": 387.69482421875, "learning_rate": 3.1807593205949566e-07, "loss": 14.7344, "step": 22486 }, { "epoch": 1.493458192202962, "grad_norm": 143.11068725585938, "learning_rate": 3.1799727003180875e-07, "loss": 16.4219, "step": 22487 }, { "epoch": 1.4935246064953178, "grad_norm": 149.19754028320312, "learning_rate": 3.179186158931363e-07, "loss": 14.8125, "step": 22488 }, { "epoch": 1.4935910207876735, "grad_norm": 189.31631469726562, "learning_rate": 3.17839969644388e-07, "loss": 16.6719, "step": 22489 }, { "epoch": 1.4936574350800291, "grad_norm": 861.3494873046875, "learning_rate": 3.1776133128647376e-07, "loss": 14.0625, "step": 22490 }, { "epoch": 1.493723849372385, "grad_norm": 168.91566467285156, "learning_rate": 3.1768270082030246e-07, "loss": 17.7031, "step": 22491 }, { "epoch": 1.4937902636647407, "grad_norm": 325.9373474121094, "learning_rate": 3.176040782467849e-07, "loss": 16.5312, "step": 22492 }, { "epoch": 1.4938566779570963, "grad_norm": 83.70702362060547, "learning_rate": 3.1752546356682954e-07, "loss": 15.6875, "step": 22493 }, { "epoch": 1.4939230922494522, "grad_norm": 159.6207733154297, "learning_rate": 3.174468567813461e-07, "loss": 12.3594, "step": 22494 }, { "epoch": 1.4939895065418078, "grad_norm": 262.0916442871094, "learning_rate": 3.173682578912438e-07, "loss": 29.6094, "step": 22495 }, { "epoch": 1.4940559208341635, "grad_norm": 228.27467346191406, "learning_rate": 3.1728966689743184e-07, "loss": 19.25, "step": 22496 }, { "epoch": 1.4941223351265192, "grad_norm": 357.1210632324219, "learning_rate": 3.172110838008194e-07, "loss": 18.0312, "step": 22497 }, { "epoch": 1.4941887494188748, "grad_norm": 158.94760131835938, "learning_rate": 3.171325086023157e-07, "loss": 18.7812, "step": 22498 }, { "epoch": 1.4942551637112307, "grad_norm": 131.9424591064453, "learning_rate": 3.170539413028287e-07, "loss": 15.9766, "step": 22499 }, { "epoch": 1.4943215780035863, "grad_norm": 117.99446105957031, "learning_rate": 3.169753819032682e-07, "loss": 14.0469, "step": 22500 }, { "epoch": 1.494387992295942, "grad_norm": 240.91920471191406, "learning_rate": 3.1689683040454307e-07, "loss": 22.1406, "step": 22501 }, { "epoch": 1.4944544065882979, "grad_norm": 286.5151062011719, "learning_rate": 3.168182868075608e-07, "loss": 14.5938, "step": 22502 }, { "epoch": 1.4945208208806535, "grad_norm": 137.525390625, "learning_rate": 3.167397511132314e-07, "loss": 12.1641, "step": 22503 }, { "epoch": 1.4945872351730092, "grad_norm": 129.6981964111328, "learning_rate": 3.16661223322462e-07, "loss": 15.5469, "step": 22504 }, { "epoch": 1.494653649465365, "grad_norm": 209.2642364501953, "learning_rate": 3.165827034361618e-07, "loss": 16.9375, "step": 22505 }, { "epoch": 1.4947200637577207, "grad_norm": 186.11563110351562, "learning_rate": 3.1650419145523867e-07, "loss": 15.0625, "step": 22506 }, { "epoch": 1.4947864780500764, "grad_norm": 226.1914825439453, "learning_rate": 3.164256873806013e-07, "loss": 14.5625, "step": 22507 }, { "epoch": 1.494852892342432, "grad_norm": 313.90924072265625, "learning_rate": 3.1634719121315666e-07, "loss": 14.9531, "step": 22508 }, { "epoch": 1.4949193066347877, "grad_norm": 239.75596618652344, "learning_rate": 3.162687029538142e-07, "loss": 10.3906, "step": 22509 }, { "epoch": 1.4949857209271435, "grad_norm": 303.9537658691406, "learning_rate": 3.1619022260348084e-07, "loss": 24.25, "step": 22510 }, { "epoch": 1.4950521352194992, "grad_norm": 230.66859436035156, "learning_rate": 3.161117501630646e-07, "loss": 11.6562, "step": 22511 }, { "epoch": 1.4951185495118549, "grad_norm": 111.96222686767578, "learning_rate": 3.160332856334733e-07, "loss": 17.4219, "step": 22512 }, { "epoch": 1.4951849638042107, "grad_norm": 378.663330078125, "learning_rate": 3.1595482901561464e-07, "loss": 16.2188, "step": 22513 }, { "epoch": 1.4952513780965664, "grad_norm": 300.32940673828125, "learning_rate": 3.15876380310396e-07, "loss": 15.2812, "step": 22514 }, { "epoch": 1.495317792388922, "grad_norm": 132.89881896972656, "learning_rate": 3.1579793951872525e-07, "loss": 14.6484, "step": 22515 }, { "epoch": 1.495384206681278, "grad_norm": 308.77716064453125, "learning_rate": 3.157195066415087e-07, "loss": 16.3516, "step": 22516 }, { "epoch": 1.4954506209736336, "grad_norm": 144.24400329589844, "learning_rate": 3.15641081679655e-07, "loss": 10.9062, "step": 22517 }, { "epoch": 1.4955170352659892, "grad_norm": 471.3356018066406, "learning_rate": 3.155626646340703e-07, "loss": 15.125, "step": 22518 }, { "epoch": 1.4955834495583449, "grad_norm": 258.86199951171875, "learning_rate": 3.1548425550566215e-07, "loss": 18.5469, "step": 22519 }, { "epoch": 1.4956498638507005, "grad_norm": 259.30322265625, "learning_rate": 3.154058542953373e-07, "loss": 12.0703, "step": 22520 }, { "epoch": 1.4957162781430564, "grad_norm": 241.30032348632812, "learning_rate": 3.1532746100400277e-07, "loss": 15.1406, "step": 22521 }, { "epoch": 1.495782692435412, "grad_norm": 126.96634674072266, "learning_rate": 3.152490756325654e-07, "loss": 16.6406, "step": 22522 }, { "epoch": 1.4958491067277677, "grad_norm": 249.10702514648438, "learning_rate": 3.151706981819322e-07, "loss": 19.4219, "step": 22523 }, { "epoch": 1.4959155210201236, "grad_norm": 641.0690307617188, "learning_rate": 3.1509232865300885e-07, "loss": 21.9766, "step": 22524 }, { "epoch": 1.4959819353124792, "grad_norm": 438.4209899902344, "learning_rate": 3.15013967046703e-07, "loss": 13.875, "step": 22525 }, { "epoch": 1.496048349604835, "grad_norm": 274.3375549316406, "learning_rate": 3.149356133639204e-07, "loss": 16.0625, "step": 22526 }, { "epoch": 1.4961147638971908, "grad_norm": 156.42630004882812, "learning_rate": 3.148572676055676e-07, "loss": 15.7188, "step": 22527 }, { "epoch": 1.4961811781895464, "grad_norm": 299.4467468261719, "learning_rate": 3.1477892977255085e-07, "loss": 21.9062, "step": 22528 }, { "epoch": 1.496247592481902, "grad_norm": 111.93527221679688, "learning_rate": 3.1470059986577655e-07, "loss": 14.1562, "step": 22529 }, { "epoch": 1.4963140067742577, "grad_norm": 294.437744140625, "learning_rate": 3.1462227788614993e-07, "loss": 20.1562, "step": 22530 }, { "epoch": 1.4963804210666134, "grad_norm": 214.2799072265625, "learning_rate": 3.1454396383457827e-07, "loss": 23.5625, "step": 22531 }, { "epoch": 1.4964468353589693, "grad_norm": 167.56076049804688, "learning_rate": 3.1446565771196656e-07, "loss": 28.1875, "step": 22532 }, { "epoch": 1.496513249651325, "grad_norm": 99.2471694946289, "learning_rate": 3.1438735951922066e-07, "loss": 13.375, "step": 22533 }, { "epoch": 1.4965796639436806, "grad_norm": 130.84957885742188, "learning_rate": 3.143090692572464e-07, "loss": 15.8906, "step": 22534 }, { "epoch": 1.4966460782360365, "grad_norm": 419.55682373046875, "learning_rate": 3.142307869269496e-07, "loss": 17.5469, "step": 22535 }, { "epoch": 1.4967124925283921, "grad_norm": 487.6752014160156, "learning_rate": 3.141525125292356e-07, "loss": 12.4688, "step": 22536 }, { "epoch": 1.4967789068207478, "grad_norm": 148.2556610107422, "learning_rate": 3.140742460650098e-07, "loss": 18.2812, "step": 22537 }, { "epoch": 1.4968453211131036, "grad_norm": 299.9461364746094, "learning_rate": 3.139959875351775e-07, "loss": 13.4219, "step": 22538 }, { "epoch": 1.4969117354054593, "grad_norm": 153.66061401367188, "learning_rate": 3.1391773694064416e-07, "loss": 15.8438, "step": 22539 }, { "epoch": 1.496978149697815, "grad_norm": 183.9467010498047, "learning_rate": 3.1383949428231516e-07, "loss": 16.25, "step": 22540 }, { "epoch": 1.4970445639901706, "grad_norm": 163.94883728027344, "learning_rate": 3.1376125956109457e-07, "loss": 15.375, "step": 22541 }, { "epoch": 1.4971109782825263, "grad_norm": 329.3146667480469, "learning_rate": 3.1368303277788856e-07, "loss": 15.4844, "step": 22542 }, { "epoch": 1.4971773925748821, "grad_norm": 159.8507537841797, "learning_rate": 3.136048139336013e-07, "loss": 29.3125, "step": 22543 }, { "epoch": 1.4972438068672378, "grad_norm": 137.65792846679688, "learning_rate": 3.135266030291376e-07, "loss": 17.5156, "step": 22544 }, { "epoch": 1.4973102211595934, "grad_norm": 106.93216705322266, "learning_rate": 3.134484000654024e-07, "loss": 14.3125, "step": 22545 }, { "epoch": 1.4973766354519493, "grad_norm": 216.81704711914062, "learning_rate": 3.1337020504330023e-07, "loss": 16.1406, "step": 22546 }, { "epoch": 1.497443049744305, "grad_norm": 497.860595703125, "learning_rate": 3.132920179637356e-07, "loss": 12.125, "step": 22547 }, { "epoch": 1.4975094640366606, "grad_norm": 543.9835815429688, "learning_rate": 3.132138388276132e-07, "loss": 20.2031, "step": 22548 }, { "epoch": 1.4975758783290165, "grad_norm": 149.3777313232422, "learning_rate": 3.131356676358365e-07, "loss": 10.7969, "step": 22549 }, { "epoch": 1.4976422926213722, "grad_norm": 135.70872497558594, "learning_rate": 3.13057504389311e-07, "loss": 13.8906, "step": 22550 }, { "epoch": 1.4977087069137278, "grad_norm": 417.5964660644531, "learning_rate": 3.1297934908893974e-07, "loss": 18.4844, "step": 22551 }, { "epoch": 1.4977751212060835, "grad_norm": 328.87322998046875, "learning_rate": 3.129012017356273e-07, "loss": 16.875, "step": 22552 }, { "epoch": 1.4978415354984393, "grad_norm": 395.65203857421875, "learning_rate": 3.1282306233027753e-07, "loss": 15.0938, "step": 22553 }, { "epoch": 1.497907949790795, "grad_norm": 295.2959899902344, "learning_rate": 3.127449308737946e-07, "loss": 15.2344, "step": 22554 }, { "epoch": 1.4979743640831507, "grad_norm": 149.2600860595703, "learning_rate": 3.126668073670814e-07, "loss": 11.7109, "step": 22555 }, { "epoch": 1.4980407783755063, "grad_norm": 243.5778045654297, "learning_rate": 3.12588691811043e-07, "loss": 15.3438, "step": 22556 }, { "epoch": 1.4981071926678622, "grad_norm": 257.9432373046875, "learning_rate": 3.1251058420658174e-07, "loss": 13.7656, "step": 22557 }, { "epoch": 1.4981736069602178, "grad_norm": 173.08416748046875, "learning_rate": 3.124324845546017e-07, "loss": 18.1406, "step": 22558 }, { "epoch": 1.4982400212525735, "grad_norm": 131.6563262939453, "learning_rate": 3.1235439285600616e-07, "loss": 15.1719, "step": 22559 }, { "epoch": 1.4983064355449294, "grad_norm": 116.37760162353516, "learning_rate": 3.1227630911169844e-07, "loss": 18.0469, "step": 22560 }, { "epoch": 1.498372849837285, "grad_norm": 308.3498229980469, "learning_rate": 3.121982333225819e-07, "loss": 16.0938, "step": 22561 }, { "epoch": 1.4984392641296407, "grad_norm": 509.38458251953125, "learning_rate": 3.121201654895599e-07, "loss": 17.5938, "step": 22562 }, { "epoch": 1.4985056784219963, "grad_norm": 249.15402221679688, "learning_rate": 3.120421056135345e-07, "loss": 15.8281, "step": 22563 }, { "epoch": 1.4985720927143522, "grad_norm": 167.0317840576172, "learning_rate": 3.1196405369541e-07, "loss": 13.2656, "step": 22564 }, { "epoch": 1.4986385070067079, "grad_norm": 393.8874816894531, "learning_rate": 3.1188600973608825e-07, "loss": 15.75, "step": 22565 }, { "epoch": 1.4987049212990635, "grad_norm": 248.56417846679688, "learning_rate": 3.1180797373647227e-07, "loss": 17.4219, "step": 22566 }, { "epoch": 1.4987713355914192, "grad_norm": 176.20123291015625, "learning_rate": 3.117299456974649e-07, "loss": 23.8125, "step": 22567 }, { "epoch": 1.498837749883775, "grad_norm": 344.5430603027344, "learning_rate": 3.11651925619969e-07, "loss": 18.8281, "step": 22568 }, { "epoch": 1.4989041641761307, "grad_norm": 194.8001708984375, "learning_rate": 3.1157391350488594e-07, "loss": 12.8281, "step": 22569 }, { "epoch": 1.4989705784684864, "grad_norm": 312.5047302246094, "learning_rate": 3.1149590935311966e-07, "loss": 19.5625, "step": 22570 }, { "epoch": 1.4990369927608422, "grad_norm": 112.64906311035156, "learning_rate": 3.114179131655712e-07, "loss": 12.5625, "step": 22571 }, { "epoch": 1.499103407053198, "grad_norm": 216.7451934814453, "learning_rate": 3.113399249431434e-07, "loss": 13.1406, "step": 22572 }, { "epoch": 1.4991698213455535, "grad_norm": 250.81614685058594, "learning_rate": 3.1126194468673816e-07, "loss": 18.6094, "step": 22573 }, { "epoch": 1.4992362356379092, "grad_norm": 1174.7271728515625, "learning_rate": 3.111839723972577e-07, "loss": 19.1094, "step": 22574 }, { "epoch": 1.499302649930265, "grad_norm": 166.19529724121094, "learning_rate": 3.1110600807560373e-07, "loss": 16.3594, "step": 22575 }, { "epoch": 1.4993690642226207, "grad_norm": 212.47671508789062, "learning_rate": 3.110280517226783e-07, "loss": 19.0625, "step": 22576 }, { "epoch": 1.4994354785149764, "grad_norm": 286.2152404785156, "learning_rate": 3.10950103339383e-07, "loss": 17.5938, "step": 22577 }, { "epoch": 1.499501892807332, "grad_norm": 164.88955688476562, "learning_rate": 3.1087216292661967e-07, "loss": 17.8125, "step": 22578 }, { "epoch": 1.499568307099688, "grad_norm": 523.0776977539062, "learning_rate": 3.107942304852901e-07, "loss": 21.125, "step": 22579 }, { "epoch": 1.4996347213920436, "grad_norm": 252.09530639648438, "learning_rate": 3.107163060162947e-07, "loss": 12.2812, "step": 22580 }, { "epoch": 1.4997011356843992, "grad_norm": 209.83740234375, "learning_rate": 3.1063838952053633e-07, "loss": 19.0469, "step": 22581 }, { "epoch": 1.499767549976755, "grad_norm": 253.9088897705078, "learning_rate": 3.105604809989153e-07, "loss": 16.125, "step": 22582 }, { "epoch": 1.4998339642691108, "grad_norm": 160.35792541503906, "learning_rate": 3.10482580452333e-07, "loss": 14.1406, "step": 22583 }, { "epoch": 1.4999003785614664, "grad_norm": 116.58368682861328, "learning_rate": 3.104046878816906e-07, "loss": 13.3281, "step": 22584 }, { "epoch": 1.499966792853822, "grad_norm": 144.04330444335938, "learning_rate": 3.103268032878892e-07, "loss": 13.6875, "step": 22585 }, { "epoch": 1.5000332071461777, "grad_norm": 302.11163330078125, "learning_rate": 3.1024892667182957e-07, "loss": 15.4531, "step": 22586 }, { "epoch": 1.5000996214385336, "grad_norm": 250.86695861816406, "learning_rate": 3.10171058034413e-07, "loss": 15.125, "step": 22587 }, { "epoch": 1.5001660357308892, "grad_norm": 206.06712341308594, "learning_rate": 3.100931973765392e-07, "loss": 18.6562, "step": 22588 }, { "epoch": 1.500232450023245, "grad_norm": 143.0775604248047, "learning_rate": 3.1001534469911004e-07, "loss": 14.625, "step": 22589 }, { "epoch": 1.5002988643156008, "grad_norm": 251.13890075683594, "learning_rate": 3.099375000030252e-07, "loss": 16.6094, "step": 22590 }, { "epoch": 1.5003652786079564, "grad_norm": 109.4703598022461, "learning_rate": 3.0985966328918556e-07, "loss": 10.4922, "step": 22591 }, { "epoch": 1.500431692900312, "grad_norm": 185.77383422851562, "learning_rate": 3.097818345584913e-07, "loss": 22.25, "step": 22592 }, { "epoch": 1.500498107192668, "grad_norm": 268.69873046875, "learning_rate": 3.097040138118431e-07, "loss": 22.5, "step": 22593 }, { "epoch": 1.5005645214850236, "grad_norm": 245.90652465820312, "learning_rate": 3.0962620105014015e-07, "loss": 20.3125, "step": 22594 }, { "epoch": 1.5006309357773793, "grad_norm": 270.9316101074219, "learning_rate": 3.0954839627428385e-07, "loss": 18.5625, "step": 22595 }, { "epoch": 1.5006973500697351, "grad_norm": 158.46006774902344, "learning_rate": 3.094705994851733e-07, "loss": 20.9219, "step": 22596 }, { "epoch": 1.5007637643620906, "grad_norm": 233.36825561523438, "learning_rate": 3.093928106837087e-07, "loss": 18.1875, "step": 22597 }, { "epoch": 1.5008301786544465, "grad_norm": 75.13827514648438, "learning_rate": 3.093150298707897e-07, "loss": 12.3203, "step": 22598 }, { "epoch": 1.500896592946802, "grad_norm": 387.717041015625, "learning_rate": 3.092372570473163e-07, "loss": 15.7031, "step": 22599 }, { "epoch": 1.5009630072391578, "grad_norm": 174.11907958984375, "learning_rate": 3.091594922141879e-07, "loss": 17.0938, "step": 22600 }, { "epoch": 1.5010294215315136, "grad_norm": 161.781005859375, "learning_rate": 3.0908173537230455e-07, "loss": 13.7422, "step": 22601 }, { "epoch": 1.5010958358238693, "grad_norm": 204.12586975097656, "learning_rate": 3.090039865225647e-07, "loss": 18.9375, "step": 22602 }, { "epoch": 1.501162250116225, "grad_norm": 330.76324462890625, "learning_rate": 3.089262456658687e-07, "loss": 20.0, "step": 22603 }, { "epoch": 1.5012286644085808, "grad_norm": 218.02259826660156, "learning_rate": 3.088485128031152e-07, "loss": 19.8594, "step": 22604 }, { "epoch": 1.5012950787009365, "grad_norm": 156.8428192138672, "learning_rate": 3.087707879352035e-07, "loss": 16.4531, "step": 22605 }, { "epoch": 1.5013614929932921, "grad_norm": 248.3299560546875, "learning_rate": 3.0869307106303287e-07, "loss": 19.8281, "step": 22606 }, { "epoch": 1.501427907285648, "grad_norm": 261.10986328125, "learning_rate": 3.0861536218750205e-07, "loss": 14.9688, "step": 22607 }, { "epoch": 1.5014943215780034, "grad_norm": 494.9154052734375, "learning_rate": 3.085376613095101e-07, "loss": 22.3281, "step": 22608 }, { "epoch": 1.5015607358703593, "grad_norm": 333.56298828125, "learning_rate": 3.0845996842995603e-07, "loss": 31.4531, "step": 22609 }, { "epoch": 1.501627150162715, "grad_norm": 122.3709945678711, "learning_rate": 3.083822835497377e-07, "loss": 13.2188, "step": 22610 }, { "epoch": 1.5016935644550706, "grad_norm": 387.88372802734375, "learning_rate": 3.0830460666975457e-07, "loss": 15.1875, "step": 22611 }, { "epoch": 1.5017599787474265, "grad_norm": 123.86279296875, "learning_rate": 3.0822693779090527e-07, "loss": 16.6094, "step": 22612 }, { "epoch": 1.5018263930397822, "grad_norm": 163.52035522460938, "learning_rate": 3.0814927691408745e-07, "loss": 20.6562, "step": 22613 }, { "epoch": 1.5018928073321378, "grad_norm": 168.7258758544922, "learning_rate": 3.080716240401999e-07, "loss": 13.6953, "step": 22614 }, { "epoch": 1.5019592216244937, "grad_norm": 206.2408447265625, "learning_rate": 3.0799397917014094e-07, "loss": 15.75, "step": 22615 }, { "epoch": 1.5020256359168493, "grad_norm": 174.699462890625, "learning_rate": 3.0791634230480844e-07, "loss": 18.3125, "step": 22616 }, { "epoch": 1.502092050209205, "grad_norm": 99.98578643798828, "learning_rate": 3.078387134451006e-07, "loss": 11.6562, "step": 22617 }, { "epoch": 1.5021584645015609, "grad_norm": 176.53475952148438, "learning_rate": 3.077610925919157e-07, "loss": 14.8281, "step": 22618 }, { "epoch": 1.5022248787939163, "grad_norm": 184.82540893554688, "learning_rate": 3.0768347974615083e-07, "loss": 13.5469, "step": 22619 }, { "epoch": 1.5022912930862722, "grad_norm": 119.76773071289062, "learning_rate": 3.0760587490870484e-07, "loss": 13.5547, "step": 22620 }, { "epoch": 1.5023577073786278, "grad_norm": 638.6458129882812, "learning_rate": 3.0752827808047443e-07, "loss": 23.9062, "step": 22621 }, { "epoch": 1.5024241216709835, "grad_norm": 178.3330078125, "learning_rate": 3.0745068926235763e-07, "loss": 17.1719, "step": 22622 }, { "epoch": 1.5024905359633394, "grad_norm": 261.10565185546875, "learning_rate": 3.0737310845525197e-07, "loss": 12.0938, "step": 22623 }, { "epoch": 1.502556950255695, "grad_norm": 237.82859802246094, "learning_rate": 3.072955356600547e-07, "loss": 14.8125, "step": 22624 }, { "epoch": 1.5026233645480507, "grad_norm": 194.7387237548828, "learning_rate": 3.072179708776633e-07, "loss": 15.2656, "step": 22625 }, { "epoch": 1.5026897788404066, "grad_norm": 238.15621948242188, "learning_rate": 3.071404141089753e-07, "loss": 15.5781, "step": 22626 }, { "epoch": 1.5027561931327622, "grad_norm": 176.6140899658203, "learning_rate": 3.070628653548867e-07, "loss": 15.2812, "step": 22627 }, { "epoch": 1.5028226074251179, "grad_norm": 136.64747619628906, "learning_rate": 3.06985324616296e-07, "loss": 15.3906, "step": 22628 }, { "epoch": 1.5028890217174737, "grad_norm": 172.934326171875, "learning_rate": 3.069077918940992e-07, "loss": 21.0, "step": 22629 }, { "epoch": 1.5029554360098292, "grad_norm": 303.2418518066406, "learning_rate": 3.0683026718919335e-07, "loss": 14.2891, "step": 22630 }, { "epoch": 1.503021850302185, "grad_norm": 330.6659851074219, "learning_rate": 3.067527505024753e-07, "loss": 22.0859, "step": 22631 }, { "epoch": 1.5030882645945407, "grad_norm": 269.5997619628906, "learning_rate": 3.0667524183484196e-07, "loss": 21.7812, "step": 22632 }, { "epoch": 1.5031546788868964, "grad_norm": 172.2926025390625, "learning_rate": 3.0659774118718895e-07, "loss": 13.5, "step": 22633 }, { "epoch": 1.5032210931792522, "grad_norm": 245.61203002929688, "learning_rate": 3.065202485604141e-07, "loss": 19.3125, "step": 22634 }, { "epoch": 1.5032875074716079, "grad_norm": 250.994873046875, "learning_rate": 3.0644276395541277e-07, "loss": 18.875, "step": 22635 }, { "epoch": 1.5033539217639635, "grad_norm": 100.80814361572266, "learning_rate": 3.0636528737308176e-07, "loss": 15.3281, "step": 22636 }, { "epoch": 1.5034203360563194, "grad_norm": 359.7665100097656, "learning_rate": 3.06287818814317e-07, "loss": 23.5312, "step": 22637 }, { "epoch": 1.503486750348675, "grad_norm": 180.75527954101562, "learning_rate": 3.0621035828001476e-07, "loss": 18.5625, "step": 22638 }, { "epoch": 1.5035531646410307, "grad_norm": 457.0525817871094, "learning_rate": 3.061329057710711e-07, "loss": 15.6094, "step": 22639 }, { "epoch": 1.5036195789333866, "grad_norm": 189.07803344726562, "learning_rate": 3.0605546128838214e-07, "loss": 16.9922, "step": 22640 }, { "epoch": 1.503685993225742, "grad_norm": 247.80349731445312, "learning_rate": 3.0597802483284284e-07, "loss": 17.9844, "step": 22641 }, { "epoch": 1.503752407518098, "grad_norm": 106.72161102294922, "learning_rate": 3.0590059640535016e-07, "loss": 14.625, "step": 22642 }, { "epoch": 1.5038188218104536, "grad_norm": 103.49919128417969, "learning_rate": 3.0582317600679887e-07, "loss": 14.1875, "step": 22643 }, { "epoch": 1.5038852361028092, "grad_norm": 462.7326354980469, "learning_rate": 3.0574576363808467e-07, "loss": 24.4688, "step": 22644 }, { "epoch": 1.503951650395165, "grad_norm": 100.35504913330078, "learning_rate": 3.056683593001033e-07, "loss": 12.9219, "step": 22645 }, { "epoch": 1.5040180646875208, "grad_norm": 138.99281311035156, "learning_rate": 3.055909629937499e-07, "loss": 14.5781, "step": 22646 }, { "epoch": 1.5040844789798764, "grad_norm": 309.0203552246094, "learning_rate": 3.055135747199198e-07, "loss": 16.7031, "step": 22647 }, { "epoch": 1.5041508932722323, "grad_norm": 197.66455078125, "learning_rate": 3.054361944795083e-07, "loss": 12.7969, "step": 22648 }, { "epoch": 1.504217307564588, "grad_norm": 426.1669006347656, "learning_rate": 3.0535882227341026e-07, "loss": 18.0781, "step": 22649 }, { "epoch": 1.5042837218569436, "grad_norm": 149.2816619873047, "learning_rate": 3.0528145810252094e-07, "loss": 17.8438, "step": 22650 }, { "epoch": 1.5043501361492995, "grad_norm": 174.2887725830078, "learning_rate": 3.0520410196773536e-07, "loss": 9.8281, "step": 22651 }, { "epoch": 1.504416550441655, "grad_norm": 149.92800903320312, "learning_rate": 3.0512675386994747e-07, "loss": 15.3906, "step": 22652 }, { "epoch": 1.5044829647340108, "grad_norm": 359.1302185058594, "learning_rate": 3.050494138100532e-07, "loss": 25.75, "step": 22653 }, { "epoch": 1.5045493790263664, "grad_norm": 267.9012756347656, "learning_rate": 3.049720817889463e-07, "loss": 15.1719, "step": 22654 }, { "epoch": 1.504615793318722, "grad_norm": 200.7416534423828, "learning_rate": 3.048947578075216e-07, "loss": 17.875, "step": 22655 }, { "epoch": 1.504682207611078, "grad_norm": 326.6504821777344, "learning_rate": 3.0481744186667345e-07, "loss": 16.2969, "step": 22656 }, { "epoch": 1.5047486219034336, "grad_norm": 293.48150634765625, "learning_rate": 3.047401339672967e-07, "loss": 17.1719, "step": 22657 }, { "epoch": 1.5048150361957893, "grad_norm": 277.1900329589844, "learning_rate": 3.0466283411028447e-07, "loss": 20.5312, "step": 22658 }, { "epoch": 1.5048814504881451, "grad_norm": 153.4755096435547, "learning_rate": 3.0458554229653224e-07, "loss": 14.625, "step": 22659 }, { "epoch": 1.5049478647805008, "grad_norm": 153.32923889160156, "learning_rate": 3.0450825852693316e-07, "loss": 20.0625, "step": 22660 }, { "epoch": 1.5050142790728565, "grad_norm": 182.7186279296875, "learning_rate": 3.0443098280238144e-07, "loss": 19.875, "step": 22661 }, { "epoch": 1.5050806933652123, "grad_norm": 143.73001098632812, "learning_rate": 3.043537151237712e-07, "loss": 15.6406, "step": 22662 }, { "epoch": 1.5051471076575678, "grad_norm": 210.51596069335938, "learning_rate": 3.042764554919959e-07, "loss": 18.7344, "step": 22663 }, { "epoch": 1.5052135219499236, "grad_norm": 122.36582946777344, "learning_rate": 3.041992039079494e-07, "loss": 8.3672, "step": 22664 }, { "epoch": 1.5052799362422793, "grad_norm": 153.17881774902344, "learning_rate": 3.041219603725256e-07, "loss": 11.1094, "step": 22665 }, { "epoch": 1.505346350534635, "grad_norm": 95.44937133789062, "learning_rate": 3.040447248866171e-07, "loss": 10.8828, "step": 22666 }, { "epoch": 1.5054127648269908, "grad_norm": 165.14202880859375, "learning_rate": 3.039674974511185e-07, "loss": 16.2812, "step": 22667 }, { "epoch": 1.5054791791193465, "grad_norm": 186.2201690673828, "learning_rate": 3.038902780669222e-07, "loss": 12.3594, "step": 22668 }, { "epoch": 1.5055455934117021, "grad_norm": 1230.147705078125, "learning_rate": 3.038130667349219e-07, "loss": 17.5469, "step": 22669 }, { "epoch": 1.505612007704058, "grad_norm": 241.79290771484375, "learning_rate": 3.037358634560104e-07, "loss": 14.875, "step": 22670 }, { "epoch": 1.5056784219964137, "grad_norm": 181.49749755859375, "learning_rate": 3.036586682310811e-07, "loss": 12.2812, "step": 22671 }, { "epoch": 1.5057448362887693, "grad_norm": 388.6385498046875, "learning_rate": 3.0358148106102676e-07, "loss": 20.0781, "step": 22672 }, { "epoch": 1.5058112505811252, "grad_norm": 155.35093688964844, "learning_rate": 3.035043019467406e-07, "loss": 14.3281, "step": 22673 }, { "epoch": 1.5058776648734806, "grad_norm": 1144.617431640625, "learning_rate": 3.034271308891144e-07, "loss": 14.5781, "step": 22674 }, { "epoch": 1.5059440791658365, "grad_norm": 304.0683288574219, "learning_rate": 3.033499678890421e-07, "loss": 22.5625, "step": 22675 }, { "epoch": 1.5060104934581922, "grad_norm": 165.65765380859375, "learning_rate": 3.032728129474154e-07, "loss": 15.9062, "step": 22676 }, { "epoch": 1.5060769077505478, "grad_norm": 268.6431579589844, "learning_rate": 3.031956660651269e-07, "loss": 20.1094, "step": 22677 }, { "epoch": 1.5061433220429037, "grad_norm": 126.54315185546875, "learning_rate": 3.031185272430693e-07, "loss": 11.2656, "step": 22678 }, { "epoch": 1.5062097363352593, "grad_norm": 164.12864685058594, "learning_rate": 3.030413964821349e-07, "loss": 12.0, "step": 22679 }, { "epoch": 1.506276150627615, "grad_norm": 573.7720336914062, "learning_rate": 3.029642737832152e-07, "loss": 19.3594, "step": 22680 }, { "epoch": 1.5063425649199709, "grad_norm": 200.7476348876953, "learning_rate": 3.028871591472035e-07, "loss": 17.6094, "step": 22681 }, { "epoch": 1.5064089792123265, "grad_norm": 182.22879028320312, "learning_rate": 3.028100525749908e-07, "loss": 15.3438, "step": 22682 }, { "epoch": 1.5064753935046822, "grad_norm": 162.0090789794922, "learning_rate": 3.02732954067469e-07, "loss": 15.375, "step": 22683 }, { "epoch": 1.506541807797038, "grad_norm": 243.37152099609375, "learning_rate": 3.026558636255311e-07, "loss": 12.7969, "step": 22684 }, { "epoch": 1.5066082220893935, "grad_norm": 199.048583984375, "learning_rate": 3.0257878125006766e-07, "loss": 15.3125, "step": 22685 }, { "epoch": 1.5066746363817494, "grad_norm": 130.33250427246094, "learning_rate": 3.0250170694197074e-07, "loss": 13.75, "step": 22686 }, { "epoch": 1.506741050674105, "grad_norm": 290.8138732910156, "learning_rate": 3.024246407021319e-07, "loss": 23.7188, "step": 22687 }, { "epoch": 1.5068074649664607, "grad_norm": 213.31617736816406, "learning_rate": 3.023475825314425e-07, "loss": 24.5625, "step": 22688 }, { "epoch": 1.5068738792588166, "grad_norm": 215.08865356445312, "learning_rate": 3.02270532430794e-07, "loss": 21.9531, "step": 22689 }, { "epoch": 1.5069402935511722, "grad_norm": 134.5599822998047, "learning_rate": 3.02193490401078e-07, "loss": 13.4844, "step": 22690 }, { "epoch": 1.5070067078435279, "grad_norm": 275.0478210449219, "learning_rate": 3.0211645644318475e-07, "loss": 15.9062, "step": 22691 }, { "epoch": 1.5070731221358837, "grad_norm": 817.291015625, "learning_rate": 3.020394305580064e-07, "loss": 19.1094, "step": 22692 }, { "epoch": 1.5071395364282394, "grad_norm": 160.6695556640625, "learning_rate": 3.0196241274643317e-07, "loss": 13.8594, "step": 22693 }, { "epoch": 1.507205950720595, "grad_norm": 202.16549682617188, "learning_rate": 3.018854030093563e-07, "loss": 11.9688, "step": 22694 }, { "epoch": 1.507272365012951, "grad_norm": 99.6938247680664, "learning_rate": 3.018084013476665e-07, "loss": 17.3125, "step": 22695 }, { "epoch": 1.5073387793053064, "grad_norm": 467.1496276855469, "learning_rate": 3.017314077622548e-07, "loss": 17.3438, "step": 22696 }, { "epoch": 1.5074051935976622, "grad_norm": 209.14378356933594, "learning_rate": 3.0165442225401094e-07, "loss": 13.4062, "step": 22697 }, { "epoch": 1.5074716078900179, "grad_norm": 414.8608703613281, "learning_rate": 3.0157744482382665e-07, "loss": 19.5312, "step": 22698 }, { "epoch": 1.5075380221823735, "grad_norm": 150.15225219726562, "learning_rate": 3.015004754725915e-07, "loss": 19.0625, "step": 22699 }, { "epoch": 1.5076044364747294, "grad_norm": 423.5149841308594, "learning_rate": 3.0142351420119594e-07, "loss": 18.9531, "step": 22700 }, { "epoch": 1.507670850767085, "grad_norm": 182.99356079101562, "learning_rate": 3.0134656101053047e-07, "loss": 14.7969, "step": 22701 }, { "epoch": 1.5077372650594407, "grad_norm": 139.64700317382812, "learning_rate": 3.01269615901485e-07, "loss": 13.6562, "step": 22702 }, { "epoch": 1.5078036793517966, "grad_norm": 237.63404846191406, "learning_rate": 3.011926788749498e-07, "loss": 14.7344, "step": 22703 }, { "epoch": 1.5078700936441523, "grad_norm": 156.57571411132812, "learning_rate": 3.01115749931815e-07, "loss": 14.9844, "step": 22704 }, { "epoch": 1.507936507936508, "grad_norm": 180.1397705078125, "learning_rate": 3.010388290729695e-07, "loss": 15.2031, "step": 22705 }, { "epoch": 1.5080029222288638, "grad_norm": 239.18272399902344, "learning_rate": 3.009619162993046e-07, "loss": 18.6562, "step": 22706 }, { "epoch": 1.5080693365212192, "grad_norm": 512.7820434570312, "learning_rate": 3.0088501161170866e-07, "loss": 16.2656, "step": 22707 }, { "epoch": 1.508135750813575, "grad_norm": 283.12030029296875, "learning_rate": 3.0080811501107183e-07, "loss": 26.7188, "step": 22708 }, { "epoch": 1.5082021651059307, "grad_norm": 244.92474365234375, "learning_rate": 3.007312264982836e-07, "loss": 13.5156, "step": 22709 }, { "epoch": 1.5082685793982864, "grad_norm": 214.9237060546875, "learning_rate": 3.0065434607423336e-07, "loss": 13.375, "step": 22710 }, { "epoch": 1.5083349936906423, "grad_norm": 382.7807312011719, "learning_rate": 3.005774737398102e-07, "loss": 18.2656, "step": 22711 }, { "epoch": 1.508401407982998, "grad_norm": 246.85482788085938, "learning_rate": 3.005006094959041e-07, "loss": 16.1562, "step": 22712 }, { "epoch": 1.5084678222753536, "grad_norm": 572.6580810546875, "learning_rate": 3.0042375334340277e-07, "loss": 13.4219, "step": 22713 }, { "epoch": 1.5085342365677095, "grad_norm": 103.93492889404297, "learning_rate": 3.0034690528319674e-07, "loss": 13.5469, "step": 22714 }, { "epoch": 1.5086006508600651, "grad_norm": 206.8740234375, "learning_rate": 3.0027006531617384e-07, "loss": 21.1562, "step": 22715 }, { "epoch": 1.5086670651524208, "grad_norm": 183.7975311279297, "learning_rate": 3.0019323344322345e-07, "loss": 10.125, "step": 22716 }, { "epoch": 1.5087334794447766, "grad_norm": 229.99839782714844, "learning_rate": 3.001164096652342e-07, "loss": 17.7031, "step": 22717 }, { "epoch": 1.508799893737132, "grad_norm": 243.05686950683594, "learning_rate": 3.0003959398309506e-07, "loss": 22.7266, "step": 22718 }, { "epoch": 1.508866308029488, "grad_norm": 487.7875671386719, "learning_rate": 2.9996278639769366e-07, "loss": 19.125, "step": 22719 }, { "epoch": 1.5089327223218436, "grad_norm": 186.21885681152344, "learning_rate": 2.998859869099194e-07, "loss": 20.75, "step": 22720 }, { "epoch": 1.5089991366141993, "grad_norm": 512.8792724609375, "learning_rate": 2.9980919552066064e-07, "loss": 13.8594, "step": 22721 }, { "epoch": 1.5090655509065551, "grad_norm": 358.58428955078125, "learning_rate": 2.997324122308047e-07, "loss": 22.5625, "step": 22722 }, { "epoch": 1.5091319651989108, "grad_norm": 112.0015640258789, "learning_rate": 2.9965563704124095e-07, "loss": 9.9375, "step": 22723 }, { "epoch": 1.5091983794912665, "grad_norm": 231.26272583007812, "learning_rate": 2.9957886995285676e-07, "loss": 17.5078, "step": 22724 }, { "epoch": 1.5092647937836223, "grad_norm": 207.28440856933594, "learning_rate": 2.9950211096654033e-07, "loss": 19.0, "step": 22725 }, { "epoch": 1.509331208075978, "grad_norm": 213.92550659179688, "learning_rate": 2.994253600831795e-07, "loss": 24.3594, "step": 22726 }, { "epoch": 1.5093976223683336, "grad_norm": 164.91275024414062, "learning_rate": 2.993486173036621e-07, "loss": 14.0469, "step": 22727 }, { "epoch": 1.5094640366606895, "grad_norm": 217.94210815429688, "learning_rate": 2.99271882628876e-07, "loss": 21.3125, "step": 22728 }, { "epoch": 1.509530450953045, "grad_norm": 355.1308898925781, "learning_rate": 2.991951560597089e-07, "loss": 21.25, "step": 22729 }, { "epoch": 1.5095968652454008, "grad_norm": 185.10133361816406, "learning_rate": 2.9911843759704757e-07, "loss": 18.5156, "step": 22730 }, { "epoch": 1.5096632795377565, "grad_norm": 139.47535705566406, "learning_rate": 2.9904172724178055e-07, "loss": 14.3672, "step": 22731 }, { "epoch": 1.5097296938301121, "grad_norm": 340.8703918457031, "learning_rate": 2.989650249947944e-07, "loss": 18.5156, "step": 22732 }, { "epoch": 1.509796108122468, "grad_norm": 420.6253967285156, "learning_rate": 2.988883308569766e-07, "loss": 21.6875, "step": 22733 }, { "epoch": 1.5098625224148237, "grad_norm": 132.0035858154297, "learning_rate": 2.988116448292143e-07, "loss": 13.7109, "step": 22734 }, { "epoch": 1.5099289367071793, "grad_norm": 154.75079345703125, "learning_rate": 2.987349669123945e-07, "loss": 17.6953, "step": 22735 }, { "epoch": 1.5099953509995352, "grad_norm": 121.96736145019531, "learning_rate": 2.986582971074042e-07, "loss": 14.5781, "step": 22736 }, { "epoch": 1.5100617652918908, "grad_norm": 261.1473388671875, "learning_rate": 2.985816354151307e-07, "loss": 14.9219, "step": 22737 }, { "epoch": 1.5101281795842465, "grad_norm": 493.9690246582031, "learning_rate": 2.985049818364598e-07, "loss": 11.9609, "step": 22738 }, { "epoch": 1.5101945938766024, "grad_norm": 166.21835327148438, "learning_rate": 2.9842833637227924e-07, "loss": 14.6875, "step": 22739 }, { "epoch": 1.5102610081689578, "grad_norm": 106.025634765625, "learning_rate": 2.9835169902347503e-07, "loss": 11.3984, "step": 22740 }, { "epoch": 1.5103274224613137, "grad_norm": 115.22266387939453, "learning_rate": 2.9827506979093353e-07, "loss": 16.75, "step": 22741 }, { "epoch": 1.5103938367536693, "grad_norm": 115.78571319580078, "learning_rate": 2.9819844867554154e-07, "loss": 14.8125, "step": 22742 }, { "epoch": 1.510460251046025, "grad_norm": 151.107421875, "learning_rate": 2.981218356781855e-07, "loss": 12.0938, "step": 22743 }, { "epoch": 1.5105266653383809, "grad_norm": 94.89066314697266, "learning_rate": 2.9804523079975073e-07, "loss": 13.6719, "step": 22744 }, { "epoch": 1.5105930796307365, "grad_norm": 343.0114440917969, "learning_rate": 2.979686340411246e-07, "loss": 18.0469, "step": 22745 }, { "epoch": 1.5106594939230922, "grad_norm": 167.2403106689453, "learning_rate": 2.978920454031921e-07, "loss": 15.75, "step": 22746 }, { "epoch": 1.510725908215448, "grad_norm": 243.1412811279297, "learning_rate": 2.9781546488683964e-07, "loss": 17.5625, "step": 22747 }, { "epoch": 1.5107923225078037, "grad_norm": 311.01611328125, "learning_rate": 2.977388924929529e-07, "loss": 18.7188, "step": 22748 }, { "epoch": 1.5108587368001594, "grad_norm": 274.55712890625, "learning_rate": 2.976623282224178e-07, "loss": 13.8125, "step": 22749 }, { "epoch": 1.5109251510925152, "grad_norm": 503.68145751953125, "learning_rate": 2.975857720761198e-07, "loss": 13.0312, "step": 22750 }, { "epoch": 1.5109915653848707, "grad_norm": 112.318115234375, "learning_rate": 2.975092240549449e-07, "loss": 16.0625, "step": 22751 }, { "epoch": 1.5110579796772265, "grad_norm": 464.24835205078125, "learning_rate": 2.974326841597776e-07, "loss": 15.6641, "step": 22752 }, { "epoch": 1.5111243939695822, "grad_norm": 2439.124267578125, "learning_rate": 2.973561523915046e-07, "loss": 18.9219, "step": 22753 }, { "epoch": 1.5111908082619379, "grad_norm": 219.80287170410156, "learning_rate": 2.9727962875100996e-07, "loss": 23.6406, "step": 22754 }, { "epoch": 1.5112572225542937, "grad_norm": 432.2144775390625, "learning_rate": 2.9720311323917945e-07, "loss": 12.7188, "step": 22755 }, { "epoch": 1.5113236368466494, "grad_norm": 183.3400115966797, "learning_rate": 2.9712660585689806e-07, "loss": 15.3906, "step": 22756 }, { "epoch": 1.511390051139005, "grad_norm": 163.62060546875, "learning_rate": 2.970501066050509e-07, "loss": 21.0938, "step": 22757 }, { "epoch": 1.511456465431361, "grad_norm": 360.6778564453125, "learning_rate": 2.9697361548452273e-07, "loss": 20.625, "step": 22758 }, { "epoch": 1.5115228797237166, "grad_norm": 138.825439453125, "learning_rate": 2.968971324961983e-07, "loss": 15.625, "step": 22759 }, { "epoch": 1.5115892940160722, "grad_norm": 214.451904296875, "learning_rate": 2.968206576409628e-07, "loss": 16.1953, "step": 22760 }, { "epoch": 1.511655708308428, "grad_norm": 138.82949829101562, "learning_rate": 2.967441909196997e-07, "loss": 18.6562, "step": 22761 }, { "epoch": 1.5117221226007835, "grad_norm": 249.989501953125, "learning_rate": 2.96667732333295e-07, "loss": 11.5469, "step": 22762 }, { "epoch": 1.5117885368931394, "grad_norm": 251.0377197265625, "learning_rate": 2.965912818826322e-07, "loss": 19.9531, "step": 22763 }, { "epoch": 1.511854951185495, "grad_norm": 261.201416015625, "learning_rate": 2.9651483956859567e-07, "loss": 21.1719, "step": 22764 }, { "epoch": 1.5119213654778507, "grad_norm": 250.5653533935547, "learning_rate": 2.9643840539206986e-07, "loss": 17.0938, "step": 22765 }, { "epoch": 1.5119877797702066, "grad_norm": 102.0944595336914, "learning_rate": 2.9636197935393893e-07, "loss": 12.1406, "step": 22766 }, { "epoch": 1.5120541940625623, "grad_norm": 149.59866333007812, "learning_rate": 2.962855614550869e-07, "loss": 13.4688, "step": 22767 }, { "epoch": 1.512120608354918, "grad_norm": 159.39344787597656, "learning_rate": 2.9620915169639795e-07, "loss": 13.7188, "step": 22768 }, { "epoch": 1.5121870226472738, "grad_norm": 102.94668579101562, "learning_rate": 2.96132750078755e-07, "loss": 14.8594, "step": 22769 }, { "epoch": 1.5122534369396294, "grad_norm": 168.91905212402344, "learning_rate": 2.960563566030434e-07, "loss": 14.125, "step": 22770 }, { "epoch": 1.512319851231985, "grad_norm": 118.52600860595703, "learning_rate": 2.959799712701454e-07, "loss": 12.4219, "step": 22771 }, { "epoch": 1.512386265524341, "grad_norm": 82.8035659790039, "learning_rate": 2.9590359408094523e-07, "loss": 13.4062, "step": 22772 }, { "epoch": 1.5124526798166964, "grad_norm": 99.0355224609375, "learning_rate": 2.958272250363263e-07, "loss": 15.2812, "step": 22773 }, { "epoch": 1.5125190941090523, "grad_norm": 167.68765258789062, "learning_rate": 2.95750864137172e-07, "loss": 19.9531, "step": 22774 }, { "epoch": 1.512585508401408, "grad_norm": 309.861083984375, "learning_rate": 2.956745113843655e-07, "loss": 16.1094, "step": 22775 }, { "epoch": 1.5126519226937636, "grad_norm": 137.87318420410156, "learning_rate": 2.9559816677879056e-07, "loss": 20.9375, "step": 22776 }, { "epoch": 1.5127183369861195, "grad_norm": 274.17578125, "learning_rate": 2.9552183032132916e-07, "loss": 20.6875, "step": 22777 }, { "epoch": 1.5127847512784751, "grad_norm": 246.4930877685547, "learning_rate": 2.9544550201286553e-07, "loss": 21.2812, "step": 22778 }, { "epoch": 1.5128511655708308, "grad_norm": 221.62351989746094, "learning_rate": 2.9536918185428193e-07, "loss": 17.4531, "step": 22779 }, { "epoch": 1.5129175798631866, "grad_norm": 213.99757385253906, "learning_rate": 2.9529286984646117e-07, "loss": 16.4531, "step": 22780 }, { "epoch": 1.5129839941555423, "grad_norm": 186.06500244140625, "learning_rate": 2.9521656599028623e-07, "loss": 13.4531, "step": 22781 }, { "epoch": 1.513050408447898, "grad_norm": 134.65185546875, "learning_rate": 2.9514027028663993e-07, "loss": 13.8984, "step": 22782 }, { "epoch": 1.5131168227402538, "grad_norm": 303.2271728515625, "learning_rate": 2.9506398273640387e-07, "loss": 19.4219, "step": 22783 }, { "epoch": 1.5131832370326093, "grad_norm": 188.19091796875, "learning_rate": 2.949877033404619e-07, "loss": 17.8438, "step": 22784 }, { "epoch": 1.5132496513249651, "grad_norm": 300.19171142578125, "learning_rate": 2.949114320996953e-07, "loss": 16.8438, "step": 22785 }, { "epoch": 1.5133160656173208, "grad_norm": 271.09210205078125, "learning_rate": 2.948351690149866e-07, "loss": 14.875, "step": 22786 }, { "epoch": 1.5133824799096764, "grad_norm": 414.0079040527344, "learning_rate": 2.947589140872181e-07, "loss": 20.2188, "step": 22787 }, { "epoch": 1.5134488942020323, "grad_norm": 102.69692993164062, "learning_rate": 2.946826673172718e-07, "loss": 12.0, "step": 22788 }, { "epoch": 1.513515308494388, "grad_norm": 756.42529296875, "learning_rate": 2.946064287060297e-07, "loss": 17.1406, "step": 22789 }, { "epoch": 1.5135817227867436, "grad_norm": 165.56398010253906, "learning_rate": 2.9453019825437395e-07, "loss": 16.3906, "step": 22790 }, { "epoch": 1.5136481370790995, "grad_norm": 171.6280059814453, "learning_rate": 2.9445397596318553e-07, "loss": 19.625, "step": 22791 }, { "epoch": 1.5137145513714552, "grad_norm": 214.7016143798828, "learning_rate": 2.943777618333473e-07, "loss": 14.2812, "step": 22792 }, { "epoch": 1.5137809656638108, "grad_norm": 175.5981903076172, "learning_rate": 2.943015558657399e-07, "loss": 11.3828, "step": 22793 }, { "epoch": 1.5138473799561667, "grad_norm": 245.96343994140625, "learning_rate": 2.9422535806124483e-07, "loss": 16.1094, "step": 22794 }, { "epoch": 1.5139137942485221, "grad_norm": 115.29898071289062, "learning_rate": 2.941491684207444e-07, "loss": 18.7188, "step": 22795 }, { "epoch": 1.513980208540878, "grad_norm": 199.6256103515625, "learning_rate": 2.9407298694511917e-07, "loss": 14.25, "step": 22796 }, { "epoch": 1.5140466228332337, "grad_norm": 171.32557678222656, "learning_rate": 2.939968136352505e-07, "loss": 13.8906, "step": 22797 }, { "epoch": 1.5141130371255893, "grad_norm": 149.62684631347656, "learning_rate": 2.939206484920197e-07, "loss": 11.25, "step": 22798 }, { "epoch": 1.5141794514179452, "grad_norm": 146.56468200683594, "learning_rate": 2.9384449151630764e-07, "loss": 14.9375, "step": 22799 }, { "epoch": 1.5142458657103008, "grad_norm": 213.62588500976562, "learning_rate": 2.937683427089952e-07, "loss": 17.4219, "step": 22800 }, { "epoch": 1.5143122800026565, "grad_norm": 254.8958282470703, "learning_rate": 2.936922020709638e-07, "loss": 24.1562, "step": 22801 }, { "epoch": 1.5143786942950124, "grad_norm": 279.5064697265625, "learning_rate": 2.9361606960309306e-07, "loss": 16.6875, "step": 22802 }, { "epoch": 1.514445108587368, "grad_norm": 668.3667602539062, "learning_rate": 2.935399453062648e-07, "loss": 16.0312, "step": 22803 }, { "epoch": 1.5145115228797237, "grad_norm": 188.57647705078125, "learning_rate": 2.934638291813589e-07, "loss": 12.8125, "step": 22804 }, { "epoch": 1.5145779371720796, "grad_norm": 414.9094543457031, "learning_rate": 2.9338772122925603e-07, "loss": 23.5312, "step": 22805 }, { "epoch": 1.514644351464435, "grad_norm": 119.25035095214844, "learning_rate": 2.9331162145083644e-07, "loss": 15.7812, "step": 22806 }, { "epoch": 1.5147107657567909, "grad_norm": 200.33233642578125, "learning_rate": 2.9323552984698083e-07, "loss": 13.1562, "step": 22807 }, { "epoch": 1.5147771800491465, "grad_norm": 243.6735382080078, "learning_rate": 2.931594464185686e-07, "loss": 21.0312, "step": 22808 }, { "epoch": 1.5148435943415022, "grad_norm": 252.89501953125, "learning_rate": 2.9308337116648075e-07, "loss": 13.1406, "step": 22809 }, { "epoch": 1.514910008633858, "grad_norm": 462.9317932128906, "learning_rate": 2.930073040915967e-07, "loss": 20.5469, "step": 22810 }, { "epoch": 1.5149764229262137, "grad_norm": 139.14990234375, "learning_rate": 2.929312451947963e-07, "loss": 20.9375, "step": 22811 }, { "epoch": 1.5150428372185694, "grad_norm": 366.8787841796875, "learning_rate": 2.928551944769596e-07, "loss": 18.5, "step": 22812 }, { "epoch": 1.5151092515109252, "grad_norm": 164.39749145507812, "learning_rate": 2.927791519389663e-07, "loss": 18.4375, "step": 22813 }, { "epoch": 1.515175665803281, "grad_norm": 122.17561340332031, "learning_rate": 2.9270311758169585e-07, "loss": 13.625, "step": 22814 }, { "epoch": 1.5152420800956365, "grad_norm": 205.78138732910156, "learning_rate": 2.926270914060284e-07, "loss": 20.2969, "step": 22815 }, { "epoch": 1.5153084943879924, "grad_norm": 251.90162658691406, "learning_rate": 2.9255107341284204e-07, "loss": 20.0938, "step": 22816 }, { "epoch": 1.5153749086803479, "grad_norm": 207.352294921875, "learning_rate": 2.9247506360301765e-07, "loss": 15.375, "step": 22817 }, { "epoch": 1.5154413229727037, "grad_norm": 237.6932373046875, "learning_rate": 2.923990619774334e-07, "loss": 12.1094, "step": 22818 }, { "epoch": 1.5155077372650596, "grad_norm": 274.4563903808594, "learning_rate": 2.923230685369688e-07, "loss": 18.8906, "step": 22819 }, { "epoch": 1.515574151557415, "grad_norm": 274.15667724609375, "learning_rate": 2.922470832825028e-07, "loss": 19.4062, "step": 22820 }, { "epoch": 1.515640565849771, "grad_norm": 153.421142578125, "learning_rate": 2.921711062149148e-07, "loss": 16.4844, "step": 22821 }, { "epoch": 1.5157069801421266, "grad_norm": 177.40234375, "learning_rate": 2.920951373350826e-07, "loss": 19.1875, "step": 22822 }, { "epoch": 1.5157733944344822, "grad_norm": 400.2754211425781, "learning_rate": 2.920191766438863e-07, "loss": 23.2031, "step": 22823 }, { "epoch": 1.515839808726838, "grad_norm": 192.34255981445312, "learning_rate": 2.9194322414220365e-07, "loss": 15.7812, "step": 22824 }, { "epoch": 1.5159062230191938, "grad_norm": 731.2211303710938, "learning_rate": 2.918672798309134e-07, "loss": 17.9844, "step": 22825 }, { "epoch": 1.5159726373115494, "grad_norm": 143.877685546875, "learning_rate": 2.917913437108943e-07, "loss": 17.4219, "step": 22826 }, { "epoch": 1.5160390516039053, "grad_norm": 157.08425903320312, "learning_rate": 2.9171541578302437e-07, "loss": 16.8438, "step": 22827 }, { "epoch": 1.5161054658962607, "grad_norm": 228.1759490966797, "learning_rate": 2.9163949604818215e-07, "loss": 13.5312, "step": 22828 }, { "epoch": 1.5161718801886166, "grad_norm": 149.54794311523438, "learning_rate": 2.915635845072462e-07, "loss": 14.5312, "step": 22829 }, { "epoch": 1.5162382944809725, "grad_norm": 397.2081604003906, "learning_rate": 2.9148768116109347e-07, "loss": 15.8906, "step": 22830 }, { "epoch": 1.516304708773328, "grad_norm": 152.0081024169922, "learning_rate": 2.91411786010603e-07, "loss": 9.4219, "step": 22831 }, { "epoch": 1.5163711230656838, "grad_norm": 290.5277099609375, "learning_rate": 2.913358990566528e-07, "loss": 19.2969, "step": 22832 }, { "epoch": 1.5164375373580394, "grad_norm": 103.44792175292969, "learning_rate": 2.9126002030011954e-07, "loss": 16.6406, "step": 22833 }, { "epoch": 1.516503951650395, "grad_norm": 104.5666275024414, "learning_rate": 2.9118414974188234e-07, "loss": 13.9766, "step": 22834 }, { "epoch": 1.516570365942751, "grad_norm": 137.39125061035156, "learning_rate": 2.911082873828179e-07, "loss": 16.9062, "step": 22835 }, { "epoch": 1.5166367802351066, "grad_norm": 137.14527893066406, "learning_rate": 2.910324332238039e-07, "loss": 16.7188, "step": 22836 }, { "epoch": 1.5167031945274623, "grad_norm": 358.2042541503906, "learning_rate": 2.9095658726571806e-07, "loss": 17.7656, "step": 22837 }, { "epoch": 1.5167696088198181, "grad_norm": 321.45458984375, "learning_rate": 2.908807495094374e-07, "loss": 13.4219, "step": 22838 }, { "epoch": 1.5168360231121736, "grad_norm": 527.9052124023438, "learning_rate": 2.9080491995583945e-07, "loss": 25.8906, "step": 22839 }, { "epoch": 1.5169024374045295, "grad_norm": 153.5592498779297, "learning_rate": 2.9072909860580153e-07, "loss": 16.4219, "step": 22840 }, { "epoch": 1.5169688516968853, "grad_norm": 131.5016326904297, "learning_rate": 2.9065328546019975e-07, "loss": 13.3906, "step": 22841 }, { "epoch": 1.5170352659892408, "grad_norm": 166.6610107421875, "learning_rate": 2.905774805199124e-07, "loss": 19.2031, "step": 22842 }, { "epoch": 1.5171016802815966, "grad_norm": 181.4853973388672, "learning_rate": 2.905016837858153e-07, "loss": 16.75, "step": 22843 }, { "epoch": 1.5171680945739523, "grad_norm": 613.316162109375, "learning_rate": 2.904258952587857e-07, "loss": 14.9688, "step": 22844 }, { "epoch": 1.517234508866308, "grad_norm": 227.74595642089844, "learning_rate": 2.903501149397001e-07, "loss": 17.7188, "step": 22845 }, { "epoch": 1.5173009231586638, "grad_norm": 83.10706329345703, "learning_rate": 2.902743428294355e-07, "loss": 11.2969, "step": 22846 }, { "epoch": 1.5173673374510195, "grad_norm": 197.6430206298828, "learning_rate": 2.9019857892886747e-07, "loss": 17.6094, "step": 22847 }, { "epoch": 1.5174337517433751, "grad_norm": 176.8138427734375, "learning_rate": 2.901228232388735e-07, "loss": 15.2891, "step": 22848 }, { "epoch": 1.517500166035731, "grad_norm": 143.761474609375, "learning_rate": 2.900470757603292e-07, "loss": 14.0312, "step": 22849 }, { "epoch": 1.5175665803280864, "grad_norm": 408.5088806152344, "learning_rate": 2.8997133649411076e-07, "loss": 21.7812, "step": 22850 }, { "epoch": 1.5176329946204423, "grad_norm": 548.3395385742188, "learning_rate": 2.898956054410946e-07, "loss": 15.1719, "step": 22851 }, { "epoch": 1.5176994089127982, "grad_norm": 357.14044189453125, "learning_rate": 2.898198826021566e-07, "loss": 15.1328, "step": 22852 }, { "epoch": 1.5177658232051536, "grad_norm": 735.9628295898438, "learning_rate": 2.897441679781727e-07, "loss": 26.1719, "step": 22853 }, { "epoch": 1.5178322374975095, "grad_norm": 440.00067138671875, "learning_rate": 2.8966846157001903e-07, "loss": 19.4922, "step": 22854 }, { "epoch": 1.5178986517898652, "grad_norm": 345.1712341308594, "learning_rate": 2.895927633785703e-07, "loss": 24.5312, "step": 22855 }, { "epoch": 1.5179650660822208, "grad_norm": 157.76974487304688, "learning_rate": 2.8951707340470343e-07, "loss": 18.8281, "step": 22856 }, { "epoch": 1.5180314803745767, "grad_norm": 195.74058532714844, "learning_rate": 2.8944139164929315e-07, "loss": 14.2188, "step": 22857 }, { "epoch": 1.5180978946669323, "grad_norm": 221.25975036621094, "learning_rate": 2.893657181132151e-07, "loss": 16.1875, "step": 22858 }, { "epoch": 1.518164308959288, "grad_norm": 152.5084991455078, "learning_rate": 2.8929005279734467e-07, "loss": 18.2031, "step": 22859 }, { "epoch": 1.5182307232516439, "grad_norm": 224.8785400390625, "learning_rate": 2.89214395702557e-07, "loss": 17.9844, "step": 22860 }, { "epoch": 1.5182971375439993, "grad_norm": 374.1247253417969, "learning_rate": 2.891387468297275e-07, "loss": 17.5469, "step": 22861 }, { "epoch": 1.5183635518363552, "grad_norm": 193.37599182128906, "learning_rate": 2.890631061797312e-07, "loss": 10.625, "step": 22862 }, { "epoch": 1.518429966128711, "grad_norm": 129.41102600097656, "learning_rate": 2.8898747375344246e-07, "loss": 9.9688, "step": 22863 }, { "epoch": 1.5184963804210665, "grad_norm": 421.5179748535156, "learning_rate": 2.889118495517372e-07, "loss": 19.5625, "step": 22864 }, { "epoch": 1.5185627947134224, "grad_norm": 219.769775390625, "learning_rate": 2.8883623357548925e-07, "loss": 15.6094, "step": 22865 }, { "epoch": 1.518629209005778, "grad_norm": 214.35589599609375, "learning_rate": 2.887606258255737e-07, "loss": 24.9375, "step": 22866 }, { "epoch": 1.5186956232981337, "grad_norm": 210.34323120117188, "learning_rate": 2.886850263028652e-07, "loss": 15.6562, "step": 22867 }, { "epoch": 1.5187620375904896, "grad_norm": 191.62783813476562, "learning_rate": 2.886094350082381e-07, "loss": 14.7188, "step": 22868 }, { "epoch": 1.5188284518828452, "grad_norm": 318.5830383300781, "learning_rate": 2.885338519425667e-07, "loss": 20.3594, "step": 22869 }, { "epoch": 1.5188948661752009, "grad_norm": 180.0153045654297, "learning_rate": 2.8845827710672565e-07, "loss": 14.6406, "step": 22870 }, { "epoch": 1.5189612804675567, "grad_norm": 360.31597900390625, "learning_rate": 2.883827105015891e-07, "loss": 19.3594, "step": 22871 }, { "epoch": 1.5190276947599122, "grad_norm": 398.38397216796875, "learning_rate": 2.883071521280306e-07, "loss": 10.625, "step": 22872 }, { "epoch": 1.519094109052268, "grad_norm": 144.25987243652344, "learning_rate": 2.8823160198692496e-07, "loss": 12.375, "step": 22873 }, { "epoch": 1.519160523344624, "grad_norm": 137.4496612548828, "learning_rate": 2.881560600791456e-07, "loss": 10.5156, "step": 22874 }, { "epoch": 1.5192269376369794, "grad_norm": 615.7218627929688, "learning_rate": 2.8808052640556634e-07, "loss": 26.125, "step": 22875 }, { "epoch": 1.5192933519293352, "grad_norm": 285.5889892578125, "learning_rate": 2.8800500096706113e-07, "loss": 15.7969, "step": 22876 }, { "epoch": 1.5193597662216909, "grad_norm": 227.09274291992188, "learning_rate": 2.8792948376450345e-07, "loss": 16.875, "step": 22877 }, { "epoch": 1.5194261805140465, "grad_norm": 234.96791076660156, "learning_rate": 2.8785397479876693e-07, "loss": 19.5156, "step": 22878 }, { "epoch": 1.5194925948064024, "grad_norm": 166.63446044921875, "learning_rate": 2.877784740707252e-07, "loss": 11.6797, "step": 22879 }, { "epoch": 1.519559009098758, "grad_norm": 187.71282958984375, "learning_rate": 2.877029815812508e-07, "loss": 15.6094, "step": 22880 }, { "epoch": 1.5196254233911137, "grad_norm": 109.67939758300781, "learning_rate": 2.8762749733121827e-07, "loss": 13.5625, "step": 22881 }, { "epoch": 1.5196918376834696, "grad_norm": 163.66624450683594, "learning_rate": 2.875520213214998e-07, "loss": 18.5781, "step": 22882 }, { "epoch": 1.519758251975825, "grad_norm": 205.08689880371094, "learning_rate": 2.8747655355296865e-07, "loss": 14.5469, "step": 22883 }, { "epoch": 1.519824666268181, "grad_norm": 378.8811950683594, "learning_rate": 2.8740109402649783e-07, "loss": 14.6562, "step": 22884 }, { "epoch": 1.5198910805605368, "grad_norm": 218.30645751953125, "learning_rate": 2.8732564274296054e-07, "loss": 14.6562, "step": 22885 }, { "epoch": 1.5199574948528922, "grad_norm": 273.63671875, "learning_rate": 2.8725019970322874e-07, "loss": 17.9219, "step": 22886 }, { "epoch": 1.520023909145248, "grad_norm": 138.9904022216797, "learning_rate": 2.8717476490817625e-07, "loss": 13.5781, "step": 22887 }, { "epoch": 1.5200903234376038, "grad_norm": 187.86610412597656, "learning_rate": 2.8709933835867473e-07, "loss": 17.2656, "step": 22888 }, { "epoch": 1.5201567377299594, "grad_norm": 123.72067260742188, "learning_rate": 2.8702392005559693e-07, "loss": 15.2656, "step": 22889 }, { "epoch": 1.5202231520223153, "grad_norm": 212.2388458251953, "learning_rate": 2.8694850999981525e-07, "loss": 19.625, "step": 22890 }, { "epoch": 1.520289566314671, "grad_norm": 119.9774398803711, "learning_rate": 2.868731081922021e-07, "loss": 13.4375, "step": 22891 }, { "epoch": 1.5203559806070266, "grad_norm": 300.3565979003906, "learning_rate": 2.867977146336297e-07, "loss": 14.75, "step": 22892 }, { "epoch": 1.5204223948993825, "grad_norm": 241.07009887695312, "learning_rate": 2.867223293249703e-07, "loss": 18.2188, "step": 22893 }, { "epoch": 1.520488809191738, "grad_norm": 118.92101287841797, "learning_rate": 2.866469522670951e-07, "loss": 12.2031, "step": 22894 }, { "epoch": 1.5205552234840938, "grad_norm": 378.84686279296875, "learning_rate": 2.8657158346087717e-07, "loss": 21.1562, "step": 22895 }, { "epoch": 1.5206216377764497, "grad_norm": 316.1156005859375, "learning_rate": 2.8649622290718757e-07, "loss": 18.875, "step": 22896 }, { "epoch": 1.520688052068805, "grad_norm": 230.68893432617188, "learning_rate": 2.8642087060689813e-07, "loss": 19.875, "step": 22897 }, { "epoch": 1.520754466361161, "grad_norm": 418.6114807128906, "learning_rate": 2.8634552656088064e-07, "loss": 17.4688, "step": 22898 }, { "epoch": 1.5208208806535166, "grad_norm": 173.22532653808594, "learning_rate": 2.8627019077000656e-07, "loss": 15.5938, "step": 22899 }, { "epoch": 1.5208872949458723, "grad_norm": 102.61685180664062, "learning_rate": 2.861948632351474e-07, "loss": 11.2656, "step": 22900 }, { "epoch": 1.5209537092382281, "grad_norm": 352.9230651855469, "learning_rate": 2.8611954395717477e-07, "loss": 18.0625, "step": 22901 }, { "epoch": 1.5210201235305838, "grad_norm": 160.8855438232422, "learning_rate": 2.8604423293695887e-07, "loss": 19.8438, "step": 22902 }, { "epoch": 1.5210865378229395, "grad_norm": 125.25967407226562, "learning_rate": 2.859689301753724e-07, "loss": 13.3438, "step": 22903 }, { "epoch": 1.5211529521152953, "grad_norm": 192.15939331054688, "learning_rate": 2.858936356732852e-07, "loss": 19.8438, "step": 22904 }, { "epoch": 1.5212193664076508, "grad_norm": 168.56259155273438, "learning_rate": 2.8581834943156836e-07, "loss": 11.8594, "step": 22905 }, { "epoch": 1.5212857807000066, "grad_norm": 375.2601013183594, "learning_rate": 2.857430714510936e-07, "loss": 20.1406, "step": 22906 }, { "epoch": 1.5213521949923625, "grad_norm": 470.4678955078125, "learning_rate": 2.856678017327309e-07, "loss": 21.1875, "step": 22907 }, { "epoch": 1.521418609284718, "grad_norm": 247.8498992919922, "learning_rate": 2.8559254027735105e-07, "loss": 25.0938, "step": 22908 }, { "epoch": 1.5214850235770738, "grad_norm": 177.0836639404297, "learning_rate": 2.855172870858248e-07, "loss": 13.7656, "step": 22909 }, { "epoch": 1.5215514378694295, "grad_norm": 334.2471923828125, "learning_rate": 2.854420421590229e-07, "loss": 19.0156, "step": 22910 }, { "epoch": 1.5216178521617851, "grad_norm": 152.1102294921875, "learning_rate": 2.853668054978148e-07, "loss": 15.3438, "step": 22911 }, { "epoch": 1.521684266454141, "grad_norm": 161.64617919921875, "learning_rate": 2.8529157710307204e-07, "loss": 16.8281, "step": 22912 }, { "epoch": 1.5217506807464967, "grad_norm": 149.0135955810547, "learning_rate": 2.852163569756638e-07, "loss": 15.2812, "step": 22913 }, { "epoch": 1.5218170950388523, "grad_norm": 205.59478759765625, "learning_rate": 2.851411451164607e-07, "loss": 21.5469, "step": 22914 }, { "epoch": 1.5218835093312082, "grad_norm": 130.267333984375, "learning_rate": 2.8506594152633255e-07, "loss": 12.7422, "step": 22915 }, { "epoch": 1.5219499236235636, "grad_norm": 247.76803588867188, "learning_rate": 2.8499074620614936e-07, "loss": 16.7969, "step": 22916 }, { "epoch": 1.5220163379159195, "grad_norm": 418.0822448730469, "learning_rate": 2.849155591567809e-07, "loss": 22.8281, "step": 22917 }, { "epoch": 1.5220827522082754, "grad_norm": 185.6614532470703, "learning_rate": 2.8484038037909717e-07, "loss": 14.9688, "step": 22918 }, { "epoch": 1.5221491665006308, "grad_norm": 328.7279968261719, "learning_rate": 2.84765209873967e-07, "loss": 20.6562, "step": 22919 }, { "epoch": 1.5222155807929867, "grad_norm": 267.298583984375, "learning_rate": 2.84690047642261e-07, "loss": 17.6094, "step": 22920 }, { "epoch": 1.5222819950853423, "grad_norm": 138.76808166503906, "learning_rate": 2.846148936848478e-07, "loss": 16.5156, "step": 22921 }, { "epoch": 1.522348409377698, "grad_norm": 162.2678985595703, "learning_rate": 2.845397480025971e-07, "loss": 12.6328, "step": 22922 }, { "epoch": 1.5224148236700539, "grad_norm": 214.4925537109375, "learning_rate": 2.8446461059637783e-07, "loss": 14.1953, "step": 22923 }, { "epoch": 1.5224812379624095, "grad_norm": 205.86231994628906, "learning_rate": 2.843894814670594e-07, "loss": 13.1562, "step": 22924 }, { "epoch": 1.5225476522547652, "grad_norm": 260.4260559082031, "learning_rate": 2.8431436061551086e-07, "loss": 15.7812, "step": 22925 }, { "epoch": 1.522614066547121, "grad_norm": 1207.7803955078125, "learning_rate": 2.8423924804260145e-07, "loss": 20.7031, "step": 22926 }, { "epoch": 1.5226804808394765, "grad_norm": 108.64952087402344, "learning_rate": 2.8416414374919897e-07, "loss": 22.1094, "step": 22927 }, { "epoch": 1.5227468951318324, "grad_norm": 135.95301818847656, "learning_rate": 2.840890477361736e-07, "loss": 12.2344, "step": 22928 }, { "epoch": 1.5228133094241882, "grad_norm": 1323.4281005859375, "learning_rate": 2.840139600043929e-07, "loss": 19.2812, "step": 22929 }, { "epoch": 1.5228797237165437, "grad_norm": 395.2240905761719, "learning_rate": 2.839388805547259e-07, "loss": 20.9375, "step": 22930 }, { "epoch": 1.5229461380088996, "grad_norm": 178.51382446289062, "learning_rate": 2.838638093880411e-07, "loss": 14.0625, "step": 22931 }, { "epoch": 1.5230125523012552, "grad_norm": 509.02294921875, "learning_rate": 2.837887465052071e-07, "loss": 19.2188, "step": 22932 }, { "epoch": 1.5230789665936109, "grad_norm": 186.96417236328125, "learning_rate": 2.8371369190709117e-07, "loss": 18.875, "step": 22933 }, { "epoch": 1.5231453808859667, "grad_norm": 168.7860107421875, "learning_rate": 2.8363864559456294e-07, "loss": 14.7031, "step": 22934 }, { "epoch": 1.5232117951783224, "grad_norm": 407.099609375, "learning_rate": 2.835636075684894e-07, "loss": 20.0938, "step": 22935 }, { "epoch": 1.523278209470678, "grad_norm": 164.99842834472656, "learning_rate": 2.834885778297389e-07, "loss": 23.6406, "step": 22936 }, { "epoch": 1.523344623763034, "grad_norm": 450.3429260253906, "learning_rate": 2.8341355637917954e-07, "loss": 16.0625, "step": 22937 }, { "epoch": 1.5234110380553894, "grad_norm": 86.4780502319336, "learning_rate": 2.833385432176788e-07, "loss": 9.7969, "step": 22938 }, { "epoch": 1.5234774523477452, "grad_norm": 148.21881103515625, "learning_rate": 2.832635383461045e-07, "loss": 16.0, "step": 22939 }, { "epoch": 1.523543866640101, "grad_norm": 199.4481658935547, "learning_rate": 2.831885417653247e-07, "loss": 14.7344, "step": 22940 }, { "epoch": 1.5236102809324565, "grad_norm": 129.365966796875, "learning_rate": 2.831135534762059e-07, "loss": 16.0312, "step": 22941 }, { "epoch": 1.5236766952248124, "grad_norm": 301.65313720703125, "learning_rate": 2.8303857347961635e-07, "loss": 18.2188, "step": 22942 }, { "epoch": 1.523743109517168, "grad_norm": 175.23342895507812, "learning_rate": 2.829636017764235e-07, "loss": 16.25, "step": 22943 }, { "epoch": 1.5238095238095237, "grad_norm": 194.84825134277344, "learning_rate": 2.8288863836749366e-07, "loss": 10.2266, "step": 22944 }, { "epoch": 1.5238759381018796, "grad_norm": 265.72314453125, "learning_rate": 2.8281368325369515e-07, "loss": 15.7656, "step": 22945 }, { "epoch": 1.5239423523942353, "grad_norm": 366.14569091796875, "learning_rate": 2.827387364358941e-07, "loss": 19.5469, "step": 22946 }, { "epoch": 1.524008766686591, "grad_norm": 384.26324462890625, "learning_rate": 2.8266379791495776e-07, "loss": 23.7969, "step": 22947 }, { "epoch": 1.5240751809789468, "grad_norm": 229.0656280517578, "learning_rate": 2.825888676917529e-07, "loss": 16.8281, "step": 22948 }, { "epoch": 1.5241415952713022, "grad_norm": 362.76904296875, "learning_rate": 2.825139457671467e-07, "loss": 14.375, "step": 22949 }, { "epoch": 1.524208009563658, "grad_norm": 217.11611938476562, "learning_rate": 2.824390321420048e-07, "loss": 23.4375, "step": 22950 }, { "epoch": 1.524274423856014, "grad_norm": 303.9809875488281, "learning_rate": 2.8236412681719513e-07, "loss": 17.1719, "step": 22951 }, { "epoch": 1.5243408381483694, "grad_norm": 178.28370666503906, "learning_rate": 2.822892297935829e-07, "loss": 13.375, "step": 22952 }, { "epoch": 1.5244072524407253, "grad_norm": 203.38998413085938, "learning_rate": 2.8221434107203524e-07, "loss": 15.75, "step": 22953 }, { "epoch": 1.524473666733081, "grad_norm": 185.44093322753906, "learning_rate": 2.821394606534181e-07, "loss": 17.9375, "step": 22954 }, { "epoch": 1.5245400810254366, "grad_norm": 399.4268798828125, "learning_rate": 2.820645885385977e-07, "loss": 14.0312, "step": 22955 }, { "epoch": 1.5246064953177925, "grad_norm": 340.22357177734375, "learning_rate": 2.819897247284402e-07, "loss": 14.8281, "step": 22956 }, { "epoch": 1.5246729096101481, "grad_norm": 163.3406524658203, "learning_rate": 2.8191486922381183e-07, "loss": 12.3672, "step": 22957 }, { "epoch": 1.5247393239025038, "grad_norm": 253.764404296875, "learning_rate": 2.8184002202557745e-07, "loss": 17.9531, "step": 22958 }, { "epoch": 1.5248057381948597, "grad_norm": 166.4215850830078, "learning_rate": 2.817651831346043e-07, "loss": 15.125, "step": 22959 }, { "epoch": 1.524872152487215, "grad_norm": 275.09320068359375, "learning_rate": 2.816903525517571e-07, "loss": 20.2344, "step": 22960 }, { "epoch": 1.524938566779571, "grad_norm": 187.6321258544922, "learning_rate": 2.8161553027790165e-07, "loss": 23.5781, "step": 22961 }, { "epoch": 1.5250049810719268, "grad_norm": 270.04180908203125, "learning_rate": 2.815407163139034e-07, "loss": 14.5156, "step": 22962 }, { "epoch": 1.5250713953642823, "grad_norm": 298.5167236328125, "learning_rate": 2.8146591066062797e-07, "loss": 19.3438, "step": 22963 }, { "epoch": 1.5251378096566381, "grad_norm": 190.6116943359375, "learning_rate": 2.813911133189406e-07, "loss": 14.4688, "step": 22964 }, { "epoch": 1.5252042239489938, "grad_norm": 252.39918518066406, "learning_rate": 2.8131632428970664e-07, "loss": 16.3906, "step": 22965 }, { "epoch": 1.5252706382413495, "grad_norm": 154.0569610595703, "learning_rate": 2.812415435737905e-07, "loss": 18.5156, "step": 22966 }, { "epoch": 1.5253370525337053, "grad_norm": 295.906005859375, "learning_rate": 2.8116677117205834e-07, "loss": 10.8594, "step": 22967 }, { "epoch": 1.525403466826061, "grad_norm": 270.9278564453125, "learning_rate": 2.8109200708537405e-07, "loss": 14.7344, "step": 22968 }, { "epoch": 1.5254698811184166, "grad_norm": 258.03955078125, "learning_rate": 2.8101725131460296e-07, "loss": 23.5781, "step": 22969 }, { "epoch": 1.5255362954107725, "grad_norm": 169.7890167236328, "learning_rate": 2.8094250386060983e-07, "loss": 13.4531, "step": 22970 }, { "epoch": 1.5256027097031282, "grad_norm": 299.6573181152344, "learning_rate": 2.808677647242593e-07, "loss": 17.3281, "step": 22971 }, { "epoch": 1.5256691239954838, "grad_norm": 533.0524291992188, "learning_rate": 2.807930339064153e-07, "loss": 25.2188, "step": 22972 }, { "epoch": 1.5257355382878397, "grad_norm": 86.34981536865234, "learning_rate": 2.8071831140794335e-07, "loss": 14.0469, "step": 22973 }, { "epoch": 1.5258019525801951, "grad_norm": 104.30357360839844, "learning_rate": 2.806435972297069e-07, "loss": 14.0312, "step": 22974 }, { "epoch": 1.525868366872551, "grad_norm": 478.265625, "learning_rate": 2.8056889137257044e-07, "loss": 16.7812, "step": 22975 }, { "epoch": 1.5259347811649067, "grad_norm": 185.5021209716797, "learning_rate": 2.804941938373983e-07, "loss": 13.4844, "step": 22976 }, { "epoch": 1.5260011954572623, "grad_norm": 350.7178039550781, "learning_rate": 2.804195046250544e-07, "loss": 14.2188, "step": 22977 }, { "epoch": 1.5260676097496182, "grad_norm": 230.97625732421875, "learning_rate": 2.8034482373640267e-07, "loss": 12.9219, "step": 22978 }, { "epoch": 1.5261340240419738, "grad_norm": 429.47149658203125, "learning_rate": 2.802701511723071e-07, "loss": 14.8125, "step": 22979 }, { "epoch": 1.5262004383343295, "grad_norm": 318.93096923828125, "learning_rate": 2.801954869336314e-07, "loss": 20.2188, "step": 22980 }, { "epoch": 1.5262668526266854, "grad_norm": 128.7494354248047, "learning_rate": 2.801208310212392e-07, "loss": 15.5312, "step": 22981 }, { "epoch": 1.526333266919041, "grad_norm": 317.3594970703125, "learning_rate": 2.8004618343599453e-07, "loss": 15.5156, "step": 22982 }, { "epoch": 1.5263996812113967, "grad_norm": 129.40689086914062, "learning_rate": 2.7997154417875977e-07, "loss": 13.6406, "step": 22983 }, { "epoch": 1.5264660955037526, "grad_norm": 144.68572998046875, "learning_rate": 2.7989691325039965e-07, "loss": 17.125, "step": 22984 }, { "epoch": 1.526532509796108, "grad_norm": 171.0375518798828, "learning_rate": 2.798222906517764e-07, "loss": 14.1406, "step": 22985 }, { "epoch": 1.5265989240884639, "grad_norm": 143.4574432373047, "learning_rate": 2.797476763837537e-07, "loss": 17.1094, "step": 22986 }, { "epoch": 1.5266653383808195, "grad_norm": 394.0094909667969, "learning_rate": 2.7967307044719457e-07, "loss": 13.9844, "step": 22987 }, { "epoch": 1.5267317526731752, "grad_norm": 131.7366943359375, "learning_rate": 2.79598472842962e-07, "loss": 14.0469, "step": 22988 }, { "epoch": 1.526798166965531, "grad_norm": 201.40667724609375, "learning_rate": 2.795238835719188e-07, "loss": 17.7188, "step": 22989 }, { "epoch": 1.5268645812578867, "grad_norm": 125.1875228881836, "learning_rate": 2.7944930263492826e-07, "loss": 12.2891, "step": 22990 }, { "epoch": 1.5269309955502424, "grad_norm": 79.51782989501953, "learning_rate": 2.7937473003285207e-07, "loss": 15.5312, "step": 22991 }, { "epoch": 1.5269974098425982, "grad_norm": 135.68212890625, "learning_rate": 2.79300165766554e-07, "loss": 12.9688, "step": 22992 }, { "epoch": 1.527063824134954, "grad_norm": 353.3013610839844, "learning_rate": 2.792256098368958e-07, "loss": 16.2812, "step": 22993 }, { "epoch": 1.5271302384273095, "grad_norm": 1001.2753295898438, "learning_rate": 2.7915106224474016e-07, "loss": 17.8672, "step": 22994 }, { "epoch": 1.5271966527196654, "grad_norm": 377.30511474609375, "learning_rate": 2.790765229909493e-07, "loss": 26.5781, "step": 22995 }, { "epoch": 1.5272630670120209, "grad_norm": 226.1078338623047, "learning_rate": 2.790019920763859e-07, "loss": 23.2812, "step": 22996 }, { "epoch": 1.5273294813043767, "grad_norm": 156.4580535888672, "learning_rate": 2.789274695019109e-07, "loss": 21.1406, "step": 22997 }, { "epoch": 1.5273958955967324, "grad_norm": 193.26333618164062, "learning_rate": 2.788529552683879e-07, "loss": 15.5, "step": 22998 }, { "epoch": 1.527462309889088, "grad_norm": 330.4853515625, "learning_rate": 2.787784493766778e-07, "loss": 14.5781, "step": 22999 }, { "epoch": 1.527528724181444, "grad_norm": 130.81668090820312, "learning_rate": 2.7870395182764263e-07, "loss": 13.8438, "step": 23000 }, { "epoch": 1.5275951384737996, "grad_norm": 411.4234313964844, "learning_rate": 2.786294626221443e-07, "loss": 23.0469, "step": 23001 }, { "epoch": 1.5276615527661552, "grad_norm": 226.7710723876953, "learning_rate": 2.785549817610443e-07, "loss": 13.3281, "step": 23002 }, { "epoch": 1.527727967058511, "grad_norm": 506.7585754394531, "learning_rate": 2.7848050924520427e-07, "loss": 12.7188, "step": 23003 }, { "epoch": 1.5277943813508668, "grad_norm": 381.8178405761719, "learning_rate": 2.7840604507548606e-07, "loss": 24.2812, "step": 23004 }, { "epoch": 1.5278607956432224, "grad_norm": 102.98394775390625, "learning_rate": 2.783315892527499e-07, "loss": 15.9688, "step": 23005 }, { "epoch": 1.5279272099355783, "grad_norm": 167.2576904296875, "learning_rate": 2.782571417778584e-07, "loss": 15.0, "step": 23006 }, { "epoch": 1.5279936242279337, "grad_norm": 408.5660095214844, "learning_rate": 2.7818270265167187e-07, "loss": 20.4062, "step": 23007 }, { "epoch": 1.5280600385202896, "grad_norm": 127.85030364990234, "learning_rate": 2.781082718750516e-07, "loss": 11.0312, "step": 23008 }, { "epoch": 1.5281264528126453, "grad_norm": 309.6311340332031, "learning_rate": 2.7803384944885853e-07, "loss": 16.7656, "step": 23009 }, { "epoch": 1.528192867105001, "grad_norm": 107.18732452392578, "learning_rate": 2.779594353739536e-07, "loss": 14.1719, "step": 23010 }, { "epoch": 1.5282592813973568, "grad_norm": 298.2312927246094, "learning_rate": 2.7788502965119764e-07, "loss": 15.4375, "step": 23011 }, { "epoch": 1.5283256956897124, "grad_norm": 257.87603759765625, "learning_rate": 2.778106322814514e-07, "loss": 19.7031, "step": 23012 }, { "epoch": 1.528392109982068, "grad_norm": 379.7052307128906, "learning_rate": 2.777362432655752e-07, "loss": 14.25, "step": 23013 }, { "epoch": 1.528458524274424, "grad_norm": 167.51931762695312, "learning_rate": 2.7766186260442926e-07, "loss": 14.625, "step": 23014 }, { "epoch": 1.5285249385667796, "grad_norm": 255.69395446777344, "learning_rate": 2.77587490298875e-07, "loss": 16.6094, "step": 23015 }, { "epoch": 1.5285913528591353, "grad_norm": 397.6329650878906, "learning_rate": 2.7751312634977173e-07, "loss": 16.8906, "step": 23016 }, { "epoch": 1.5286577671514912, "grad_norm": 115.23895263671875, "learning_rate": 2.7743877075798005e-07, "loss": 16.25, "step": 23017 }, { "epoch": 1.5287241814438466, "grad_norm": 123.212158203125, "learning_rate": 2.773644235243601e-07, "loss": 12.4453, "step": 23018 }, { "epoch": 1.5287905957362025, "grad_norm": 132.83145141601562, "learning_rate": 2.772900846497717e-07, "loss": 15.0312, "step": 23019 }, { "epoch": 1.5288570100285581, "grad_norm": 343.7013854980469, "learning_rate": 2.772157541350748e-07, "loss": 13.0781, "step": 23020 }, { "epoch": 1.5289234243209138, "grad_norm": 87.34749603271484, "learning_rate": 2.771414319811297e-07, "loss": 14.625, "step": 23021 }, { "epoch": 1.5289898386132696, "grad_norm": 151.21107482910156, "learning_rate": 2.77067118188795e-07, "loss": 17.875, "step": 23022 }, { "epoch": 1.5290562529056253, "grad_norm": 166.92271423339844, "learning_rate": 2.769928127589317e-07, "loss": 10.6094, "step": 23023 }, { "epoch": 1.529122667197981, "grad_norm": 256.7167053222656, "learning_rate": 2.769185156923982e-07, "loss": 15.2656, "step": 23024 }, { "epoch": 1.5291890814903368, "grad_norm": 195.81935119628906, "learning_rate": 2.7684422699005447e-07, "loss": 17.8906, "step": 23025 }, { "epoch": 1.5292554957826925, "grad_norm": 369.2843322753906, "learning_rate": 2.7676994665275977e-07, "loss": 20.25, "step": 23026 }, { "epoch": 1.5293219100750481, "grad_norm": 1695.517333984375, "learning_rate": 2.766956746813732e-07, "loss": 15.5, "step": 23027 }, { "epoch": 1.529388324367404, "grad_norm": 130.81887817382812, "learning_rate": 2.76621411076754e-07, "loss": 14.5938, "step": 23028 }, { "epoch": 1.5294547386597594, "grad_norm": 179.5145263671875, "learning_rate": 2.765471558397615e-07, "loss": 21.8594, "step": 23029 }, { "epoch": 1.5295211529521153, "grad_norm": 247.56033325195312, "learning_rate": 2.7647290897125377e-07, "loss": 21.6562, "step": 23030 }, { "epoch": 1.529587567244471, "grad_norm": 127.07461547851562, "learning_rate": 2.7639867047209075e-07, "loss": 14.2812, "step": 23031 }, { "epoch": 1.5296539815368266, "grad_norm": 136.859375, "learning_rate": 2.763244403431304e-07, "loss": 12.6094, "step": 23032 }, { "epoch": 1.5297203958291825, "grad_norm": 245.99490356445312, "learning_rate": 2.762502185852316e-07, "loss": 17.5469, "step": 23033 }, { "epoch": 1.5297868101215382, "grad_norm": 307.2513732910156, "learning_rate": 2.761760051992529e-07, "loss": 18.7812, "step": 23034 }, { "epoch": 1.5298532244138938, "grad_norm": 238.15679931640625, "learning_rate": 2.7610180018605323e-07, "loss": 20.8906, "step": 23035 }, { "epoch": 1.5299196387062497, "grad_norm": 294.4223327636719, "learning_rate": 2.760276035464898e-07, "loss": 19.6719, "step": 23036 }, { "epoch": 1.5299860529986054, "grad_norm": 231.8448944091797, "learning_rate": 2.759534152814221e-07, "loss": 20.8125, "step": 23037 }, { "epoch": 1.530052467290961, "grad_norm": 218.28915405273438, "learning_rate": 2.7587923539170767e-07, "loss": 16.4375, "step": 23038 }, { "epoch": 1.5301188815833169, "grad_norm": 85.2760009765625, "learning_rate": 2.7580506387820457e-07, "loss": 13.0703, "step": 23039 }, { "epoch": 1.5301852958756723, "grad_norm": 308.40228271484375, "learning_rate": 2.75730900741771e-07, "loss": 23.7031, "step": 23040 }, { "epoch": 1.5302517101680282, "grad_norm": 550.2453002929688, "learning_rate": 2.756567459832646e-07, "loss": 17.0, "step": 23041 }, { "epoch": 1.5303181244603838, "grad_norm": 310.9509582519531, "learning_rate": 2.7558259960354336e-07, "loss": 19.875, "step": 23042 }, { "epoch": 1.5303845387527395, "grad_norm": 1247.1981201171875, "learning_rate": 2.7550846160346517e-07, "loss": 19.6406, "step": 23043 }, { "epoch": 1.5304509530450954, "grad_norm": 411.2265319824219, "learning_rate": 2.7543433198388675e-07, "loss": 15.7031, "step": 23044 }, { "epoch": 1.530517367337451, "grad_norm": 161.20237731933594, "learning_rate": 2.7536021074566684e-07, "loss": 17.9844, "step": 23045 }, { "epoch": 1.5305837816298067, "grad_norm": 694.0651245117188, "learning_rate": 2.7528609788966174e-07, "loss": 16.9375, "step": 23046 }, { "epoch": 1.5306501959221626, "grad_norm": 272.7291259765625, "learning_rate": 2.7521199341672916e-07, "loss": 15.8125, "step": 23047 }, { "epoch": 1.5307166102145182, "grad_norm": 151.3957061767578, "learning_rate": 2.751378973277264e-07, "loss": 14.4844, "step": 23048 }, { "epoch": 1.5307830245068739, "grad_norm": 142.42701721191406, "learning_rate": 2.750638096235104e-07, "loss": 16.2344, "step": 23049 }, { "epoch": 1.5308494387992297, "grad_norm": 117.48681640625, "learning_rate": 2.7498973030493824e-07, "loss": 12.4688, "step": 23050 }, { "epoch": 1.5309158530915852, "grad_norm": 174.03997802734375, "learning_rate": 2.7491565937286687e-07, "loss": 20.4531, "step": 23051 }, { "epoch": 1.530982267383941, "grad_norm": 291.7275085449219, "learning_rate": 2.74841596828153e-07, "loss": 19.1719, "step": 23052 }, { "epoch": 1.5310486816762967, "grad_norm": 1472.7344970703125, "learning_rate": 2.7476754267165334e-07, "loss": 14.9062, "step": 23053 }, { "epoch": 1.5311150959686524, "grad_norm": 227.82200622558594, "learning_rate": 2.7469349690422494e-07, "loss": 19.125, "step": 23054 }, { "epoch": 1.5311815102610082, "grad_norm": 182.1538543701172, "learning_rate": 2.7461945952672326e-07, "loss": 14.4531, "step": 23055 }, { "epoch": 1.531247924553364, "grad_norm": 333.9213562011719, "learning_rate": 2.7454543054000603e-07, "loss": 18.8594, "step": 23056 }, { "epoch": 1.5313143388457195, "grad_norm": 236.3969268798828, "learning_rate": 2.744714099449286e-07, "loss": 23.1562, "step": 23057 }, { "epoch": 1.5313807531380754, "grad_norm": 332.3197937011719, "learning_rate": 2.743973977423476e-07, "loss": 16.8203, "step": 23058 }, { "epoch": 1.531447167430431, "grad_norm": 245.1046600341797, "learning_rate": 2.74323393933119e-07, "loss": 17.4375, "step": 23059 }, { "epoch": 1.5315135817227867, "grad_norm": 163.93260192871094, "learning_rate": 2.7424939851809924e-07, "loss": 17.1875, "step": 23060 }, { "epoch": 1.5315799960151426, "grad_norm": 139.0386505126953, "learning_rate": 2.741754114981435e-07, "loss": 14.8125, "step": 23061 }, { "epoch": 1.531646410307498, "grad_norm": 229.72262573242188, "learning_rate": 2.7410143287410847e-07, "loss": 14.5, "step": 23062 }, { "epoch": 1.531712824599854, "grad_norm": 273.11541748046875, "learning_rate": 2.740274626468493e-07, "loss": 14.5625, "step": 23063 }, { "epoch": 1.5317792388922096, "grad_norm": 190.80935668945312, "learning_rate": 2.739535008172218e-07, "loss": 16.2031, "step": 23064 }, { "epoch": 1.5318456531845652, "grad_norm": 171.0314178466797, "learning_rate": 2.7387954738608155e-07, "loss": 17.625, "step": 23065 }, { "epoch": 1.531912067476921, "grad_norm": 184.7675018310547, "learning_rate": 2.7380560235428394e-07, "loss": 13.5, "step": 23066 }, { "epoch": 1.5319784817692768, "grad_norm": 218.74844360351562, "learning_rate": 2.7373166572268444e-07, "loss": 16.9219, "step": 23067 }, { "epoch": 1.5320448960616324, "grad_norm": 152.78907775878906, "learning_rate": 2.736577374921385e-07, "loss": 14.0156, "step": 23068 }, { "epoch": 1.5321113103539883, "grad_norm": 222.8134307861328, "learning_rate": 2.7358381766350046e-07, "loss": 26.6719, "step": 23069 }, { "epoch": 1.532177724646344, "grad_norm": 206.74661254882812, "learning_rate": 2.735099062376266e-07, "loss": 16.1562, "step": 23070 }, { "epoch": 1.5322441389386996, "grad_norm": 252.44769287109375, "learning_rate": 2.734360032153707e-07, "loss": 16.2344, "step": 23071 }, { "epoch": 1.5323105532310555, "grad_norm": 225.03143310546875, "learning_rate": 2.7336210859758834e-07, "loss": 18.5938, "step": 23072 }, { "epoch": 1.532376967523411, "grad_norm": 170.49578857421875, "learning_rate": 2.7328822238513417e-07, "loss": 21.4688, "step": 23073 }, { "epoch": 1.5324433818157668, "grad_norm": 229.48138427734375, "learning_rate": 2.7321434457886293e-07, "loss": 18.125, "step": 23074 }, { "epoch": 1.5325097961081224, "grad_norm": 136.5048370361328, "learning_rate": 2.731404751796286e-07, "loss": 12.7656, "step": 23075 }, { "epoch": 1.532576210400478, "grad_norm": 1078.1488037109375, "learning_rate": 2.730666141882866e-07, "loss": 14.5312, "step": 23076 }, { "epoch": 1.532642624692834, "grad_norm": 196.67393493652344, "learning_rate": 2.729927616056905e-07, "loss": 14.5781, "step": 23077 }, { "epoch": 1.5327090389851896, "grad_norm": 306.15081787109375, "learning_rate": 2.72918917432695e-07, "loss": 15.8281, "step": 23078 }, { "epoch": 1.5327754532775453, "grad_norm": 817.374755859375, "learning_rate": 2.728450816701542e-07, "loss": 26.1562, "step": 23079 }, { "epoch": 1.5328418675699012, "grad_norm": 209.03627014160156, "learning_rate": 2.727712543189222e-07, "loss": 21.0469, "step": 23080 }, { "epoch": 1.5329082818622568, "grad_norm": 483.8130798339844, "learning_rate": 2.72697435379853e-07, "loss": 20.5781, "step": 23081 }, { "epoch": 1.5329746961546125, "grad_norm": 151.97225952148438, "learning_rate": 2.7262362485380077e-07, "loss": 14.2188, "step": 23082 }, { "epoch": 1.5330411104469683, "grad_norm": 149.81280517578125, "learning_rate": 2.725498227416184e-07, "loss": 14.1562, "step": 23083 }, { "epoch": 1.5331075247393238, "grad_norm": 552.5743408203125, "learning_rate": 2.7247602904416076e-07, "loss": 21.375, "step": 23084 }, { "epoch": 1.5331739390316796, "grad_norm": 158.6114501953125, "learning_rate": 2.724022437622807e-07, "loss": 14.3906, "step": 23085 }, { "epoch": 1.5332403533240353, "grad_norm": 193.42677307128906, "learning_rate": 2.7232846689683187e-07, "loss": 14.1562, "step": 23086 }, { "epoch": 1.533306767616391, "grad_norm": 258.9638977050781, "learning_rate": 2.7225469844866776e-07, "loss": 17.9844, "step": 23087 }, { "epoch": 1.5333731819087468, "grad_norm": 170.42080688476562, "learning_rate": 2.7218093841864165e-07, "loss": 18.9531, "step": 23088 }, { "epoch": 1.5334395962011025, "grad_norm": 628.8593139648438, "learning_rate": 2.721071868076068e-07, "loss": 18.3438, "step": 23089 }, { "epoch": 1.5335060104934581, "grad_norm": 147.1826629638672, "learning_rate": 2.7203344361641624e-07, "loss": 17.3594, "step": 23090 }, { "epoch": 1.533572424785814, "grad_norm": 216.5854034423828, "learning_rate": 2.71959708845923e-07, "loss": 16.8438, "step": 23091 }, { "epoch": 1.5336388390781697, "grad_norm": 149.45301818847656, "learning_rate": 2.7188598249698014e-07, "loss": 11.7188, "step": 23092 }, { "epoch": 1.5337052533705253, "grad_norm": 201.95523071289062, "learning_rate": 2.718122645704406e-07, "loss": 14.6094, "step": 23093 }, { "epoch": 1.5337716676628812, "grad_norm": 1127.0244140625, "learning_rate": 2.7173855506715624e-07, "loss": 19.8438, "step": 23094 }, { "epoch": 1.5338380819552366, "grad_norm": 198.90365600585938, "learning_rate": 2.71664853987981e-07, "loss": 16.125, "step": 23095 }, { "epoch": 1.5339044962475925, "grad_norm": 251.96035766601562, "learning_rate": 2.715911613337665e-07, "loss": 14.4688, "step": 23096 }, { "epoch": 1.5339709105399482, "grad_norm": 159.2774658203125, "learning_rate": 2.7151747710536533e-07, "loss": 13.3281, "step": 23097 }, { "epoch": 1.5340373248323038, "grad_norm": 256.7648010253906, "learning_rate": 2.7144380130363e-07, "loss": 15.9844, "step": 23098 }, { "epoch": 1.5341037391246597, "grad_norm": 188.79173278808594, "learning_rate": 2.7137013392941286e-07, "loss": 18.7812, "step": 23099 }, { "epoch": 1.5341701534170153, "grad_norm": 257.49658203125, "learning_rate": 2.7129647498356536e-07, "loss": 17.9531, "step": 23100 }, { "epoch": 1.534236567709371, "grad_norm": 191.9415740966797, "learning_rate": 2.7122282446694056e-07, "loss": 11.8594, "step": 23101 }, { "epoch": 1.5343029820017269, "grad_norm": 116.90397644042969, "learning_rate": 2.711491823803896e-07, "loss": 11.875, "step": 23102 }, { "epoch": 1.5343693962940825, "grad_norm": 276.34832763671875, "learning_rate": 2.7107554872476465e-07, "loss": 19.125, "step": 23103 }, { "epoch": 1.5344358105864382, "grad_norm": 86.12362670898438, "learning_rate": 2.7100192350091746e-07, "loss": 12.2188, "step": 23104 }, { "epoch": 1.534502224878794, "grad_norm": 292.5953063964844, "learning_rate": 2.709283067096997e-07, "loss": 14.2344, "step": 23105 }, { "epoch": 1.5345686391711495, "grad_norm": 231.88507080078125, "learning_rate": 2.7085469835196275e-07, "loss": 14.5469, "step": 23106 }, { "epoch": 1.5346350534635054, "grad_norm": 135.72547912597656, "learning_rate": 2.7078109842855855e-07, "loss": 16.9688, "step": 23107 }, { "epoch": 1.534701467755861, "grad_norm": 420.35723876953125, "learning_rate": 2.707075069403375e-07, "loss": 16.1094, "step": 23108 }, { "epoch": 1.5347678820482167, "grad_norm": 342.91162109375, "learning_rate": 2.706339238881521e-07, "loss": 18.7812, "step": 23109 }, { "epoch": 1.5348342963405726, "grad_norm": 166.3538360595703, "learning_rate": 2.7056034927285256e-07, "loss": 18.5625, "step": 23110 }, { "epoch": 1.5349007106329282, "grad_norm": 199.9921417236328, "learning_rate": 2.7048678309529037e-07, "loss": 19.5781, "step": 23111 }, { "epoch": 1.5349671249252839, "grad_norm": 116.9873046875, "learning_rate": 2.7041322535631626e-07, "loss": 15.0938, "step": 23112 }, { "epoch": 1.5350335392176397, "grad_norm": 149.6031036376953, "learning_rate": 2.7033967605678145e-07, "loss": 16.0625, "step": 23113 }, { "epoch": 1.5350999535099954, "grad_norm": 239.19989013671875, "learning_rate": 2.702661351975364e-07, "loss": 20.4844, "step": 23114 }, { "epoch": 1.535166367802351, "grad_norm": 133.4707794189453, "learning_rate": 2.7019260277943223e-07, "loss": 12.1953, "step": 23115 }, { "epoch": 1.535232782094707, "grad_norm": 263.0833435058594, "learning_rate": 2.701190788033185e-07, "loss": 18.5625, "step": 23116 }, { "epoch": 1.5352991963870624, "grad_norm": 292.9923095703125, "learning_rate": 2.7004556327004714e-07, "loss": 16.9531, "step": 23117 }, { "epoch": 1.5353656106794182, "grad_norm": 319.9383850097656, "learning_rate": 2.699720561804674e-07, "loss": 16.0469, "step": 23118 }, { "epoch": 1.535432024971774, "grad_norm": 388.49945068359375, "learning_rate": 2.6989855753543e-07, "loss": 25.2656, "step": 23119 }, { "epoch": 1.5354984392641295, "grad_norm": 275.3183898925781, "learning_rate": 2.69825067335785e-07, "loss": 17.2969, "step": 23120 }, { "epoch": 1.5355648535564854, "grad_norm": 198.93417358398438, "learning_rate": 2.69751585582383e-07, "loss": 13.0781, "step": 23121 }, { "epoch": 1.535631267848841, "grad_norm": 136.72283935546875, "learning_rate": 2.6967811227607297e-07, "loss": 15.2188, "step": 23122 }, { "epoch": 1.5356976821411967, "grad_norm": 148.0360107421875, "learning_rate": 2.6960464741770593e-07, "loss": 14.2656, "step": 23123 }, { "epoch": 1.5357640964335526, "grad_norm": 403.3703308105469, "learning_rate": 2.695311910081309e-07, "loss": 18.5938, "step": 23124 }, { "epoch": 1.5358305107259083, "grad_norm": 132.8262939453125, "learning_rate": 2.6945774304819755e-07, "loss": 16.9844, "step": 23125 }, { "epoch": 1.535896925018264, "grad_norm": 530.171142578125, "learning_rate": 2.693843035387564e-07, "loss": 15.5156, "step": 23126 }, { "epoch": 1.5359633393106198, "grad_norm": 550.1852416992188, "learning_rate": 2.69310872480656e-07, "loss": 17.3594, "step": 23127 }, { "epoch": 1.5360297536029752, "grad_norm": 152.40199279785156, "learning_rate": 2.6923744987474607e-07, "loss": 15.6641, "step": 23128 }, { "epoch": 1.536096167895331, "grad_norm": 156.33761596679688, "learning_rate": 2.6916403572187587e-07, "loss": 13.5391, "step": 23129 }, { "epoch": 1.5361625821876868, "grad_norm": 140.78201293945312, "learning_rate": 2.6909063002289476e-07, "loss": 10.9531, "step": 23130 }, { "epoch": 1.5362289964800424, "grad_norm": 191.991455078125, "learning_rate": 2.6901723277865174e-07, "loss": 19.0469, "step": 23131 }, { "epoch": 1.5362954107723983, "grad_norm": 226.04110717773438, "learning_rate": 2.6894384398999627e-07, "loss": 18.5, "step": 23132 }, { "epoch": 1.536361825064754, "grad_norm": 101.8636474609375, "learning_rate": 2.688704636577761e-07, "loss": 18.0469, "step": 23133 }, { "epoch": 1.5364282393571096, "grad_norm": 228.00782775878906, "learning_rate": 2.687970917828415e-07, "loss": 25.6406, "step": 23134 }, { "epoch": 1.5364946536494655, "grad_norm": 455.4670104980469, "learning_rate": 2.6872372836604016e-07, "loss": 23.8906, "step": 23135 }, { "epoch": 1.5365610679418211, "grad_norm": 221.11099243164062, "learning_rate": 2.6865037340822117e-07, "loss": 20.4688, "step": 23136 }, { "epoch": 1.5366274822341768, "grad_norm": 289.553955078125, "learning_rate": 2.685770269102329e-07, "loss": 14.2344, "step": 23137 }, { "epoch": 1.5366938965265327, "grad_norm": 202.91444396972656, "learning_rate": 2.685036888729241e-07, "loss": 14.1875, "step": 23138 }, { "epoch": 1.536760310818888, "grad_norm": 179.97560119628906, "learning_rate": 2.684303592971422e-07, "loss": 13.3125, "step": 23139 }, { "epoch": 1.536826725111244, "grad_norm": 244.02178955078125, "learning_rate": 2.6835703818373654e-07, "loss": 12.7578, "step": 23140 }, { "epoch": 1.5368931394035996, "grad_norm": 181.24034118652344, "learning_rate": 2.682837255335547e-07, "loss": 16.5156, "step": 23141 }, { "epoch": 1.5369595536959553, "grad_norm": 411.66778564453125, "learning_rate": 2.6821042134744464e-07, "loss": 17.6875, "step": 23142 }, { "epoch": 1.5370259679883111, "grad_norm": 207.6551513671875, "learning_rate": 2.6813712562625446e-07, "loss": 19.2344, "step": 23143 }, { "epoch": 1.5370923822806668, "grad_norm": 244.20904541015625, "learning_rate": 2.680638383708321e-07, "loss": 13.7188, "step": 23144 }, { "epoch": 1.5371587965730225, "grad_norm": 147.79635620117188, "learning_rate": 2.679905595820251e-07, "loss": 13.8672, "step": 23145 }, { "epoch": 1.5372252108653783, "grad_norm": 233.79893493652344, "learning_rate": 2.679172892606816e-07, "loss": 18.25, "step": 23146 }, { "epoch": 1.537291625157734, "grad_norm": 458.9700012207031, "learning_rate": 2.67844027407648e-07, "loss": 26.7969, "step": 23147 }, { "epoch": 1.5373580394500896, "grad_norm": 204.2777099609375, "learning_rate": 2.677707740237732e-07, "loss": 17.4375, "step": 23148 }, { "epoch": 1.5374244537424455, "grad_norm": 253.43080139160156, "learning_rate": 2.676975291099036e-07, "loss": 22.2812, "step": 23149 }, { "epoch": 1.537490868034801, "grad_norm": 154.2772674560547, "learning_rate": 2.676242926668867e-07, "loss": 13.5312, "step": 23150 }, { "epoch": 1.5375572823271568, "grad_norm": 175.46676635742188, "learning_rate": 2.6755106469556967e-07, "loss": 13.9844, "step": 23151 }, { "epoch": 1.5376236966195125, "grad_norm": 100.5093002319336, "learning_rate": 2.674778451967995e-07, "loss": 14.2812, "step": 23152 }, { "epoch": 1.5376901109118681, "grad_norm": 222.701171875, "learning_rate": 2.674046341714232e-07, "loss": 15.8438, "step": 23153 }, { "epoch": 1.537756525204224, "grad_norm": 240.2196502685547, "learning_rate": 2.6733143162028805e-07, "loss": 23.875, "step": 23154 }, { "epoch": 1.5378229394965797, "grad_norm": 129.19190979003906, "learning_rate": 2.6725823754423983e-07, "loss": 19.2031, "step": 23155 }, { "epoch": 1.5378893537889353, "grad_norm": 363.7494812011719, "learning_rate": 2.671850519441263e-07, "loss": 24.8125, "step": 23156 }, { "epoch": 1.5379557680812912, "grad_norm": 292.4183654785156, "learning_rate": 2.671118748207932e-07, "loss": 16.3281, "step": 23157 }, { "epoch": 1.5380221823736469, "grad_norm": 422.0189208984375, "learning_rate": 2.670387061750875e-07, "loss": 26.9844, "step": 23158 }, { "epoch": 1.5380885966660025, "grad_norm": 162.37533569335938, "learning_rate": 2.669655460078553e-07, "loss": 16.1406, "step": 23159 }, { "epoch": 1.5381550109583584, "grad_norm": 92.10070037841797, "learning_rate": 2.668923943199433e-07, "loss": 14.2344, "step": 23160 }, { "epoch": 1.5382214252507138, "grad_norm": 124.98763275146484, "learning_rate": 2.668192511121966e-07, "loss": 13.8594, "step": 23161 }, { "epoch": 1.5382878395430697, "grad_norm": 115.89219665527344, "learning_rate": 2.6674611638546253e-07, "loss": 13.0469, "step": 23162 }, { "epoch": 1.5383542538354253, "grad_norm": 341.854736328125, "learning_rate": 2.6667299014058676e-07, "loss": 14.8359, "step": 23163 }, { "epoch": 1.538420668127781, "grad_norm": 179.03414916992188, "learning_rate": 2.6659987237841433e-07, "loss": 17.75, "step": 23164 }, { "epoch": 1.5384870824201369, "grad_norm": 178.63735961914062, "learning_rate": 2.665267630997924e-07, "loss": 15.7344, "step": 23165 }, { "epoch": 1.5385534967124925, "grad_norm": 113.95482635498047, "learning_rate": 2.6645366230556555e-07, "loss": 15.6406, "step": 23166 }, { "epoch": 1.5386199110048482, "grad_norm": 142.29185485839844, "learning_rate": 2.6638056999657977e-07, "loss": 13.7969, "step": 23167 }, { "epoch": 1.538686325297204, "grad_norm": 339.047119140625, "learning_rate": 2.663074861736805e-07, "loss": 17.4062, "step": 23168 }, { "epoch": 1.5387527395895597, "grad_norm": 93.47703552246094, "learning_rate": 2.6623441083771313e-07, "loss": 10.4688, "step": 23169 }, { "epoch": 1.5388191538819154, "grad_norm": 203.0446319580078, "learning_rate": 2.6616134398952305e-07, "loss": 15.3594, "step": 23170 }, { "epoch": 1.5388855681742712, "grad_norm": 146.68438720703125, "learning_rate": 2.6608828562995566e-07, "loss": 14.2344, "step": 23171 }, { "epoch": 1.5389519824666267, "grad_norm": 363.24871826171875, "learning_rate": 2.660152357598552e-07, "loss": 21.7188, "step": 23172 }, { "epoch": 1.5390183967589826, "grad_norm": 178.1854248046875, "learning_rate": 2.6594219438006795e-07, "loss": 15.375, "step": 23173 }, { "epoch": 1.5390848110513382, "grad_norm": 134.23043823242188, "learning_rate": 2.658691614914377e-07, "loss": 13.5, "step": 23174 }, { "epoch": 1.5391512253436939, "grad_norm": 222.28704833984375, "learning_rate": 2.6579613709480964e-07, "loss": 16.5469, "step": 23175 }, { "epoch": 1.5392176396360497, "grad_norm": 179.949951171875, "learning_rate": 2.6572312119102845e-07, "loss": 14.0156, "step": 23176 }, { "epoch": 1.5392840539284054, "grad_norm": 171.35263061523438, "learning_rate": 2.6565011378093895e-07, "loss": 17.7812, "step": 23177 }, { "epoch": 1.539350468220761, "grad_norm": 204.9718475341797, "learning_rate": 2.655771148653854e-07, "loss": 20.9688, "step": 23178 }, { "epoch": 1.539416882513117, "grad_norm": 188.9126434326172, "learning_rate": 2.6550412444521266e-07, "loss": 18.6875, "step": 23179 }, { "epoch": 1.5394832968054726, "grad_norm": 328.65570068359375, "learning_rate": 2.65431142521264e-07, "loss": 15.9219, "step": 23180 }, { "epoch": 1.5395497110978282, "grad_norm": 276.4205017089844, "learning_rate": 2.653581690943849e-07, "loss": 16.5781, "step": 23181 }, { "epoch": 1.539616125390184, "grad_norm": 203.82098388671875, "learning_rate": 2.6528520416541853e-07, "loss": 13.4688, "step": 23182 }, { "epoch": 1.5396825396825395, "grad_norm": 194.58941650390625, "learning_rate": 2.6521224773520933e-07, "loss": 11.8828, "step": 23183 }, { "epoch": 1.5397489539748954, "grad_norm": 424.4406433105469, "learning_rate": 2.6513929980460115e-07, "loss": 18.1406, "step": 23184 }, { "epoch": 1.539815368267251, "grad_norm": 484.16326904296875, "learning_rate": 2.6506636037443807e-07, "loss": 13.6562, "step": 23185 }, { "epoch": 1.5398817825596067, "grad_norm": 203.28672790527344, "learning_rate": 2.6499342944556304e-07, "loss": 15.2656, "step": 23186 }, { "epoch": 1.5399481968519626, "grad_norm": 892.9902954101562, "learning_rate": 2.6492050701882076e-07, "loss": 20.25, "step": 23187 }, { "epoch": 1.5400146111443183, "grad_norm": 142.32769775390625, "learning_rate": 2.6484759309505377e-07, "loss": 13.7266, "step": 23188 }, { "epoch": 1.540081025436674, "grad_norm": 198.56698608398438, "learning_rate": 2.6477468767510603e-07, "loss": 16.1719, "step": 23189 }, { "epoch": 1.5401474397290298, "grad_norm": 222.651123046875, "learning_rate": 2.647017907598207e-07, "loss": 18.4531, "step": 23190 }, { "epoch": 1.5402138540213854, "grad_norm": 263.0067138671875, "learning_rate": 2.6462890235004097e-07, "loss": 15.9062, "step": 23191 }, { "epoch": 1.540280268313741, "grad_norm": 701.667724609375, "learning_rate": 2.6455602244661025e-07, "loss": 17.2031, "step": 23192 }, { "epoch": 1.540346682606097, "grad_norm": 433.305419921875, "learning_rate": 2.6448315105037156e-07, "loss": 20.9688, "step": 23193 }, { "epoch": 1.5404130968984524, "grad_norm": 142.78054809570312, "learning_rate": 2.6441028816216706e-07, "loss": 15.7188, "step": 23194 }, { "epoch": 1.5404795111908083, "grad_norm": 388.4940490722656, "learning_rate": 2.6433743378284077e-07, "loss": 18.0469, "step": 23195 }, { "epoch": 1.540545925483164, "grad_norm": 247.4846954345703, "learning_rate": 2.642645879132346e-07, "loss": 11.7656, "step": 23196 }, { "epoch": 1.5406123397755196, "grad_norm": 586.1412353515625, "learning_rate": 2.641917505541914e-07, "loss": 17.25, "step": 23197 }, { "epoch": 1.5406787540678755, "grad_norm": 206.89227294921875, "learning_rate": 2.641189217065537e-07, "loss": 17.5, "step": 23198 }, { "epoch": 1.5407451683602311, "grad_norm": 270.75390625, "learning_rate": 2.64046101371164e-07, "loss": 16.2656, "step": 23199 }, { "epoch": 1.5408115826525868, "grad_norm": 170.5272979736328, "learning_rate": 2.6397328954886466e-07, "loss": 17.875, "step": 23200 }, { "epoch": 1.5408779969449427, "grad_norm": 213.78268432617188, "learning_rate": 2.639004862404979e-07, "loss": 17.0312, "step": 23201 }, { "epoch": 1.5409444112372983, "grad_norm": 182.89109802246094, "learning_rate": 2.638276914469061e-07, "loss": 13.0625, "step": 23202 }, { "epoch": 1.541010825529654, "grad_norm": 133.32150268554688, "learning_rate": 2.637549051689305e-07, "loss": 22.5781, "step": 23203 }, { "epoch": 1.5410772398220098, "grad_norm": 241.383056640625, "learning_rate": 2.6368212740741414e-07, "loss": 17.6875, "step": 23204 }, { "epoch": 1.5411436541143653, "grad_norm": 372.5812072753906, "learning_rate": 2.6360935816319817e-07, "loss": 19.1562, "step": 23205 }, { "epoch": 1.5412100684067211, "grad_norm": 801.4498291015625, "learning_rate": 2.635365974371244e-07, "loss": 15.0625, "step": 23206 }, { "epoch": 1.5412764826990768, "grad_norm": 213.78875732421875, "learning_rate": 2.6346384523003475e-07, "loss": 20.8438, "step": 23207 }, { "epoch": 1.5413428969914325, "grad_norm": 484.6083984375, "learning_rate": 2.6339110154277046e-07, "loss": 12.2812, "step": 23208 }, { "epoch": 1.5414093112837883, "grad_norm": 325.81890869140625, "learning_rate": 2.633183663761732e-07, "loss": 19.0781, "step": 23209 }, { "epoch": 1.541475725576144, "grad_norm": 181.76292419433594, "learning_rate": 2.632456397310846e-07, "loss": 16.7344, "step": 23210 }, { "epoch": 1.5415421398684996, "grad_norm": 120.13494873046875, "learning_rate": 2.6317292160834505e-07, "loss": 16.5938, "step": 23211 }, { "epoch": 1.5416085541608555, "grad_norm": 152.6403045654297, "learning_rate": 2.6310021200879686e-07, "loss": 18.0312, "step": 23212 }, { "epoch": 1.5416749684532112, "grad_norm": 280.0244140625, "learning_rate": 2.6302751093328013e-07, "loss": 12.1406, "step": 23213 }, { "epoch": 1.5417413827455668, "grad_norm": 320.95611572265625, "learning_rate": 2.6295481838263624e-07, "loss": 26.5312, "step": 23214 }, { "epoch": 1.5418077970379227, "grad_norm": 313.3428955078125, "learning_rate": 2.6288213435770587e-07, "loss": 30.5, "step": 23215 }, { "epoch": 1.5418742113302781, "grad_norm": 142.62393188476562, "learning_rate": 2.628094588593301e-07, "loss": 19.625, "step": 23216 }, { "epoch": 1.541940625622634, "grad_norm": 130.1892852783203, "learning_rate": 2.627367918883494e-07, "loss": 14.0781, "step": 23217 }, { "epoch": 1.5420070399149897, "grad_norm": 160.58705139160156, "learning_rate": 2.6266413344560456e-07, "loss": 14.1562, "step": 23218 }, { "epoch": 1.5420734542073453, "grad_norm": 180.81808471679688, "learning_rate": 2.6259148353193537e-07, "loss": 19.375, "step": 23219 }, { "epoch": 1.5421398684997012, "grad_norm": 310.576904296875, "learning_rate": 2.625188421481832e-07, "loss": 13.2812, "step": 23220 }, { "epoch": 1.5422062827920568, "grad_norm": 144.13600158691406, "learning_rate": 2.624462092951876e-07, "loss": 15.0938, "step": 23221 }, { "epoch": 1.5422726970844125, "grad_norm": 234.14413452148438, "learning_rate": 2.623735849737888e-07, "loss": 14.1562, "step": 23222 }, { "epoch": 1.5423391113767684, "grad_norm": 204.73390197753906, "learning_rate": 2.623009691848271e-07, "loss": 15.2812, "step": 23223 }, { "epoch": 1.542405525669124, "grad_norm": 196.3575897216797, "learning_rate": 2.6222836192914276e-07, "loss": 20.6719, "step": 23224 }, { "epoch": 1.5424719399614797, "grad_norm": 426.932861328125, "learning_rate": 2.621557632075747e-07, "loss": 16.0156, "step": 23225 }, { "epoch": 1.5425383542538356, "grad_norm": 145.75502014160156, "learning_rate": 2.6208317302096396e-07, "loss": 12.6875, "step": 23226 }, { "epoch": 1.542604768546191, "grad_norm": 118.93405151367188, "learning_rate": 2.6201059137014914e-07, "loss": 16.7188, "step": 23227 }, { "epoch": 1.5426711828385469, "grad_norm": 192.7015838623047, "learning_rate": 2.6193801825597027e-07, "loss": 15.0156, "step": 23228 }, { "epoch": 1.5427375971309025, "grad_norm": 148.5836944580078, "learning_rate": 2.618654536792668e-07, "loss": 14.6953, "step": 23229 }, { "epoch": 1.5428040114232582, "grad_norm": 149.38546752929688, "learning_rate": 2.617928976408782e-07, "loss": 12.4844, "step": 23230 }, { "epoch": 1.542870425715614, "grad_norm": 430.3947448730469, "learning_rate": 2.6172035014164364e-07, "loss": 16.7031, "step": 23231 }, { "epoch": 1.5429368400079697, "grad_norm": 361.9637756347656, "learning_rate": 2.6164781118240263e-07, "loss": 19.0781, "step": 23232 }, { "epoch": 1.5430032543003254, "grad_norm": 227.64239501953125, "learning_rate": 2.615752807639935e-07, "loss": 24.2656, "step": 23233 }, { "epoch": 1.5430696685926812, "grad_norm": 433.3985900878906, "learning_rate": 2.6150275888725627e-07, "loss": 15.8438, "step": 23234 }, { "epoch": 1.543136082885037, "grad_norm": 220.36790466308594, "learning_rate": 2.6143024555302894e-07, "loss": 14.6406, "step": 23235 }, { "epoch": 1.5432024971773926, "grad_norm": 167.6643524169922, "learning_rate": 2.6135774076215036e-07, "loss": 16.9531, "step": 23236 }, { "epoch": 1.5432689114697484, "grad_norm": 711.7451782226562, "learning_rate": 2.612852445154601e-07, "loss": 12.7969, "step": 23237 }, { "epoch": 1.5433353257621039, "grad_norm": 348.5670166015625, "learning_rate": 2.6121275681379584e-07, "loss": 18.1719, "step": 23238 }, { "epoch": 1.5434017400544597, "grad_norm": 167.66400146484375, "learning_rate": 2.611402776579964e-07, "loss": 16.25, "step": 23239 }, { "epoch": 1.5434681543468154, "grad_norm": 208.8854522705078, "learning_rate": 2.610678070489003e-07, "loss": 15.2344, "step": 23240 }, { "epoch": 1.543534568639171, "grad_norm": 279.69451904296875, "learning_rate": 2.6099534498734556e-07, "loss": 21.7812, "step": 23241 }, { "epoch": 1.543600982931527, "grad_norm": 247.0557403564453, "learning_rate": 2.6092289147417057e-07, "loss": 18.8906, "step": 23242 }, { "epoch": 1.5436673972238826, "grad_norm": 311.99609375, "learning_rate": 2.6085044651021383e-07, "loss": 14.0156, "step": 23243 }, { "epoch": 1.5437338115162382, "grad_norm": 154.34326171875, "learning_rate": 2.6077801009631216e-07, "loss": 18.5938, "step": 23244 }, { "epoch": 1.543800225808594, "grad_norm": 182.35760498046875, "learning_rate": 2.6070558223330487e-07, "loss": 14.6406, "step": 23245 }, { "epoch": 1.5438666401009498, "grad_norm": 275.7531433105469, "learning_rate": 2.606331629220287e-07, "loss": 21.875, "step": 23246 }, { "epoch": 1.5439330543933054, "grad_norm": 260.5250549316406, "learning_rate": 2.6056075216332185e-07, "loss": 18.4531, "step": 23247 }, { "epoch": 1.5439994686856613, "grad_norm": 183.05035400390625, "learning_rate": 2.604883499580218e-07, "loss": 18.8594, "step": 23248 }, { "epoch": 1.5440658829780167, "grad_norm": 194.7342529296875, "learning_rate": 2.6041595630696643e-07, "loss": 15.6406, "step": 23249 }, { "epoch": 1.5441322972703726, "grad_norm": 199.9652557373047, "learning_rate": 2.6034357121099225e-07, "loss": 21.3125, "step": 23250 }, { "epoch": 1.5441987115627283, "grad_norm": 163.34234619140625, "learning_rate": 2.6027119467093774e-07, "loss": 17.4062, "step": 23251 }, { "epoch": 1.544265125855084, "grad_norm": 161.68905639648438, "learning_rate": 2.60198826687639e-07, "loss": 19.5156, "step": 23252 }, { "epoch": 1.5443315401474398, "grad_norm": 152.143798828125, "learning_rate": 2.6012646726193386e-07, "loss": 18.6875, "step": 23253 }, { "epoch": 1.5443979544397954, "grad_norm": 2957.304931640625, "learning_rate": 2.60054116394659e-07, "loss": 21.6797, "step": 23254 }, { "epoch": 1.544464368732151, "grad_norm": 180.51808166503906, "learning_rate": 2.599817740866515e-07, "loss": 13.4062, "step": 23255 }, { "epoch": 1.544530783024507, "grad_norm": 199.62754821777344, "learning_rate": 2.5990944033874805e-07, "loss": 15.1094, "step": 23256 }, { "epoch": 1.5445971973168626, "grad_norm": 202.96690368652344, "learning_rate": 2.5983711515178576e-07, "loss": 17.7344, "step": 23257 }, { "epoch": 1.5446636116092183, "grad_norm": 309.32440185546875, "learning_rate": 2.5976479852660036e-07, "loss": 19.1875, "step": 23258 }, { "epoch": 1.5447300259015742, "grad_norm": 120.01348876953125, "learning_rate": 2.5969249046402953e-07, "loss": 11.7188, "step": 23259 }, { "epoch": 1.5447964401939296, "grad_norm": 173.97596740722656, "learning_rate": 2.5962019096490874e-07, "loss": 14.625, "step": 23260 }, { "epoch": 1.5448628544862855, "grad_norm": 97.85708618164062, "learning_rate": 2.5954790003007467e-07, "loss": 15.3125, "step": 23261 }, { "epoch": 1.5449292687786411, "grad_norm": 148.41128540039062, "learning_rate": 2.5947561766036354e-07, "loss": 16.25, "step": 23262 }, { "epoch": 1.5449956830709968, "grad_norm": 115.61711883544922, "learning_rate": 2.594033438566113e-07, "loss": 15.9219, "step": 23263 }, { "epoch": 1.5450620973633526, "grad_norm": 450.10394287109375, "learning_rate": 2.593310786196543e-07, "loss": 15.8438, "step": 23264 }, { "epoch": 1.5451285116557083, "grad_norm": 281.8332214355469, "learning_rate": 2.592588219503286e-07, "loss": 15.125, "step": 23265 }, { "epoch": 1.545194925948064, "grad_norm": 329.3066101074219, "learning_rate": 2.5918657384946927e-07, "loss": 14.6406, "step": 23266 }, { "epoch": 1.5452613402404198, "grad_norm": 371.4208984375, "learning_rate": 2.5911433431791263e-07, "loss": 25.1562, "step": 23267 }, { "epoch": 1.5453277545327755, "grad_norm": 462.05218505859375, "learning_rate": 2.59042103356494e-07, "loss": 16.5703, "step": 23268 }, { "epoch": 1.5453941688251311, "grad_norm": 88.11038970947266, "learning_rate": 2.589698809660492e-07, "loss": 13.2344, "step": 23269 }, { "epoch": 1.545460583117487, "grad_norm": 196.67662048339844, "learning_rate": 2.5889766714741346e-07, "loss": 13.0, "step": 23270 }, { "epoch": 1.5455269974098425, "grad_norm": 263.5964660644531, "learning_rate": 2.588254619014225e-07, "loss": 14.5156, "step": 23271 }, { "epoch": 1.5455934117021983, "grad_norm": 333.32635498046875, "learning_rate": 2.587532652289105e-07, "loss": 14.0938, "step": 23272 }, { "epoch": 1.545659825994554, "grad_norm": 260.9994201660156, "learning_rate": 2.5868107713071373e-07, "loss": 14.5156, "step": 23273 }, { "epoch": 1.5457262402869096, "grad_norm": 275.7750244140625, "learning_rate": 2.5860889760766703e-07, "loss": 22.3125, "step": 23274 }, { "epoch": 1.5457926545792655, "grad_norm": 203.87228393554688, "learning_rate": 2.5853672666060446e-07, "loss": 17.875, "step": 23275 }, { "epoch": 1.5458590688716212, "grad_norm": 203.19033813476562, "learning_rate": 2.5846456429036214e-07, "loss": 13.0781, "step": 23276 }, { "epoch": 1.5459254831639768, "grad_norm": 170.97177124023438, "learning_rate": 2.583924104977738e-07, "loss": 12.2188, "step": 23277 }, { "epoch": 1.5459918974563327, "grad_norm": 270.7346496582031, "learning_rate": 2.5832026528367447e-07, "loss": 15.5781, "step": 23278 }, { "epoch": 1.5460583117486884, "grad_norm": 236.18838500976562, "learning_rate": 2.5824812864889856e-07, "loss": 14.2812, "step": 23279 }, { "epoch": 1.546124726041044, "grad_norm": 137.26393127441406, "learning_rate": 2.581760005942807e-07, "loss": 15.1562, "step": 23280 }, { "epoch": 1.5461911403333999, "grad_norm": 175.18760681152344, "learning_rate": 2.5810388112065493e-07, "loss": 10.9531, "step": 23281 }, { "epoch": 1.5462575546257553, "grad_norm": 356.5532531738281, "learning_rate": 2.5803177022885613e-07, "loss": 22.75, "step": 23282 }, { "epoch": 1.5463239689181112, "grad_norm": 406.87200927734375, "learning_rate": 2.5795966791971724e-07, "loss": 21.4375, "step": 23283 }, { "epoch": 1.5463903832104668, "grad_norm": 191.26231384277344, "learning_rate": 2.578875741940737e-07, "loss": 14.8438, "step": 23284 }, { "epoch": 1.5464567975028225, "grad_norm": 313.5531311035156, "learning_rate": 2.578154890527585e-07, "loss": 16.3125, "step": 23285 }, { "epoch": 1.5465232117951784, "grad_norm": 208.07606506347656, "learning_rate": 2.577434124966057e-07, "loss": 16.5156, "step": 23286 }, { "epoch": 1.546589626087534, "grad_norm": 215.96961975097656, "learning_rate": 2.5767134452644914e-07, "loss": 20.4531, "step": 23287 }, { "epoch": 1.5466560403798897, "grad_norm": 212.90966796875, "learning_rate": 2.5759928514312266e-07, "loss": 19.6562, "step": 23288 }, { "epoch": 1.5467224546722456, "grad_norm": 135.9345245361328, "learning_rate": 2.575272343474589e-07, "loss": 14.5781, "step": 23289 }, { "epoch": 1.5467888689646012, "grad_norm": 157.3470916748047, "learning_rate": 2.5745519214029275e-07, "loss": 17.4688, "step": 23290 }, { "epoch": 1.5468552832569569, "grad_norm": 299.2257080078125, "learning_rate": 2.573831585224563e-07, "loss": 24.3438, "step": 23291 }, { "epoch": 1.5469216975493127, "grad_norm": 701.3591918945312, "learning_rate": 2.573111334947833e-07, "loss": 19.2031, "step": 23292 }, { "epoch": 1.5469881118416682, "grad_norm": 1139.1666259765625, "learning_rate": 2.5723911705810685e-07, "loss": 14.2969, "step": 23293 }, { "epoch": 1.547054526134024, "grad_norm": 298.4377136230469, "learning_rate": 2.5716710921326004e-07, "loss": 26.625, "step": 23294 }, { "epoch": 1.5471209404263797, "grad_norm": 772.899169921875, "learning_rate": 2.5709510996107575e-07, "loss": 18.1094, "step": 23295 }, { "epoch": 1.5471873547187354, "grad_norm": 793.3009643554688, "learning_rate": 2.5702311930238705e-07, "loss": 10.8906, "step": 23296 }, { "epoch": 1.5472537690110912, "grad_norm": 236.8256072998047, "learning_rate": 2.56951137238026e-07, "loss": 16.1094, "step": 23297 }, { "epoch": 1.547320183303447, "grad_norm": 238.99647521972656, "learning_rate": 2.5687916376882633e-07, "loss": 16.8906, "step": 23298 }, { "epoch": 1.5473865975958025, "grad_norm": 119.06565856933594, "learning_rate": 2.5680719889561964e-07, "loss": 16.0, "step": 23299 }, { "epoch": 1.5474530118881584, "grad_norm": 310.5144958496094, "learning_rate": 2.5673524261923874e-07, "loss": 18.9844, "step": 23300 }, { "epoch": 1.547519426180514, "grad_norm": 169.55580139160156, "learning_rate": 2.5666329494051585e-07, "loss": 13.9844, "step": 23301 }, { "epoch": 1.5475858404728697, "grad_norm": 171.58775329589844, "learning_rate": 2.5659135586028344e-07, "loss": 16.0625, "step": 23302 }, { "epoch": 1.5476522547652256, "grad_norm": 134.38589477539062, "learning_rate": 2.5651942537937357e-07, "loss": 14.2344, "step": 23303 }, { "epoch": 1.547718669057581, "grad_norm": 504.8057861328125, "learning_rate": 2.5644750349861854e-07, "loss": 17.5156, "step": 23304 }, { "epoch": 1.547785083349937, "grad_norm": 127.55846405029297, "learning_rate": 2.5637559021884945e-07, "loss": 29.25, "step": 23305 }, { "epoch": 1.5478514976422926, "grad_norm": 327.0053405761719, "learning_rate": 2.563036855408993e-07, "loss": 27.5312, "step": 23306 }, { "epoch": 1.5479179119346482, "grad_norm": 264.3553161621094, "learning_rate": 2.5623178946559897e-07, "loss": 12.5938, "step": 23307 }, { "epoch": 1.547984326227004, "grad_norm": 216.5001220703125, "learning_rate": 2.5615990199378045e-07, "loss": 16.6875, "step": 23308 }, { "epoch": 1.5480507405193598, "grad_norm": 285.1044006347656, "learning_rate": 2.560880231262752e-07, "loss": 18.875, "step": 23309 }, { "epoch": 1.5481171548117154, "grad_norm": 166.7958984375, "learning_rate": 2.5601615286391485e-07, "loss": 13.5312, "step": 23310 }, { "epoch": 1.5481835691040713, "grad_norm": 301.3864440917969, "learning_rate": 2.559442912075306e-07, "loss": 13.9688, "step": 23311 }, { "epoch": 1.548249983396427, "grad_norm": 201.0245819091797, "learning_rate": 2.558724381579537e-07, "loss": 16.1875, "step": 23312 }, { "epoch": 1.5483163976887826, "grad_norm": 150.33187866210938, "learning_rate": 2.5580059371601557e-07, "loss": 11.9219, "step": 23313 }, { "epoch": 1.5483828119811385, "grad_norm": 155.77015686035156, "learning_rate": 2.5572875788254657e-07, "loss": 20.2812, "step": 23314 }, { "epoch": 1.548449226273494, "grad_norm": 137.05612182617188, "learning_rate": 2.556569306583787e-07, "loss": 11.1094, "step": 23315 }, { "epoch": 1.5485156405658498, "grad_norm": 189.996337890625, "learning_rate": 2.5558511204434194e-07, "loss": 20.9531, "step": 23316 }, { "epoch": 1.5485820548582054, "grad_norm": 264.3968811035156, "learning_rate": 2.555133020412673e-07, "loss": 14.0781, "step": 23317 }, { "epoch": 1.548648469150561, "grad_norm": 230.71514892578125, "learning_rate": 2.554415006499856e-07, "loss": 17.4375, "step": 23318 }, { "epoch": 1.548714883442917, "grad_norm": 312.8804931640625, "learning_rate": 2.5536970787132716e-07, "loss": 15.7969, "step": 23319 }, { "epoch": 1.5487812977352726, "grad_norm": 132.0717315673828, "learning_rate": 2.5529792370612266e-07, "loss": 14.2031, "step": 23320 }, { "epoch": 1.5488477120276283, "grad_norm": 380.1004943847656, "learning_rate": 2.552261481552026e-07, "loss": 15.3672, "step": 23321 }, { "epoch": 1.5489141263199842, "grad_norm": 163.8212432861328, "learning_rate": 2.5515438121939637e-07, "loss": 16.0781, "step": 23322 }, { "epoch": 1.5489805406123398, "grad_norm": 181.07955932617188, "learning_rate": 2.550826228995354e-07, "loss": 14.8203, "step": 23323 }, { "epoch": 1.5490469549046955, "grad_norm": 820.2169189453125, "learning_rate": 2.5501087319644885e-07, "loss": 21.625, "step": 23324 }, { "epoch": 1.5491133691970513, "grad_norm": 181.0193634033203, "learning_rate": 2.5493913211096683e-07, "loss": 12.4297, "step": 23325 }, { "epoch": 1.5491797834894068, "grad_norm": 179.3736572265625, "learning_rate": 2.5486739964391936e-07, "loss": 13.9531, "step": 23326 }, { "epoch": 1.5492461977817626, "grad_norm": 360.7938537597656, "learning_rate": 2.5479567579613613e-07, "loss": 17.4375, "step": 23327 }, { "epoch": 1.5493126120741183, "grad_norm": 147.78372192382812, "learning_rate": 2.547239605684468e-07, "loss": 13.8125, "step": 23328 }, { "epoch": 1.549379026366474, "grad_norm": 131.84608459472656, "learning_rate": 2.5465225396168133e-07, "loss": 13.6875, "step": 23329 }, { "epoch": 1.5494454406588298, "grad_norm": 140.4446563720703, "learning_rate": 2.5458055597666837e-07, "loss": 19.5781, "step": 23330 }, { "epoch": 1.5495118549511855, "grad_norm": 161.57070922851562, "learning_rate": 2.545088666142378e-07, "loss": 14.7969, "step": 23331 }, { "epoch": 1.5495782692435411, "grad_norm": 316.3581237792969, "learning_rate": 2.5443718587521876e-07, "loss": 19.3438, "step": 23332 }, { "epoch": 1.549644683535897, "grad_norm": 381.4172668457031, "learning_rate": 2.5436551376044045e-07, "loss": 14.3828, "step": 23333 }, { "epoch": 1.5497110978282527, "grad_norm": 123.45417785644531, "learning_rate": 2.542938502707319e-07, "loss": 16.9844, "step": 23334 }, { "epoch": 1.5497775121206083, "grad_norm": 540.2908325195312, "learning_rate": 2.5422219540692233e-07, "loss": 21.7188, "step": 23335 }, { "epoch": 1.5498439264129642, "grad_norm": 267.4307861328125, "learning_rate": 2.541505491698398e-07, "loss": 16.4375, "step": 23336 }, { "epoch": 1.5499103407053196, "grad_norm": 351.2560729980469, "learning_rate": 2.5407891156031434e-07, "loss": 17.3594, "step": 23337 }, { "epoch": 1.5499767549976755, "grad_norm": 216.8450469970703, "learning_rate": 2.5400728257917347e-07, "loss": 13.8594, "step": 23338 }, { "epoch": 1.5500431692900312, "grad_norm": 237.57998657226562, "learning_rate": 2.539356622272463e-07, "loss": 12.3125, "step": 23339 }, { "epoch": 1.5501095835823868, "grad_norm": 430.93084716796875, "learning_rate": 2.538640505053611e-07, "loss": 16.3438, "step": 23340 }, { "epoch": 1.5501759978747427, "grad_norm": 184.51026916503906, "learning_rate": 2.5379244741434637e-07, "loss": 16.2031, "step": 23341 }, { "epoch": 1.5502424121670983, "grad_norm": 128.70285034179688, "learning_rate": 2.5372085295503033e-07, "loss": 11.7344, "step": 23342 }, { "epoch": 1.550308826459454, "grad_norm": 419.17724609375, "learning_rate": 2.536492671282413e-07, "loss": 14.8281, "step": 23343 }, { "epoch": 1.5503752407518099, "grad_norm": 200.00457763671875, "learning_rate": 2.535776899348068e-07, "loss": 15.8438, "step": 23344 }, { "epoch": 1.5504416550441655, "grad_norm": 225.11585998535156, "learning_rate": 2.5350612137555525e-07, "loss": 22.1719, "step": 23345 }, { "epoch": 1.5505080693365212, "grad_norm": 452.9988708496094, "learning_rate": 2.534345614513148e-07, "loss": 20.3125, "step": 23346 }, { "epoch": 1.550574483628877, "grad_norm": 169.81919860839844, "learning_rate": 2.533630101629124e-07, "loss": 14.5938, "step": 23347 }, { "epoch": 1.5506408979212325, "grad_norm": 151.62657165527344, "learning_rate": 2.532914675111766e-07, "loss": 18.2656, "step": 23348 }, { "epoch": 1.5507073122135884, "grad_norm": 137.0535888671875, "learning_rate": 2.532199334969343e-07, "loss": 16.7969, "step": 23349 }, { "epoch": 1.550773726505944, "grad_norm": 398.7848205566406, "learning_rate": 2.531484081210131e-07, "loss": 15.8906, "step": 23350 }, { "epoch": 1.5508401407982997, "grad_norm": 122.09571838378906, "learning_rate": 2.530768913842406e-07, "loss": 20.7031, "step": 23351 }, { "epoch": 1.5509065550906556, "grad_norm": 141.98715209960938, "learning_rate": 2.530053832874441e-07, "loss": 15.1875, "step": 23352 }, { "epoch": 1.5509729693830112, "grad_norm": 109.45059204101562, "learning_rate": 2.5293388383144996e-07, "loss": 14.0, "step": 23353 }, { "epoch": 1.5510393836753669, "grad_norm": 207.66978454589844, "learning_rate": 2.5286239301708656e-07, "loss": 14.5625, "step": 23354 }, { "epoch": 1.5511057979677227, "grad_norm": 168.31654357910156, "learning_rate": 2.527909108451799e-07, "loss": 13.8438, "step": 23355 }, { "epoch": 1.5511722122600784, "grad_norm": 206.32887268066406, "learning_rate": 2.527194373165571e-07, "loss": 12.4688, "step": 23356 }, { "epoch": 1.551238626552434, "grad_norm": 152.8919219970703, "learning_rate": 2.5264797243204486e-07, "loss": 16.75, "step": 23357 }, { "epoch": 1.55130504084479, "grad_norm": 215.1162109375, "learning_rate": 2.5257651619247e-07, "loss": 11.6406, "step": 23358 }, { "epoch": 1.5513714551371454, "grad_norm": 250.92637634277344, "learning_rate": 2.5250506859865894e-07, "loss": 14.3906, "step": 23359 }, { "epoch": 1.5514378694295012, "grad_norm": 235.9237823486328, "learning_rate": 2.5243362965143856e-07, "loss": 15.4219, "step": 23360 }, { "epoch": 1.551504283721857, "grad_norm": 144.21475219726562, "learning_rate": 2.523621993516343e-07, "loss": 14.5312, "step": 23361 }, { "epoch": 1.5515706980142125, "grad_norm": 146.78990173339844, "learning_rate": 2.522907777000736e-07, "loss": 16.25, "step": 23362 }, { "epoch": 1.5516371123065684, "grad_norm": 166.24267578125, "learning_rate": 2.5221936469758165e-07, "loss": 17.875, "step": 23363 }, { "epoch": 1.551703526598924, "grad_norm": 221.38919067382812, "learning_rate": 2.52147960344985e-07, "loss": 12.25, "step": 23364 }, { "epoch": 1.5517699408912797, "grad_norm": 374.7984924316406, "learning_rate": 2.5207656464310944e-07, "loss": 18.3594, "step": 23365 }, { "epoch": 1.5518363551836356, "grad_norm": 259.1956481933594, "learning_rate": 2.5200517759278094e-07, "loss": 13.7188, "step": 23366 }, { "epoch": 1.5519027694759913, "grad_norm": 193.87852478027344, "learning_rate": 2.519337991948253e-07, "loss": 13.7812, "step": 23367 }, { "epoch": 1.551969183768347, "grad_norm": 382.00299072265625, "learning_rate": 2.518624294500683e-07, "loss": 16.1562, "step": 23368 }, { "epoch": 1.5520355980607028, "grad_norm": 134.1416778564453, "learning_rate": 2.5179106835933474e-07, "loss": 12.3594, "step": 23369 }, { "epoch": 1.5521020123530582, "grad_norm": 169.62060546875, "learning_rate": 2.517197159234513e-07, "loss": 10.7656, "step": 23370 }, { "epoch": 1.552168426645414, "grad_norm": 264.1801452636719, "learning_rate": 2.516483721432423e-07, "loss": 12.0781, "step": 23371 }, { "epoch": 1.5522348409377698, "grad_norm": 141.47274780273438, "learning_rate": 2.5157703701953346e-07, "loss": 14.3438, "step": 23372 }, { "epoch": 1.5523012552301254, "grad_norm": 341.9131164550781, "learning_rate": 2.5150571055314995e-07, "loss": 20.3438, "step": 23373 }, { "epoch": 1.5523676695224813, "grad_norm": 302.03125, "learning_rate": 2.5143439274491706e-07, "loss": 19.3594, "step": 23374 }, { "epoch": 1.552434083814837, "grad_norm": 462.5177917480469, "learning_rate": 2.5136308359565883e-07, "loss": 18.1562, "step": 23375 }, { "epoch": 1.5525004981071926, "grad_norm": 331.4667663574219, "learning_rate": 2.5129178310620134e-07, "loss": 21.3906, "step": 23376 }, { "epoch": 1.5525669123995485, "grad_norm": 224.86192321777344, "learning_rate": 2.5122049127736854e-07, "loss": 15.6875, "step": 23377 }, { "epoch": 1.5526333266919041, "grad_norm": 201.80091857910156, "learning_rate": 2.511492081099853e-07, "loss": 13.4531, "step": 23378 }, { "epoch": 1.5526997409842598, "grad_norm": 128.53543090820312, "learning_rate": 2.5107793360487614e-07, "loss": 13.25, "step": 23379 }, { "epoch": 1.5527661552766157, "grad_norm": 164.96702575683594, "learning_rate": 2.510066677628656e-07, "loss": 12.0938, "step": 23380 }, { "epoch": 1.552832569568971, "grad_norm": 120.72587585449219, "learning_rate": 2.509354105847782e-07, "loss": 15.2656, "step": 23381 }, { "epoch": 1.552898983861327, "grad_norm": 238.29273986816406, "learning_rate": 2.508641620714379e-07, "loss": 14.7031, "step": 23382 }, { "epoch": 1.5529653981536826, "grad_norm": 444.074462890625, "learning_rate": 2.50792922223669e-07, "loss": 13.6953, "step": 23383 }, { "epoch": 1.5530318124460383, "grad_norm": 112.80363464355469, "learning_rate": 2.507216910422956e-07, "loss": 14.0547, "step": 23384 }, { "epoch": 1.5530982267383941, "grad_norm": 152.60623168945312, "learning_rate": 2.5065046852814187e-07, "loss": 15.1406, "step": 23385 }, { "epoch": 1.5531646410307498, "grad_norm": 197.76437377929688, "learning_rate": 2.505792546820308e-07, "loss": 21.3594, "step": 23386 }, { "epoch": 1.5532310553231055, "grad_norm": 273.06219482421875, "learning_rate": 2.5050804950478743e-07, "loss": 15.2656, "step": 23387 }, { "epoch": 1.5532974696154613, "grad_norm": 424.1233825683594, "learning_rate": 2.5043685299723436e-07, "loss": 21.75, "step": 23388 }, { "epoch": 1.553363883907817, "grad_norm": 190.747314453125, "learning_rate": 2.503656651601956e-07, "loss": 15.375, "step": 23389 }, { "epoch": 1.5534302982001726, "grad_norm": 247.75662231445312, "learning_rate": 2.502944859944947e-07, "loss": 19.5625, "step": 23390 }, { "epoch": 1.5534967124925285, "grad_norm": 267.7061767578125, "learning_rate": 2.50223315500955e-07, "loss": 18.3281, "step": 23391 }, { "epoch": 1.553563126784884, "grad_norm": 169.0352783203125, "learning_rate": 2.501521536803992e-07, "loss": 13.6719, "step": 23392 }, { "epoch": 1.5536295410772398, "grad_norm": 88.25611877441406, "learning_rate": 2.5008100053365135e-07, "loss": 16.9531, "step": 23393 }, { "epoch": 1.5536959553695955, "grad_norm": 173.22750854492188, "learning_rate": 2.500098560615338e-07, "loss": 14.3906, "step": 23394 }, { "epoch": 1.5537623696619511, "grad_norm": 188.63751220703125, "learning_rate": 2.4993872026486984e-07, "loss": 15.5312, "step": 23395 }, { "epoch": 1.553828783954307, "grad_norm": 151.81455993652344, "learning_rate": 2.498675931444822e-07, "loss": 12.0938, "step": 23396 }, { "epoch": 1.5538951982466627, "grad_norm": 219.45652770996094, "learning_rate": 2.4979647470119367e-07, "loss": 17.3281, "step": 23397 }, { "epoch": 1.5539616125390183, "grad_norm": 174.57101440429688, "learning_rate": 2.4972536493582695e-07, "loss": 16.7031, "step": 23398 }, { "epoch": 1.5540280268313742, "grad_norm": 195.56655883789062, "learning_rate": 2.4965426384920487e-07, "loss": 18.5312, "step": 23399 }, { "epoch": 1.5540944411237299, "grad_norm": 131.87347412109375, "learning_rate": 2.4958317144214903e-07, "loss": 12.8516, "step": 23400 }, { "epoch": 1.5541608554160855, "grad_norm": 209.4414520263672, "learning_rate": 2.495120877154829e-07, "loss": 16.8438, "step": 23401 }, { "epoch": 1.5542272697084414, "grad_norm": 300.19183349609375, "learning_rate": 2.4944101267002795e-07, "loss": 12.4375, "step": 23402 }, { "epoch": 1.5542936840007968, "grad_norm": 317.2207336425781, "learning_rate": 2.493699463066065e-07, "loss": 18.3906, "step": 23403 }, { "epoch": 1.5543600982931527, "grad_norm": 337.7477111816406, "learning_rate": 2.4929888862604065e-07, "loss": 19.0156, "step": 23404 }, { "epoch": 1.5544265125855083, "grad_norm": 283.2257995605469, "learning_rate": 2.4922783962915237e-07, "loss": 27.0938, "step": 23405 }, { "epoch": 1.554492926877864, "grad_norm": 192.98538208007812, "learning_rate": 2.491567993167636e-07, "loss": 19.4219, "step": 23406 }, { "epoch": 1.5545593411702199, "grad_norm": 141.5717315673828, "learning_rate": 2.490857676896964e-07, "loss": 15.7344, "step": 23407 }, { "epoch": 1.5546257554625755, "grad_norm": 166.60560607910156, "learning_rate": 2.490147447487713e-07, "loss": 13.9219, "step": 23408 }, { "epoch": 1.5546921697549312, "grad_norm": 253.75315856933594, "learning_rate": 2.489437304948111e-07, "loss": 18.875, "step": 23409 }, { "epoch": 1.554758584047287, "grad_norm": 204.37489318847656, "learning_rate": 2.4887272492863664e-07, "loss": 16.1406, "step": 23410 }, { "epoch": 1.5548249983396427, "grad_norm": 89.14826202392578, "learning_rate": 2.4880172805106923e-07, "loss": 15.25, "step": 23411 }, { "epoch": 1.5548914126319984, "grad_norm": 126.79607391357422, "learning_rate": 2.4873073986293034e-07, "loss": 15.6562, "step": 23412 }, { "epoch": 1.5549578269243542, "grad_norm": 357.86590576171875, "learning_rate": 2.486597603650413e-07, "loss": 22.6562, "step": 23413 }, { "epoch": 1.5550242412167097, "grad_norm": 198.24850463867188, "learning_rate": 2.485887895582224e-07, "loss": 16.8281, "step": 23414 }, { "epoch": 1.5550906555090656, "grad_norm": 310.8877258300781, "learning_rate": 2.4851782744329555e-07, "loss": 18.1094, "step": 23415 }, { "epoch": 1.5551570698014214, "grad_norm": 349.9886169433594, "learning_rate": 2.4844687402108087e-07, "loss": 14.7031, "step": 23416 }, { "epoch": 1.5552234840937769, "grad_norm": 223.82302856445312, "learning_rate": 2.483759292923995e-07, "loss": 13.9062, "step": 23417 }, { "epoch": 1.5552898983861327, "grad_norm": 592.5806884765625, "learning_rate": 2.4830499325807185e-07, "loss": 23.0156, "step": 23418 }, { "epoch": 1.5553563126784884, "grad_norm": 144.1533203125, "learning_rate": 2.482340659189187e-07, "loss": 15.7188, "step": 23419 }, { "epoch": 1.555422726970844, "grad_norm": 244.0785675048828, "learning_rate": 2.4816314727576026e-07, "loss": 19.2656, "step": 23420 }, { "epoch": 1.5554891412632, "grad_norm": 202.974853515625, "learning_rate": 2.480922373294171e-07, "loss": 16.625, "step": 23421 }, { "epoch": 1.5555555555555556, "grad_norm": 535.0183715820312, "learning_rate": 2.480213360807093e-07, "loss": 19.7188, "step": 23422 }, { "epoch": 1.5556219698479112, "grad_norm": 155.2626190185547, "learning_rate": 2.4795044353045704e-07, "loss": 15.8125, "step": 23423 }, { "epoch": 1.555688384140267, "grad_norm": 161.75433349609375, "learning_rate": 2.4787955967948073e-07, "loss": 28.625, "step": 23424 }, { "epoch": 1.5557547984326225, "grad_norm": 548.6630859375, "learning_rate": 2.4780868452859937e-07, "loss": 17.7344, "step": 23425 }, { "epoch": 1.5558212127249784, "grad_norm": 237.77073669433594, "learning_rate": 2.4773781807863403e-07, "loss": 21.6562, "step": 23426 }, { "epoch": 1.5558876270173343, "grad_norm": 114.10893249511719, "learning_rate": 2.4766696033040355e-07, "loss": 9.2188, "step": 23427 }, { "epoch": 1.5559540413096897, "grad_norm": 195.42726135253906, "learning_rate": 2.475961112847278e-07, "loss": 12.875, "step": 23428 }, { "epoch": 1.5560204556020456, "grad_norm": 218.01991271972656, "learning_rate": 2.475252709424265e-07, "loss": 21.4062, "step": 23429 }, { "epoch": 1.5560868698944013, "grad_norm": 236.08059692382812, "learning_rate": 2.4745443930431873e-07, "loss": 21.2031, "step": 23430 }, { "epoch": 1.556153284186757, "grad_norm": 125.9408187866211, "learning_rate": 2.473836163712243e-07, "loss": 14.8125, "step": 23431 }, { "epoch": 1.5562196984791128, "grad_norm": 163.8932647705078, "learning_rate": 2.473128021439623e-07, "loss": 14.7656, "step": 23432 }, { "epoch": 1.5562861127714684, "grad_norm": 142.39366149902344, "learning_rate": 2.472419966233513e-07, "loss": 12.5156, "step": 23433 }, { "epoch": 1.556352527063824, "grad_norm": 217.36032104492188, "learning_rate": 2.471711998102115e-07, "loss": 21.1875, "step": 23434 }, { "epoch": 1.55641894135618, "grad_norm": 305.88702392578125, "learning_rate": 2.471004117053608e-07, "loss": 19.6562, "step": 23435 }, { "epoch": 1.5564853556485354, "grad_norm": 227.84999084472656, "learning_rate": 2.470296323096183e-07, "loss": 13.0469, "step": 23436 }, { "epoch": 1.5565517699408913, "grad_norm": 210.7563018798828, "learning_rate": 2.4695886162380285e-07, "loss": 12.5781, "step": 23437 }, { "epoch": 1.5566181842332472, "grad_norm": 167.3786163330078, "learning_rate": 2.4688809964873337e-07, "loss": 18.625, "step": 23438 }, { "epoch": 1.5566845985256026, "grad_norm": 175.20111083984375, "learning_rate": 2.4681734638522756e-07, "loss": 13.8281, "step": 23439 }, { "epoch": 1.5567510128179585, "grad_norm": 186.43136596679688, "learning_rate": 2.467466018341049e-07, "loss": 15.9297, "step": 23440 }, { "epoch": 1.5568174271103141, "grad_norm": 369.2710876464844, "learning_rate": 2.4667586599618297e-07, "loss": 19.6406, "step": 23441 }, { "epoch": 1.5568838414026698, "grad_norm": 469.4894104003906, "learning_rate": 2.466051388722801e-07, "loss": 14.8281, "step": 23442 }, { "epoch": 1.5569502556950257, "grad_norm": 162.93417358398438, "learning_rate": 2.465344204632146e-07, "loss": 15.3906, "step": 23443 }, { "epoch": 1.5570166699873813, "grad_norm": 535.9021606445312, "learning_rate": 2.4646371076980457e-07, "loss": 14.9688, "step": 23444 }, { "epoch": 1.557083084279737, "grad_norm": 312.8284606933594, "learning_rate": 2.463930097928677e-07, "loss": 11.7969, "step": 23445 }, { "epoch": 1.5571494985720928, "grad_norm": 340.1981201171875, "learning_rate": 2.463223175332223e-07, "loss": 18.8594, "step": 23446 }, { "epoch": 1.5572159128644483, "grad_norm": 325.6769714355469, "learning_rate": 2.46251633991685e-07, "loss": 13.6094, "step": 23447 }, { "epoch": 1.5572823271568041, "grad_norm": 208.1604766845703, "learning_rate": 2.461809591690749e-07, "loss": 17.0938, "step": 23448 }, { "epoch": 1.55734874144916, "grad_norm": 125.58914184570312, "learning_rate": 2.4611029306620846e-07, "loss": 12.5, "step": 23449 }, { "epoch": 1.5574151557415155, "grad_norm": 121.220703125, "learning_rate": 2.460396356839034e-07, "loss": 16.0781, "step": 23450 }, { "epoch": 1.5574815700338713, "grad_norm": 289.10894775390625, "learning_rate": 2.459689870229772e-07, "loss": 16.9062, "step": 23451 }, { "epoch": 1.557547984326227, "grad_norm": 194.0596160888672, "learning_rate": 2.4589834708424683e-07, "loss": 14.7344, "step": 23452 }, { "epoch": 1.5576143986185826, "grad_norm": 361.09368896484375, "learning_rate": 2.458277158685296e-07, "loss": 19.6719, "step": 23453 }, { "epoch": 1.5576808129109385, "grad_norm": 167.81016540527344, "learning_rate": 2.457570933766427e-07, "loss": 16.5469, "step": 23454 }, { "epoch": 1.5577472272032942, "grad_norm": 223.17495727539062, "learning_rate": 2.456864796094026e-07, "loss": 16.6562, "step": 23455 }, { "epoch": 1.5578136414956498, "grad_norm": 443.2021179199219, "learning_rate": 2.45615874567626e-07, "loss": 17.875, "step": 23456 }, { "epoch": 1.5578800557880057, "grad_norm": 198.52639770507812, "learning_rate": 2.455452782521306e-07, "loss": 16.5625, "step": 23457 }, { "epoch": 1.5579464700803611, "grad_norm": 210.68527221679688, "learning_rate": 2.4547469066373195e-07, "loss": 16.8438, "step": 23458 }, { "epoch": 1.558012884372717, "grad_norm": 162.4214630126953, "learning_rate": 2.4540411180324716e-07, "loss": 23.9531, "step": 23459 }, { "epoch": 1.5580792986650729, "grad_norm": 230.7241668701172, "learning_rate": 2.453335416714923e-07, "loss": 14.5625, "step": 23460 }, { "epoch": 1.5581457129574283, "grad_norm": 174.66607666015625, "learning_rate": 2.4526298026928403e-07, "loss": 15.125, "step": 23461 }, { "epoch": 1.5582121272497842, "grad_norm": 1380.881103515625, "learning_rate": 2.451924275974384e-07, "loss": 13.6719, "step": 23462 }, { "epoch": 1.5582785415421399, "grad_norm": 180.81173706054688, "learning_rate": 2.451218836567717e-07, "loss": 20.3438, "step": 23463 }, { "epoch": 1.5583449558344955, "grad_norm": 92.56814575195312, "learning_rate": 2.450513484480993e-07, "loss": 14.0625, "step": 23464 }, { "epoch": 1.5584113701268514, "grad_norm": 151.0079345703125, "learning_rate": 2.44980821972238e-07, "loss": 16.0938, "step": 23465 }, { "epoch": 1.558477784419207, "grad_norm": 140.0427703857422, "learning_rate": 2.4491030423000304e-07, "loss": 18.6562, "step": 23466 }, { "epoch": 1.5585441987115627, "grad_norm": 371.95281982421875, "learning_rate": 2.448397952222102e-07, "loss": 15.9844, "step": 23467 }, { "epoch": 1.5586106130039186, "grad_norm": 272.8038330078125, "learning_rate": 2.4476929494967515e-07, "loss": 14.1406, "step": 23468 }, { "epoch": 1.558677027296274, "grad_norm": 192.9124755859375, "learning_rate": 2.446988034132135e-07, "loss": 18.5938, "step": 23469 }, { "epoch": 1.5587434415886299, "grad_norm": 284.0966491699219, "learning_rate": 2.4462832061364046e-07, "loss": 16.4688, "step": 23470 }, { "epoch": 1.5588098558809858, "grad_norm": 506.70074462890625, "learning_rate": 2.445578465517717e-07, "loss": 20.2344, "step": 23471 }, { "epoch": 1.5588762701733412, "grad_norm": 119.08039093017578, "learning_rate": 2.444873812284216e-07, "loss": 15.2812, "step": 23472 }, { "epoch": 1.558942684465697, "grad_norm": 159.68106079101562, "learning_rate": 2.4441692464440644e-07, "loss": 15.7656, "step": 23473 }, { "epoch": 1.5590090987580527, "grad_norm": 212.39614868164062, "learning_rate": 2.4434647680054033e-07, "loss": 20.625, "step": 23474 }, { "epoch": 1.5590755130504084, "grad_norm": 324.3319396972656, "learning_rate": 2.442760376976384e-07, "loss": 20.7031, "step": 23475 }, { "epoch": 1.5591419273427642, "grad_norm": 171.25253295898438, "learning_rate": 2.4420560733651554e-07, "loss": 12.6719, "step": 23476 }, { "epoch": 1.55920834163512, "grad_norm": 121.68269348144531, "learning_rate": 2.4413518571798665e-07, "loss": 14.8906, "step": 23477 }, { "epoch": 1.5592747559274756, "grad_norm": 627.2376708984375, "learning_rate": 2.440647728428655e-07, "loss": 23.75, "step": 23478 }, { "epoch": 1.5593411702198314, "grad_norm": 426.72418212890625, "learning_rate": 2.439943687119678e-07, "loss": 14.9375, "step": 23479 }, { "epoch": 1.5594075845121869, "grad_norm": 252.59938049316406, "learning_rate": 2.4392397332610693e-07, "loss": 16.7656, "step": 23480 }, { "epoch": 1.5594739988045427, "grad_norm": 171.35574340820312, "learning_rate": 2.438535866860976e-07, "loss": 15.5156, "step": 23481 }, { "epoch": 1.5595404130968986, "grad_norm": 255.1596221923828, "learning_rate": 2.437832087927538e-07, "loss": 26.6094, "step": 23482 }, { "epoch": 1.559606827389254, "grad_norm": 150.36561584472656, "learning_rate": 2.4371283964688995e-07, "loss": 16.3047, "step": 23483 }, { "epoch": 1.55967324168161, "grad_norm": 217.20899963378906, "learning_rate": 2.4364247924931994e-07, "loss": 11.2188, "step": 23484 }, { "epoch": 1.5597396559739656, "grad_norm": 406.56414794921875, "learning_rate": 2.435721276008578e-07, "loss": 21.3594, "step": 23485 }, { "epoch": 1.5598060702663212, "grad_norm": 249.50527954101562, "learning_rate": 2.435017847023164e-07, "loss": 20.4688, "step": 23486 }, { "epoch": 1.559872484558677, "grad_norm": 336.7493896484375, "learning_rate": 2.434314505545109e-07, "loss": 13.2188, "step": 23487 }, { "epoch": 1.5599388988510328, "grad_norm": 143.42901611328125, "learning_rate": 2.433611251582537e-07, "loss": 14.8125, "step": 23488 }, { "epoch": 1.5600053131433884, "grad_norm": 256.12420654296875, "learning_rate": 2.4329080851435877e-07, "loss": 16.1094, "step": 23489 }, { "epoch": 1.5600717274357443, "grad_norm": 202.2959747314453, "learning_rate": 2.4322050062363943e-07, "loss": 17.4062, "step": 23490 }, { "epoch": 1.5601381417280997, "grad_norm": 197.34927368164062, "learning_rate": 2.431502014869089e-07, "loss": 19.8281, "step": 23491 }, { "epoch": 1.5602045560204556, "grad_norm": 424.9704284667969, "learning_rate": 2.430799111049804e-07, "loss": 15.1406, "step": 23492 }, { "epoch": 1.5602709703128115, "grad_norm": 314.9066162109375, "learning_rate": 2.430096294786671e-07, "loss": 18.8906, "step": 23493 }, { "epoch": 1.560337384605167, "grad_norm": 153.1313018798828, "learning_rate": 2.429393566087818e-07, "loss": 15.875, "step": 23494 }, { "epoch": 1.5604037988975228, "grad_norm": 79.77752685546875, "learning_rate": 2.4286909249613763e-07, "loss": 11.7031, "step": 23495 }, { "epoch": 1.5604702131898784, "grad_norm": 163.8094940185547, "learning_rate": 2.4279883714154735e-07, "loss": 12.75, "step": 23496 }, { "epoch": 1.560536627482234, "grad_norm": 370.20159912109375, "learning_rate": 2.4272859054582297e-07, "loss": 14.0469, "step": 23497 }, { "epoch": 1.56060304177459, "grad_norm": 151.66770935058594, "learning_rate": 2.426583527097782e-07, "loss": 11.7656, "step": 23498 }, { "epoch": 1.5606694560669456, "grad_norm": 349.78289794921875, "learning_rate": 2.425881236342245e-07, "loss": 24.8594, "step": 23499 }, { "epoch": 1.5607358703593013, "grad_norm": 405.1143798828125, "learning_rate": 2.425179033199748e-07, "loss": 20.4531, "step": 23500 }, { "epoch": 1.5608022846516572, "grad_norm": 221.01376342773438, "learning_rate": 2.424476917678411e-07, "loss": 20.7812, "step": 23501 }, { "epoch": 1.5608686989440126, "grad_norm": 147.30792236328125, "learning_rate": 2.4237748897863595e-07, "loss": 15.625, "step": 23502 }, { "epoch": 1.5609351132363685, "grad_norm": 449.98358154296875, "learning_rate": 2.423072949531707e-07, "loss": 18.1875, "step": 23503 }, { "epoch": 1.5610015275287243, "grad_norm": 191.08726501464844, "learning_rate": 2.422371096922582e-07, "loss": 16.0, "step": 23504 }, { "epoch": 1.5610679418210798, "grad_norm": 101.90228271484375, "learning_rate": 2.421669331967096e-07, "loss": 12.7812, "step": 23505 }, { "epoch": 1.5611343561134357, "grad_norm": 178.26580810546875, "learning_rate": 2.420967654673369e-07, "loss": 13.9062, "step": 23506 }, { "epoch": 1.5612007704057913, "grad_norm": 247.5484619140625, "learning_rate": 2.420266065049518e-07, "loss": 18.2344, "step": 23507 }, { "epoch": 1.561267184698147, "grad_norm": 167.8391571044922, "learning_rate": 2.419564563103659e-07, "loss": 15.0156, "step": 23508 }, { "epoch": 1.5613335989905028, "grad_norm": 268.2651062011719, "learning_rate": 2.4188631488439046e-07, "loss": 14.0156, "step": 23509 }, { "epoch": 1.5614000132828585, "grad_norm": 310.5074462890625, "learning_rate": 2.418161822278374e-07, "loss": 17.4453, "step": 23510 }, { "epoch": 1.5614664275752141, "grad_norm": 187.11114501953125, "learning_rate": 2.4174605834151696e-07, "loss": 14.125, "step": 23511 }, { "epoch": 1.56153284186757, "grad_norm": 121.44596862792969, "learning_rate": 2.416759432262414e-07, "loss": 11.9844, "step": 23512 }, { "epoch": 1.5615992561599255, "grad_norm": 163.7606658935547, "learning_rate": 2.41605836882821e-07, "loss": 16.0, "step": 23513 }, { "epoch": 1.5616656704522813, "grad_norm": 348.17132568359375, "learning_rate": 2.4153573931206693e-07, "loss": 20.7969, "step": 23514 }, { "epoch": 1.5617320847446372, "grad_norm": 164.78611755371094, "learning_rate": 2.4146565051479015e-07, "loss": 12.6094, "step": 23515 }, { "epoch": 1.5617984990369926, "grad_norm": 196.4260711669922, "learning_rate": 2.4139557049180125e-07, "loss": 15.9688, "step": 23516 }, { "epoch": 1.5618649133293485, "grad_norm": 357.13385009765625, "learning_rate": 2.41325499243911e-07, "loss": 16.1562, "step": 23517 }, { "epoch": 1.5619313276217042, "grad_norm": 184.35568237304688, "learning_rate": 2.412554367719302e-07, "loss": 15.0, "step": 23518 }, { "epoch": 1.5619977419140598, "grad_norm": 213.5752716064453, "learning_rate": 2.411853830766687e-07, "loss": 15.2969, "step": 23519 }, { "epoch": 1.5620641562064157, "grad_norm": 213.021728515625, "learning_rate": 2.4111533815893706e-07, "loss": 19.0312, "step": 23520 }, { "epoch": 1.5621305704987714, "grad_norm": 265.2520446777344, "learning_rate": 2.4104530201954566e-07, "loss": 21.0312, "step": 23521 }, { "epoch": 1.562196984791127, "grad_norm": 263.329345703125, "learning_rate": 2.4097527465930455e-07, "loss": 12.2812, "step": 23522 }, { "epoch": 1.5622633990834829, "grad_norm": 170.7312469482422, "learning_rate": 2.409052560790238e-07, "loss": 15.4844, "step": 23523 }, { "epoch": 1.5623298133758383, "grad_norm": 139.6868896484375, "learning_rate": 2.4083524627951356e-07, "loss": 12.375, "step": 23524 }, { "epoch": 1.5623962276681942, "grad_norm": 597.2617797851562, "learning_rate": 2.4076524526158295e-07, "loss": 14.0938, "step": 23525 }, { "epoch": 1.56246264196055, "grad_norm": 148.02236938476562, "learning_rate": 2.406952530260427e-07, "loss": 15.1094, "step": 23526 }, { "epoch": 1.5625290562529055, "grad_norm": 169.85397338867188, "learning_rate": 2.4062526957370155e-07, "loss": 14.8594, "step": 23527 }, { "epoch": 1.5625954705452614, "grad_norm": 356.963623046875, "learning_rate": 2.4055529490536954e-07, "loss": 16.0625, "step": 23528 }, { "epoch": 1.562661884837617, "grad_norm": 158.08627319335938, "learning_rate": 2.4048532902185583e-07, "loss": 14.9375, "step": 23529 }, { "epoch": 1.5627282991299727, "grad_norm": 325.05340576171875, "learning_rate": 2.4041537192397e-07, "loss": 12.4844, "step": 23530 }, { "epoch": 1.5627947134223286, "grad_norm": 200.7356414794922, "learning_rate": 2.403454236125211e-07, "loss": 11.8438, "step": 23531 }, { "epoch": 1.5628611277146842, "grad_norm": 231.91757202148438, "learning_rate": 2.4027548408831823e-07, "loss": 19.6562, "step": 23532 }, { "epoch": 1.5629275420070399, "grad_norm": 151.08226013183594, "learning_rate": 2.4020555335217054e-07, "loss": 12.1719, "step": 23533 }, { "epoch": 1.5629939562993957, "grad_norm": 326.6002502441406, "learning_rate": 2.401356314048869e-07, "loss": 18.8281, "step": 23534 }, { "epoch": 1.5630603705917512, "grad_norm": 439.6732482910156, "learning_rate": 2.400657182472764e-07, "loss": 11.2344, "step": 23535 }, { "epoch": 1.563126784884107, "grad_norm": 186.79132080078125, "learning_rate": 2.399958138801469e-07, "loss": 14.6875, "step": 23536 }, { "epoch": 1.563193199176463, "grad_norm": 274.7318115234375, "learning_rate": 2.399259183043081e-07, "loss": 16.9531, "step": 23537 }, { "epoch": 1.5632596134688184, "grad_norm": 528.0885620117188, "learning_rate": 2.398560315205678e-07, "loss": 13.5938, "step": 23538 }, { "epoch": 1.5633260277611742, "grad_norm": 96.36701202392578, "learning_rate": 2.3978615352973443e-07, "loss": 11.8594, "step": 23539 }, { "epoch": 1.56339244205353, "grad_norm": 208.14337158203125, "learning_rate": 2.397162843326166e-07, "loss": 17.8125, "step": 23540 }, { "epoch": 1.5634588563458856, "grad_norm": 401.9047546386719, "learning_rate": 2.3964642393002264e-07, "loss": 16.4219, "step": 23541 }, { "epoch": 1.5635252706382414, "grad_norm": 540.963623046875, "learning_rate": 2.395765723227599e-07, "loss": 19.875, "step": 23542 }, { "epoch": 1.563591684930597, "grad_norm": 153.8567657470703, "learning_rate": 2.3950672951163744e-07, "loss": 19.7812, "step": 23543 }, { "epoch": 1.5636580992229527, "grad_norm": 298.2495422363281, "learning_rate": 2.394368954974623e-07, "loss": 15.8438, "step": 23544 }, { "epoch": 1.5637245135153086, "grad_norm": 219.0890350341797, "learning_rate": 2.3936707028104253e-07, "loss": 20.6562, "step": 23545 }, { "epoch": 1.563790927807664, "grad_norm": 283.4087219238281, "learning_rate": 2.392972538631859e-07, "loss": 20.625, "step": 23546 }, { "epoch": 1.56385734210002, "grad_norm": 179.9396514892578, "learning_rate": 2.3922744624470004e-07, "loss": 15.9375, "step": 23547 }, { "epoch": 1.5639237563923758, "grad_norm": 188.33567810058594, "learning_rate": 2.3915764742639233e-07, "loss": 16.9531, "step": 23548 }, { "epoch": 1.5639901706847312, "grad_norm": 168.66671752929688, "learning_rate": 2.3908785740907055e-07, "loss": 12.9219, "step": 23549 }, { "epoch": 1.564056584977087, "grad_norm": 367.677734375, "learning_rate": 2.390180761935412e-07, "loss": 17.7656, "step": 23550 }, { "epoch": 1.5641229992694428, "grad_norm": 403.2633361816406, "learning_rate": 2.3894830378061237e-07, "loss": 19.0469, "step": 23551 }, { "epoch": 1.5641894135617984, "grad_norm": 247.96554565429688, "learning_rate": 2.388785401710904e-07, "loss": 17.8906, "step": 23552 }, { "epoch": 1.5642558278541543, "grad_norm": 272.7626037597656, "learning_rate": 2.3880878536578275e-07, "loss": 13.5781, "step": 23553 }, { "epoch": 1.56432224214651, "grad_norm": 381.8616027832031, "learning_rate": 2.38739039365496e-07, "loss": 12.4219, "step": 23554 }, { "epoch": 1.5643886564388656, "grad_norm": 210.6166534423828, "learning_rate": 2.386693021710372e-07, "loss": 13.0469, "step": 23555 }, { "epoch": 1.5644550707312215, "grad_norm": 128.501953125, "learning_rate": 2.3859957378321284e-07, "loss": 12.2188, "step": 23556 }, { "epoch": 1.564521485023577, "grad_norm": 134.8639373779297, "learning_rate": 2.385298542028298e-07, "loss": 15.6562, "step": 23557 }, { "epoch": 1.5645878993159328, "grad_norm": 267.1771240234375, "learning_rate": 2.384601434306938e-07, "loss": 18.9844, "step": 23558 }, { "epoch": 1.5646543136082887, "grad_norm": 104.58032989501953, "learning_rate": 2.3839044146761223e-07, "loss": 13.2109, "step": 23559 }, { "epoch": 1.564720727900644, "grad_norm": 191.96832275390625, "learning_rate": 2.3832074831439064e-07, "loss": 21.875, "step": 23560 }, { "epoch": 1.564787142193, "grad_norm": 214.10537719726562, "learning_rate": 2.3825106397183547e-07, "loss": 19.5938, "step": 23561 }, { "epoch": 1.5648535564853556, "grad_norm": 145.7895965576172, "learning_rate": 2.3818138844075265e-07, "loss": 13.4219, "step": 23562 }, { "epoch": 1.5649199707777113, "grad_norm": 230.0607147216797, "learning_rate": 2.381117217219486e-07, "loss": 17.1562, "step": 23563 }, { "epoch": 1.5649863850700672, "grad_norm": 128.67445373535156, "learning_rate": 2.3804206381622826e-07, "loss": 12.6094, "step": 23564 }, { "epoch": 1.5650527993624228, "grad_norm": 458.875244140625, "learning_rate": 2.3797241472439843e-07, "loss": 16.7031, "step": 23565 }, { "epoch": 1.5651192136547785, "grad_norm": 417.3927917480469, "learning_rate": 2.3790277444726414e-07, "loss": 15.2656, "step": 23566 }, { "epoch": 1.5651856279471343, "grad_norm": 218.08985900878906, "learning_rate": 2.3783314298563072e-07, "loss": 17.6562, "step": 23567 }, { "epoch": 1.56525204223949, "grad_norm": 135.48277282714844, "learning_rate": 2.3776352034030456e-07, "loss": 14.7812, "step": 23568 }, { "epoch": 1.5653184565318456, "grad_norm": 469.21185302734375, "learning_rate": 2.3769390651209033e-07, "loss": 22.2188, "step": 23569 }, { "epoch": 1.5653848708242015, "grad_norm": 161.56015014648438, "learning_rate": 2.3762430150179324e-07, "loss": 12.5625, "step": 23570 }, { "epoch": 1.565451285116557, "grad_norm": 470.85528564453125, "learning_rate": 2.375547053102187e-07, "loss": 15.0, "step": 23571 }, { "epoch": 1.5655176994089128, "grad_norm": 320.3183288574219, "learning_rate": 2.3748511793817172e-07, "loss": 19.9688, "step": 23572 }, { "epoch": 1.5655841137012685, "grad_norm": 119.41828155517578, "learning_rate": 2.374155393864571e-07, "loss": 12.9766, "step": 23573 }, { "epoch": 1.5656505279936241, "grad_norm": 160.19374084472656, "learning_rate": 2.3734596965588017e-07, "loss": 12.5625, "step": 23574 }, { "epoch": 1.56571694228598, "grad_norm": 145.77407836914062, "learning_rate": 2.3727640874724464e-07, "loss": 14.0, "step": 23575 }, { "epoch": 1.5657833565783357, "grad_norm": 119.53459930419922, "learning_rate": 2.3720685666135644e-07, "loss": 11.9219, "step": 23576 }, { "epoch": 1.5658497708706913, "grad_norm": 166.87599182128906, "learning_rate": 2.371373133990192e-07, "loss": 17.0, "step": 23577 }, { "epoch": 1.5659161851630472, "grad_norm": 280.1982421875, "learning_rate": 2.3706777896103758e-07, "loss": 13.375, "step": 23578 }, { "epoch": 1.5659825994554029, "grad_norm": 472.4396667480469, "learning_rate": 2.36998253348216e-07, "loss": 17.1875, "step": 23579 }, { "epoch": 1.5660490137477585, "grad_norm": 609.6943969726562, "learning_rate": 2.3692873656135858e-07, "loss": 15.4219, "step": 23580 }, { "epoch": 1.5661154280401144, "grad_norm": 99.19123840332031, "learning_rate": 2.3685922860126962e-07, "loss": 11.8906, "step": 23581 }, { "epoch": 1.5661818423324698, "grad_norm": 183.09947204589844, "learning_rate": 2.367897294687532e-07, "loss": 17.2969, "step": 23582 }, { "epoch": 1.5662482566248257, "grad_norm": 212.8253936767578, "learning_rate": 2.3672023916461292e-07, "loss": 14.4688, "step": 23583 }, { "epoch": 1.5663146709171814, "grad_norm": 308.5686950683594, "learning_rate": 2.366507576896528e-07, "loss": 14.2344, "step": 23584 }, { "epoch": 1.566381085209537, "grad_norm": 153.6582794189453, "learning_rate": 2.3658128504467646e-07, "loss": 13.1562, "step": 23585 }, { "epoch": 1.5664474995018929, "grad_norm": 277.4474182128906, "learning_rate": 2.3651182123048762e-07, "loss": 16.9688, "step": 23586 }, { "epoch": 1.5665139137942485, "grad_norm": 208.1175079345703, "learning_rate": 2.3644236624788983e-07, "loss": 19.75, "step": 23587 }, { "epoch": 1.5665803280866042, "grad_norm": 137.8365020751953, "learning_rate": 2.3637292009768672e-07, "loss": 13.0156, "step": 23588 }, { "epoch": 1.56664674237896, "grad_norm": 260.75726318359375, "learning_rate": 2.363034827806808e-07, "loss": 22.1875, "step": 23589 }, { "epoch": 1.5667131566713157, "grad_norm": 474.0906066894531, "learning_rate": 2.362340542976763e-07, "loss": 18.4844, "step": 23590 }, { "epoch": 1.5667795709636714, "grad_norm": 169.00559997558594, "learning_rate": 2.3616463464947566e-07, "loss": 14.9531, "step": 23591 }, { "epoch": 1.5668459852560273, "grad_norm": 275.70758056640625, "learning_rate": 2.3609522383688196e-07, "loss": 15.8594, "step": 23592 }, { "epoch": 1.5669123995483827, "grad_norm": 272.4974060058594, "learning_rate": 2.360258218606983e-07, "loss": 13.9375, "step": 23593 }, { "epoch": 1.5669788138407386, "grad_norm": 227.7200469970703, "learning_rate": 2.3595642872172738e-07, "loss": 23.3438, "step": 23594 }, { "epoch": 1.5670452281330942, "grad_norm": 304.9969787597656, "learning_rate": 2.358870444207719e-07, "loss": 18.7812, "step": 23595 }, { "epoch": 1.5671116424254499, "grad_norm": 84.6417465209961, "learning_rate": 2.3581766895863487e-07, "loss": 11.4844, "step": 23596 }, { "epoch": 1.5671780567178057, "grad_norm": 591.6402587890625, "learning_rate": 2.3574830233611764e-07, "loss": 18.7812, "step": 23597 }, { "epoch": 1.5672444710101614, "grad_norm": 81.68047332763672, "learning_rate": 2.3567894455402404e-07, "loss": 14.5, "step": 23598 }, { "epoch": 1.567310885302517, "grad_norm": 203.8811492919922, "learning_rate": 2.3560959561315541e-07, "loss": 14.1406, "step": 23599 }, { "epoch": 1.567377299594873, "grad_norm": 236.5426483154297, "learning_rate": 2.3554025551431412e-07, "loss": 15.2188, "step": 23600 }, { "epoch": 1.5674437138872286, "grad_norm": 184.1142578125, "learning_rate": 2.354709242583024e-07, "loss": 18.6719, "step": 23601 }, { "epoch": 1.5675101281795842, "grad_norm": 273.71051025390625, "learning_rate": 2.354016018459225e-07, "loss": 14.2344, "step": 23602 }, { "epoch": 1.5675765424719401, "grad_norm": 144.91696166992188, "learning_rate": 2.3533228827797535e-07, "loss": 16.5781, "step": 23603 }, { "epoch": 1.5676429567642955, "grad_norm": 480.3020935058594, "learning_rate": 2.3526298355526363e-07, "loss": 16.0625, "step": 23604 }, { "epoch": 1.5677093710566514, "grad_norm": 260.83544921875, "learning_rate": 2.3519368767858905e-07, "loss": 16.3281, "step": 23605 }, { "epoch": 1.567775785349007, "grad_norm": 289.0360412597656, "learning_rate": 2.3512440064875228e-07, "loss": 16.6094, "step": 23606 }, { "epoch": 1.5678421996413627, "grad_norm": 119.97672271728516, "learning_rate": 2.3505512246655602e-07, "loss": 14.4688, "step": 23607 }, { "epoch": 1.5679086139337186, "grad_norm": 214.62518310546875, "learning_rate": 2.3498585313280072e-07, "loss": 17.0078, "step": 23608 }, { "epoch": 1.5679750282260743, "grad_norm": 185.93185424804688, "learning_rate": 2.349165926482879e-07, "loss": 15.2031, "step": 23609 }, { "epoch": 1.56804144251843, "grad_norm": 175.6411895751953, "learning_rate": 2.3484734101381876e-07, "loss": 23.4688, "step": 23610 }, { "epoch": 1.5681078568107858, "grad_norm": 350.6441345214844, "learning_rate": 2.3477809823019436e-07, "loss": 18.0781, "step": 23611 }, { "epoch": 1.5681742711031414, "grad_norm": 124.15260314941406, "learning_rate": 2.3470886429821568e-07, "loss": 12.9375, "step": 23612 }, { "epoch": 1.568240685395497, "grad_norm": 102.41797637939453, "learning_rate": 2.346396392186839e-07, "loss": 13.7344, "step": 23613 }, { "epoch": 1.568307099687853, "grad_norm": 221.32803344726562, "learning_rate": 2.3457042299239882e-07, "loss": 21.25, "step": 23614 }, { "epoch": 1.5683735139802084, "grad_norm": 122.48954010009766, "learning_rate": 2.3450121562016235e-07, "loss": 15.1875, "step": 23615 }, { "epoch": 1.5684399282725643, "grad_norm": 289.8002014160156, "learning_rate": 2.3443201710277405e-07, "loss": 15.2188, "step": 23616 }, { "epoch": 1.56850634256492, "grad_norm": 755.7072143554688, "learning_rate": 2.3436282744103476e-07, "loss": 20.0781, "step": 23617 }, { "epoch": 1.5685727568572756, "grad_norm": 380.9989013671875, "learning_rate": 2.342936466357447e-07, "loss": 15.8281, "step": 23618 }, { "epoch": 1.5686391711496315, "grad_norm": 352.8124084472656, "learning_rate": 2.3422447468770435e-07, "loss": 20.3906, "step": 23619 }, { "epoch": 1.5687055854419871, "grad_norm": 159.53004455566406, "learning_rate": 2.3415531159771363e-07, "loss": 14.3906, "step": 23620 }, { "epoch": 1.5687719997343428, "grad_norm": 220.2792510986328, "learning_rate": 2.340861573665729e-07, "loss": 15.2188, "step": 23621 }, { "epoch": 1.5688384140266987, "grad_norm": 131.57810974121094, "learning_rate": 2.340170119950813e-07, "loss": 18.6094, "step": 23622 }, { "epoch": 1.5689048283190543, "grad_norm": 452.3955078125, "learning_rate": 2.3394787548403983e-07, "loss": 14.3281, "step": 23623 }, { "epoch": 1.56897124261141, "grad_norm": 124.5335464477539, "learning_rate": 2.3387874783424734e-07, "loss": 13.2969, "step": 23624 }, { "epoch": 1.5690376569037658, "grad_norm": 499.5356140136719, "learning_rate": 2.338096290465036e-07, "loss": 18.3125, "step": 23625 }, { "epoch": 1.5691040711961213, "grad_norm": 167.72506713867188, "learning_rate": 2.337405191216083e-07, "loss": 18.1875, "step": 23626 }, { "epoch": 1.5691704854884772, "grad_norm": 281.1491394042969, "learning_rate": 2.3367141806036117e-07, "loss": 14.7578, "step": 23627 }, { "epoch": 1.5692368997808328, "grad_norm": 215.2576904296875, "learning_rate": 2.3360232586356067e-07, "loss": 15.6875, "step": 23628 }, { "epoch": 1.5693033140731885, "grad_norm": 448.7867736816406, "learning_rate": 2.33533242532007e-07, "loss": 20.9844, "step": 23629 }, { "epoch": 1.5693697283655443, "grad_norm": 112.88592529296875, "learning_rate": 2.334641680664985e-07, "loss": 12.0469, "step": 23630 }, { "epoch": 1.5694361426579, "grad_norm": 480.3657531738281, "learning_rate": 2.3339510246783457e-07, "loss": 20.9219, "step": 23631 }, { "epoch": 1.5695025569502556, "grad_norm": 204.35548400878906, "learning_rate": 2.3332604573681401e-07, "loss": 12.6562, "step": 23632 }, { "epoch": 1.5695689712426115, "grad_norm": 312.7531433105469, "learning_rate": 2.332569978742357e-07, "loss": 16.2031, "step": 23633 }, { "epoch": 1.5696353855349672, "grad_norm": 204.3937530517578, "learning_rate": 2.3318795888089827e-07, "loss": 16.125, "step": 23634 }, { "epoch": 1.5697017998273228, "grad_norm": 272.1838684082031, "learning_rate": 2.3311892875760063e-07, "loss": 16.3125, "step": 23635 }, { "epoch": 1.5697682141196787, "grad_norm": 245.97854614257812, "learning_rate": 2.3304990750514053e-07, "loss": 16.1875, "step": 23636 }, { "epoch": 1.5698346284120341, "grad_norm": 166.99118041992188, "learning_rate": 2.329808951243174e-07, "loss": 11.9375, "step": 23637 }, { "epoch": 1.56990104270439, "grad_norm": 289.8416442871094, "learning_rate": 2.329118916159286e-07, "loss": 25.0938, "step": 23638 }, { "epoch": 1.5699674569967457, "grad_norm": 183.24267578125, "learning_rate": 2.3284289698077287e-07, "loss": 16.1719, "step": 23639 }, { "epoch": 1.5700338712891013, "grad_norm": 118.88326263427734, "learning_rate": 2.3277391121964796e-07, "loss": 19.5156, "step": 23640 }, { "epoch": 1.5701002855814572, "grad_norm": 219.6270751953125, "learning_rate": 2.3270493433335214e-07, "loss": 17.6094, "step": 23641 }, { "epoch": 1.5701666998738129, "grad_norm": 240.079833984375, "learning_rate": 2.3263596632268324e-07, "loss": 16.5156, "step": 23642 }, { "epoch": 1.5702331141661685, "grad_norm": 149.0456085205078, "learning_rate": 2.325670071884389e-07, "loss": 17.1875, "step": 23643 }, { "epoch": 1.5702995284585244, "grad_norm": 154.56106567382812, "learning_rate": 2.3249805693141721e-07, "loss": 13.2969, "step": 23644 }, { "epoch": 1.57036594275088, "grad_norm": 295.1751403808594, "learning_rate": 2.3242911555241484e-07, "loss": 17.2188, "step": 23645 }, { "epoch": 1.5704323570432357, "grad_norm": 224.4332733154297, "learning_rate": 2.3236018305223049e-07, "loss": 15.2344, "step": 23646 }, { "epoch": 1.5704987713355916, "grad_norm": 157.46482849121094, "learning_rate": 2.322912594316605e-07, "loss": 17.2031, "step": 23647 }, { "epoch": 1.570565185627947, "grad_norm": 2725.863037109375, "learning_rate": 2.322223446915026e-07, "loss": 13.2969, "step": 23648 }, { "epoch": 1.5706315999203029, "grad_norm": 168.53453063964844, "learning_rate": 2.321534388325539e-07, "loss": 13.0, "step": 23649 }, { "epoch": 1.5706980142126585, "grad_norm": 354.400390625, "learning_rate": 2.3208454185561144e-07, "loss": 18.375, "step": 23650 }, { "epoch": 1.5707644285050142, "grad_norm": 186.98948669433594, "learning_rate": 2.3201565376147226e-07, "loss": 17.0938, "step": 23651 }, { "epoch": 1.57083084279737, "grad_norm": 248.49278259277344, "learning_rate": 2.3194677455093337e-07, "loss": 13.9375, "step": 23652 }, { "epoch": 1.5708972570897257, "grad_norm": 203.53713989257812, "learning_rate": 2.318779042247907e-07, "loss": 13.9688, "step": 23653 }, { "epoch": 1.5709636713820814, "grad_norm": 160.9849395751953, "learning_rate": 2.3180904278384217e-07, "loss": 12.3672, "step": 23654 }, { "epoch": 1.5710300856744372, "grad_norm": 771.0672607421875, "learning_rate": 2.3174019022888335e-07, "loss": 18.6875, "step": 23655 }, { "epoch": 1.571096499966793, "grad_norm": 252.2324676513672, "learning_rate": 2.3167134656071098e-07, "loss": 15.6562, "step": 23656 }, { "epoch": 1.5711629142591486, "grad_norm": 215.38522338867188, "learning_rate": 2.3160251178012146e-07, "loss": 17.4531, "step": 23657 }, { "epoch": 1.5712293285515044, "grad_norm": 155.53233337402344, "learning_rate": 2.315336858879109e-07, "loss": 19.0156, "step": 23658 }, { "epoch": 1.5712957428438599, "grad_norm": 245.0346221923828, "learning_rate": 2.3146486888487572e-07, "loss": 16.0625, "step": 23659 }, { "epoch": 1.5713621571362157, "grad_norm": 441.89007568359375, "learning_rate": 2.3139606077181184e-07, "loss": 20.8438, "step": 23660 }, { "epoch": 1.5714285714285714, "grad_norm": 157.33322143554688, "learning_rate": 2.3132726154951475e-07, "loss": 13.3594, "step": 23661 }, { "epoch": 1.571494985720927, "grad_norm": 154.90296936035156, "learning_rate": 2.3125847121878118e-07, "loss": 20.8594, "step": 23662 }, { "epoch": 1.571561400013283, "grad_norm": 304.84234619140625, "learning_rate": 2.3118968978040598e-07, "loss": 14.4453, "step": 23663 }, { "epoch": 1.5716278143056386, "grad_norm": 295.9462585449219, "learning_rate": 2.3112091723518512e-07, "loss": 22.9062, "step": 23664 }, { "epoch": 1.5716942285979942, "grad_norm": 118.85042572021484, "learning_rate": 2.3105215358391418e-07, "loss": 12.8594, "step": 23665 }, { "epoch": 1.5717606428903501, "grad_norm": 514.67578125, "learning_rate": 2.309833988273888e-07, "loss": 16.5625, "step": 23666 }, { "epoch": 1.5718270571827058, "grad_norm": 187.89370727539062, "learning_rate": 2.3091465296640356e-07, "loss": 17.125, "step": 23667 }, { "epoch": 1.5718934714750614, "grad_norm": 303.1304016113281, "learning_rate": 2.308459160017545e-07, "loss": 22.8281, "step": 23668 }, { "epoch": 1.5719598857674173, "grad_norm": 195.10752868652344, "learning_rate": 2.3077718793423628e-07, "loss": 14.6406, "step": 23669 }, { "epoch": 1.5720263000597727, "grad_norm": 161.2324676513672, "learning_rate": 2.3070846876464389e-07, "loss": 15.6719, "step": 23670 }, { "epoch": 1.5720927143521286, "grad_norm": 275.88763427734375, "learning_rate": 2.3063975849377227e-07, "loss": 12.6562, "step": 23671 }, { "epoch": 1.5721591286444843, "grad_norm": 114.73561096191406, "learning_rate": 2.3057105712241643e-07, "loss": 13.7188, "step": 23672 }, { "epoch": 1.57222554293684, "grad_norm": 125.25920867919922, "learning_rate": 2.3050236465137075e-07, "loss": 11.4375, "step": 23673 }, { "epoch": 1.5722919572291958, "grad_norm": 198.23602294921875, "learning_rate": 2.3043368108143046e-07, "loss": 19.4844, "step": 23674 }, { "epoch": 1.5723583715215514, "grad_norm": 204.585205078125, "learning_rate": 2.3036500641338885e-07, "loss": 13.4844, "step": 23675 }, { "epoch": 1.572424785813907, "grad_norm": 196.04957580566406, "learning_rate": 2.302963406480417e-07, "loss": 11.5625, "step": 23676 }, { "epoch": 1.572491200106263, "grad_norm": 1070.9571533203125, "learning_rate": 2.302276837861824e-07, "loss": 17.2969, "step": 23677 }, { "epoch": 1.5725576143986186, "grad_norm": 164.06881713867188, "learning_rate": 2.3015903582860507e-07, "loss": 12.7656, "step": 23678 }, { "epoch": 1.5726240286909743, "grad_norm": 528.3779296875, "learning_rate": 2.3009039677610466e-07, "loss": 16.2969, "step": 23679 }, { "epoch": 1.5726904429833302, "grad_norm": 251.1730499267578, "learning_rate": 2.3002176662947425e-07, "loss": 12.0312, "step": 23680 }, { "epoch": 1.5727568572756856, "grad_norm": 171.2615203857422, "learning_rate": 2.2995314538950817e-07, "loss": 12.9062, "step": 23681 }, { "epoch": 1.5728232715680415, "grad_norm": 776.3107299804688, "learning_rate": 2.2988453305699994e-07, "loss": 24.7344, "step": 23682 }, { "epoch": 1.5728896858603971, "grad_norm": 299.3131103515625, "learning_rate": 2.2981592963274344e-07, "loss": 18.9219, "step": 23683 }, { "epoch": 1.5729561001527528, "grad_norm": 458.8689880371094, "learning_rate": 2.297473351175322e-07, "loss": 16.3906, "step": 23684 }, { "epoch": 1.5730225144451087, "grad_norm": 117.1265869140625, "learning_rate": 2.296787495121598e-07, "loss": 12.7031, "step": 23685 }, { "epoch": 1.5730889287374643, "grad_norm": 178.5929412841797, "learning_rate": 2.296101728174189e-07, "loss": 16.3125, "step": 23686 }, { "epoch": 1.57315534302982, "grad_norm": 273.9211120605469, "learning_rate": 2.295416050341039e-07, "loss": 12.4844, "step": 23687 }, { "epoch": 1.5732217573221758, "grad_norm": 754.2321166992188, "learning_rate": 2.29473046163007e-07, "loss": 26.2656, "step": 23688 }, { "epoch": 1.5732881716145315, "grad_norm": 152.3338623046875, "learning_rate": 2.2940449620492163e-07, "loss": 12.9531, "step": 23689 }, { "epoch": 1.5733545859068871, "grad_norm": 661.2794799804688, "learning_rate": 2.2933595516064062e-07, "loss": 10.6094, "step": 23690 }, { "epoch": 1.573421000199243, "grad_norm": 238.2595977783203, "learning_rate": 2.2926742303095726e-07, "loss": 11.6719, "step": 23691 }, { "epoch": 1.5734874144915985, "grad_norm": 286.984619140625, "learning_rate": 2.2919889981666329e-07, "loss": 16.2344, "step": 23692 }, { "epoch": 1.5735538287839543, "grad_norm": 270.67901611328125, "learning_rate": 2.2913038551855268e-07, "loss": 15.1094, "step": 23693 }, { "epoch": 1.57362024307631, "grad_norm": 238.3932342529297, "learning_rate": 2.2906188013741678e-07, "loss": 14.5469, "step": 23694 }, { "epoch": 1.5736866573686656, "grad_norm": 321.80804443359375, "learning_rate": 2.2899338367404863e-07, "loss": 19.8281, "step": 23695 }, { "epoch": 1.5737530716610215, "grad_norm": 246.79434204101562, "learning_rate": 2.2892489612924038e-07, "loss": 19.6406, "step": 23696 }, { "epoch": 1.5738194859533772, "grad_norm": 141.42616271972656, "learning_rate": 2.2885641750378436e-07, "loss": 14.1094, "step": 23697 }, { "epoch": 1.5738859002457328, "grad_norm": 86.55752563476562, "learning_rate": 2.2878794779847267e-07, "loss": 12.5859, "step": 23698 }, { "epoch": 1.5739523145380887, "grad_norm": 330.302490234375, "learning_rate": 2.2871948701409748e-07, "loss": 28.8438, "step": 23699 }, { "epoch": 1.5740187288304444, "grad_norm": 320.292724609375, "learning_rate": 2.286510351514499e-07, "loss": 20.5625, "step": 23700 }, { "epoch": 1.5740851431228, "grad_norm": 217.29278564453125, "learning_rate": 2.2858259221132303e-07, "loss": 15.8594, "step": 23701 }, { "epoch": 1.5741515574151559, "grad_norm": 152.15260314941406, "learning_rate": 2.2851415819450759e-07, "loss": 17.1016, "step": 23702 }, { "epoch": 1.5742179717075113, "grad_norm": 180.0989227294922, "learning_rate": 2.284457331017956e-07, "loss": 16.0781, "step": 23703 }, { "epoch": 1.5742843859998672, "grad_norm": 293.240234375, "learning_rate": 2.2837731693397833e-07, "loss": 12.3359, "step": 23704 }, { "epoch": 1.5743508002922229, "grad_norm": 224.01930236816406, "learning_rate": 2.2830890969184735e-07, "loss": 29.1562, "step": 23705 }, { "epoch": 1.5744172145845785, "grad_norm": 170.9380645751953, "learning_rate": 2.2824051137619395e-07, "loss": 22.0625, "step": 23706 }, { "epoch": 1.5744836288769344, "grad_norm": 222.50523376464844, "learning_rate": 2.2817212198780956e-07, "loss": 17.0469, "step": 23707 }, { "epoch": 1.57455004316929, "grad_norm": 1101.66552734375, "learning_rate": 2.281037415274848e-07, "loss": 31.9531, "step": 23708 }, { "epoch": 1.5746164574616457, "grad_norm": 235.59417724609375, "learning_rate": 2.280353699960107e-07, "loss": 13.6094, "step": 23709 }, { "epoch": 1.5746828717540016, "grad_norm": 233.4458465576172, "learning_rate": 2.279670073941784e-07, "loss": 13.0625, "step": 23710 }, { "epoch": 1.5747492860463572, "grad_norm": 284.29541015625, "learning_rate": 2.278986537227785e-07, "loss": 12.4688, "step": 23711 }, { "epoch": 1.5748157003387129, "grad_norm": 751.8854370117188, "learning_rate": 2.2783030898260192e-07, "loss": 13.8906, "step": 23712 }, { "epoch": 1.5748821146310688, "grad_norm": 345.48992919921875, "learning_rate": 2.277619731744389e-07, "loss": 13.5625, "step": 23713 }, { "epoch": 1.5749485289234242, "grad_norm": 96.21891021728516, "learning_rate": 2.2769364629908017e-07, "loss": 13.8906, "step": 23714 }, { "epoch": 1.57501494321578, "grad_norm": 349.73162841796875, "learning_rate": 2.2762532835731595e-07, "loss": 21.5625, "step": 23715 }, { "epoch": 1.5750813575081357, "grad_norm": 201.04722595214844, "learning_rate": 2.2755701934993689e-07, "loss": 15.8438, "step": 23716 }, { "epoch": 1.5751477718004914, "grad_norm": 145.64566040039062, "learning_rate": 2.2748871927773217e-07, "loss": 15.4688, "step": 23717 }, { "epoch": 1.5752141860928472, "grad_norm": 196.8150634765625, "learning_rate": 2.274204281414931e-07, "loss": 13.4531, "step": 23718 }, { "epoch": 1.575280600385203, "grad_norm": 220.6435089111328, "learning_rate": 2.273521459420087e-07, "loss": 15.7656, "step": 23719 }, { "epoch": 1.5753470146775586, "grad_norm": 1046.4771728515625, "learning_rate": 2.272838726800692e-07, "loss": 16.8125, "step": 23720 }, { "epoch": 1.5754134289699144, "grad_norm": 123.86772155761719, "learning_rate": 2.2721560835646426e-07, "loss": 15.6719, "step": 23721 }, { "epoch": 1.57547984326227, "grad_norm": 89.2955551147461, "learning_rate": 2.2714735297198362e-07, "loss": 11.25, "step": 23722 }, { "epoch": 1.5755462575546257, "grad_norm": 697.1179809570312, "learning_rate": 2.270791065274167e-07, "loss": 11.0938, "step": 23723 }, { "epoch": 1.5756126718469816, "grad_norm": 229.33497619628906, "learning_rate": 2.2701086902355315e-07, "loss": 15.3438, "step": 23724 }, { "epoch": 1.575679086139337, "grad_norm": 173.44247436523438, "learning_rate": 2.269426404611817e-07, "loss": 12.8516, "step": 23725 }, { "epoch": 1.575745500431693, "grad_norm": 139.00831604003906, "learning_rate": 2.2687442084109253e-07, "loss": 17.4844, "step": 23726 }, { "epoch": 1.5758119147240486, "grad_norm": 234.7207489013672, "learning_rate": 2.2680621016407398e-07, "loss": 18.4688, "step": 23727 }, { "epoch": 1.5758783290164042, "grad_norm": 236.07666015625, "learning_rate": 2.267380084309154e-07, "loss": 15.0469, "step": 23728 }, { "epoch": 1.57594474330876, "grad_norm": 189.02911376953125, "learning_rate": 2.266698156424056e-07, "loss": 20.9688, "step": 23729 }, { "epoch": 1.5760111576011158, "grad_norm": 160.56948852539062, "learning_rate": 2.2660163179933368e-07, "loss": 13.1094, "step": 23730 }, { "epoch": 1.5760775718934714, "grad_norm": 447.1555480957031, "learning_rate": 2.265334569024876e-07, "loss": 13.4688, "step": 23731 }, { "epoch": 1.5761439861858273, "grad_norm": 209.82858276367188, "learning_rate": 2.2646529095265709e-07, "loss": 15.5156, "step": 23732 }, { "epoch": 1.576210400478183, "grad_norm": 192.8386993408203, "learning_rate": 2.2639713395062975e-07, "loss": 16.6875, "step": 23733 }, { "epoch": 1.5762768147705386, "grad_norm": 444.7491455078125, "learning_rate": 2.2632898589719417e-07, "loss": 18.0938, "step": 23734 }, { "epoch": 1.5763432290628945, "grad_norm": 164.26016235351562, "learning_rate": 2.2626084679313885e-07, "loss": 13.1406, "step": 23735 }, { "epoch": 1.57640964335525, "grad_norm": 183.3451690673828, "learning_rate": 2.2619271663925187e-07, "loss": 14.75, "step": 23736 }, { "epoch": 1.5764760576476058, "grad_norm": 218.83555603027344, "learning_rate": 2.2612459543632145e-07, "loss": 14.4062, "step": 23737 }, { "epoch": 1.5765424719399614, "grad_norm": 207.33700561523438, "learning_rate": 2.260564831851356e-07, "loss": 14.6719, "step": 23738 }, { "epoch": 1.576608886232317, "grad_norm": 151.91444396972656, "learning_rate": 2.2598837988648157e-07, "loss": 14.9219, "step": 23739 }, { "epoch": 1.576675300524673, "grad_norm": 1219.34521484375, "learning_rate": 2.2592028554114818e-07, "loss": 17.5625, "step": 23740 }, { "epoch": 1.5767417148170286, "grad_norm": 453.8372802734375, "learning_rate": 2.2585220014992236e-07, "loss": 15.1094, "step": 23741 }, { "epoch": 1.5768081291093843, "grad_norm": 202.833984375, "learning_rate": 2.257841237135918e-07, "loss": 15.3125, "step": 23742 }, { "epoch": 1.5768745434017402, "grad_norm": 410.5686340332031, "learning_rate": 2.257160562329441e-07, "loss": 13.1875, "step": 23743 }, { "epoch": 1.5769409576940958, "grad_norm": 94.33028411865234, "learning_rate": 2.2564799770876652e-07, "loss": 11.1406, "step": 23744 }, { "epoch": 1.5770073719864515, "grad_norm": 172.62083435058594, "learning_rate": 2.2557994814184645e-07, "loss": 14.5, "step": 23745 }, { "epoch": 1.5770737862788073, "grad_norm": 327.20159912109375, "learning_rate": 2.2551190753297123e-07, "loss": 21.6094, "step": 23746 }, { "epoch": 1.5771402005711628, "grad_norm": 154.85940551757812, "learning_rate": 2.2544387588292712e-07, "loss": 12.0469, "step": 23747 }, { "epoch": 1.5772066148635187, "grad_norm": 463.37261962890625, "learning_rate": 2.2537585319250218e-07, "loss": 17.9375, "step": 23748 }, { "epoch": 1.5772730291558743, "grad_norm": 352.6995544433594, "learning_rate": 2.2530783946248244e-07, "loss": 17.1406, "step": 23749 }, { "epoch": 1.57733944344823, "grad_norm": 140.54116821289062, "learning_rate": 2.252398346936546e-07, "loss": 10.1875, "step": 23750 }, { "epoch": 1.5774058577405858, "grad_norm": 138.08106994628906, "learning_rate": 2.2517183888680625e-07, "loss": 14.8125, "step": 23751 }, { "epoch": 1.5774722720329415, "grad_norm": 269.1868591308594, "learning_rate": 2.2510385204272285e-07, "loss": 29.8438, "step": 23752 }, { "epoch": 1.5775386863252971, "grad_norm": 358.02032470703125, "learning_rate": 2.2503587416219127e-07, "loss": 21.5625, "step": 23753 }, { "epoch": 1.577605100617653, "grad_norm": 137.40699768066406, "learning_rate": 2.2496790524599796e-07, "loss": 13.6094, "step": 23754 }, { "epoch": 1.5776715149100087, "grad_norm": 124.77225494384766, "learning_rate": 2.2489994529492916e-07, "loss": 14.2188, "step": 23755 }, { "epoch": 1.5777379292023643, "grad_norm": 341.5056457519531, "learning_rate": 2.248319943097704e-07, "loss": 17.3594, "step": 23756 }, { "epoch": 1.5778043434947202, "grad_norm": 137.719970703125, "learning_rate": 2.2476405229130857e-07, "loss": 15.75, "step": 23757 }, { "epoch": 1.5778707577870756, "grad_norm": 141.93763732910156, "learning_rate": 2.2469611924032895e-07, "loss": 11.7422, "step": 23758 }, { "epoch": 1.5779371720794315, "grad_norm": 501.8313903808594, "learning_rate": 2.2462819515761754e-07, "loss": 12.8516, "step": 23759 }, { "epoch": 1.5780035863717872, "grad_norm": 173.02479553222656, "learning_rate": 2.2456028004396011e-07, "loss": 15.2188, "step": 23760 }, { "epoch": 1.5780700006641428, "grad_norm": 168.88685607910156, "learning_rate": 2.244923739001422e-07, "loss": 11.4375, "step": 23761 }, { "epoch": 1.5781364149564987, "grad_norm": 116.7074203491211, "learning_rate": 2.244244767269493e-07, "loss": 15.5781, "step": 23762 }, { "epoch": 1.5782028292488544, "grad_norm": 118.60860443115234, "learning_rate": 2.2435658852516703e-07, "loss": 17.0781, "step": 23763 }, { "epoch": 1.57826924354121, "grad_norm": 187.01229858398438, "learning_rate": 2.2428870929558007e-07, "loss": 14.3438, "step": 23764 }, { "epoch": 1.5783356578335659, "grad_norm": 336.375244140625, "learning_rate": 2.2422083903897448e-07, "loss": 21.0, "step": 23765 }, { "epoch": 1.5784020721259215, "grad_norm": 285.04266357421875, "learning_rate": 2.2415297775613462e-07, "loss": 20.625, "step": 23766 }, { "epoch": 1.5784684864182772, "grad_norm": 81.0229263305664, "learning_rate": 2.2408512544784587e-07, "loss": 15.0469, "step": 23767 }, { "epoch": 1.578534900710633, "grad_norm": 123.47276306152344, "learning_rate": 2.240172821148928e-07, "loss": 9.8047, "step": 23768 }, { "epoch": 1.5786013150029885, "grad_norm": 256.3520202636719, "learning_rate": 2.2394944775806034e-07, "loss": 15.125, "step": 23769 }, { "epoch": 1.5786677292953444, "grad_norm": 583.5184936523438, "learning_rate": 2.2388162237813335e-07, "loss": 10.7578, "step": 23770 }, { "epoch": 1.5787341435877, "grad_norm": 119.84977722167969, "learning_rate": 2.2381380597589639e-07, "loss": 14.7656, "step": 23771 }, { "epoch": 1.5788005578800557, "grad_norm": 145.81727600097656, "learning_rate": 2.237459985521334e-07, "loss": 24.7812, "step": 23772 }, { "epoch": 1.5788669721724116, "grad_norm": 258.3190002441406, "learning_rate": 2.2367820010762918e-07, "loss": 20.6562, "step": 23773 }, { "epoch": 1.5789333864647672, "grad_norm": 243.66770935058594, "learning_rate": 2.2361041064316787e-07, "loss": 15.3594, "step": 23774 }, { "epoch": 1.5789998007571229, "grad_norm": 335.8887023925781, "learning_rate": 2.2354263015953356e-07, "loss": 17.4531, "step": 23775 }, { "epoch": 1.5790662150494787, "grad_norm": 134.63050842285156, "learning_rate": 2.2347485865751049e-07, "loss": 14.7812, "step": 23776 }, { "epoch": 1.5791326293418344, "grad_norm": 130.3378448486328, "learning_rate": 2.2340709613788266e-07, "loss": 14.2344, "step": 23777 }, { "epoch": 1.57919904363419, "grad_norm": 160.1256103515625, "learning_rate": 2.233393426014333e-07, "loss": 16.7812, "step": 23778 }, { "epoch": 1.579265457926546, "grad_norm": 201.53579711914062, "learning_rate": 2.2327159804894703e-07, "loss": 15.4688, "step": 23779 }, { "epoch": 1.5793318722189014, "grad_norm": 205.10415649414062, "learning_rate": 2.2320386248120682e-07, "loss": 17.5625, "step": 23780 }, { "epoch": 1.5793982865112572, "grad_norm": 393.8704528808594, "learning_rate": 2.231361358989964e-07, "loss": 24.125, "step": 23781 }, { "epoch": 1.579464700803613, "grad_norm": 300.3222351074219, "learning_rate": 2.2306841830309907e-07, "loss": 21.4219, "step": 23782 }, { "epoch": 1.5795311150959686, "grad_norm": 415.4768981933594, "learning_rate": 2.230007096942984e-07, "loss": 20.0156, "step": 23783 }, { "epoch": 1.5795975293883244, "grad_norm": 156.8810272216797, "learning_rate": 2.229330100733774e-07, "loss": 14.1094, "step": 23784 }, { "epoch": 1.57966394368068, "grad_norm": 440.33502197265625, "learning_rate": 2.2286531944111964e-07, "loss": 15.5, "step": 23785 }, { "epoch": 1.5797303579730357, "grad_norm": 243.8293914794922, "learning_rate": 2.2279763779830707e-07, "loss": 16.5938, "step": 23786 }, { "epoch": 1.5797967722653916, "grad_norm": 186.41783142089844, "learning_rate": 2.2272996514572362e-07, "loss": 15.7031, "step": 23787 }, { "epoch": 1.5798631865577473, "grad_norm": 168.94419860839844, "learning_rate": 2.2266230148415198e-07, "loss": 13.3594, "step": 23788 }, { "epoch": 1.579929600850103, "grad_norm": 309.12896728515625, "learning_rate": 2.22594646814374e-07, "loss": 19.0312, "step": 23789 }, { "epoch": 1.5799960151424588, "grad_norm": 307.40594482421875, "learning_rate": 2.225270011371735e-07, "loss": 14.2656, "step": 23790 }, { "epoch": 1.5800624294348142, "grad_norm": 245.08395385742188, "learning_rate": 2.2245936445333202e-07, "loss": 16.3906, "step": 23791 }, { "epoch": 1.58012884372717, "grad_norm": 1105.9339599609375, "learning_rate": 2.2239173676363233e-07, "loss": 13.0312, "step": 23792 }, { "epoch": 1.5801952580195258, "grad_norm": 140.1279754638672, "learning_rate": 2.2232411806885653e-07, "loss": 13.5156, "step": 23793 }, { "epoch": 1.5802616723118814, "grad_norm": 210.84304809570312, "learning_rate": 2.2225650836978726e-07, "loss": 15.875, "step": 23794 }, { "epoch": 1.5803280866042373, "grad_norm": 213.23995971679688, "learning_rate": 2.2218890766720556e-07, "loss": 18.375, "step": 23795 }, { "epoch": 1.580394500896593, "grad_norm": 266.6328430175781, "learning_rate": 2.2212131596189476e-07, "loss": 19.0781, "step": 23796 }, { "epoch": 1.5804609151889486, "grad_norm": 118.22064971923828, "learning_rate": 2.2205373325463562e-07, "loss": 17.7031, "step": 23797 }, { "epoch": 1.5805273294813045, "grad_norm": 90.10305786132812, "learning_rate": 2.2198615954621037e-07, "loss": 12.6562, "step": 23798 }, { "epoch": 1.5805937437736601, "grad_norm": 1572.8236083984375, "learning_rate": 2.2191859483740062e-07, "loss": 12.7031, "step": 23799 }, { "epoch": 1.5806601580660158, "grad_norm": 215.11383056640625, "learning_rate": 2.2185103912898785e-07, "loss": 15.3438, "step": 23800 }, { "epoch": 1.5807265723583717, "grad_norm": 184.69244384765625, "learning_rate": 2.2178349242175354e-07, "loss": 17.5469, "step": 23801 }, { "epoch": 1.580792986650727, "grad_norm": 1056.331298828125, "learning_rate": 2.217159547164794e-07, "loss": 19.875, "step": 23802 }, { "epoch": 1.580859400943083, "grad_norm": 150.90773010253906, "learning_rate": 2.2164842601394561e-07, "loss": 14.7578, "step": 23803 }, { "epoch": 1.5809258152354386, "grad_norm": 209.74435424804688, "learning_rate": 2.2158090631493474e-07, "loss": 13.8594, "step": 23804 }, { "epoch": 1.5809922295277943, "grad_norm": 548.0477905273438, "learning_rate": 2.2151339562022676e-07, "loss": 19.0156, "step": 23805 }, { "epoch": 1.5810586438201502, "grad_norm": 176.95887756347656, "learning_rate": 2.2144589393060288e-07, "loss": 13.875, "step": 23806 }, { "epoch": 1.5811250581125058, "grad_norm": 253.85006713867188, "learning_rate": 2.2137840124684391e-07, "loss": 13.6016, "step": 23807 }, { "epoch": 1.5811914724048615, "grad_norm": 297.2096252441406, "learning_rate": 2.2131091756973064e-07, "loss": 17.8438, "step": 23808 }, { "epoch": 1.5812578866972173, "grad_norm": 123.70501708984375, "learning_rate": 2.2124344290004372e-07, "loss": 15.25, "step": 23809 }, { "epoch": 1.581324300989573, "grad_norm": 231.26150512695312, "learning_rate": 2.2117597723856386e-07, "loss": 15.4688, "step": 23810 }, { "epoch": 1.5813907152819286, "grad_norm": 200.868896484375, "learning_rate": 2.2110852058607054e-07, "loss": 13.7344, "step": 23811 }, { "epoch": 1.5814571295742845, "grad_norm": 195.28172302246094, "learning_rate": 2.210410729433454e-07, "loss": 14.4688, "step": 23812 }, { "epoch": 1.58152354386664, "grad_norm": 181.59608459472656, "learning_rate": 2.2097363431116766e-07, "loss": 15.8984, "step": 23813 }, { "epoch": 1.5815899581589958, "grad_norm": 137.5196533203125, "learning_rate": 2.2090620469031762e-07, "loss": 13.9062, "step": 23814 }, { "epoch": 1.5816563724513515, "grad_norm": 268.6241149902344, "learning_rate": 2.208387840815754e-07, "loss": 17.3125, "step": 23815 }, { "epoch": 1.5817227867437071, "grad_norm": 389.28399658203125, "learning_rate": 2.207713724857212e-07, "loss": 16.9531, "step": 23816 }, { "epoch": 1.581789201036063, "grad_norm": 223.77882385253906, "learning_rate": 2.2070396990353379e-07, "loss": 17.6094, "step": 23817 }, { "epoch": 1.5818556153284187, "grad_norm": 621.8208618164062, "learning_rate": 2.2063657633579403e-07, "loss": 17.8125, "step": 23818 }, { "epoch": 1.5819220296207743, "grad_norm": 632.2906494140625, "learning_rate": 2.205691917832807e-07, "loss": 17.9844, "step": 23819 }, { "epoch": 1.5819884439131302, "grad_norm": 148.4225311279297, "learning_rate": 2.2050181624677357e-07, "loss": 13.0469, "step": 23820 }, { "epoch": 1.5820548582054859, "grad_norm": 102.40782928466797, "learning_rate": 2.2043444972705183e-07, "loss": 14.8906, "step": 23821 }, { "epoch": 1.5821212724978415, "grad_norm": 128.5975341796875, "learning_rate": 2.2036709222489492e-07, "loss": 12.0938, "step": 23822 }, { "epoch": 1.5821876867901974, "grad_norm": 157.7295684814453, "learning_rate": 2.2029974374108196e-07, "loss": 9.9141, "step": 23823 }, { "epoch": 1.5822541010825528, "grad_norm": 890.9656372070312, "learning_rate": 2.202324042763919e-07, "loss": 14.2969, "step": 23824 }, { "epoch": 1.5823205153749087, "grad_norm": 144.41912841796875, "learning_rate": 2.2016507383160375e-07, "loss": 11.4062, "step": 23825 }, { "epoch": 1.5823869296672644, "grad_norm": 627.638916015625, "learning_rate": 2.200977524074964e-07, "loss": 19.1406, "step": 23826 }, { "epoch": 1.58245334395962, "grad_norm": 219.4954376220703, "learning_rate": 2.200304400048487e-07, "loss": 15.125, "step": 23827 }, { "epoch": 1.5825197582519759, "grad_norm": 211.2581329345703, "learning_rate": 2.1996313662443866e-07, "loss": 18.8125, "step": 23828 }, { "epoch": 1.5825861725443315, "grad_norm": 269.92205810546875, "learning_rate": 2.1989584226704583e-07, "loss": 13.7344, "step": 23829 }, { "epoch": 1.5826525868366872, "grad_norm": 688.8389892578125, "learning_rate": 2.198285569334477e-07, "loss": 13.75, "step": 23830 }, { "epoch": 1.582719001129043, "grad_norm": 159.98672485351562, "learning_rate": 2.1976128062442302e-07, "loss": 13.4531, "step": 23831 }, { "epoch": 1.5827854154213987, "grad_norm": 250.74310302734375, "learning_rate": 2.196940133407499e-07, "loss": 21.25, "step": 23832 }, { "epoch": 1.5828518297137544, "grad_norm": 202.65618896484375, "learning_rate": 2.196267550832065e-07, "loss": 15.0625, "step": 23833 }, { "epoch": 1.5829182440061103, "grad_norm": 111.68099975585938, "learning_rate": 2.1955950585257076e-07, "loss": 14.8438, "step": 23834 }, { "epoch": 1.5829846582984657, "grad_norm": 228.0337677001953, "learning_rate": 2.1949226564962099e-07, "loss": 12.8125, "step": 23835 }, { "epoch": 1.5830510725908216, "grad_norm": 158.37940979003906, "learning_rate": 2.1942503447513428e-07, "loss": 14.5938, "step": 23836 }, { "epoch": 1.5831174868831772, "grad_norm": 441.529541015625, "learning_rate": 2.193578123298887e-07, "loss": 15.8594, "step": 23837 }, { "epoch": 1.5831839011755329, "grad_norm": 231.84805297851562, "learning_rate": 2.1929059921466176e-07, "loss": 21.125, "step": 23838 }, { "epoch": 1.5832503154678887, "grad_norm": 272.627685546875, "learning_rate": 2.1922339513023103e-07, "loss": 31.5625, "step": 23839 }, { "epoch": 1.5833167297602444, "grad_norm": 125.35543060302734, "learning_rate": 2.1915620007737378e-07, "loss": 13.5156, "step": 23840 }, { "epoch": 1.5833831440526, "grad_norm": 179.22958374023438, "learning_rate": 2.1908901405686776e-07, "loss": 18.7031, "step": 23841 }, { "epoch": 1.583449558344956, "grad_norm": 593.6978759765625, "learning_rate": 2.1902183706948908e-07, "loss": 14.1875, "step": 23842 }, { "epoch": 1.5835159726373116, "grad_norm": 202.9150848388672, "learning_rate": 2.1895466911601602e-07, "loss": 16.5625, "step": 23843 }, { "epoch": 1.5835823869296672, "grad_norm": 121.845703125, "learning_rate": 2.1888751019722462e-07, "loss": 15.3203, "step": 23844 }, { "epoch": 1.5836488012220231, "grad_norm": 263.47967529296875, "learning_rate": 2.1882036031389218e-07, "loss": 15.5781, "step": 23845 }, { "epoch": 1.5837152155143785, "grad_norm": 141.48660278320312, "learning_rate": 2.1875321946679536e-07, "loss": 13.7188, "step": 23846 }, { "epoch": 1.5837816298067344, "grad_norm": 189.99656677246094, "learning_rate": 2.186860876567107e-07, "loss": 17.375, "step": 23847 }, { "epoch": 1.58384804409909, "grad_norm": 173.91195678710938, "learning_rate": 2.18618964884415e-07, "loss": 13.1406, "step": 23848 }, { "epoch": 1.5839144583914457, "grad_norm": 226.40769958496094, "learning_rate": 2.1855185115068475e-07, "loss": 30.9688, "step": 23849 }, { "epoch": 1.5839808726838016, "grad_norm": 96.8872299194336, "learning_rate": 2.1848474645629545e-07, "loss": 12.9219, "step": 23850 }, { "epoch": 1.5840472869761573, "grad_norm": 145.87628173828125, "learning_rate": 2.1841765080202457e-07, "loss": 21.1562, "step": 23851 }, { "epoch": 1.584113701268513, "grad_norm": 194.37518310546875, "learning_rate": 2.183505641886474e-07, "loss": 14.9844, "step": 23852 }, { "epoch": 1.5841801155608688, "grad_norm": 253.88262939453125, "learning_rate": 2.1828348661694006e-07, "loss": 14.4688, "step": 23853 }, { "epoch": 1.5842465298532244, "grad_norm": 92.70547485351562, "learning_rate": 2.1821641808767866e-07, "loss": 11.9219, "step": 23854 }, { "epoch": 1.58431294414558, "grad_norm": 175.6846923828125, "learning_rate": 2.181493586016392e-07, "loss": 19.7031, "step": 23855 }, { "epoch": 1.584379358437936, "grad_norm": 170.4070281982422, "learning_rate": 2.180823081595966e-07, "loss": 14.7656, "step": 23856 }, { "epoch": 1.5844457727302914, "grad_norm": 184.7571258544922, "learning_rate": 2.180152667623275e-07, "loss": 20.4688, "step": 23857 }, { "epoch": 1.5845121870226473, "grad_norm": 653.7254028320312, "learning_rate": 2.1794823441060662e-07, "loss": 14.0938, "step": 23858 }, { "epoch": 1.584578601315003, "grad_norm": 120.10645294189453, "learning_rate": 2.1788121110520953e-07, "loss": 9.4219, "step": 23859 }, { "epoch": 1.5846450156073586, "grad_norm": 379.2919006347656, "learning_rate": 2.178141968469117e-07, "loss": 21.7188, "step": 23860 }, { "epoch": 1.5847114298997145, "grad_norm": 147.79066467285156, "learning_rate": 2.1774719163648814e-07, "loss": 15.6562, "step": 23861 }, { "epoch": 1.5847778441920701, "grad_norm": 193.48272705078125, "learning_rate": 2.1768019547471394e-07, "loss": 14.7031, "step": 23862 }, { "epoch": 1.5848442584844258, "grad_norm": 149.35516357421875, "learning_rate": 2.1761320836236429e-07, "loss": 16.9062, "step": 23863 }, { "epoch": 1.5849106727767817, "grad_norm": 169.45672607421875, "learning_rate": 2.175462303002138e-07, "loss": 17.0625, "step": 23864 }, { "epoch": 1.5849770870691373, "grad_norm": 226.1904754638672, "learning_rate": 2.1747926128903727e-07, "loss": 19.7656, "step": 23865 }, { "epoch": 1.585043501361493, "grad_norm": 300.4510192871094, "learning_rate": 2.1741230132960974e-07, "loss": 21.8594, "step": 23866 }, { "epoch": 1.5851099156538488, "grad_norm": 303.58636474609375, "learning_rate": 2.1734535042270495e-07, "loss": 20.6406, "step": 23867 }, { "epoch": 1.5851763299462043, "grad_norm": 146.11660766601562, "learning_rate": 2.172784085690983e-07, "loss": 13.125, "step": 23868 }, { "epoch": 1.5852427442385602, "grad_norm": 225.6160430908203, "learning_rate": 2.1721147576956345e-07, "loss": 15.5938, "step": 23869 }, { "epoch": 1.5853091585309158, "grad_norm": 435.4757080078125, "learning_rate": 2.1714455202487492e-07, "loss": 21.2344, "step": 23870 }, { "epoch": 1.5853755728232715, "grad_norm": 501.28216552734375, "learning_rate": 2.1707763733580675e-07, "loss": 11.6719, "step": 23871 }, { "epoch": 1.5854419871156273, "grad_norm": 293.68682861328125, "learning_rate": 2.17010731703133e-07, "loss": 14.6875, "step": 23872 }, { "epoch": 1.585508401407983, "grad_norm": 126.66259765625, "learning_rate": 2.1694383512762759e-07, "loss": 11.4375, "step": 23873 }, { "epoch": 1.5855748157003386, "grad_norm": 248.1144256591797, "learning_rate": 2.168769476100647e-07, "loss": 17.6719, "step": 23874 }, { "epoch": 1.5856412299926945, "grad_norm": 125.26079559326172, "learning_rate": 2.16810069151217e-07, "loss": 19.125, "step": 23875 }, { "epoch": 1.5857076442850502, "grad_norm": 203.46798706054688, "learning_rate": 2.167431997518595e-07, "loss": 24.9844, "step": 23876 }, { "epoch": 1.5857740585774058, "grad_norm": 742.939208984375, "learning_rate": 2.1667633941276475e-07, "loss": 23.7188, "step": 23877 }, { "epoch": 1.5858404728697617, "grad_norm": 168.59095764160156, "learning_rate": 2.1660948813470638e-07, "loss": 19.2188, "step": 23878 }, { "epoch": 1.5859068871621171, "grad_norm": 711.2111206054688, "learning_rate": 2.1654264591845772e-07, "loss": 15.3594, "step": 23879 }, { "epoch": 1.585973301454473, "grad_norm": 362.1344909667969, "learning_rate": 2.164758127647922e-07, "loss": 14.1562, "step": 23880 }, { "epoch": 1.5860397157468287, "grad_norm": 226.6272430419922, "learning_rate": 2.164089886744821e-07, "loss": 13.2031, "step": 23881 }, { "epoch": 1.5861061300391843, "grad_norm": 634.0407104492188, "learning_rate": 2.1634217364830164e-07, "loss": 14.0469, "step": 23882 }, { "epoch": 1.5861725443315402, "grad_norm": 121.78284454345703, "learning_rate": 2.162753676870227e-07, "loss": 11.9688, "step": 23883 }, { "epoch": 1.5862389586238959, "grad_norm": 263.8485107421875, "learning_rate": 2.162085707914184e-07, "loss": 10.5547, "step": 23884 }, { "epoch": 1.5863053729162515, "grad_norm": 614.636474609375, "learning_rate": 2.1614178296226138e-07, "loss": 11.0312, "step": 23885 }, { "epoch": 1.5863717872086074, "grad_norm": 155.23257446289062, "learning_rate": 2.1607500420032421e-07, "loss": 14.9375, "step": 23886 }, { "epoch": 1.586438201500963, "grad_norm": 209.88832092285156, "learning_rate": 2.1600823450637928e-07, "loss": 14.6406, "step": 23887 }, { "epoch": 1.5865046157933187, "grad_norm": 145.0170135498047, "learning_rate": 2.1594147388119943e-07, "loss": 11.1484, "step": 23888 }, { "epoch": 1.5865710300856746, "grad_norm": 301.9739074707031, "learning_rate": 2.158747223255558e-07, "loss": 12.2344, "step": 23889 }, { "epoch": 1.58663744437803, "grad_norm": 110.17889404296875, "learning_rate": 2.1580797984022193e-07, "loss": 12.1328, "step": 23890 }, { "epoch": 1.5867038586703859, "grad_norm": 205.28842163085938, "learning_rate": 2.157412464259688e-07, "loss": 13.5938, "step": 23891 }, { "epoch": 1.5867702729627415, "grad_norm": 158.39974975585938, "learning_rate": 2.1567452208356874e-07, "loss": 13.9141, "step": 23892 }, { "epoch": 1.5868366872550972, "grad_norm": 203.87107849121094, "learning_rate": 2.1560780681379354e-07, "loss": 17.6094, "step": 23893 }, { "epoch": 1.586903101547453, "grad_norm": 236.9725799560547, "learning_rate": 2.155411006174148e-07, "loss": 17.0938, "step": 23894 }, { "epoch": 1.5869695158398087, "grad_norm": 169.92572021484375, "learning_rate": 2.154744034952044e-07, "loss": 14.1875, "step": 23895 }, { "epoch": 1.5870359301321644, "grad_norm": 142.6801300048828, "learning_rate": 2.1540771544793402e-07, "loss": 14.75, "step": 23896 }, { "epoch": 1.5871023444245203, "grad_norm": 481.8932800292969, "learning_rate": 2.1534103647637437e-07, "loss": 18.7188, "step": 23897 }, { "epoch": 1.587168758716876, "grad_norm": 126.51422882080078, "learning_rate": 2.1527436658129693e-07, "loss": 19.8125, "step": 23898 }, { "epoch": 1.5872351730092316, "grad_norm": 234.0388641357422, "learning_rate": 2.152077057634737e-07, "loss": 23.1094, "step": 23899 }, { "epoch": 1.5873015873015874, "grad_norm": 170.4435577392578, "learning_rate": 2.1514105402367488e-07, "loss": 20.6875, "step": 23900 }, { "epoch": 1.5873680015939429, "grad_norm": 206.2047882080078, "learning_rate": 2.150744113626718e-07, "loss": 15.8438, "step": 23901 }, { "epoch": 1.5874344158862987, "grad_norm": 175.95596313476562, "learning_rate": 2.1500777778123524e-07, "loss": 14.6406, "step": 23902 }, { "epoch": 1.5875008301786544, "grad_norm": 401.98992919921875, "learning_rate": 2.1494115328013618e-07, "loss": 22.8281, "step": 23903 }, { "epoch": 1.58756724447101, "grad_norm": 237.71914672851562, "learning_rate": 2.1487453786014508e-07, "loss": 17.3125, "step": 23904 }, { "epoch": 1.587633658763366, "grad_norm": 186.14276123046875, "learning_rate": 2.148079315220328e-07, "loss": 23.1719, "step": 23905 }, { "epoch": 1.5877000730557216, "grad_norm": 172.3715362548828, "learning_rate": 2.1474133426656915e-07, "loss": 15.8906, "step": 23906 }, { "epoch": 1.5877664873480772, "grad_norm": 353.9029846191406, "learning_rate": 2.1467474609452553e-07, "loss": 11.4375, "step": 23907 }, { "epoch": 1.5878329016404331, "grad_norm": 137.80877685546875, "learning_rate": 2.146081670066713e-07, "loss": 18.3438, "step": 23908 }, { "epoch": 1.5878993159327888, "grad_norm": 250.9154052734375, "learning_rate": 2.1454159700377682e-07, "loss": 23.8438, "step": 23909 }, { "epoch": 1.5879657302251444, "grad_norm": 236.86268615722656, "learning_rate": 2.1447503608661234e-07, "loss": 14.875, "step": 23910 }, { "epoch": 1.5880321445175003, "grad_norm": 182.91268920898438, "learning_rate": 2.144084842559476e-07, "loss": 16.1094, "step": 23911 }, { "epoch": 1.5880985588098557, "grad_norm": 532.01904296875, "learning_rate": 2.1434194151255258e-07, "loss": 20.3438, "step": 23912 }, { "epoch": 1.5881649731022116, "grad_norm": 156.36129760742188, "learning_rate": 2.142754078571972e-07, "loss": 16.0625, "step": 23913 }, { "epoch": 1.5882313873945673, "grad_norm": 1309.088134765625, "learning_rate": 2.1420888329065023e-07, "loss": 15.9531, "step": 23914 }, { "epoch": 1.588297801686923, "grad_norm": 243.190185546875, "learning_rate": 2.1414236781368245e-07, "loss": 15.25, "step": 23915 }, { "epoch": 1.5883642159792788, "grad_norm": 205.55044555664062, "learning_rate": 2.1407586142706225e-07, "loss": 17.8906, "step": 23916 }, { "epoch": 1.5884306302716344, "grad_norm": 330.2947998046875, "learning_rate": 2.1400936413155924e-07, "loss": 17.7188, "step": 23917 }, { "epoch": 1.58849704456399, "grad_norm": 153.91757202148438, "learning_rate": 2.139428759279428e-07, "loss": 15.875, "step": 23918 }, { "epoch": 1.588563458856346, "grad_norm": 355.4230651855469, "learning_rate": 2.138763968169821e-07, "loss": 19.0703, "step": 23919 }, { "epoch": 1.5886298731487016, "grad_norm": 169.04161071777344, "learning_rate": 2.1380992679944542e-07, "loss": 15.3281, "step": 23920 }, { "epoch": 1.5886962874410573, "grad_norm": 237.4664764404297, "learning_rate": 2.137434658761027e-07, "loss": 16.3906, "step": 23921 }, { "epoch": 1.5887627017334132, "grad_norm": 231.29896545410156, "learning_rate": 2.1367701404772188e-07, "loss": 14.0312, "step": 23922 }, { "epoch": 1.5888291160257686, "grad_norm": 261.3777770996094, "learning_rate": 2.1361057131507198e-07, "loss": 17.7656, "step": 23923 }, { "epoch": 1.5888955303181245, "grad_norm": 221.47325134277344, "learning_rate": 2.1354413767892154e-07, "loss": 22.3906, "step": 23924 }, { "epoch": 1.5889619446104801, "grad_norm": 244.93624877929688, "learning_rate": 2.1347771314003892e-07, "loss": 12.8594, "step": 23925 }, { "epoch": 1.5890283589028358, "grad_norm": 173.9832763671875, "learning_rate": 2.1341129769919274e-07, "loss": 12.1094, "step": 23926 }, { "epoch": 1.5890947731951917, "grad_norm": 1130.14501953125, "learning_rate": 2.1334489135715127e-07, "loss": 14.75, "step": 23927 }, { "epoch": 1.5891611874875473, "grad_norm": 144.4247589111328, "learning_rate": 2.132784941146819e-07, "loss": 15.6562, "step": 23928 }, { "epoch": 1.589227601779903, "grad_norm": 186.73501586914062, "learning_rate": 2.1321210597255391e-07, "loss": 17.3125, "step": 23929 }, { "epoch": 1.5892940160722588, "grad_norm": 554.6891479492188, "learning_rate": 2.131457269315343e-07, "loss": 20.2188, "step": 23930 }, { "epoch": 1.5893604303646145, "grad_norm": 184.28208923339844, "learning_rate": 2.130793569923911e-07, "loss": 16.5156, "step": 23931 }, { "epoch": 1.5894268446569702, "grad_norm": 738.2918090820312, "learning_rate": 2.130129961558922e-07, "loss": 18.8281, "step": 23932 }, { "epoch": 1.589493258949326, "grad_norm": 161.0137939453125, "learning_rate": 2.129466444228052e-07, "loss": 15.4219, "step": 23933 }, { "epoch": 1.5895596732416815, "grad_norm": 231.52386474609375, "learning_rate": 2.1288030179389748e-07, "loss": 12.6719, "step": 23934 }, { "epoch": 1.5896260875340373, "grad_norm": 360.10845947265625, "learning_rate": 2.128139682699367e-07, "loss": 15.2031, "step": 23935 }, { "epoch": 1.589692501826393, "grad_norm": 200.17910766601562, "learning_rate": 2.1274764385168985e-07, "loss": 13.9219, "step": 23936 }, { "epoch": 1.5897589161187486, "grad_norm": 127.73225402832031, "learning_rate": 2.1268132853992437e-07, "loss": 14.5781, "step": 23937 }, { "epoch": 1.5898253304111045, "grad_norm": 439.2103271484375, "learning_rate": 2.1261502233540763e-07, "loss": 20.3281, "step": 23938 }, { "epoch": 1.5898917447034602, "grad_norm": 179.93310546875, "learning_rate": 2.1254872523890566e-07, "loss": 15.0469, "step": 23939 }, { "epoch": 1.5899581589958158, "grad_norm": 270.0433654785156, "learning_rate": 2.1248243725118654e-07, "loss": 15.9375, "step": 23940 }, { "epoch": 1.5900245732881717, "grad_norm": 126.6563949584961, "learning_rate": 2.124161583730162e-07, "loss": 15.1719, "step": 23941 }, { "epoch": 1.5900909875805274, "grad_norm": 379.8267822265625, "learning_rate": 2.1234988860516145e-07, "loss": 18.4688, "step": 23942 }, { "epoch": 1.590157401872883, "grad_norm": 399.8995666503906, "learning_rate": 2.1228362794838917e-07, "loss": 13.9375, "step": 23943 }, { "epoch": 1.590223816165239, "grad_norm": 162.4908447265625, "learning_rate": 2.1221737640346592e-07, "loss": 14.5312, "step": 23944 }, { "epoch": 1.5902902304575943, "grad_norm": 123.22282409667969, "learning_rate": 2.1215113397115725e-07, "loss": 15.4375, "step": 23945 }, { "epoch": 1.5903566447499502, "grad_norm": 144.0041046142578, "learning_rate": 2.120849006522304e-07, "loss": 22.1875, "step": 23946 }, { "epoch": 1.5904230590423059, "grad_norm": 173.45712280273438, "learning_rate": 2.1201867644745098e-07, "loss": 15.3125, "step": 23947 }, { "epoch": 1.5904894733346615, "grad_norm": 197.9266357421875, "learning_rate": 2.1195246135758504e-07, "loss": 19.9844, "step": 23948 }, { "epoch": 1.5905558876270174, "grad_norm": 165.50189208984375, "learning_rate": 2.1188625538339855e-07, "loss": 13.4062, "step": 23949 }, { "epoch": 1.590622301919373, "grad_norm": 172.0261993408203, "learning_rate": 2.1182005852565753e-07, "loss": 10.2188, "step": 23950 }, { "epoch": 1.5906887162117287, "grad_norm": 233.66551208496094, "learning_rate": 2.1175387078512753e-07, "loss": 24.1562, "step": 23951 }, { "epoch": 1.5907551305040846, "grad_norm": 249.80751037597656, "learning_rate": 2.1168769216257443e-07, "loss": 13.7109, "step": 23952 }, { "epoch": 1.5908215447964402, "grad_norm": 165.39227294921875, "learning_rate": 2.116215226587631e-07, "loss": 13.0156, "step": 23953 }, { "epoch": 1.5908879590887959, "grad_norm": 185.66859436035156, "learning_rate": 2.115553622744599e-07, "loss": 21.3281, "step": 23954 }, { "epoch": 1.5909543733811518, "grad_norm": 132.0532684326172, "learning_rate": 2.1148921101042927e-07, "loss": 17.5469, "step": 23955 }, { "epoch": 1.5910207876735072, "grad_norm": 368.6580505371094, "learning_rate": 2.114230688674369e-07, "loss": 27.3125, "step": 23956 }, { "epoch": 1.591087201965863, "grad_norm": 203.55050659179688, "learning_rate": 2.113569358462477e-07, "loss": 18.25, "step": 23957 }, { "epoch": 1.5911536162582187, "grad_norm": 146.6386260986328, "learning_rate": 2.1129081194762664e-07, "loss": 15.3125, "step": 23958 }, { "epoch": 1.5912200305505744, "grad_norm": 451.3976135253906, "learning_rate": 2.112246971723388e-07, "loss": 16.9531, "step": 23959 }, { "epoch": 1.5912864448429302, "grad_norm": 148.9598388671875, "learning_rate": 2.11158591521149e-07, "loss": 11.9531, "step": 23960 }, { "epoch": 1.591352859135286, "grad_norm": 147.65928649902344, "learning_rate": 2.1109249499482152e-07, "loss": 14.75, "step": 23961 }, { "epoch": 1.5914192734276416, "grad_norm": 269.39263916015625, "learning_rate": 2.1102640759412116e-07, "loss": 19.2812, "step": 23962 }, { "epoch": 1.5914856877199974, "grad_norm": 158.04563903808594, "learning_rate": 2.1096032931981233e-07, "loss": 14.4453, "step": 23963 }, { "epoch": 1.591552102012353, "grad_norm": 489.810791015625, "learning_rate": 2.1089426017265943e-07, "loss": 13.8281, "step": 23964 }, { "epoch": 1.5916185163047087, "grad_norm": 275.7451477050781, "learning_rate": 2.1082820015342673e-07, "loss": 17.8125, "step": 23965 }, { "epoch": 1.5916849305970646, "grad_norm": 456.1807556152344, "learning_rate": 2.107621492628785e-07, "loss": 15.0781, "step": 23966 }, { "epoch": 1.59175134488942, "grad_norm": 121.79973602294922, "learning_rate": 2.1069610750177823e-07, "loss": 16.5781, "step": 23967 }, { "epoch": 1.591817759181776, "grad_norm": 122.94807434082031, "learning_rate": 2.106300748708907e-07, "loss": 13.9453, "step": 23968 }, { "epoch": 1.5918841734741316, "grad_norm": 317.79913330078125, "learning_rate": 2.105640513709791e-07, "loss": 22.5938, "step": 23969 }, { "epoch": 1.5919505877664872, "grad_norm": 265.2174987792969, "learning_rate": 2.1049803700280732e-07, "loss": 17.9375, "step": 23970 }, { "epoch": 1.5920170020588431, "grad_norm": 205.2588348388672, "learning_rate": 2.1043203176713897e-07, "loss": 14.3594, "step": 23971 }, { "epoch": 1.5920834163511988, "grad_norm": 264.4272155761719, "learning_rate": 2.1036603566473755e-07, "loss": 15.5938, "step": 23972 }, { "epoch": 1.5921498306435544, "grad_norm": 150.0458984375, "learning_rate": 2.1030004869636652e-07, "loss": 15.2031, "step": 23973 }, { "epoch": 1.5922162449359103, "grad_norm": 117.41974639892578, "learning_rate": 2.102340708627892e-07, "loss": 17.2656, "step": 23974 }, { "epoch": 1.592282659228266, "grad_norm": 240.29168701171875, "learning_rate": 2.1016810216476877e-07, "loss": 14.2188, "step": 23975 }, { "epoch": 1.5923490735206216, "grad_norm": 235.39524841308594, "learning_rate": 2.1010214260306824e-07, "loss": 29.7969, "step": 23976 }, { "epoch": 1.5924154878129775, "grad_norm": 106.73104095458984, "learning_rate": 2.1003619217845091e-07, "loss": 17.3125, "step": 23977 }, { "epoch": 1.592481902105333, "grad_norm": 132.2557373046875, "learning_rate": 2.0997025089167886e-07, "loss": 15.1094, "step": 23978 }, { "epoch": 1.5925483163976888, "grad_norm": 167.97459411621094, "learning_rate": 2.0990431874351587e-07, "loss": 18.4062, "step": 23979 }, { "epoch": 1.5926147306900444, "grad_norm": 359.48699951171875, "learning_rate": 2.0983839573472395e-07, "loss": 14.4688, "step": 23980 }, { "epoch": 1.5926811449824, "grad_norm": 451.41680908203125, "learning_rate": 2.0977248186606578e-07, "loss": 17.0, "step": 23981 }, { "epoch": 1.592747559274756, "grad_norm": 748.4177856445312, "learning_rate": 2.0970657713830387e-07, "loss": 16.9844, "step": 23982 }, { "epoch": 1.5928139735671116, "grad_norm": 143.900634765625, "learning_rate": 2.0964068155220081e-07, "loss": 15.2812, "step": 23983 }, { "epoch": 1.5928803878594673, "grad_norm": 407.2154846191406, "learning_rate": 2.0957479510851818e-07, "loss": 22.9375, "step": 23984 }, { "epoch": 1.5929468021518232, "grad_norm": 229.67787170410156, "learning_rate": 2.0950891780801894e-07, "loss": 14.9375, "step": 23985 }, { "epoch": 1.5930132164441788, "grad_norm": 196.97003173828125, "learning_rate": 2.0944304965146452e-07, "loss": 15.3438, "step": 23986 }, { "epoch": 1.5930796307365345, "grad_norm": 266.1593017578125, "learning_rate": 2.09377190639617e-07, "loss": 21.0469, "step": 23987 }, { "epoch": 1.5931460450288903, "grad_norm": 149.09127807617188, "learning_rate": 2.093113407732383e-07, "loss": 15.3828, "step": 23988 }, { "epoch": 1.5932124593212458, "grad_norm": 1429.2711181640625, "learning_rate": 2.0924550005309005e-07, "loss": 13.7344, "step": 23989 }, { "epoch": 1.5932788736136017, "grad_norm": 159.98008728027344, "learning_rate": 2.0917966847993386e-07, "loss": 14.1094, "step": 23990 }, { "epoch": 1.5933452879059573, "grad_norm": 81.58213806152344, "learning_rate": 2.0911384605453153e-07, "loss": 10.5859, "step": 23991 }, { "epoch": 1.593411702198313, "grad_norm": 106.87255096435547, "learning_rate": 2.090480327776436e-07, "loss": 15.5781, "step": 23992 }, { "epoch": 1.5934781164906688, "grad_norm": 398.2740783691406, "learning_rate": 2.0898222865003245e-07, "loss": 15.2969, "step": 23993 }, { "epoch": 1.5935445307830245, "grad_norm": 180.19406127929688, "learning_rate": 2.0891643367245858e-07, "loss": 18.8594, "step": 23994 }, { "epoch": 1.5936109450753801, "grad_norm": 316.81048583984375, "learning_rate": 2.0885064784568319e-07, "loss": 19.7031, "step": 23995 }, { "epoch": 1.593677359367736, "grad_norm": 276.54705810546875, "learning_rate": 2.0878487117046727e-07, "loss": 15.8906, "step": 23996 }, { "epoch": 1.5937437736600917, "grad_norm": 192.194580078125, "learning_rate": 2.0871910364757184e-07, "loss": 23.2188, "step": 23997 }, { "epoch": 1.5938101879524473, "grad_norm": 399.6455383300781, "learning_rate": 2.0865334527775747e-07, "loss": 12.6406, "step": 23998 }, { "epoch": 1.5938766022448032, "grad_norm": 223.7686309814453, "learning_rate": 2.085875960617851e-07, "loss": 11.8906, "step": 23999 }, { "epoch": 1.5939430165371586, "grad_norm": 358.8773498535156, "learning_rate": 2.085218560004146e-07, "loss": 20.1875, "step": 24000 }, { "epoch": 1.5940094308295145, "grad_norm": 285.5716552734375, "learning_rate": 2.0845612509440735e-07, "loss": 16.1562, "step": 24001 }, { "epoch": 1.5940758451218702, "grad_norm": 150.13986206054688, "learning_rate": 2.0839040334452306e-07, "loss": 13.8906, "step": 24002 }, { "epoch": 1.5941422594142258, "grad_norm": 235.2810821533203, "learning_rate": 2.0832469075152204e-07, "loss": 25.2188, "step": 24003 }, { "epoch": 1.5942086737065817, "grad_norm": 133.63082885742188, "learning_rate": 2.0825898731616444e-07, "loss": 13.5, "step": 24004 }, { "epoch": 1.5942750879989374, "grad_norm": 206.98202514648438, "learning_rate": 2.0819329303921063e-07, "loss": 26.3906, "step": 24005 }, { "epoch": 1.594341502291293, "grad_norm": 114.72232818603516, "learning_rate": 2.081276079214197e-07, "loss": 21.6875, "step": 24006 }, { "epoch": 1.5944079165836489, "grad_norm": 113.82078552246094, "learning_rate": 2.080619319635526e-07, "loss": 17.1719, "step": 24007 }, { "epoch": 1.5944743308760045, "grad_norm": 143.12286376953125, "learning_rate": 2.0799626516636803e-07, "loss": 17.6875, "step": 24008 }, { "epoch": 1.5945407451683602, "grad_norm": 205.76597595214844, "learning_rate": 2.0793060753062574e-07, "loss": 14.25, "step": 24009 }, { "epoch": 1.594607159460716, "grad_norm": 197.7075653076172, "learning_rate": 2.07864959057086e-07, "loss": 17.125, "step": 24010 }, { "epoch": 1.5946735737530715, "grad_norm": 208.2551727294922, "learning_rate": 2.0779931974650733e-07, "loss": 20.2812, "step": 24011 }, { "epoch": 1.5947399880454274, "grad_norm": 422.9627990722656, "learning_rate": 2.0773368959964944e-07, "loss": 18.2969, "step": 24012 }, { "epoch": 1.594806402337783, "grad_norm": 151.36521911621094, "learning_rate": 2.0766806861727128e-07, "loss": 26.0938, "step": 24013 }, { "epoch": 1.5948728166301387, "grad_norm": 252.23106384277344, "learning_rate": 2.0760245680013199e-07, "loss": 14.5703, "step": 24014 }, { "epoch": 1.5949392309224946, "grad_norm": 331.9231872558594, "learning_rate": 2.0753685414899057e-07, "loss": 18.0, "step": 24015 }, { "epoch": 1.5950056452148502, "grad_norm": 319.3315734863281, "learning_rate": 2.0747126066460617e-07, "loss": 16.4844, "step": 24016 }, { "epoch": 1.5950720595072059, "grad_norm": 145.3271026611328, "learning_rate": 2.0740567634773676e-07, "loss": 16.1719, "step": 24017 }, { "epoch": 1.5951384737995618, "grad_norm": 270.1884460449219, "learning_rate": 2.0734010119914192e-07, "loss": 16.9219, "step": 24018 }, { "epoch": 1.5952048880919174, "grad_norm": 183.40155029296875, "learning_rate": 2.0727453521957938e-07, "loss": 19.2031, "step": 24019 }, { "epoch": 1.595271302384273, "grad_norm": 443.8282470703125, "learning_rate": 2.07208978409808e-07, "loss": 20.6719, "step": 24020 }, { "epoch": 1.595337716676629, "grad_norm": 272.9973449707031, "learning_rate": 2.0714343077058594e-07, "loss": 17.625, "step": 24021 }, { "epoch": 1.5954041309689844, "grad_norm": 355.0993347167969, "learning_rate": 2.0707789230267148e-07, "loss": 15.7812, "step": 24022 }, { "epoch": 1.5954705452613402, "grad_norm": 136.09225463867188, "learning_rate": 2.070123630068228e-07, "loss": 17.25, "step": 24023 }, { "epoch": 1.5955369595536961, "grad_norm": 237.94419860839844, "learning_rate": 2.0694684288379806e-07, "loss": 19.3125, "step": 24024 }, { "epoch": 1.5956033738460516, "grad_norm": 222.70309448242188, "learning_rate": 2.0688133193435465e-07, "loss": 15.0469, "step": 24025 }, { "epoch": 1.5956697881384074, "grad_norm": 169.7129669189453, "learning_rate": 2.0681583015925075e-07, "loss": 15.4688, "step": 24026 }, { "epoch": 1.595736202430763, "grad_norm": 168.2763214111328, "learning_rate": 2.0675033755924387e-07, "loss": 18.8594, "step": 24027 }, { "epoch": 1.5958026167231187, "grad_norm": 209.32644653320312, "learning_rate": 2.0668485413509172e-07, "loss": 16.0469, "step": 24028 }, { "epoch": 1.5958690310154746, "grad_norm": 261.6159973144531, "learning_rate": 2.0661937988755173e-07, "loss": 10.0469, "step": 24029 }, { "epoch": 1.5959354453078303, "grad_norm": 201.48318481445312, "learning_rate": 2.065539148173815e-07, "loss": 17.1094, "step": 24030 }, { "epoch": 1.596001859600186, "grad_norm": 150.77273559570312, "learning_rate": 2.064884589253375e-07, "loss": 12.9375, "step": 24031 }, { "epoch": 1.5960682738925418, "grad_norm": 188.05198669433594, "learning_rate": 2.0642301221217795e-07, "loss": 14.1406, "step": 24032 }, { "epoch": 1.5961346881848972, "grad_norm": 181.5935821533203, "learning_rate": 2.0635757467865923e-07, "loss": 16.3906, "step": 24033 }, { "epoch": 1.596201102477253, "grad_norm": 193.5910186767578, "learning_rate": 2.0629214632553838e-07, "loss": 18.4219, "step": 24034 }, { "epoch": 1.596267516769609, "grad_norm": 292.5491943359375, "learning_rate": 2.0622672715357237e-07, "loss": 14.6406, "step": 24035 }, { "epoch": 1.5963339310619644, "grad_norm": 115.67462921142578, "learning_rate": 2.061613171635178e-07, "loss": 13.0469, "step": 24036 }, { "epoch": 1.5964003453543203, "grad_norm": 504.441162109375, "learning_rate": 2.0609591635613133e-07, "loss": 16.4844, "step": 24037 }, { "epoch": 1.596466759646676, "grad_norm": 204.72894287109375, "learning_rate": 2.0603052473216987e-07, "loss": 15.3281, "step": 24038 }, { "epoch": 1.5965331739390316, "grad_norm": 187.4424285888672, "learning_rate": 2.0596514229238883e-07, "loss": 16.4531, "step": 24039 }, { "epoch": 1.5965995882313875, "grad_norm": 159.36224365234375, "learning_rate": 2.0589976903754568e-07, "loss": 15.2969, "step": 24040 }, { "epoch": 1.5966660025237431, "grad_norm": 136.06146240234375, "learning_rate": 2.0583440496839599e-07, "loss": 16.7812, "step": 24041 }, { "epoch": 1.5967324168160988, "grad_norm": 244.75917053222656, "learning_rate": 2.057690500856958e-07, "loss": 17.1094, "step": 24042 }, { "epoch": 1.5967988311084547, "grad_norm": 114.09844970703125, "learning_rate": 2.0570370439020136e-07, "loss": 13.1719, "step": 24043 }, { "epoch": 1.59686524540081, "grad_norm": 230.06988525390625, "learning_rate": 2.0563836788266843e-07, "loss": 15.7656, "step": 24044 }, { "epoch": 1.596931659693166, "grad_norm": 153.7871856689453, "learning_rate": 2.0557304056385273e-07, "loss": 12.6875, "step": 24045 }, { "epoch": 1.5969980739855218, "grad_norm": 224.9716339111328, "learning_rate": 2.055077224345101e-07, "loss": 21.0469, "step": 24046 }, { "epoch": 1.5970644882778773, "grad_norm": 249.20494079589844, "learning_rate": 2.0544241349539626e-07, "loss": 17.5938, "step": 24047 }, { "epoch": 1.5971309025702332, "grad_norm": 247.4522705078125, "learning_rate": 2.0537711374726585e-07, "loss": 17.5469, "step": 24048 }, { "epoch": 1.5971973168625888, "grad_norm": 343.7420959472656, "learning_rate": 2.0531182319087525e-07, "loss": 19.0781, "step": 24049 }, { "epoch": 1.5972637311549445, "grad_norm": 731.0311889648438, "learning_rate": 2.0524654182697909e-07, "loss": 12.75, "step": 24050 }, { "epoch": 1.5973301454473003, "grad_norm": 227.69837951660156, "learning_rate": 2.0518126965633265e-07, "loss": 14.2188, "step": 24051 }, { "epoch": 1.597396559739656, "grad_norm": 364.8465881347656, "learning_rate": 2.0511600667969086e-07, "loss": 12.3047, "step": 24052 }, { "epoch": 1.5974629740320117, "grad_norm": 106.3944091796875, "learning_rate": 2.0505075289780881e-07, "loss": 12.9141, "step": 24053 }, { "epoch": 1.5975293883243675, "grad_norm": 116.74027252197266, "learning_rate": 2.0498550831144123e-07, "loss": 16.5781, "step": 24054 }, { "epoch": 1.597595802616723, "grad_norm": 268.08502197265625, "learning_rate": 2.0492027292134317e-07, "loss": 22.375, "step": 24055 }, { "epoch": 1.5976622169090788, "grad_norm": 144.61521911621094, "learning_rate": 2.0485504672826837e-07, "loss": 12.5156, "step": 24056 }, { "epoch": 1.5977286312014347, "grad_norm": 266.138427734375, "learning_rate": 2.0478982973297243e-07, "loss": 15.7031, "step": 24057 }, { "epoch": 1.5977950454937901, "grad_norm": 149.39797973632812, "learning_rate": 2.0472462193620887e-07, "loss": 22.9688, "step": 24058 }, { "epoch": 1.597861459786146, "grad_norm": 348.1887512207031, "learning_rate": 2.0465942333873232e-07, "loss": 16.0938, "step": 24059 }, { "epoch": 1.5979278740785017, "grad_norm": 193.7306365966797, "learning_rate": 2.0459423394129706e-07, "loss": 12.1875, "step": 24060 }, { "epoch": 1.5979942883708573, "grad_norm": 132.0523681640625, "learning_rate": 2.045290537446569e-07, "loss": 11.5469, "step": 24061 }, { "epoch": 1.5980607026632132, "grad_norm": 149.36151123046875, "learning_rate": 2.0446388274956594e-07, "loss": 15.1875, "step": 24062 }, { "epoch": 1.5981271169555689, "grad_norm": 301.1846618652344, "learning_rate": 2.0439872095677845e-07, "loss": 21.0469, "step": 24063 }, { "epoch": 1.5981935312479245, "grad_norm": 127.4733657836914, "learning_rate": 2.0433356836704717e-07, "loss": 14.0469, "step": 24064 }, { "epoch": 1.5982599455402804, "grad_norm": 220.87403869628906, "learning_rate": 2.0426842498112696e-07, "loss": 18.2969, "step": 24065 }, { "epoch": 1.5983263598326358, "grad_norm": 165.40525817871094, "learning_rate": 2.042032907997704e-07, "loss": 16.1719, "step": 24066 }, { "epoch": 1.5983927741249917, "grad_norm": 661.1073608398438, "learning_rate": 2.0413816582373132e-07, "loss": 18.3125, "step": 24067 }, { "epoch": 1.5984591884173476, "grad_norm": 121.13583374023438, "learning_rate": 2.0407305005376309e-07, "loss": 15.3906, "step": 24068 }, { "epoch": 1.598525602709703, "grad_norm": 1530.33740234375, "learning_rate": 2.04007943490619e-07, "loss": 14.9062, "step": 24069 }, { "epoch": 1.5985920170020589, "grad_norm": 166.90774536132812, "learning_rate": 2.0394284613505153e-07, "loss": 24.4844, "step": 24070 }, { "epoch": 1.5986584312944145, "grad_norm": 383.5350341796875, "learning_rate": 2.038777579878148e-07, "loss": 20.6406, "step": 24071 }, { "epoch": 1.5987248455867702, "grad_norm": 300.8195495605469, "learning_rate": 2.038126790496607e-07, "loss": 14.4531, "step": 24072 }, { "epoch": 1.598791259879126, "grad_norm": 290.7706298828125, "learning_rate": 2.037476093213426e-07, "loss": 16.5781, "step": 24073 }, { "epoch": 1.5988576741714817, "grad_norm": 200.79751586914062, "learning_rate": 2.036825488036129e-07, "loss": 23.5, "step": 24074 }, { "epoch": 1.5989240884638374, "grad_norm": 105.41962432861328, "learning_rate": 2.0361749749722424e-07, "loss": 11.9375, "step": 24075 }, { "epoch": 1.5989905027561933, "grad_norm": 171.4390869140625, "learning_rate": 2.035524554029292e-07, "loss": 15.6875, "step": 24076 }, { "epoch": 1.5990569170485487, "grad_norm": 207.94761657714844, "learning_rate": 2.0348742252148044e-07, "loss": 16.5469, "step": 24077 }, { "epoch": 1.5991233313409046, "grad_norm": 239.26646423339844, "learning_rate": 2.034223988536292e-07, "loss": 19.5625, "step": 24078 }, { "epoch": 1.5991897456332604, "grad_norm": 128.9776153564453, "learning_rate": 2.0335738440012894e-07, "loss": 12.5625, "step": 24079 }, { "epoch": 1.5992561599256159, "grad_norm": 185.5485076904297, "learning_rate": 2.0329237916173082e-07, "loss": 15.0781, "step": 24080 }, { "epoch": 1.5993225742179717, "grad_norm": 190.59225463867188, "learning_rate": 2.0322738313918674e-07, "loss": 18.0938, "step": 24081 }, { "epoch": 1.5993889885103274, "grad_norm": 323.8582763671875, "learning_rate": 2.031623963332495e-07, "loss": 19.125, "step": 24082 }, { "epoch": 1.599455402802683, "grad_norm": 126.99821472167969, "learning_rate": 2.030974187446698e-07, "loss": 14.9062, "step": 24083 }, { "epoch": 1.599521817095039, "grad_norm": 100.90196990966797, "learning_rate": 2.0303245037419957e-07, "loss": 13.9375, "step": 24084 }, { "epoch": 1.5995882313873946, "grad_norm": 117.26607513427734, "learning_rate": 2.029674912225906e-07, "loss": 16.5625, "step": 24085 }, { "epoch": 1.5996546456797502, "grad_norm": 174.35108947753906, "learning_rate": 2.0290254129059393e-07, "loss": 15.5469, "step": 24086 }, { "epoch": 1.5997210599721061, "grad_norm": 160.6224365234375, "learning_rate": 2.0283760057896103e-07, "loss": 12.4375, "step": 24087 }, { "epoch": 1.5997874742644616, "grad_norm": 176.94476318359375, "learning_rate": 2.0277266908844348e-07, "loss": 13.0938, "step": 24088 }, { "epoch": 1.5998538885568174, "grad_norm": 103.88330841064453, "learning_rate": 2.0270774681979165e-07, "loss": 15.7344, "step": 24089 }, { "epoch": 1.5999203028491733, "grad_norm": 195.56430053710938, "learning_rate": 2.026428337737569e-07, "loss": 11.3438, "step": 24090 }, { "epoch": 1.5999867171415287, "grad_norm": 224.77500915527344, "learning_rate": 2.0257792995109003e-07, "loss": 15.5781, "step": 24091 }, { "epoch": 1.6000531314338846, "grad_norm": 204.72645568847656, "learning_rate": 2.0251303535254184e-07, "loss": 12.9688, "step": 24092 }, { "epoch": 1.6001195457262403, "grad_norm": 158.7950897216797, "learning_rate": 2.0244814997886296e-07, "loss": 14.375, "step": 24093 }, { "epoch": 1.600185960018596, "grad_norm": 84.20081329345703, "learning_rate": 2.0238327383080433e-07, "loss": 11.4531, "step": 24094 }, { "epoch": 1.6002523743109518, "grad_norm": 499.93597412109375, "learning_rate": 2.023184069091155e-07, "loss": 19.4844, "step": 24095 }, { "epoch": 1.6003187886033075, "grad_norm": 249.256103515625, "learning_rate": 2.0225354921454784e-07, "loss": 16.5938, "step": 24096 }, { "epoch": 1.600385202895663, "grad_norm": 251.8782501220703, "learning_rate": 2.0218870074785098e-07, "loss": 23.5781, "step": 24097 }, { "epoch": 1.600451617188019, "grad_norm": 295.2638244628906, "learning_rate": 2.0212386150977512e-07, "loss": 22.9531, "step": 24098 }, { "epoch": 1.6005180314803744, "grad_norm": 291.2572937011719, "learning_rate": 2.0205903150107028e-07, "loss": 12.375, "step": 24099 }, { "epoch": 1.6005844457727303, "grad_norm": 174.47901916503906, "learning_rate": 2.0199421072248646e-07, "loss": 14.9062, "step": 24100 }, { "epoch": 1.6006508600650862, "grad_norm": 350.941650390625, "learning_rate": 2.0192939917477357e-07, "loss": 15.0234, "step": 24101 }, { "epoch": 1.6007172743574416, "grad_norm": 129.8564453125, "learning_rate": 2.018645968586814e-07, "loss": 16.8125, "step": 24102 }, { "epoch": 1.6007836886497975, "grad_norm": 213.8397674560547, "learning_rate": 2.0179980377495875e-07, "loss": 23.3906, "step": 24103 }, { "epoch": 1.6008501029421531, "grad_norm": 144.67750549316406, "learning_rate": 2.0173501992435627e-07, "loss": 14.2031, "step": 24104 }, { "epoch": 1.6009165172345088, "grad_norm": 182.25596618652344, "learning_rate": 2.0167024530762256e-07, "loss": 18.3594, "step": 24105 }, { "epoch": 1.6009829315268647, "grad_norm": 127.43309783935547, "learning_rate": 2.0160547992550703e-07, "loss": 17.0, "step": 24106 }, { "epoch": 1.6010493458192203, "grad_norm": 177.7710723876953, "learning_rate": 2.0154072377875896e-07, "loss": 14.6719, "step": 24107 }, { "epoch": 1.601115760111576, "grad_norm": 279.5052795410156, "learning_rate": 2.0147597686812768e-07, "loss": 21.1719, "step": 24108 }, { "epoch": 1.6011821744039318, "grad_norm": 554.2157592773438, "learning_rate": 2.0141123919436131e-07, "loss": 29.4062, "step": 24109 }, { "epoch": 1.6012485886962873, "grad_norm": 204.2564697265625, "learning_rate": 2.0134651075820974e-07, "loss": 16.5, "step": 24110 }, { "epoch": 1.6013150029886432, "grad_norm": 102.78658294677734, "learning_rate": 2.012817915604209e-07, "loss": 14.4531, "step": 24111 }, { "epoch": 1.601381417280999, "grad_norm": 125.18981170654297, "learning_rate": 2.0121708160174377e-07, "loss": 15.7656, "step": 24112 }, { "epoch": 1.6014478315733545, "grad_norm": 180.15640258789062, "learning_rate": 2.0115238088292684e-07, "loss": 13.1875, "step": 24113 }, { "epoch": 1.6015142458657103, "grad_norm": 160.4144287109375, "learning_rate": 2.0108768940471855e-07, "loss": 15.125, "step": 24114 }, { "epoch": 1.601580660158066, "grad_norm": 257.14117431640625, "learning_rate": 2.0102300716786712e-07, "loss": 13.9375, "step": 24115 }, { "epoch": 1.6016470744504216, "grad_norm": 202.7181396484375, "learning_rate": 2.0095833417312113e-07, "loss": 20.5156, "step": 24116 }, { "epoch": 1.6017134887427775, "grad_norm": 127.02386474609375, "learning_rate": 2.0089367042122784e-07, "loss": 12.0547, "step": 24117 }, { "epoch": 1.6017799030351332, "grad_norm": 97.87178802490234, "learning_rate": 2.0082901591293611e-07, "loss": 9.1328, "step": 24118 }, { "epoch": 1.6018463173274888, "grad_norm": 252.6648406982422, "learning_rate": 2.0076437064899366e-07, "loss": 22.9219, "step": 24119 }, { "epoch": 1.6019127316198447, "grad_norm": 222.2761993408203, "learning_rate": 2.0069973463014755e-07, "loss": 20.1875, "step": 24120 }, { "epoch": 1.6019791459122001, "grad_norm": 307.0083923339844, "learning_rate": 2.0063510785714667e-07, "loss": 19.125, "step": 24121 }, { "epoch": 1.602045560204556, "grad_norm": 139.4334716796875, "learning_rate": 2.0057049033073757e-07, "loss": 13.3281, "step": 24122 }, { "epoch": 1.602111974496912, "grad_norm": 283.2104187011719, "learning_rate": 2.0050588205166806e-07, "loss": 17.7188, "step": 24123 }, { "epoch": 1.6021783887892673, "grad_norm": 211.7937469482422, "learning_rate": 2.0044128302068553e-07, "loss": 12.7188, "step": 24124 }, { "epoch": 1.6022448030816232, "grad_norm": 409.8911437988281, "learning_rate": 2.0037669323853712e-07, "loss": 17.3438, "step": 24125 }, { "epoch": 1.6023112173739789, "grad_norm": 284.04022216796875, "learning_rate": 2.0031211270597015e-07, "loss": 14.4375, "step": 24126 }, { "epoch": 1.6023776316663345, "grad_norm": 209.18780517578125, "learning_rate": 2.0024754142373168e-07, "loss": 15.5469, "step": 24127 }, { "epoch": 1.6024440459586904, "grad_norm": 624.4169921875, "learning_rate": 2.00182979392568e-07, "loss": 20.5156, "step": 24128 }, { "epoch": 1.602510460251046, "grad_norm": 142.8333282470703, "learning_rate": 2.0011842661322697e-07, "loss": 15.0938, "step": 24129 }, { "epoch": 1.6025768745434017, "grad_norm": 124.74583435058594, "learning_rate": 2.0005388308645443e-07, "loss": 12.9531, "step": 24130 }, { "epoch": 1.6026432888357576, "grad_norm": 143.0333251953125, "learning_rate": 1.9998934881299733e-07, "loss": 11.8281, "step": 24131 }, { "epoch": 1.602709703128113, "grad_norm": 249.4521026611328, "learning_rate": 1.999248237936021e-07, "loss": 18.1562, "step": 24132 }, { "epoch": 1.6027761174204689, "grad_norm": 155.481689453125, "learning_rate": 1.9986030802901543e-07, "loss": 11.5938, "step": 24133 }, { "epoch": 1.6028425317128248, "grad_norm": 248.80984497070312, "learning_rate": 1.9979580151998288e-07, "loss": 21.9688, "step": 24134 }, { "epoch": 1.6029089460051802, "grad_norm": 431.8709411621094, "learning_rate": 1.997313042672515e-07, "loss": 22.6562, "step": 24135 }, { "epoch": 1.602975360297536, "grad_norm": 619.9674072265625, "learning_rate": 1.9966681627156668e-07, "loss": 16.0625, "step": 24136 }, { "epoch": 1.6030417745898917, "grad_norm": 126.72151947021484, "learning_rate": 1.9960233753367472e-07, "loss": 13.9219, "step": 24137 }, { "epoch": 1.6031081888822474, "grad_norm": 243.33282470703125, "learning_rate": 1.9953786805432128e-07, "loss": 18.5625, "step": 24138 }, { "epoch": 1.6031746031746033, "grad_norm": 281.02874755859375, "learning_rate": 1.9947340783425225e-07, "loss": 20.9531, "step": 24139 }, { "epoch": 1.603241017466959, "grad_norm": 143.82284545898438, "learning_rate": 1.994089568742132e-07, "loss": 15.8906, "step": 24140 }, { "epoch": 1.6033074317593146, "grad_norm": 433.3356628417969, "learning_rate": 1.9934451517495e-07, "loss": 10.8281, "step": 24141 }, { "epoch": 1.6033738460516704, "grad_norm": 147.23529052734375, "learning_rate": 1.992800827372072e-07, "loss": 14.9375, "step": 24142 }, { "epoch": 1.6034402603440259, "grad_norm": 288.95166015625, "learning_rate": 1.9921565956173124e-07, "loss": 14.0625, "step": 24143 }, { "epoch": 1.6035066746363817, "grad_norm": 237.11141967773438, "learning_rate": 1.9915124564926655e-07, "loss": 19.0, "step": 24144 }, { "epoch": 1.6035730889287376, "grad_norm": 222.53919982910156, "learning_rate": 1.990868410005584e-07, "loss": 15.2344, "step": 24145 }, { "epoch": 1.603639503221093, "grad_norm": 362.9611511230469, "learning_rate": 1.9902244561635183e-07, "loss": 14.8906, "step": 24146 }, { "epoch": 1.603705917513449, "grad_norm": 198.69696044921875, "learning_rate": 1.9895805949739175e-07, "loss": 13.1797, "step": 24147 }, { "epoch": 1.6037723318058046, "grad_norm": 190.34458923339844, "learning_rate": 1.9889368264442284e-07, "loss": 16.5938, "step": 24148 }, { "epoch": 1.6038387460981602, "grad_norm": 225.45021057128906, "learning_rate": 1.9882931505819023e-07, "loss": 12.7188, "step": 24149 }, { "epoch": 1.6039051603905161, "grad_norm": 210.1898956298828, "learning_rate": 1.9876495673943784e-07, "loss": 14.2812, "step": 24150 }, { "epoch": 1.6039715746828718, "grad_norm": 176.27053833007812, "learning_rate": 1.987006076889105e-07, "loss": 23.8125, "step": 24151 }, { "epoch": 1.6040379889752274, "grad_norm": 146.3167724609375, "learning_rate": 1.9863626790735233e-07, "loss": 12.7812, "step": 24152 }, { "epoch": 1.6041044032675833, "grad_norm": 186.88369750976562, "learning_rate": 1.9857193739550782e-07, "loss": 15.1562, "step": 24153 }, { "epoch": 1.6041708175599387, "grad_norm": 218.8372802734375, "learning_rate": 1.9850761615412093e-07, "loss": 14.0156, "step": 24154 }, { "epoch": 1.6042372318522946, "grad_norm": 259.3465270996094, "learning_rate": 1.9844330418393584e-07, "loss": 14.1562, "step": 24155 }, { "epoch": 1.6043036461446505, "grad_norm": 265.9437561035156, "learning_rate": 1.983790014856963e-07, "loss": 16.0469, "step": 24156 }, { "epoch": 1.604370060437006, "grad_norm": 228.18748474121094, "learning_rate": 1.983147080601464e-07, "loss": 23.1562, "step": 24157 }, { "epoch": 1.6044364747293618, "grad_norm": 113.9722671508789, "learning_rate": 1.9825042390802982e-07, "loss": 16.3438, "step": 24158 }, { "epoch": 1.6045028890217174, "grad_norm": 166.71856689453125, "learning_rate": 1.9818614903008945e-07, "loss": 14.8125, "step": 24159 }, { "epoch": 1.604569303314073, "grad_norm": 310.8094177246094, "learning_rate": 1.9812188342707005e-07, "loss": 13.3906, "step": 24160 }, { "epoch": 1.604635717606429, "grad_norm": 577.5855102539062, "learning_rate": 1.9805762709971387e-07, "loss": 17.875, "step": 24161 }, { "epoch": 1.6047021318987846, "grad_norm": 228.45603942871094, "learning_rate": 1.9799338004876477e-07, "loss": 19.1875, "step": 24162 }, { "epoch": 1.6047685461911403, "grad_norm": 93.21339416503906, "learning_rate": 1.979291422749657e-07, "loss": 14.4531, "step": 24163 }, { "epoch": 1.6048349604834962, "grad_norm": 254.90516662597656, "learning_rate": 1.9786491377905978e-07, "loss": 18.6406, "step": 24164 }, { "epoch": 1.6049013747758516, "grad_norm": 218.15997314453125, "learning_rate": 1.9780069456179017e-07, "loss": 19.3438, "step": 24165 }, { "epoch": 1.6049677890682075, "grad_norm": 116.77364349365234, "learning_rate": 1.977364846238997e-07, "loss": 15.8125, "step": 24166 }, { "epoch": 1.6050342033605633, "grad_norm": 254.94805908203125, "learning_rate": 1.9767228396613032e-07, "loss": 16.8125, "step": 24167 }, { "epoch": 1.6051006176529188, "grad_norm": 208.9205322265625, "learning_rate": 1.9760809258922605e-07, "loss": 18.3125, "step": 24168 }, { "epoch": 1.6051670319452747, "grad_norm": 727.2009887695312, "learning_rate": 1.9754391049392826e-07, "loss": 22.3594, "step": 24169 }, { "epoch": 1.6052334462376303, "grad_norm": 127.51490020751953, "learning_rate": 1.974797376809799e-07, "loss": 14.3594, "step": 24170 }, { "epoch": 1.605299860529986, "grad_norm": 166.64993286132812, "learning_rate": 1.9741557415112308e-07, "loss": 17.6875, "step": 24171 }, { "epoch": 1.6053662748223418, "grad_norm": 153.611572265625, "learning_rate": 1.973514199051004e-07, "loss": 12.0312, "step": 24172 }, { "epoch": 1.6054326891146975, "grad_norm": 204.1696014404297, "learning_rate": 1.9728727494365315e-07, "loss": 21.7188, "step": 24173 }, { "epoch": 1.6054991034070532, "grad_norm": 119.72293090820312, "learning_rate": 1.9722313926752442e-07, "loss": 12.5938, "step": 24174 }, { "epoch": 1.605565517699409, "grad_norm": 104.62505340576172, "learning_rate": 1.9715901287745518e-07, "loss": 12.4062, "step": 24175 }, { "epoch": 1.6056319319917647, "grad_norm": 197.14990234375, "learning_rate": 1.9709489577418747e-07, "loss": 17.6562, "step": 24176 }, { "epoch": 1.6056983462841203, "grad_norm": 294.58502197265625, "learning_rate": 1.9703078795846318e-07, "loss": 15.1406, "step": 24177 }, { "epoch": 1.6057647605764762, "grad_norm": 624.8800048828125, "learning_rate": 1.9696668943102357e-07, "loss": 11.8438, "step": 24178 }, { "epoch": 1.6058311748688316, "grad_norm": 510.3709716796875, "learning_rate": 1.969026001926103e-07, "loss": 20.5703, "step": 24179 }, { "epoch": 1.6058975891611875, "grad_norm": 589.6547241210938, "learning_rate": 1.9683852024396496e-07, "loss": 16.4297, "step": 24180 }, { "epoch": 1.6059640034535432, "grad_norm": 287.345458984375, "learning_rate": 1.967744495858279e-07, "loss": 20.2109, "step": 24181 }, { "epoch": 1.6060304177458988, "grad_norm": 111.15242004394531, "learning_rate": 1.9671038821894136e-07, "loss": 11.9844, "step": 24182 }, { "epoch": 1.6060968320382547, "grad_norm": 174.7766571044922, "learning_rate": 1.9664633614404568e-07, "loss": 15.2344, "step": 24183 }, { "epoch": 1.6061632463306104, "grad_norm": 295.4425048828125, "learning_rate": 1.9658229336188181e-07, "loss": 16.5312, "step": 24184 }, { "epoch": 1.606229660622966, "grad_norm": 122.94051361083984, "learning_rate": 1.965182598731908e-07, "loss": 15.4062, "step": 24185 }, { "epoch": 1.606296074915322, "grad_norm": 125.3529052734375, "learning_rate": 1.9645423567871323e-07, "loss": 14.75, "step": 24186 }, { "epoch": 1.6063624892076775, "grad_norm": 352.9573974609375, "learning_rate": 1.9639022077918966e-07, "loss": 17.25, "step": 24187 }, { "epoch": 1.6064289035000332, "grad_norm": 905.1383056640625, "learning_rate": 1.9632621517536085e-07, "loss": 21.5312, "step": 24188 }, { "epoch": 1.606495317792389, "grad_norm": 515.4339599609375, "learning_rate": 1.9626221886796656e-07, "loss": 22.3281, "step": 24189 }, { "epoch": 1.6065617320847445, "grad_norm": 179.52047729492188, "learning_rate": 1.9619823185774787e-07, "loss": 14.9531, "step": 24190 }, { "epoch": 1.6066281463771004, "grad_norm": 504.694091796875, "learning_rate": 1.9613425414544437e-07, "loss": 12.8281, "step": 24191 }, { "epoch": 1.606694560669456, "grad_norm": 159.40003967285156, "learning_rate": 1.9607028573179595e-07, "loss": 11.8125, "step": 24192 }, { "epoch": 1.6067609749618117, "grad_norm": 293.911865234375, "learning_rate": 1.9600632661754346e-07, "loss": 18.6094, "step": 24193 }, { "epoch": 1.6068273892541676, "grad_norm": 536.8692626953125, "learning_rate": 1.9594237680342585e-07, "loss": 15.3281, "step": 24194 }, { "epoch": 1.6068938035465232, "grad_norm": 180.85255432128906, "learning_rate": 1.958784362901833e-07, "loss": 15.3281, "step": 24195 }, { "epoch": 1.6069602178388789, "grad_norm": 176.51268005371094, "learning_rate": 1.9581450507855523e-07, "loss": 13.4453, "step": 24196 }, { "epoch": 1.6070266321312348, "grad_norm": 82.69964599609375, "learning_rate": 1.957505831692815e-07, "loss": 12.5, "step": 24197 }, { "epoch": 1.6070930464235904, "grad_norm": 393.6443786621094, "learning_rate": 1.9568667056310072e-07, "loss": 21.4375, "step": 24198 }, { "epoch": 1.607159460715946, "grad_norm": 266.123291015625, "learning_rate": 1.9562276726075333e-07, "loss": 13.7188, "step": 24199 }, { "epoch": 1.607225875008302, "grad_norm": 301.1888122558594, "learning_rate": 1.955588732629777e-07, "loss": 15.7734, "step": 24200 }, { "epoch": 1.6072922893006574, "grad_norm": 165.1996612548828, "learning_rate": 1.9549498857051306e-07, "loss": 15.6406, "step": 24201 }, { "epoch": 1.6073587035930132, "grad_norm": 183.08729553222656, "learning_rate": 1.9543111318409855e-07, "loss": 16.3281, "step": 24202 }, { "epoch": 1.607425117885369, "grad_norm": 291.09619140625, "learning_rate": 1.9536724710447294e-07, "loss": 16.6875, "step": 24203 }, { "epoch": 1.6074915321777246, "grad_norm": 136.76431274414062, "learning_rate": 1.953033903323751e-07, "loss": 15.125, "step": 24204 }, { "epoch": 1.6075579464700804, "grad_norm": 124.82550048828125, "learning_rate": 1.9523954286854382e-07, "loss": 16.6016, "step": 24205 }, { "epoch": 1.607624360762436, "grad_norm": 181.868408203125, "learning_rate": 1.9517570471371701e-07, "loss": 14.0469, "step": 24206 }, { "epoch": 1.6076907750547917, "grad_norm": 140.37132263183594, "learning_rate": 1.9511187586863399e-07, "loss": 14.625, "step": 24207 }, { "epoch": 1.6077571893471476, "grad_norm": 568.5858764648438, "learning_rate": 1.9504805633403242e-07, "loss": 16.5781, "step": 24208 }, { "epoch": 1.6078236036395033, "grad_norm": 185.83302307128906, "learning_rate": 1.949842461106509e-07, "loss": 10.4531, "step": 24209 }, { "epoch": 1.607890017931859, "grad_norm": 404.814453125, "learning_rate": 1.9492044519922723e-07, "loss": 19.875, "step": 24210 }, { "epoch": 1.6079564322242148, "grad_norm": 186.1185302734375, "learning_rate": 1.9485665360049974e-07, "loss": 16.8594, "step": 24211 }, { "epoch": 1.6080228465165702, "grad_norm": 200.19241333007812, "learning_rate": 1.9479287131520616e-07, "loss": 20.4688, "step": 24212 }, { "epoch": 1.6080892608089261, "grad_norm": 224.2446746826172, "learning_rate": 1.947290983440847e-07, "loss": 19.2188, "step": 24213 }, { "epoch": 1.6081556751012818, "grad_norm": 101.793701171875, "learning_rate": 1.9466533468787228e-07, "loss": 12.4375, "step": 24214 }, { "epoch": 1.6082220893936374, "grad_norm": 241.69094848632812, "learning_rate": 1.94601580347307e-07, "loss": 19.5703, "step": 24215 }, { "epoch": 1.6082885036859933, "grad_norm": 156.39573669433594, "learning_rate": 1.9453783532312618e-07, "loss": 15.1406, "step": 24216 }, { "epoch": 1.608354917978349, "grad_norm": 210.74600219726562, "learning_rate": 1.944740996160672e-07, "loss": 16.2188, "step": 24217 }, { "epoch": 1.6084213322707046, "grad_norm": 324.56884765625, "learning_rate": 1.9441037322686727e-07, "loss": 20.2344, "step": 24218 }, { "epoch": 1.6084877465630605, "grad_norm": 163.63058471679688, "learning_rate": 1.94346656156264e-07, "loss": 15.8906, "step": 24219 }, { "epoch": 1.6085541608554161, "grad_norm": 168.60166931152344, "learning_rate": 1.942829484049935e-07, "loss": 15.6094, "step": 24220 }, { "epoch": 1.6086205751477718, "grad_norm": 700.6458129882812, "learning_rate": 1.9421924997379368e-07, "loss": 14.6406, "step": 24221 }, { "epoch": 1.6086869894401277, "grad_norm": 511.75750732421875, "learning_rate": 1.941555608634008e-07, "loss": 24.0, "step": 24222 }, { "epoch": 1.608753403732483, "grad_norm": 128.20596313476562, "learning_rate": 1.9409188107455166e-07, "loss": 13.3125, "step": 24223 }, { "epoch": 1.608819818024839, "grad_norm": 204.82693481445312, "learning_rate": 1.940282106079829e-07, "loss": 17.8281, "step": 24224 }, { "epoch": 1.6088862323171946, "grad_norm": 401.03521728515625, "learning_rate": 1.9396454946443109e-07, "loss": 13.9531, "step": 24225 }, { "epoch": 1.6089526466095503, "grad_norm": 118.54322814941406, "learning_rate": 1.9390089764463258e-07, "loss": 15.9375, "step": 24226 }, { "epoch": 1.6090190609019062, "grad_norm": 204.87667846679688, "learning_rate": 1.9383725514932393e-07, "loss": 16.7969, "step": 24227 }, { "epoch": 1.6090854751942618, "grad_norm": 87.78629302978516, "learning_rate": 1.9377362197924052e-07, "loss": 12.5469, "step": 24228 }, { "epoch": 1.6091518894866175, "grad_norm": 530.1930541992188, "learning_rate": 1.9370999813511922e-07, "loss": 27.9844, "step": 24229 }, { "epoch": 1.6092183037789733, "grad_norm": 547.8153686523438, "learning_rate": 1.9364638361769592e-07, "loss": 21.2656, "step": 24230 }, { "epoch": 1.609284718071329, "grad_norm": 222.70025634765625, "learning_rate": 1.9358277842770588e-07, "loss": 15.0312, "step": 24231 }, { "epoch": 1.6093511323636847, "grad_norm": 269.5338134765625, "learning_rate": 1.9351918256588572e-07, "loss": 19.4219, "step": 24232 }, { "epoch": 1.6094175466560405, "grad_norm": 197.58990478515625, "learning_rate": 1.9345559603297036e-07, "loss": 18.0312, "step": 24233 }, { "epoch": 1.609483960948396, "grad_norm": 113.8742446899414, "learning_rate": 1.933920188296956e-07, "loss": 19.5625, "step": 24234 }, { "epoch": 1.6095503752407518, "grad_norm": 173.5343017578125, "learning_rate": 1.9332845095679673e-07, "loss": 18.9688, "step": 24235 }, { "epoch": 1.6096167895331075, "grad_norm": 153.44520568847656, "learning_rate": 1.9326489241500955e-07, "loss": 14.3594, "step": 24236 }, { "epoch": 1.6096832038254631, "grad_norm": 175.5423583984375, "learning_rate": 1.9320134320506843e-07, "loss": 16.125, "step": 24237 }, { "epoch": 1.609749618117819, "grad_norm": 91.37323760986328, "learning_rate": 1.9313780332770934e-07, "loss": 14.6094, "step": 24238 }, { "epoch": 1.6098160324101747, "grad_norm": 384.8098449707031, "learning_rate": 1.9307427278366674e-07, "loss": 16.8906, "step": 24239 }, { "epoch": 1.6098824467025303, "grad_norm": 132.74049377441406, "learning_rate": 1.9301075157367565e-07, "loss": 14.75, "step": 24240 }, { "epoch": 1.6099488609948862, "grad_norm": 256.5799865722656, "learning_rate": 1.929472396984707e-07, "loss": 13.75, "step": 24241 }, { "epoch": 1.6100152752872419, "grad_norm": 276.996826171875, "learning_rate": 1.928837371587868e-07, "loss": 19.6094, "step": 24242 }, { "epoch": 1.6100816895795975, "grad_norm": 239.69190979003906, "learning_rate": 1.9282024395535847e-07, "loss": 24.1562, "step": 24243 }, { "epoch": 1.6101481038719534, "grad_norm": 147.1392059326172, "learning_rate": 1.927567600889204e-07, "loss": 20.1562, "step": 24244 }, { "epoch": 1.6102145181643088, "grad_norm": 342.6092529296875, "learning_rate": 1.9269328556020604e-07, "loss": 23.6875, "step": 24245 }, { "epoch": 1.6102809324566647, "grad_norm": 382.7556457519531, "learning_rate": 1.9262982036995067e-07, "loss": 15.9219, "step": 24246 }, { "epoch": 1.6103473467490204, "grad_norm": 121.64412689208984, "learning_rate": 1.9256636451888786e-07, "loss": 18.4375, "step": 24247 }, { "epoch": 1.610413761041376, "grad_norm": 269.25848388671875, "learning_rate": 1.9250291800775176e-07, "loss": 15.9688, "step": 24248 }, { "epoch": 1.610480175333732, "grad_norm": 261.12664794921875, "learning_rate": 1.9243948083727623e-07, "loss": 13.2656, "step": 24249 }, { "epoch": 1.6105465896260875, "grad_norm": 306.37677001953125, "learning_rate": 1.9237605300819503e-07, "loss": 16.125, "step": 24250 }, { "epoch": 1.6106130039184432, "grad_norm": 159.65940856933594, "learning_rate": 1.9231263452124203e-07, "loss": 18.9531, "step": 24251 }, { "epoch": 1.610679418210799, "grad_norm": 363.99639892578125, "learning_rate": 1.9224922537715104e-07, "loss": 19.0625, "step": 24252 }, { "epoch": 1.6107458325031547, "grad_norm": 147.14796447753906, "learning_rate": 1.9218582557665463e-07, "loss": 23.75, "step": 24253 }, { "epoch": 1.6108122467955104, "grad_norm": 164.47439575195312, "learning_rate": 1.9212243512048741e-07, "loss": 17.5625, "step": 24254 }, { "epoch": 1.6108786610878663, "grad_norm": 177.42532348632812, "learning_rate": 1.920590540093816e-07, "loss": 15.75, "step": 24255 }, { "epoch": 1.6109450753802217, "grad_norm": 161.78765869140625, "learning_rate": 1.9199568224407093e-07, "loss": 14.5156, "step": 24256 }, { "epoch": 1.6110114896725776, "grad_norm": 208.85299682617188, "learning_rate": 1.9193231982528812e-07, "loss": 14.7031, "step": 24257 }, { "epoch": 1.6110779039649332, "grad_norm": 147.85775756835938, "learning_rate": 1.9186896675376662e-07, "loss": 17.3594, "step": 24258 }, { "epoch": 1.6111443182572889, "grad_norm": 138.370361328125, "learning_rate": 1.918056230302384e-07, "loss": 12.5156, "step": 24259 }, { "epoch": 1.6112107325496448, "grad_norm": 445.3015441894531, "learning_rate": 1.9174228865543728e-07, "loss": 16.5938, "step": 24260 }, { "epoch": 1.6112771468420004, "grad_norm": 338.3702087402344, "learning_rate": 1.9167896363009495e-07, "loss": 16.0, "step": 24261 }, { "epoch": 1.611343561134356, "grad_norm": 450.4312744140625, "learning_rate": 1.9161564795494423e-07, "loss": 16.6562, "step": 24262 }, { "epoch": 1.611409975426712, "grad_norm": 268.4900207519531, "learning_rate": 1.9155234163071766e-07, "loss": 14.0, "step": 24263 }, { "epoch": 1.6114763897190676, "grad_norm": 532.0786743164062, "learning_rate": 1.914890446581474e-07, "loss": 22.2344, "step": 24264 }, { "epoch": 1.6115428040114232, "grad_norm": 280.1020812988281, "learning_rate": 1.914257570379656e-07, "loss": 14.8438, "step": 24265 }, { "epoch": 1.6116092183037791, "grad_norm": 1762.121826171875, "learning_rate": 1.9136247877090438e-07, "loss": 15.9219, "step": 24266 }, { "epoch": 1.6116756325961346, "grad_norm": 195.9689483642578, "learning_rate": 1.9129920985769576e-07, "loss": 16.5469, "step": 24267 }, { "epoch": 1.6117420468884904, "grad_norm": 203.5106658935547, "learning_rate": 1.9123595029907157e-07, "loss": 26.6797, "step": 24268 }, { "epoch": 1.611808461180846, "grad_norm": 121.1286849975586, "learning_rate": 1.911727000957638e-07, "loss": 15.7031, "step": 24269 }, { "epoch": 1.6118748754732017, "grad_norm": 171.55059814453125, "learning_rate": 1.9110945924850342e-07, "loss": 24.5469, "step": 24270 }, { "epoch": 1.6119412897655576, "grad_norm": 369.9251708984375, "learning_rate": 1.910462277580227e-07, "loss": 13.7969, "step": 24271 }, { "epoch": 1.6120077040579133, "grad_norm": 161.08258056640625, "learning_rate": 1.9098300562505264e-07, "loss": 13.4297, "step": 24272 }, { "epoch": 1.612074118350269, "grad_norm": 243.66680908203125, "learning_rate": 1.9091979285032455e-07, "loss": 11.8906, "step": 24273 }, { "epoch": 1.6121405326426248, "grad_norm": 424.514404296875, "learning_rate": 1.9085658943456985e-07, "loss": 17.7969, "step": 24274 }, { "epoch": 1.6122069469349805, "grad_norm": 99.28984069824219, "learning_rate": 1.9079339537851958e-07, "loss": 12.8438, "step": 24275 }, { "epoch": 1.612273361227336, "grad_norm": 180.20481872558594, "learning_rate": 1.9073021068290452e-07, "loss": 10.8828, "step": 24276 }, { "epoch": 1.612339775519692, "grad_norm": 130.88064575195312, "learning_rate": 1.906670353484562e-07, "loss": 14.9375, "step": 24277 }, { "epoch": 1.6124061898120474, "grad_norm": 271.94537353515625, "learning_rate": 1.9060386937590455e-07, "loss": 19.5625, "step": 24278 }, { "epoch": 1.6124726041044033, "grad_norm": 232.2460174560547, "learning_rate": 1.905407127659806e-07, "loss": 15.3594, "step": 24279 }, { "epoch": 1.612539018396759, "grad_norm": 216.8424072265625, "learning_rate": 1.9047756551941495e-07, "loss": 21.0625, "step": 24280 }, { "epoch": 1.6126054326891146, "grad_norm": 211.29127502441406, "learning_rate": 1.9041442763693806e-07, "loss": 17.1328, "step": 24281 }, { "epoch": 1.6126718469814705, "grad_norm": 212.4060516357422, "learning_rate": 1.9035129911928017e-07, "loss": 16.4219, "step": 24282 }, { "epoch": 1.6127382612738261, "grad_norm": 214.69371032714844, "learning_rate": 1.9028817996717184e-07, "loss": 11.8516, "step": 24283 }, { "epoch": 1.6128046755661818, "grad_norm": 334.18585205078125, "learning_rate": 1.902250701813425e-07, "loss": 15.6875, "step": 24284 }, { "epoch": 1.6128710898585377, "grad_norm": 112.48411560058594, "learning_rate": 1.9016196976252307e-07, "loss": 19.9219, "step": 24285 }, { "epoch": 1.6129375041508933, "grad_norm": 286.9077453613281, "learning_rate": 1.9009887871144271e-07, "loss": 14.6875, "step": 24286 }, { "epoch": 1.613003918443249, "grad_norm": 294.9967956542969, "learning_rate": 1.9003579702883155e-07, "loss": 12.3672, "step": 24287 }, { "epoch": 1.6130703327356049, "grad_norm": 112.85459899902344, "learning_rate": 1.8997272471541926e-07, "loss": 13.9219, "step": 24288 }, { "epoch": 1.6131367470279603, "grad_norm": 412.8615417480469, "learning_rate": 1.8990966177193535e-07, "loss": 15.5312, "step": 24289 }, { "epoch": 1.6132031613203162, "grad_norm": 231.68333435058594, "learning_rate": 1.8984660819910936e-07, "loss": 16.4844, "step": 24290 }, { "epoch": 1.6132695756126718, "grad_norm": 211.66456604003906, "learning_rate": 1.8978356399767093e-07, "loss": 18.5156, "step": 24291 }, { "epoch": 1.6133359899050275, "grad_norm": 320.4865417480469, "learning_rate": 1.8972052916834845e-07, "loss": 17.0156, "step": 24292 }, { "epoch": 1.6134024041973833, "grad_norm": 384.8470153808594, "learning_rate": 1.8965750371187228e-07, "loss": 20.125, "step": 24293 }, { "epoch": 1.613468818489739, "grad_norm": 158.19085693359375, "learning_rate": 1.8959448762897045e-07, "loss": 12.1094, "step": 24294 }, { "epoch": 1.6135352327820947, "grad_norm": 104.22769165039062, "learning_rate": 1.8953148092037242e-07, "loss": 18.0156, "step": 24295 }, { "epoch": 1.6136016470744505, "grad_norm": 1117.131591796875, "learning_rate": 1.894684835868068e-07, "loss": 16.0938, "step": 24296 }, { "epoch": 1.6136680613668062, "grad_norm": 536.127685546875, "learning_rate": 1.8940549562900266e-07, "loss": 23.1719, "step": 24297 }, { "epoch": 1.6137344756591618, "grad_norm": 674.8292846679688, "learning_rate": 1.8934251704768777e-07, "loss": 30.4062, "step": 24298 }, { "epoch": 1.6138008899515177, "grad_norm": 352.3264465332031, "learning_rate": 1.8927954784359167e-07, "loss": 15.375, "step": 24299 }, { "epoch": 1.6138673042438731, "grad_norm": 337.8533935546875, "learning_rate": 1.892165880174421e-07, "loss": 19.0156, "step": 24300 }, { "epoch": 1.613933718536229, "grad_norm": 277.068603515625, "learning_rate": 1.891536375699675e-07, "loss": 15.8125, "step": 24301 }, { "epoch": 1.6140001328285847, "grad_norm": 282.2752685546875, "learning_rate": 1.8909069650189602e-07, "loss": 14.2188, "step": 24302 }, { "epoch": 1.6140665471209403, "grad_norm": 175.97235107421875, "learning_rate": 1.8902776481395587e-07, "loss": 18.7188, "step": 24303 }, { "epoch": 1.6141329614132962, "grad_norm": 167.63906860351562, "learning_rate": 1.8896484250687482e-07, "loss": 15.5781, "step": 24304 }, { "epoch": 1.6141993757056519, "grad_norm": 131.40811157226562, "learning_rate": 1.8890192958138085e-07, "loss": 15.9219, "step": 24305 }, { "epoch": 1.6142657899980075, "grad_norm": 3281.060302734375, "learning_rate": 1.8883902603820156e-07, "loss": 18.6562, "step": 24306 }, { "epoch": 1.6143322042903634, "grad_norm": 667.447509765625, "learning_rate": 1.8877613187806473e-07, "loss": 17.125, "step": 24307 }, { "epoch": 1.614398618582719, "grad_norm": 174.65077209472656, "learning_rate": 1.887132471016981e-07, "loss": 14.2969, "step": 24308 }, { "epoch": 1.6144650328750747, "grad_norm": 145.2447509765625, "learning_rate": 1.8865037170982833e-07, "loss": 16.8594, "step": 24309 }, { "epoch": 1.6145314471674306, "grad_norm": 265.7748718261719, "learning_rate": 1.885875057031836e-07, "loss": 18.7188, "step": 24310 }, { "epoch": 1.614597861459786, "grad_norm": 161.61361694335938, "learning_rate": 1.8852464908249055e-07, "loss": 13.9688, "step": 24311 }, { "epoch": 1.6146642757521419, "grad_norm": 180.95765686035156, "learning_rate": 1.8846180184847648e-07, "loss": 15.1719, "step": 24312 }, { "epoch": 1.6147306900444975, "grad_norm": 546.3704223632812, "learning_rate": 1.8839896400186828e-07, "loss": 16.9531, "step": 24313 }, { "epoch": 1.6147971043368532, "grad_norm": 319.74981689453125, "learning_rate": 1.883361355433929e-07, "loss": 18.1094, "step": 24314 }, { "epoch": 1.614863518629209, "grad_norm": 214.3769073486328, "learning_rate": 1.88273316473777e-07, "loss": 16.0156, "step": 24315 }, { "epoch": 1.6149299329215647, "grad_norm": 277.55859375, "learning_rate": 1.882105067937476e-07, "loss": 14.8438, "step": 24316 }, { "epoch": 1.6149963472139204, "grad_norm": 153.19256591796875, "learning_rate": 1.8814770650403044e-07, "loss": 14.5781, "step": 24317 }, { "epoch": 1.6150627615062763, "grad_norm": 262.2002258300781, "learning_rate": 1.880849156053529e-07, "loss": 13.7812, "step": 24318 }, { "epoch": 1.615129175798632, "grad_norm": 124.81582641601562, "learning_rate": 1.880221340984407e-07, "loss": 15.4688, "step": 24319 }, { "epoch": 1.6151955900909876, "grad_norm": 271.3636474609375, "learning_rate": 1.8795936198402029e-07, "loss": 11.6562, "step": 24320 }, { "epoch": 1.6152620043833434, "grad_norm": 270.1302185058594, "learning_rate": 1.8789659926281765e-07, "loss": 15.375, "step": 24321 }, { "epoch": 1.6153284186756989, "grad_norm": 96.21168518066406, "learning_rate": 1.8783384593555917e-07, "loss": 11.4062, "step": 24322 }, { "epoch": 1.6153948329680548, "grad_norm": 324.6995849609375, "learning_rate": 1.8777110200296985e-07, "loss": 21.875, "step": 24323 }, { "epoch": 1.6154612472604104, "grad_norm": 121.28689575195312, "learning_rate": 1.8770836746577668e-07, "loss": 16.4219, "step": 24324 }, { "epoch": 1.615527661552766, "grad_norm": 146.33837890625, "learning_rate": 1.8764564232470448e-07, "loss": 15.8438, "step": 24325 }, { "epoch": 1.615594075845122, "grad_norm": 334.98681640625, "learning_rate": 1.8758292658047892e-07, "loss": 13.8125, "step": 24326 }, { "epoch": 1.6156604901374776, "grad_norm": 307.47015380859375, "learning_rate": 1.8752022023382575e-07, "loss": 15.7656, "step": 24327 }, { "epoch": 1.6157269044298332, "grad_norm": 212.8126678466797, "learning_rate": 1.8745752328547004e-07, "loss": 18.0781, "step": 24328 }, { "epoch": 1.6157933187221891, "grad_norm": 240.85430908203125, "learning_rate": 1.8739483573613723e-07, "loss": 10.9922, "step": 24329 }, { "epoch": 1.6158597330145448, "grad_norm": 143.65447998046875, "learning_rate": 1.8733215758655262e-07, "loss": 15.125, "step": 24330 }, { "epoch": 1.6159261473069004, "grad_norm": 191.99542236328125, "learning_rate": 1.8726948883744054e-07, "loss": 16.9688, "step": 24331 }, { "epoch": 1.6159925615992563, "grad_norm": 165.14796447753906, "learning_rate": 1.8720682948952682e-07, "loss": 17.4375, "step": 24332 }, { "epoch": 1.6160589758916117, "grad_norm": 170.98936462402344, "learning_rate": 1.8714417954353558e-07, "loss": 18.1172, "step": 24333 }, { "epoch": 1.6161253901839676, "grad_norm": 281.7154235839844, "learning_rate": 1.8708153900019175e-07, "loss": 17.3438, "step": 24334 }, { "epoch": 1.6161918044763233, "grad_norm": 152.96669006347656, "learning_rate": 1.870189078602199e-07, "loss": 17.2812, "step": 24335 }, { "epoch": 1.616258218768679, "grad_norm": 124.44818878173828, "learning_rate": 1.8695628612434454e-07, "loss": 19.7031, "step": 24336 }, { "epoch": 1.6163246330610348, "grad_norm": 300.2735595703125, "learning_rate": 1.8689367379329001e-07, "loss": 20.7969, "step": 24337 }, { "epoch": 1.6163910473533905, "grad_norm": 363.4173583984375, "learning_rate": 1.8683107086778094e-07, "loss": 18.625, "step": 24338 }, { "epoch": 1.616457461645746, "grad_norm": 176.0763397216797, "learning_rate": 1.867684773485405e-07, "loss": 20.625, "step": 24339 }, { "epoch": 1.616523875938102, "grad_norm": 134.88780212402344, "learning_rate": 1.8670589323629383e-07, "loss": 15.25, "step": 24340 }, { "epoch": 1.6165902902304576, "grad_norm": 270.4128112792969, "learning_rate": 1.866433185317645e-07, "loss": 14.75, "step": 24341 }, { "epoch": 1.6166567045228133, "grad_norm": 148.76487731933594, "learning_rate": 1.86580753235676e-07, "loss": 18.6562, "step": 24342 }, { "epoch": 1.6167231188151692, "grad_norm": 119.49678039550781, "learning_rate": 1.865181973487524e-07, "loss": 16.5781, "step": 24343 }, { "epoch": 1.6167895331075246, "grad_norm": 174.56556701660156, "learning_rate": 1.8645565087171711e-07, "loss": 18.9219, "step": 24344 }, { "epoch": 1.6168559473998805, "grad_norm": 454.8573303222656, "learning_rate": 1.8639311380529387e-07, "loss": 23.6719, "step": 24345 }, { "epoch": 1.6169223616922361, "grad_norm": 92.6939697265625, "learning_rate": 1.863305861502058e-07, "loss": 9.5312, "step": 24346 }, { "epoch": 1.6169887759845918, "grad_norm": 193.5975799560547, "learning_rate": 1.8626806790717664e-07, "loss": 18.8906, "step": 24347 }, { "epoch": 1.6170551902769477, "grad_norm": 369.9790344238281, "learning_rate": 1.862055590769287e-07, "loss": 23.0312, "step": 24348 }, { "epoch": 1.6171216045693033, "grad_norm": 118.22908782958984, "learning_rate": 1.8614305966018616e-07, "loss": 13.7656, "step": 24349 }, { "epoch": 1.617188018861659, "grad_norm": 251.0910186767578, "learning_rate": 1.8608056965767117e-07, "loss": 21.8906, "step": 24350 }, { "epoch": 1.6172544331540148, "grad_norm": 211.4417266845703, "learning_rate": 1.8601808907010674e-07, "loss": 17.6875, "step": 24351 }, { "epoch": 1.6173208474463705, "grad_norm": 193.6432342529297, "learning_rate": 1.8595561789821567e-07, "loss": 14.2344, "step": 24352 }, { "epoch": 1.6173872617387262, "grad_norm": 204.69406127929688, "learning_rate": 1.858931561427206e-07, "loss": 20.7734, "step": 24353 }, { "epoch": 1.617453676031082, "grad_norm": 204.5458526611328, "learning_rate": 1.8583070380434418e-07, "loss": 14.5703, "step": 24354 }, { "epoch": 1.6175200903234375, "grad_norm": 123.74234008789062, "learning_rate": 1.8576826088380894e-07, "loss": 12.3906, "step": 24355 }, { "epoch": 1.6175865046157933, "grad_norm": 239.45681762695312, "learning_rate": 1.8570582738183628e-07, "loss": 21.125, "step": 24356 }, { "epoch": 1.617652918908149, "grad_norm": 180.64968872070312, "learning_rate": 1.8564340329914975e-07, "loss": 15.625, "step": 24357 }, { "epoch": 1.6177193332005046, "grad_norm": 283.4424743652344, "learning_rate": 1.8558098863647044e-07, "loss": 21.8594, "step": 24358 }, { "epoch": 1.6177857474928605, "grad_norm": 97.38202667236328, "learning_rate": 1.8551858339452053e-07, "loss": 13.0312, "step": 24359 }, { "epoch": 1.6178521617852162, "grad_norm": 137.8828125, "learning_rate": 1.8545618757402203e-07, "loss": 13.7344, "step": 24360 }, { "epoch": 1.6179185760775718, "grad_norm": 145.4356231689453, "learning_rate": 1.8539380117569702e-07, "loss": 17.8906, "step": 24361 }, { "epoch": 1.6179849903699277, "grad_norm": 338.2001037597656, "learning_rate": 1.8533142420026615e-07, "loss": 20.4844, "step": 24362 }, { "epoch": 1.6180514046622834, "grad_norm": 134.46875, "learning_rate": 1.8526905664845217e-07, "loss": 13.75, "step": 24363 }, { "epoch": 1.618117818954639, "grad_norm": 210.3043975830078, "learning_rate": 1.852066985209757e-07, "loss": 15.8125, "step": 24364 }, { "epoch": 1.618184233246995, "grad_norm": 194.9735870361328, "learning_rate": 1.8514434981855832e-07, "loss": 19.25, "step": 24365 }, { "epoch": 1.6182506475393503, "grad_norm": 200.5868682861328, "learning_rate": 1.850820105419212e-07, "loss": 13.2969, "step": 24366 }, { "epoch": 1.6183170618317062, "grad_norm": 121.38838195800781, "learning_rate": 1.8501968069178542e-07, "loss": 17.4062, "step": 24367 }, { "epoch": 1.6183834761240619, "grad_norm": 453.2752990722656, "learning_rate": 1.849573602688721e-07, "loss": 13.0156, "step": 24368 }, { "epoch": 1.6184498904164175, "grad_norm": 297.838134765625, "learning_rate": 1.8489504927390233e-07, "loss": 17.8594, "step": 24369 }, { "epoch": 1.6185163047087734, "grad_norm": 245.65452575683594, "learning_rate": 1.8483274770759606e-07, "loss": 19.0469, "step": 24370 }, { "epoch": 1.618582719001129, "grad_norm": 147.53309631347656, "learning_rate": 1.8477045557067504e-07, "loss": 14.7812, "step": 24371 }, { "epoch": 1.6186491332934847, "grad_norm": 265.4634704589844, "learning_rate": 1.8470817286385897e-07, "loss": 10.2734, "step": 24372 }, { "epoch": 1.6187155475858406, "grad_norm": 341.6004638671875, "learning_rate": 1.846458995878687e-07, "loss": 15.3047, "step": 24373 }, { "epoch": 1.6187819618781962, "grad_norm": 217.7681884765625, "learning_rate": 1.845836357434245e-07, "loss": 12.0781, "step": 24374 }, { "epoch": 1.6188483761705519, "grad_norm": 1141.9158935546875, "learning_rate": 1.8452138133124663e-07, "loss": 12.9219, "step": 24375 }, { "epoch": 1.6189147904629078, "grad_norm": 241.68104553222656, "learning_rate": 1.8445913635205523e-07, "loss": 20.7344, "step": 24376 }, { "epoch": 1.6189812047552632, "grad_norm": 334.7403259277344, "learning_rate": 1.8439690080657012e-07, "loss": 22.125, "step": 24377 }, { "epoch": 1.619047619047619, "grad_norm": 171.15365600585938, "learning_rate": 1.8433467469551146e-07, "loss": 17.0781, "step": 24378 }, { "epoch": 1.6191140333399747, "grad_norm": 352.05267333984375, "learning_rate": 1.842724580195989e-07, "loss": 15.9531, "step": 24379 }, { "epoch": 1.6191804476323304, "grad_norm": 197.08648681640625, "learning_rate": 1.8421025077955255e-07, "loss": 17.4062, "step": 24380 }, { "epoch": 1.6192468619246863, "grad_norm": 508.37689208984375, "learning_rate": 1.8414805297609093e-07, "loss": 16.0312, "step": 24381 }, { "epoch": 1.619313276217042, "grad_norm": 252.1442108154297, "learning_rate": 1.840858646099348e-07, "loss": 19.5, "step": 24382 }, { "epoch": 1.6193796905093976, "grad_norm": 113.3269271850586, "learning_rate": 1.840236856818026e-07, "loss": 12.5156, "step": 24383 }, { "epoch": 1.6194461048017534, "grad_norm": 144.6908721923828, "learning_rate": 1.8396151619241394e-07, "loss": 10.7031, "step": 24384 }, { "epoch": 1.619512519094109, "grad_norm": 195.68458557128906, "learning_rate": 1.8389935614248785e-07, "loss": 14.3281, "step": 24385 }, { "epoch": 1.6195789333864647, "grad_norm": 319.48223876953125, "learning_rate": 1.8383720553274374e-07, "loss": 15.875, "step": 24386 }, { "epoch": 1.6196453476788206, "grad_norm": 429.9460754394531, "learning_rate": 1.8377506436389955e-07, "loss": 21.2578, "step": 24387 }, { "epoch": 1.619711761971176, "grad_norm": 211.10057067871094, "learning_rate": 1.8371293263667543e-07, "loss": 17.4375, "step": 24388 }, { "epoch": 1.619778176263532, "grad_norm": 290.1614990234375, "learning_rate": 1.836508103517891e-07, "loss": 20.3125, "step": 24389 }, { "epoch": 1.6198445905558876, "grad_norm": 300.1753845214844, "learning_rate": 1.8358869750995942e-07, "loss": 12.8594, "step": 24390 }, { "epoch": 1.6199110048482432, "grad_norm": 418.27398681640625, "learning_rate": 1.8352659411190497e-07, "loss": 22.0469, "step": 24391 }, { "epoch": 1.6199774191405991, "grad_norm": 131.5716094970703, "learning_rate": 1.8346450015834402e-07, "loss": 12.5156, "step": 24392 }, { "epoch": 1.6200438334329548, "grad_norm": 149.43115234375, "learning_rate": 1.834024156499949e-07, "loss": 11.8594, "step": 24393 }, { "epoch": 1.6201102477253104, "grad_norm": 273.7439880371094, "learning_rate": 1.8334034058757608e-07, "loss": 12.625, "step": 24394 }, { "epoch": 1.6201766620176663, "grad_norm": 307.9858703613281, "learning_rate": 1.832782749718047e-07, "loss": 16.6406, "step": 24395 }, { "epoch": 1.620243076310022, "grad_norm": 282.6220703125, "learning_rate": 1.8321621880339989e-07, "loss": 11.5781, "step": 24396 }, { "epoch": 1.6203094906023776, "grad_norm": 458.4696960449219, "learning_rate": 1.831541720830785e-07, "loss": 19.4531, "step": 24397 }, { "epoch": 1.6203759048947335, "grad_norm": 226.2425994873047, "learning_rate": 1.8309213481155872e-07, "loss": 17.8438, "step": 24398 }, { "epoch": 1.620442319187089, "grad_norm": 1422.99658203125, "learning_rate": 1.83030106989558e-07, "loss": 18.75, "step": 24399 }, { "epoch": 1.6205087334794448, "grad_norm": 254.60377502441406, "learning_rate": 1.82968088617794e-07, "loss": 21.3594, "step": 24400 }, { "epoch": 1.6205751477718005, "grad_norm": 155.15077209472656, "learning_rate": 1.829060796969839e-07, "loss": 14.7344, "step": 24401 }, { "epoch": 1.620641562064156, "grad_norm": 281.597900390625, "learning_rate": 1.8284408022784548e-07, "loss": 22.5078, "step": 24402 }, { "epoch": 1.620707976356512, "grad_norm": 113.0048599243164, "learning_rate": 1.8278209021109491e-07, "loss": 14.2656, "step": 24403 }, { "epoch": 1.6207743906488676, "grad_norm": 328.7347106933594, "learning_rate": 1.8272010964745055e-07, "loss": 16.2656, "step": 24404 }, { "epoch": 1.6208408049412233, "grad_norm": 180.21954345703125, "learning_rate": 1.8265813853762835e-07, "loss": 16.8438, "step": 24405 }, { "epoch": 1.6209072192335792, "grad_norm": 380.9859924316406, "learning_rate": 1.825961768823454e-07, "loss": 16.5156, "step": 24406 }, { "epoch": 1.6209736335259348, "grad_norm": 164.93002319335938, "learning_rate": 1.825342246823187e-07, "loss": 17.6719, "step": 24407 }, { "epoch": 1.6210400478182905, "grad_norm": 274.8454895019531, "learning_rate": 1.8247228193826492e-07, "loss": 17.4531, "step": 24408 }, { "epoch": 1.6211064621106464, "grad_norm": 141.73243713378906, "learning_rate": 1.8241034865089976e-07, "loss": 16.3281, "step": 24409 }, { "epoch": 1.6211728764030018, "grad_norm": 247.8390655517578, "learning_rate": 1.8234842482094093e-07, "loss": 21.875, "step": 24410 }, { "epoch": 1.6212392906953577, "grad_norm": 199.40237426757812, "learning_rate": 1.8228651044910358e-07, "loss": 22.6094, "step": 24411 }, { "epoch": 1.6213057049877133, "grad_norm": 362.48565673828125, "learning_rate": 1.822246055361042e-07, "loss": 25.5469, "step": 24412 }, { "epoch": 1.621372119280069, "grad_norm": 153.40928649902344, "learning_rate": 1.821627100826596e-07, "loss": 15.3438, "step": 24413 }, { "epoch": 1.6214385335724248, "grad_norm": 389.2369079589844, "learning_rate": 1.821008240894848e-07, "loss": 22.3125, "step": 24414 }, { "epoch": 1.6215049478647805, "grad_norm": 139.9534454345703, "learning_rate": 1.820389475572961e-07, "loss": 12.6406, "step": 24415 }, { "epoch": 1.6215713621571362, "grad_norm": 175.4304962158203, "learning_rate": 1.8197708048680925e-07, "loss": 9.8203, "step": 24416 }, { "epoch": 1.621637776449492, "grad_norm": 117.04803466796875, "learning_rate": 1.8191522287873973e-07, "loss": 13.0625, "step": 24417 }, { "epoch": 1.6217041907418477, "grad_norm": 122.8451919555664, "learning_rate": 1.8185337473380324e-07, "loss": 14.9844, "step": 24418 }, { "epoch": 1.6217706050342033, "grad_norm": 228.692138671875, "learning_rate": 1.8179153605271536e-07, "loss": 16.6719, "step": 24419 }, { "epoch": 1.6218370193265592, "grad_norm": 177.367919921875, "learning_rate": 1.817297068361907e-07, "loss": 17.8125, "step": 24420 }, { "epoch": 1.6219034336189146, "grad_norm": 179.1835174560547, "learning_rate": 1.816678870849454e-07, "loss": 14.4844, "step": 24421 }, { "epoch": 1.6219698479112705, "grad_norm": 302.666015625, "learning_rate": 1.8160607679969397e-07, "loss": 17.5625, "step": 24422 }, { "epoch": 1.6220362622036262, "grad_norm": 403.84857177734375, "learning_rate": 1.815442759811514e-07, "loss": 28.5312, "step": 24423 }, { "epoch": 1.6221026764959818, "grad_norm": 480.14190673828125, "learning_rate": 1.8148248463003279e-07, "loss": 20.0625, "step": 24424 }, { "epoch": 1.6221690907883377, "grad_norm": 137.6014862060547, "learning_rate": 1.8142070274705301e-07, "loss": 14.7812, "step": 24425 }, { "epoch": 1.6222355050806934, "grad_norm": 181.94776916503906, "learning_rate": 1.8135893033292605e-07, "loss": 17.125, "step": 24426 }, { "epoch": 1.622301919373049, "grad_norm": 335.7792053222656, "learning_rate": 1.8129716738836742e-07, "loss": 20.4375, "step": 24427 }, { "epoch": 1.622368333665405, "grad_norm": 222.70957946777344, "learning_rate": 1.812354139140908e-07, "loss": 18.0781, "step": 24428 }, { "epoch": 1.6224347479577605, "grad_norm": 198.9164581298828, "learning_rate": 1.8117366991081084e-07, "loss": 16.1875, "step": 24429 }, { "epoch": 1.6225011622501162, "grad_norm": 164.7115478515625, "learning_rate": 1.8111193537924163e-07, "loss": 15.1406, "step": 24430 }, { "epoch": 1.622567576542472, "grad_norm": 166.47732543945312, "learning_rate": 1.8105021032009747e-07, "loss": 12.1562, "step": 24431 }, { "epoch": 1.6226339908348275, "grad_norm": 196.59005737304688, "learning_rate": 1.8098849473409217e-07, "loss": 17.1719, "step": 24432 }, { "epoch": 1.6227004051271834, "grad_norm": 480.1043701171875, "learning_rate": 1.8092678862193989e-07, "loss": 37.4688, "step": 24433 }, { "epoch": 1.622766819419539, "grad_norm": 256.7744140625, "learning_rate": 1.8086509198435373e-07, "loss": 20.9062, "step": 24434 }, { "epoch": 1.6228332337118947, "grad_norm": 254.64353942871094, "learning_rate": 1.8080340482204838e-07, "loss": 19.7344, "step": 24435 }, { "epoch": 1.6228996480042506, "grad_norm": 153.8464813232422, "learning_rate": 1.8074172713573667e-07, "loss": 15.3594, "step": 24436 }, { "epoch": 1.6229660622966062, "grad_norm": 549.1748046875, "learning_rate": 1.8068005892613214e-07, "loss": 15.25, "step": 24437 }, { "epoch": 1.6230324765889619, "grad_norm": 224.77529907226562, "learning_rate": 1.8061840019394837e-07, "loss": 20.6016, "step": 24438 }, { "epoch": 1.6230988908813178, "grad_norm": 140.89175415039062, "learning_rate": 1.8055675093989832e-07, "loss": 16.8906, "step": 24439 }, { "epoch": 1.6231653051736734, "grad_norm": 150.21322631835938, "learning_rate": 1.804951111646954e-07, "loss": 14.9531, "step": 24440 }, { "epoch": 1.623231719466029, "grad_norm": 156.73033142089844, "learning_rate": 1.8043348086905263e-07, "loss": 15.9531, "step": 24441 }, { "epoch": 1.623298133758385, "grad_norm": 254.56768798828125, "learning_rate": 1.8037186005368232e-07, "loss": 23.25, "step": 24442 }, { "epoch": 1.6233645480507404, "grad_norm": 164.41917419433594, "learning_rate": 1.803102487192981e-07, "loss": 19.8125, "step": 24443 }, { "epoch": 1.6234309623430963, "grad_norm": 214.03155517578125, "learning_rate": 1.8024864686661212e-07, "loss": 17.0, "step": 24444 }, { "epoch": 1.623497376635452, "grad_norm": 227.84336853027344, "learning_rate": 1.801870544963371e-07, "loss": 22.875, "step": 24445 }, { "epoch": 1.6235637909278076, "grad_norm": 314.5282897949219, "learning_rate": 1.8012547160918545e-07, "loss": 14.4062, "step": 24446 }, { "epoch": 1.6236302052201634, "grad_norm": 285.4802551269531, "learning_rate": 1.8006389820586987e-07, "loss": 16.2188, "step": 24447 }, { "epoch": 1.623696619512519, "grad_norm": 121.86910247802734, "learning_rate": 1.8000233428710176e-07, "loss": 13.9219, "step": 24448 }, { "epoch": 1.6237630338048747, "grad_norm": 236.6608428955078, "learning_rate": 1.7994077985359412e-07, "loss": 14.125, "step": 24449 }, { "epoch": 1.6238294480972306, "grad_norm": 433.96234130859375, "learning_rate": 1.7987923490605883e-07, "loss": 14.3906, "step": 24450 }, { "epoch": 1.6238958623895863, "grad_norm": 274.5397644042969, "learning_rate": 1.7981769944520718e-07, "loss": 16.5156, "step": 24451 }, { "epoch": 1.623962276681942, "grad_norm": 285.5708923339844, "learning_rate": 1.7975617347175199e-07, "loss": 18.1562, "step": 24452 }, { "epoch": 1.6240286909742978, "grad_norm": 213.01025390625, "learning_rate": 1.796946569864042e-07, "loss": 15.3906, "step": 24453 }, { "epoch": 1.6240951052666532, "grad_norm": 238.4043731689453, "learning_rate": 1.7963314998987543e-07, "loss": 17.4062, "step": 24454 }, { "epoch": 1.6241615195590091, "grad_norm": 192.3302764892578, "learning_rate": 1.795716524828774e-07, "loss": 11.7656, "step": 24455 }, { "epoch": 1.6242279338513648, "grad_norm": 163.01902770996094, "learning_rate": 1.7951016446612155e-07, "loss": 14.8438, "step": 24456 }, { "epoch": 1.6242943481437204, "grad_norm": 158.40362548828125, "learning_rate": 1.794486859403188e-07, "loss": 18.5625, "step": 24457 }, { "epoch": 1.6243607624360763, "grad_norm": 253.64816284179688, "learning_rate": 1.7938721690618087e-07, "loss": 11.3281, "step": 24458 }, { "epoch": 1.624427176728432, "grad_norm": 206.99807739257812, "learning_rate": 1.7932575736441792e-07, "loss": 17.7188, "step": 24459 }, { "epoch": 1.6244935910207876, "grad_norm": 182.99124145507812, "learning_rate": 1.792643073157418e-07, "loss": 15.9062, "step": 24460 }, { "epoch": 1.6245600053131435, "grad_norm": 136.35556030273438, "learning_rate": 1.792028667608627e-07, "loss": 12.125, "step": 24461 }, { "epoch": 1.6246264196054991, "grad_norm": 583.5072021484375, "learning_rate": 1.7914143570049155e-07, "loss": 18.2812, "step": 24462 }, { "epoch": 1.6246928338978548, "grad_norm": 117.76343536376953, "learning_rate": 1.790800141353389e-07, "loss": 14.7422, "step": 24463 }, { "epoch": 1.6247592481902107, "grad_norm": 108.14259338378906, "learning_rate": 1.790186020661153e-07, "loss": 12.4219, "step": 24464 }, { "epoch": 1.624825662482566, "grad_norm": 141.60687255859375, "learning_rate": 1.7895719949353107e-07, "loss": 13.5469, "step": 24465 }, { "epoch": 1.624892076774922, "grad_norm": 2516.937255859375, "learning_rate": 1.7889580641829682e-07, "loss": 12.7109, "step": 24466 }, { "epoch": 1.6249584910672776, "grad_norm": 161.5772247314453, "learning_rate": 1.7883442284112216e-07, "loss": 18.3281, "step": 24467 }, { "epoch": 1.6250249053596333, "grad_norm": 132.56817626953125, "learning_rate": 1.7877304876271738e-07, "loss": 11.5, "step": 24468 }, { "epoch": 1.6250913196519892, "grad_norm": 333.2025146484375, "learning_rate": 1.7871168418379234e-07, "loss": 21.6406, "step": 24469 }, { "epoch": 1.6251577339443448, "grad_norm": 125.127685546875, "learning_rate": 1.7865032910505705e-07, "loss": 10.3281, "step": 24470 }, { "epoch": 1.6252241482367005, "grad_norm": 180.35946655273438, "learning_rate": 1.7858898352722108e-07, "loss": 16.5938, "step": 24471 }, { "epoch": 1.6252905625290563, "grad_norm": 158.26824951171875, "learning_rate": 1.7852764745099447e-07, "loss": 13.8594, "step": 24472 }, { "epoch": 1.625356976821412, "grad_norm": 1007.8617553710938, "learning_rate": 1.7846632087708568e-07, "loss": 13.875, "step": 24473 }, { "epoch": 1.6254233911137677, "grad_norm": 161.22938537597656, "learning_rate": 1.7840500380620538e-07, "loss": 13.9844, "step": 24474 }, { "epoch": 1.6254898054061235, "grad_norm": 239.41824340820312, "learning_rate": 1.7834369623906188e-07, "loss": 20.8906, "step": 24475 }, { "epoch": 1.625556219698479, "grad_norm": 187.55360412597656, "learning_rate": 1.782823981763648e-07, "loss": 12.3125, "step": 24476 }, { "epoch": 1.6256226339908348, "grad_norm": 140.2659454345703, "learning_rate": 1.782211096188231e-07, "loss": 14.2969, "step": 24477 }, { "epoch": 1.6256890482831905, "grad_norm": 283.40350341796875, "learning_rate": 1.7815983056714567e-07, "loss": 16.3906, "step": 24478 }, { "epoch": 1.6257554625755462, "grad_norm": 960.5669555664062, "learning_rate": 1.7809856102204147e-07, "loss": 15.75, "step": 24479 }, { "epoch": 1.625821876867902, "grad_norm": 112.74671936035156, "learning_rate": 1.7803730098421943e-07, "loss": 15.7344, "step": 24480 }, { "epoch": 1.6258882911602577, "grad_norm": 274.28704833984375, "learning_rate": 1.7797605045438746e-07, "loss": 14.9375, "step": 24481 }, { "epoch": 1.6259547054526133, "grad_norm": 183.49172973632812, "learning_rate": 1.7791480943325498e-07, "loss": 16.5469, "step": 24482 }, { "epoch": 1.6260211197449692, "grad_norm": 131.16673278808594, "learning_rate": 1.7785357792152979e-07, "loss": 15.2656, "step": 24483 }, { "epoch": 1.6260875340373249, "grad_norm": 230.80593872070312, "learning_rate": 1.7779235591992016e-07, "loss": 14.4688, "step": 24484 }, { "epoch": 1.6261539483296805, "grad_norm": 392.0557861328125, "learning_rate": 1.7773114342913453e-07, "loss": 22.5781, "step": 24485 }, { "epoch": 1.6262203626220364, "grad_norm": 228.33441162109375, "learning_rate": 1.7766994044988093e-07, "loss": 13.9531, "step": 24486 }, { "epoch": 1.6262867769143918, "grad_norm": 406.4488220214844, "learning_rate": 1.7760874698286722e-07, "loss": 14.3906, "step": 24487 }, { "epoch": 1.6263531912067477, "grad_norm": 217.15185546875, "learning_rate": 1.7754756302880137e-07, "loss": 15.8125, "step": 24488 }, { "epoch": 1.6264196054991034, "grad_norm": 743.5256958007812, "learning_rate": 1.7748638858839126e-07, "loss": 14.0156, "step": 24489 }, { "epoch": 1.626486019791459, "grad_norm": 147.47564697265625, "learning_rate": 1.774252236623438e-07, "loss": 13.9688, "step": 24490 }, { "epoch": 1.626552434083815, "grad_norm": 2944.801513671875, "learning_rate": 1.7736406825136752e-07, "loss": 21.3906, "step": 24491 }, { "epoch": 1.6266188483761705, "grad_norm": 508.13287353515625, "learning_rate": 1.7730292235616917e-07, "loss": 12.5547, "step": 24492 }, { "epoch": 1.6266852626685262, "grad_norm": 200.51841735839844, "learning_rate": 1.772417859774561e-07, "loss": 13.5625, "step": 24493 }, { "epoch": 1.626751676960882, "grad_norm": 340.9247131347656, "learning_rate": 1.771806591159357e-07, "loss": 20.375, "step": 24494 }, { "epoch": 1.6268180912532377, "grad_norm": 265.21893310546875, "learning_rate": 1.7711954177231502e-07, "loss": 25.2812, "step": 24495 }, { "epoch": 1.6268845055455934, "grad_norm": 593.229736328125, "learning_rate": 1.770584339473009e-07, "loss": 14.8906, "step": 24496 }, { "epoch": 1.6269509198379493, "grad_norm": 360.02215576171875, "learning_rate": 1.7699733564160057e-07, "loss": 17.5312, "step": 24497 }, { "epoch": 1.6270173341303047, "grad_norm": 273.7900695800781, "learning_rate": 1.769362468559199e-07, "loss": 16.8594, "step": 24498 }, { "epoch": 1.6270837484226606, "grad_norm": 2561.314697265625, "learning_rate": 1.7687516759096676e-07, "loss": 23.7656, "step": 24499 }, { "epoch": 1.6271501627150162, "grad_norm": 297.4837951660156, "learning_rate": 1.7681409784744671e-07, "loss": 18.625, "step": 24500 }, { "epoch": 1.6272165770073719, "grad_norm": 446.2608947753906, "learning_rate": 1.7675303762606653e-07, "loss": 18.8438, "step": 24501 }, { "epoch": 1.6272829912997278, "grad_norm": 361.19775390625, "learning_rate": 1.766919869275324e-07, "loss": 15.5469, "step": 24502 }, { "epoch": 1.6273494055920834, "grad_norm": 328.02838134765625, "learning_rate": 1.7663094575255076e-07, "loss": 17.8438, "step": 24503 }, { "epoch": 1.627415819884439, "grad_norm": 211.35552978515625, "learning_rate": 1.7656991410182742e-07, "loss": 16.25, "step": 24504 }, { "epoch": 1.627482234176795, "grad_norm": 440.6945495605469, "learning_rate": 1.765088919760689e-07, "loss": 18.3125, "step": 24505 }, { "epoch": 1.6275486484691506, "grad_norm": 243.9070587158203, "learning_rate": 1.7644787937598004e-07, "loss": 18.2656, "step": 24506 }, { "epoch": 1.6276150627615062, "grad_norm": 390.2516784667969, "learning_rate": 1.763868763022678e-07, "loss": 16.0156, "step": 24507 }, { "epoch": 1.6276814770538621, "grad_norm": 157.49261474609375, "learning_rate": 1.76325882755637e-07, "loss": 15.4219, "step": 24508 }, { "epoch": 1.6277478913462176, "grad_norm": 151.9881591796875, "learning_rate": 1.762648987367934e-07, "loss": 17.0, "step": 24509 }, { "epoch": 1.6278143056385734, "grad_norm": 188.37130737304688, "learning_rate": 1.7620392424644249e-07, "loss": 21.3906, "step": 24510 }, { "epoch": 1.627880719930929, "grad_norm": 112.12657928466797, "learning_rate": 1.7614295928528978e-07, "loss": 19.3906, "step": 24511 }, { "epoch": 1.6279471342232847, "grad_norm": 530.9277954101562, "learning_rate": 1.760820038540397e-07, "loss": 14.0781, "step": 24512 }, { "epoch": 1.6280135485156406, "grad_norm": 379.56170654296875, "learning_rate": 1.760210579533985e-07, "loss": 18.625, "step": 24513 }, { "epoch": 1.6280799628079963, "grad_norm": 187.97579956054688, "learning_rate": 1.7596012158407026e-07, "loss": 16.7344, "step": 24514 }, { "epoch": 1.628146377100352, "grad_norm": 626.5086059570312, "learning_rate": 1.7589919474676018e-07, "loss": 17.875, "step": 24515 }, { "epoch": 1.6282127913927078, "grad_norm": 187.4233856201172, "learning_rate": 1.7583827744217294e-07, "loss": 21.1875, "step": 24516 }, { "epoch": 1.6282792056850635, "grad_norm": 345.7858581542969, "learning_rate": 1.7577736967101343e-07, "loss": 17.4375, "step": 24517 }, { "epoch": 1.6283456199774191, "grad_norm": 299.46990966796875, "learning_rate": 1.7571647143398593e-07, "loss": 17.3438, "step": 24518 }, { "epoch": 1.628412034269775, "grad_norm": 105.70763397216797, "learning_rate": 1.7565558273179527e-07, "loss": 11.6719, "step": 24519 }, { "epoch": 1.6284784485621304, "grad_norm": 210.2173309326172, "learning_rate": 1.7559470356514495e-07, "loss": 17.1406, "step": 24520 }, { "epoch": 1.6285448628544863, "grad_norm": 659.5040283203125, "learning_rate": 1.7553383393474018e-07, "loss": 21.5625, "step": 24521 }, { "epoch": 1.628611277146842, "grad_norm": 205.7495574951172, "learning_rate": 1.7547297384128435e-07, "loss": 14.3906, "step": 24522 }, { "epoch": 1.6286776914391976, "grad_norm": 185.18377685546875, "learning_rate": 1.754121232854815e-07, "loss": 14.4219, "step": 24523 }, { "epoch": 1.6287441057315535, "grad_norm": 244.1100616455078, "learning_rate": 1.753512822680363e-07, "loss": 15.4062, "step": 24524 }, { "epoch": 1.6288105200239091, "grad_norm": 179.49635314941406, "learning_rate": 1.7529045078965165e-07, "loss": 14.5625, "step": 24525 }, { "epoch": 1.6288769343162648, "grad_norm": 207.84202575683594, "learning_rate": 1.7522962885103143e-07, "loss": 19.0938, "step": 24526 }, { "epoch": 1.6289433486086207, "grad_norm": 253.9385223388672, "learning_rate": 1.751688164528794e-07, "loss": 18.7812, "step": 24527 }, { "epoch": 1.6290097629009763, "grad_norm": 150.0181121826172, "learning_rate": 1.7510801359589877e-07, "loss": 14.1406, "step": 24528 }, { "epoch": 1.629076177193332, "grad_norm": 82.22564697265625, "learning_rate": 1.75047220280793e-07, "loss": 17.8125, "step": 24529 }, { "epoch": 1.6291425914856879, "grad_norm": 149.5496368408203, "learning_rate": 1.749864365082656e-07, "loss": 19.6406, "step": 24530 }, { "epoch": 1.6292090057780433, "grad_norm": 104.68927001953125, "learning_rate": 1.7492566227901906e-07, "loss": 16.4844, "step": 24531 }, { "epoch": 1.6292754200703992, "grad_norm": 151.53732299804688, "learning_rate": 1.7486489759375667e-07, "loss": 13.6406, "step": 24532 }, { "epoch": 1.6293418343627548, "grad_norm": 332.7691650390625, "learning_rate": 1.7480414245318141e-07, "loss": 15.0312, "step": 24533 }, { "epoch": 1.6294082486551105, "grad_norm": 202.78323364257812, "learning_rate": 1.74743396857996e-07, "loss": 20.0312, "step": 24534 }, { "epoch": 1.6294746629474663, "grad_norm": 156.43429565429688, "learning_rate": 1.7468266080890303e-07, "loss": 20.8125, "step": 24535 }, { "epoch": 1.629541077239822, "grad_norm": 143.1381072998047, "learning_rate": 1.7462193430660543e-07, "loss": 14.8281, "step": 24536 }, { "epoch": 1.6296074915321777, "grad_norm": 158.1834259033203, "learning_rate": 1.7456121735180485e-07, "loss": 13.7969, "step": 24537 }, { "epoch": 1.6296739058245335, "grad_norm": 248.26327514648438, "learning_rate": 1.745005099452047e-07, "loss": 18.6562, "step": 24538 }, { "epoch": 1.6297403201168892, "grad_norm": 159.84182739257812, "learning_rate": 1.7443981208750636e-07, "loss": 25.0, "step": 24539 }, { "epoch": 1.6298067344092448, "grad_norm": 157.99667358398438, "learning_rate": 1.743791237794121e-07, "loss": 15.2656, "step": 24540 }, { "epoch": 1.6298731487016007, "grad_norm": 238.56295776367188, "learning_rate": 1.7431844502162418e-07, "loss": 15.7188, "step": 24541 }, { "epoch": 1.6299395629939561, "grad_norm": 165.28952026367188, "learning_rate": 1.7425777581484435e-07, "loss": 17.9375, "step": 24542 }, { "epoch": 1.630005977286312, "grad_norm": 170.95664978027344, "learning_rate": 1.741971161597744e-07, "loss": 13.9531, "step": 24543 }, { "epoch": 1.6300723915786677, "grad_norm": 196.77194213867188, "learning_rate": 1.741364660571163e-07, "loss": 15.875, "step": 24544 }, { "epoch": 1.6301388058710233, "grad_norm": 163.66966247558594, "learning_rate": 1.7407582550757084e-07, "loss": 15.1094, "step": 24545 }, { "epoch": 1.6302052201633792, "grad_norm": 175.6586151123047, "learning_rate": 1.7401519451184055e-07, "loss": 13.5312, "step": 24546 }, { "epoch": 1.6302716344557349, "grad_norm": 179.0808563232422, "learning_rate": 1.7395457307062578e-07, "loss": 15.7656, "step": 24547 }, { "epoch": 1.6303380487480905, "grad_norm": 223.17376708984375, "learning_rate": 1.738939611846283e-07, "loss": 17.6875, "step": 24548 }, { "epoch": 1.6304044630404464, "grad_norm": 457.1237487792969, "learning_rate": 1.738333588545491e-07, "loss": 17.625, "step": 24549 }, { "epoch": 1.630470877332802, "grad_norm": 217.0120086669922, "learning_rate": 1.7377276608108948e-07, "loss": 14.8281, "step": 24550 }, { "epoch": 1.6305372916251577, "grad_norm": 325.56781005859375, "learning_rate": 1.7371218286494948e-07, "loss": 21.125, "step": 24551 }, { "epoch": 1.6306037059175136, "grad_norm": 352.97418212890625, "learning_rate": 1.7365160920683108e-07, "loss": 17.9062, "step": 24552 }, { "epoch": 1.630670120209869, "grad_norm": 122.62224578857422, "learning_rate": 1.7359104510743405e-07, "loss": 12.7812, "step": 24553 }, { "epoch": 1.6307365345022249, "grad_norm": 182.1390838623047, "learning_rate": 1.7353049056745926e-07, "loss": 14.4688, "step": 24554 }, { "epoch": 1.6308029487945805, "grad_norm": 138.51443481445312, "learning_rate": 1.734699455876073e-07, "loss": 13.4688, "step": 24555 }, { "epoch": 1.6308693630869362, "grad_norm": 432.8396911621094, "learning_rate": 1.7340941016857834e-07, "loss": 15.4688, "step": 24556 }, { "epoch": 1.630935777379292, "grad_norm": 180.60972595214844, "learning_rate": 1.7334888431107264e-07, "loss": 13.6406, "step": 24557 }, { "epoch": 1.6310021916716477, "grad_norm": 199.64146423339844, "learning_rate": 1.7328836801579073e-07, "loss": 14.5, "step": 24558 }, { "epoch": 1.6310686059640034, "grad_norm": 356.0593566894531, "learning_rate": 1.732278612834317e-07, "loss": 17.2812, "step": 24559 }, { "epoch": 1.6311350202563593, "grad_norm": 478.3100280761719, "learning_rate": 1.7316736411469623e-07, "loss": 16.75, "step": 24560 }, { "epoch": 1.631201434548715, "grad_norm": 169.0133514404297, "learning_rate": 1.7310687651028422e-07, "loss": 15.3594, "step": 24561 }, { "epoch": 1.6312678488410706, "grad_norm": 94.12568664550781, "learning_rate": 1.7304639847089453e-07, "loss": 12.1562, "step": 24562 }, { "epoch": 1.6313342631334264, "grad_norm": 2429.03466796875, "learning_rate": 1.729859299972277e-07, "loss": 11.0781, "step": 24563 }, { "epoch": 1.6314006774257819, "grad_norm": 276.3498840332031, "learning_rate": 1.7292547108998257e-07, "loss": 19.7344, "step": 24564 }, { "epoch": 1.6314670917181378, "grad_norm": 340.9248352050781, "learning_rate": 1.7286502174985874e-07, "loss": 17.7812, "step": 24565 }, { "epoch": 1.6315335060104934, "grad_norm": 182.79429626464844, "learning_rate": 1.7280458197755533e-07, "loss": 14.5938, "step": 24566 }, { "epoch": 1.631599920302849, "grad_norm": 131.41400146484375, "learning_rate": 1.7274415177377145e-07, "loss": 10.8203, "step": 24567 }, { "epoch": 1.631666334595205, "grad_norm": 171.5285186767578, "learning_rate": 1.726837311392062e-07, "loss": 10.5, "step": 24568 }, { "epoch": 1.6317327488875606, "grad_norm": 305.06365966796875, "learning_rate": 1.7262332007455882e-07, "loss": 14.4844, "step": 24569 }, { "epoch": 1.6317991631799162, "grad_norm": 140.02674865722656, "learning_rate": 1.725629185805273e-07, "loss": 15.1719, "step": 24570 }, { "epoch": 1.6318655774722721, "grad_norm": 217.05628967285156, "learning_rate": 1.7250252665781118e-07, "loss": 11.6719, "step": 24571 }, { "epoch": 1.6319319917646278, "grad_norm": 122.16228485107422, "learning_rate": 1.7244214430710857e-07, "loss": 12.6406, "step": 24572 }, { "epoch": 1.6319984060569834, "grad_norm": 321.0921325683594, "learning_rate": 1.7238177152911793e-07, "loss": 18.2031, "step": 24573 }, { "epoch": 1.6320648203493393, "grad_norm": 247.1263427734375, "learning_rate": 1.7232140832453778e-07, "loss": 18.0156, "step": 24574 }, { "epoch": 1.6321312346416947, "grad_norm": 134.0345001220703, "learning_rate": 1.722610546940666e-07, "loss": 17.3125, "step": 24575 }, { "epoch": 1.6321976489340506, "grad_norm": 239.52171325683594, "learning_rate": 1.722007106384017e-07, "loss": 18.9219, "step": 24576 }, { "epoch": 1.6322640632264063, "grad_norm": 252.36851501464844, "learning_rate": 1.7214037615824205e-07, "loss": 16.4297, "step": 24577 }, { "epoch": 1.632330477518762, "grad_norm": 237.39076232910156, "learning_rate": 1.7208005125428505e-07, "loss": 23.5625, "step": 24578 }, { "epoch": 1.6323968918111178, "grad_norm": 131.38134765625, "learning_rate": 1.720197359272285e-07, "loss": 14.0781, "step": 24579 }, { "epoch": 1.6324633061034735, "grad_norm": 353.6466369628906, "learning_rate": 1.7195943017777026e-07, "loss": 13.6562, "step": 24580 }, { "epoch": 1.632529720395829, "grad_norm": 390.8282775878906, "learning_rate": 1.7189913400660782e-07, "loss": 15.2344, "step": 24581 }, { "epoch": 1.632596134688185, "grad_norm": 197.5979766845703, "learning_rate": 1.7183884741443878e-07, "loss": 14.9375, "step": 24582 }, { "epoch": 1.6326625489805406, "grad_norm": 115.12716674804688, "learning_rate": 1.7177857040196054e-07, "loss": 14.2812, "step": 24583 }, { "epoch": 1.6327289632728963, "grad_norm": 218.88706970214844, "learning_rate": 1.7171830296986978e-07, "loss": 14.7891, "step": 24584 }, { "epoch": 1.6327953775652522, "grad_norm": 143.88595581054688, "learning_rate": 1.716580451188645e-07, "loss": 16.0312, "step": 24585 }, { "epoch": 1.6328617918576076, "grad_norm": 206.3456268310547, "learning_rate": 1.7159779684964115e-07, "loss": 20.1875, "step": 24586 }, { "epoch": 1.6329282061499635, "grad_norm": 92.71926879882812, "learning_rate": 1.7153755816289685e-07, "loss": 14.7031, "step": 24587 }, { "epoch": 1.6329946204423191, "grad_norm": 295.75335693359375, "learning_rate": 1.7147732905932822e-07, "loss": 14.1719, "step": 24588 }, { "epoch": 1.6330610347346748, "grad_norm": 324.49591064453125, "learning_rate": 1.7141710953963217e-07, "loss": 17.1406, "step": 24589 }, { "epoch": 1.6331274490270307, "grad_norm": 182.25689697265625, "learning_rate": 1.7135689960450517e-07, "loss": 19.2969, "step": 24590 }, { "epoch": 1.6331938633193863, "grad_norm": 82.87645721435547, "learning_rate": 1.7129669925464407e-07, "loss": 12.9531, "step": 24591 }, { "epoch": 1.633260277611742, "grad_norm": 158.30441284179688, "learning_rate": 1.7123650849074434e-07, "loss": 13.7969, "step": 24592 }, { "epoch": 1.6333266919040978, "grad_norm": 208.2223358154297, "learning_rate": 1.7117632731350316e-07, "loss": 19.3906, "step": 24593 }, { "epoch": 1.6333931061964535, "grad_norm": 183.32672119140625, "learning_rate": 1.7111615572361626e-07, "loss": 12.0938, "step": 24594 }, { "epoch": 1.6334595204888092, "grad_norm": 401.7109069824219, "learning_rate": 1.7105599372177959e-07, "loss": 25.1719, "step": 24595 }, { "epoch": 1.633525934781165, "grad_norm": 208.10824584960938, "learning_rate": 1.7099584130868915e-07, "loss": 16.0625, "step": 24596 }, { "epoch": 1.6335923490735205, "grad_norm": 209.5883331298828, "learning_rate": 1.7093569848504096e-07, "loss": 12.1875, "step": 24597 }, { "epoch": 1.6336587633658763, "grad_norm": 333.7691955566406, "learning_rate": 1.7087556525153036e-07, "loss": 19.8125, "step": 24598 }, { "epoch": 1.633725177658232, "grad_norm": 340.6286315917969, "learning_rate": 1.7081544160885318e-07, "loss": 21.2188, "step": 24599 }, { "epoch": 1.6337915919505877, "grad_norm": 141.7501678466797, "learning_rate": 1.7075532755770517e-07, "loss": 17.3438, "step": 24600 }, { "epoch": 1.6338580062429435, "grad_norm": 529.55859375, "learning_rate": 1.7069522309878082e-07, "loss": 25.625, "step": 24601 }, { "epoch": 1.6339244205352992, "grad_norm": 184.1748809814453, "learning_rate": 1.7063512823277636e-07, "loss": 16.6094, "step": 24602 }, { "epoch": 1.6339908348276548, "grad_norm": 182.36915588378906, "learning_rate": 1.7057504296038628e-07, "loss": 15.4844, "step": 24603 }, { "epoch": 1.6340572491200107, "grad_norm": 303.78851318359375, "learning_rate": 1.7051496728230587e-07, "loss": 15.6562, "step": 24604 }, { "epoch": 1.6341236634123664, "grad_norm": 213.34434509277344, "learning_rate": 1.704549011992299e-07, "loss": 14.8125, "step": 24605 }, { "epoch": 1.634190077704722, "grad_norm": 426.5104675292969, "learning_rate": 1.703948447118534e-07, "loss": 16.2344, "step": 24606 }, { "epoch": 1.634256491997078, "grad_norm": 612.4469604492188, "learning_rate": 1.703347978208709e-07, "loss": 27.8906, "step": 24607 }, { "epoch": 1.6343229062894333, "grad_norm": 218.68531799316406, "learning_rate": 1.7027476052697732e-07, "loss": 13.75, "step": 24608 }, { "epoch": 1.6343893205817892, "grad_norm": 496.1969299316406, "learning_rate": 1.702147328308664e-07, "loss": 16.5312, "step": 24609 }, { "epoch": 1.6344557348741449, "grad_norm": 130.91534423828125, "learning_rate": 1.701547147332334e-07, "loss": 16.4844, "step": 24610 }, { "epoch": 1.6345221491665005, "grad_norm": 146.7718505859375, "learning_rate": 1.7009470623477194e-07, "loss": 15.1875, "step": 24611 }, { "epoch": 1.6345885634588564, "grad_norm": 139.51095581054688, "learning_rate": 1.700347073361763e-07, "loss": 13.9688, "step": 24612 }, { "epoch": 1.634654977751212, "grad_norm": 169.58876037597656, "learning_rate": 1.6997471803814067e-07, "loss": 17.1406, "step": 24613 }, { "epoch": 1.6347213920435677, "grad_norm": 490.0318908691406, "learning_rate": 1.6991473834135904e-07, "loss": 18.1719, "step": 24614 }, { "epoch": 1.6347878063359236, "grad_norm": 330.8063049316406, "learning_rate": 1.6985476824652466e-07, "loss": 26.2812, "step": 24615 }, { "epoch": 1.6348542206282792, "grad_norm": 199.55401611328125, "learning_rate": 1.6979480775433208e-07, "loss": 21.3438, "step": 24616 }, { "epoch": 1.6349206349206349, "grad_norm": 306.6442565917969, "learning_rate": 1.6973485686547416e-07, "loss": 29.4062, "step": 24617 }, { "epoch": 1.6349870492129908, "grad_norm": 144.72683715820312, "learning_rate": 1.6967491558064474e-07, "loss": 12.7344, "step": 24618 }, { "epoch": 1.6350534635053462, "grad_norm": 100.56509399414062, "learning_rate": 1.6961498390053707e-07, "loss": 13.4688, "step": 24619 }, { "epoch": 1.635119877797702, "grad_norm": 308.8023986816406, "learning_rate": 1.6955506182584444e-07, "loss": 15.2344, "step": 24620 }, { "epoch": 1.635186292090058, "grad_norm": 217.13987731933594, "learning_rate": 1.6949514935725995e-07, "loss": 22.4062, "step": 24621 }, { "epoch": 1.6352527063824134, "grad_norm": 373.6960754394531, "learning_rate": 1.6943524649547703e-07, "loss": 15.1562, "step": 24622 }, { "epoch": 1.6353191206747693, "grad_norm": 282.333984375, "learning_rate": 1.6937535324118758e-07, "loss": 25.0781, "step": 24623 }, { "epoch": 1.635385534967125, "grad_norm": 154.15701293945312, "learning_rate": 1.6931546959508568e-07, "loss": 15.8906, "step": 24624 }, { "epoch": 1.6354519492594806, "grad_norm": 110.6510238647461, "learning_rate": 1.6925559555786318e-07, "loss": 10.8984, "step": 24625 }, { "epoch": 1.6355183635518364, "grad_norm": 534.850830078125, "learning_rate": 1.691957311302129e-07, "loss": 20.9531, "step": 24626 }, { "epoch": 1.635584777844192, "grad_norm": 282.7091369628906, "learning_rate": 1.6913587631282733e-07, "loss": 14.5625, "step": 24627 }, { "epoch": 1.6356511921365477, "grad_norm": 177.32276916503906, "learning_rate": 1.690760311063989e-07, "loss": 12.9844, "step": 24628 }, { "epoch": 1.6357176064289036, "grad_norm": 232.16873168945312, "learning_rate": 1.690161955116197e-07, "loss": 14.9688, "step": 24629 }, { "epoch": 1.635784020721259, "grad_norm": 160.8818359375, "learning_rate": 1.6895636952918236e-07, "loss": 17.0156, "step": 24630 }, { "epoch": 1.635850435013615, "grad_norm": 149.95755004882812, "learning_rate": 1.6889655315977792e-07, "loss": 14.0469, "step": 24631 }, { "epoch": 1.6359168493059708, "grad_norm": 118.70962524414062, "learning_rate": 1.688367464040994e-07, "loss": 13.7656, "step": 24632 }, { "epoch": 1.6359832635983262, "grad_norm": 299.8350830078125, "learning_rate": 1.6877694926283793e-07, "loss": 22.0781, "step": 24633 }, { "epoch": 1.6360496778906821, "grad_norm": 318.01129150390625, "learning_rate": 1.6871716173668516e-07, "loss": 19.8125, "step": 24634 }, { "epoch": 1.6361160921830378, "grad_norm": 323.0733642578125, "learning_rate": 1.6865738382633333e-07, "loss": 16.5312, "step": 24635 }, { "epoch": 1.6361825064753934, "grad_norm": 209.7238311767578, "learning_rate": 1.685976155324733e-07, "loss": 16.7031, "step": 24636 }, { "epoch": 1.6362489207677493, "grad_norm": 211.48121643066406, "learning_rate": 1.685378568557967e-07, "loss": 13.7969, "step": 24637 }, { "epoch": 1.636315335060105, "grad_norm": 300.0987243652344, "learning_rate": 1.6847810779699466e-07, "loss": 19.8438, "step": 24638 }, { "epoch": 1.6363817493524606, "grad_norm": 222.28562927246094, "learning_rate": 1.6841836835675871e-07, "loss": 13.9531, "step": 24639 }, { "epoch": 1.6364481636448165, "grad_norm": 539.1681518554688, "learning_rate": 1.6835863853577893e-07, "loss": 19.75, "step": 24640 }, { "epoch": 1.636514577937172, "grad_norm": 229.8817596435547, "learning_rate": 1.682989183347474e-07, "loss": 16.4375, "step": 24641 }, { "epoch": 1.6365809922295278, "grad_norm": 335.4485778808594, "learning_rate": 1.682392077543543e-07, "loss": 20.25, "step": 24642 }, { "epoch": 1.6366474065218837, "grad_norm": 387.48077392578125, "learning_rate": 1.6817950679529024e-07, "loss": 19.7969, "step": 24643 }, { "epoch": 1.636713820814239, "grad_norm": 367.0377197265625, "learning_rate": 1.6811981545824606e-07, "loss": 19.4375, "step": 24644 }, { "epoch": 1.636780235106595, "grad_norm": 109.28414154052734, "learning_rate": 1.6806013374391216e-07, "loss": 12.7266, "step": 24645 }, { "epoch": 1.6368466493989506, "grad_norm": 195.76467895507812, "learning_rate": 1.6800046165297886e-07, "loss": 15.0781, "step": 24646 }, { "epoch": 1.6369130636913063, "grad_norm": 232.8853302001953, "learning_rate": 1.679407991861368e-07, "loss": 12.2031, "step": 24647 }, { "epoch": 1.6369794779836622, "grad_norm": 165.38169860839844, "learning_rate": 1.6788114634407523e-07, "loss": 13.0469, "step": 24648 }, { "epoch": 1.6370458922760178, "grad_norm": 252.09835815429688, "learning_rate": 1.6782150312748534e-07, "loss": 17.6094, "step": 24649 }, { "epoch": 1.6371123065683735, "grad_norm": 188.12936401367188, "learning_rate": 1.6776186953705607e-07, "loss": 14.0625, "step": 24650 }, { "epoch": 1.6371787208607294, "grad_norm": 254.5275115966797, "learning_rate": 1.6770224557347756e-07, "loss": 18.25, "step": 24651 }, { "epoch": 1.6372451351530848, "grad_norm": 178.9197540283203, "learning_rate": 1.6764263123743961e-07, "loss": 11.7812, "step": 24652 }, { "epoch": 1.6373115494454407, "grad_norm": 148.28497314453125, "learning_rate": 1.6758302652963174e-07, "loss": 16.4844, "step": 24653 }, { "epoch": 1.6373779637377965, "grad_norm": 147.28271484375, "learning_rate": 1.6752343145074343e-07, "loss": 18.3594, "step": 24654 }, { "epoch": 1.637444378030152, "grad_norm": 170.28933715820312, "learning_rate": 1.6746384600146425e-07, "loss": 13.3125, "step": 24655 }, { "epoch": 1.6375107923225078, "grad_norm": 260.77581787109375, "learning_rate": 1.6740427018248283e-07, "loss": 19.25, "step": 24656 }, { "epoch": 1.6375772066148635, "grad_norm": 220.6441192626953, "learning_rate": 1.6734470399448908e-07, "loss": 18.3906, "step": 24657 }, { "epoch": 1.6376436209072192, "grad_norm": 163.2638702392578, "learning_rate": 1.6728514743817145e-07, "loss": 17.2656, "step": 24658 }, { "epoch": 1.637710035199575, "grad_norm": 261.59686279296875, "learning_rate": 1.672256005142191e-07, "loss": 18.0938, "step": 24659 }, { "epoch": 1.6377764494919307, "grad_norm": 203.89418029785156, "learning_rate": 1.671660632233207e-07, "loss": 16.8594, "step": 24660 }, { "epoch": 1.6378428637842863, "grad_norm": 144.16664123535156, "learning_rate": 1.671065355661654e-07, "loss": 15.4219, "step": 24661 }, { "epoch": 1.6379092780766422, "grad_norm": 170.6437225341797, "learning_rate": 1.6704701754344086e-07, "loss": 14.2188, "step": 24662 }, { "epoch": 1.6379756923689976, "grad_norm": 172.99472045898438, "learning_rate": 1.669875091558367e-07, "loss": 17.5781, "step": 24663 }, { "epoch": 1.6380421066613535, "grad_norm": 106.39344787597656, "learning_rate": 1.6692801040404036e-07, "loss": 14.0, "step": 24664 }, { "epoch": 1.6381085209537094, "grad_norm": 182.4886932373047, "learning_rate": 1.668685212887404e-07, "loss": 19.7344, "step": 24665 }, { "epoch": 1.6381749352460648, "grad_norm": 220.5208282470703, "learning_rate": 1.6680904181062505e-07, "loss": 25.0469, "step": 24666 }, { "epoch": 1.6382413495384207, "grad_norm": 179.83309936523438, "learning_rate": 1.6674957197038219e-07, "loss": 17.6406, "step": 24667 }, { "epoch": 1.6383077638307764, "grad_norm": 537.4917602539062, "learning_rate": 1.6669011176869984e-07, "loss": 23.0781, "step": 24668 }, { "epoch": 1.638374178123132, "grad_norm": 454.40289306640625, "learning_rate": 1.6663066120626602e-07, "loss": 21.6719, "step": 24669 }, { "epoch": 1.638440592415488, "grad_norm": 88.50511169433594, "learning_rate": 1.6657122028376757e-07, "loss": 12.1875, "step": 24670 }, { "epoch": 1.6385070067078435, "grad_norm": 333.9208068847656, "learning_rate": 1.66511789001893e-07, "loss": 24.0625, "step": 24671 }, { "epoch": 1.6385734210001992, "grad_norm": 123.46733856201172, "learning_rate": 1.6645236736132972e-07, "loss": 14.6094, "step": 24672 }, { "epoch": 1.638639835292555, "grad_norm": 198.29803466796875, "learning_rate": 1.6639295536276422e-07, "loss": 16.4375, "step": 24673 }, { "epoch": 1.6387062495849105, "grad_norm": 142.0751953125, "learning_rate": 1.663335530068849e-07, "loss": 14.5938, "step": 24674 }, { "epoch": 1.6387726638772664, "grad_norm": 198.27142333984375, "learning_rate": 1.66274160294378e-07, "loss": 23.4375, "step": 24675 }, { "epoch": 1.6388390781696223, "grad_norm": 319.56640625, "learning_rate": 1.6621477722593092e-07, "loss": 17.625, "step": 24676 }, { "epoch": 1.6389054924619777, "grad_norm": 124.8163070678711, "learning_rate": 1.6615540380223046e-07, "loss": 13.7812, "step": 24677 }, { "epoch": 1.6389719067543336, "grad_norm": 133.54330444335938, "learning_rate": 1.6609604002396372e-07, "loss": 13.3125, "step": 24678 }, { "epoch": 1.6390383210466892, "grad_norm": 202.3234405517578, "learning_rate": 1.6603668589181663e-07, "loss": 11.8438, "step": 24679 }, { "epoch": 1.6391047353390449, "grad_norm": 190.39492797851562, "learning_rate": 1.659773414064768e-07, "loss": 16.3281, "step": 24680 }, { "epoch": 1.6391711496314008, "grad_norm": 115.32003784179688, "learning_rate": 1.6591800656862999e-07, "loss": 12.6094, "step": 24681 }, { "epoch": 1.6392375639237564, "grad_norm": 244.1309051513672, "learning_rate": 1.6585868137896252e-07, "loss": 18.2031, "step": 24682 }, { "epoch": 1.639303978216112, "grad_norm": 287.42803955078125, "learning_rate": 1.65799365838161e-07, "loss": 11.9375, "step": 24683 }, { "epoch": 1.639370392508468, "grad_norm": 195.2359619140625, "learning_rate": 1.6574005994691132e-07, "loss": 17.4844, "step": 24684 }, { "epoch": 1.6394368068008234, "grad_norm": 220.80474853515625, "learning_rate": 1.6568076370589955e-07, "loss": 12.7344, "step": 24685 }, { "epoch": 1.6395032210931793, "grad_norm": 259.5443420410156, "learning_rate": 1.6562147711581186e-07, "loss": 15.7891, "step": 24686 }, { "epoch": 1.6395696353855351, "grad_norm": 144.77459716796875, "learning_rate": 1.6556220017733336e-07, "loss": 16.875, "step": 24687 }, { "epoch": 1.6396360496778906, "grad_norm": 393.8301086425781, "learning_rate": 1.655029328911506e-07, "loss": 20.3281, "step": 24688 }, { "epoch": 1.6397024639702464, "grad_norm": 117.94479370117188, "learning_rate": 1.6544367525794845e-07, "loss": 12.6094, "step": 24689 }, { "epoch": 1.639768878262602, "grad_norm": 134.1357879638672, "learning_rate": 1.6538442727841272e-07, "loss": 13.2656, "step": 24690 }, { "epoch": 1.6398352925549577, "grad_norm": 383.3901062011719, "learning_rate": 1.6532518895322856e-07, "loss": 16.1719, "step": 24691 }, { "epoch": 1.6399017068473136, "grad_norm": 169.2380828857422, "learning_rate": 1.6526596028308138e-07, "loss": 14.6484, "step": 24692 }, { "epoch": 1.6399681211396693, "grad_norm": 390.8373107910156, "learning_rate": 1.6520674126865618e-07, "loss": 19.5625, "step": 24693 }, { "epoch": 1.640034535432025, "grad_norm": 400.8578796386719, "learning_rate": 1.6514753191063836e-07, "loss": 17.7031, "step": 24694 }, { "epoch": 1.6401009497243808, "grad_norm": 147.20887756347656, "learning_rate": 1.6508833220971198e-07, "loss": 14.1719, "step": 24695 }, { "epoch": 1.6401673640167362, "grad_norm": 419.26214599609375, "learning_rate": 1.6502914216656273e-07, "loss": 21.4219, "step": 24696 }, { "epoch": 1.6402337783090921, "grad_norm": 304.8481140136719, "learning_rate": 1.6496996178187472e-07, "loss": 16.4844, "step": 24697 }, { "epoch": 1.640300192601448, "grad_norm": 179.9791259765625, "learning_rate": 1.6491079105633277e-07, "loss": 15.7188, "step": 24698 }, { "epoch": 1.6403666068938034, "grad_norm": 162.10015869140625, "learning_rate": 1.6485162999062108e-07, "loss": 15.8594, "step": 24699 }, { "epoch": 1.6404330211861593, "grad_norm": 261.35150146484375, "learning_rate": 1.6479247858542454e-07, "loss": 16.5156, "step": 24700 }, { "epoch": 1.640499435478515, "grad_norm": 103.03740692138672, "learning_rate": 1.6473333684142643e-07, "loss": 16.3203, "step": 24701 }, { "epoch": 1.6405658497708706, "grad_norm": 195.2880859375, "learning_rate": 1.6467420475931192e-07, "loss": 15.9688, "step": 24702 }, { "epoch": 1.6406322640632265, "grad_norm": 134.85118103027344, "learning_rate": 1.646150823397643e-07, "loss": 12.3906, "step": 24703 }, { "epoch": 1.6406986783555821, "grad_norm": 343.588134765625, "learning_rate": 1.6455596958346774e-07, "loss": 12.7344, "step": 24704 }, { "epoch": 1.6407650926479378, "grad_norm": 168.08689880371094, "learning_rate": 1.6449686649110595e-07, "loss": 15.8438, "step": 24705 }, { "epoch": 1.6408315069402937, "grad_norm": 190.73455810546875, "learning_rate": 1.644377730633626e-07, "loss": 13.5156, "step": 24706 }, { "epoch": 1.640897921232649, "grad_norm": 629.9671630859375, "learning_rate": 1.6437868930092135e-07, "loss": 19.375, "step": 24707 }, { "epoch": 1.640964335525005, "grad_norm": 237.02798461914062, "learning_rate": 1.6431961520446559e-07, "loss": 17.0469, "step": 24708 }, { "epoch": 1.6410307498173609, "grad_norm": 193.40109252929688, "learning_rate": 1.642605507746786e-07, "loss": 16.4062, "step": 24709 }, { "epoch": 1.6410971641097163, "grad_norm": 167.80563354492188, "learning_rate": 1.6420149601224355e-07, "loss": 12.8125, "step": 24710 }, { "epoch": 1.6411635784020722, "grad_norm": 182.57107543945312, "learning_rate": 1.6414245091784395e-07, "loss": 18.7031, "step": 24711 }, { "epoch": 1.6412299926944278, "grad_norm": 336.7521667480469, "learning_rate": 1.6408341549216209e-07, "loss": 18.375, "step": 24712 }, { "epoch": 1.6412964069867835, "grad_norm": 219.15760803222656, "learning_rate": 1.6402438973588163e-07, "loss": 15.2969, "step": 24713 }, { "epoch": 1.6413628212791394, "grad_norm": 250.0186309814453, "learning_rate": 1.639653736496849e-07, "loss": 14.5781, "step": 24714 }, { "epoch": 1.641429235571495, "grad_norm": 105.21009063720703, "learning_rate": 1.6390636723425445e-07, "loss": 13.8906, "step": 24715 }, { "epoch": 1.6414956498638507, "grad_norm": 349.6557312011719, "learning_rate": 1.638473704902731e-07, "loss": 19.6875, "step": 24716 }, { "epoch": 1.6415620641562065, "grad_norm": 270.30596923828125, "learning_rate": 1.6378838341842326e-07, "loss": 18.1719, "step": 24717 }, { "epoch": 1.641628478448562, "grad_norm": 279.991943359375, "learning_rate": 1.6372940601938712e-07, "loss": 15.3047, "step": 24718 }, { "epoch": 1.6416948927409178, "grad_norm": 228.02325439453125, "learning_rate": 1.636704382938473e-07, "loss": 17.1406, "step": 24719 }, { "epoch": 1.6417613070332737, "grad_norm": 177.0142059326172, "learning_rate": 1.6361148024248538e-07, "loss": 14.625, "step": 24720 }, { "epoch": 1.6418277213256292, "grad_norm": 220.56004333496094, "learning_rate": 1.635525318659835e-07, "loss": 17.1562, "step": 24721 }, { "epoch": 1.641894135617985, "grad_norm": 159.85986328125, "learning_rate": 1.6349359316502364e-07, "loss": 12.9219, "step": 24722 }, { "epoch": 1.6419605499103407, "grad_norm": 203.88296508789062, "learning_rate": 1.634346641402875e-07, "loss": 13.9531, "step": 24723 }, { "epoch": 1.6420269642026963, "grad_norm": 156.2379150390625, "learning_rate": 1.6337574479245686e-07, "loss": 11.5312, "step": 24724 }, { "epoch": 1.6420933784950522, "grad_norm": 189.1305694580078, "learning_rate": 1.633168351222134e-07, "loss": 17.2188, "step": 24725 }, { "epoch": 1.6421597927874079, "grad_norm": 241.7805633544922, "learning_rate": 1.6325793513023777e-07, "loss": 14.7266, "step": 24726 }, { "epoch": 1.6422262070797635, "grad_norm": 175.42913818359375, "learning_rate": 1.6319904481721236e-07, "loss": 17.4531, "step": 24727 }, { "epoch": 1.6422926213721194, "grad_norm": 163.97596740722656, "learning_rate": 1.6314016418381772e-07, "loss": 19.2344, "step": 24728 }, { "epoch": 1.6423590356644748, "grad_norm": 274.3325500488281, "learning_rate": 1.6308129323073504e-07, "loss": 15.9375, "step": 24729 }, { "epoch": 1.6424254499568307, "grad_norm": 485.9000549316406, "learning_rate": 1.6302243195864528e-07, "loss": 20.8594, "step": 24730 }, { "epoch": 1.6424918642491866, "grad_norm": 252.28125, "learning_rate": 1.6296358036822954e-07, "loss": 16.3594, "step": 24731 }, { "epoch": 1.642558278541542, "grad_norm": 256.9784240722656, "learning_rate": 1.629047384601684e-07, "loss": 17.3125, "step": 24732 }, { "epoch": 1.642624692833898, "grad_norm": 133.4009552001953, "learning_rate": 1.628459062351427e-07, "loss": 14.2344, "step": 24733 }, { "epoch": 1.6426911071262535, "grad_norm": 159.39434814453125, "learning_rate": 1.6278708369383242e-07, "loss": 19.1094, "step": 24734 }, { "epoch": 1.6427575214186092, "grad_norm": 364.7819519042969, "learning_rate": 1.6272827083691887e-07, "loss": 14.4766, "step": 24735 }, { "epoch": 1.642823935710965, "grad_norm": 524.5494384765625, "learning_rate": 1.626694676650816e-07, "loss": 23.2969, "step": 24736 }, { "epoch": 1.6428903500033207, "grad_norm": 147.48973083496094, "learning_rate": 1.626106741790012e-07, "loss": 13.9375, "step": 24737 }, { "epoch": 1.6429567642956764, "grad_norm": 208.029541015625, "learning_rate": 1.6255189037935756e-07, "loss": 16.0469, "step": 24738 }, { "epoch": 1.6430231785880323, "grad_norm": 169.15692138671875, "learning_rate": 1.6249311626683104e-07, "loss": 12.9688, "step": 24739 }, { "epoch": 1.6430895928803877, "grad_norm": 278.8143005371094, "learning_rate": 1.6243435184210074e-07, "loss": 19.8203, "step": 24740 }, { "epoch": 1.6431560071727436, "grad_norm": 186.67103576660156, "learning_rate": 1.6237559710584737e-07, "loss": 18.1094, "step": 24741 }, { "epoch": 1.6432224214650994, "grad_norm": 255.00843811035156, "learning_rate": 1.6231685205874978e-07, "loss": 18.125, "step": 24742 }, { "epoch": 1.6432888357574549, "grad_norm": 334.9286804199219, "learning_rate": 1.622581167014876e-07, "loss": 17.1094, "step": 24743 }, { "epoch": 1.6433552500498108, "grad_norm": 203.48687744140625, "learning_rate": 1.62199391034741e-07, "loss": 13.1406, "step": 24744 }, { "epoch": 1.6434216643421664, "grad_norm": 1236.5013427734375, "learning_rate": 1.621406750591884e-07, "loss": 19.375, "step": 24745 }, { "epoch": 1.643488078634522, "grad_norm": 724.1102294921875, "learning_rate": 1.620819687755094e-07, "loss": 18.6719, "step": 24746 }, { "epoch": 1.643554492926878, "grad_norm": 196.97946166992188, "learning_rate": 1.6202327218438306e-07, "loss": 15.3047, "step": 24747 }, { "epoch": 1.6436209072192336, "grad_norm": 343.7306213378906, "learning_rate": 1.6196458528648815e-07, "loss": 13.0, "step": 24748 }, { "epoch": 1.6436873215115892, "grad_norm": 117.83429718017578, "learning_rate": 1.6190590808250382e-07, "loss": 14.1406, "step": 24749 }, { "epoch": 1.6437537358039451, "grad_norm": 260.3740234375, "learning_rate": 1.6184724057310882e-07, "loss": 23.0938, "step": 24750 }, { "epoch": 1.6438201500963006, "grad_norm": 138.6814727783203, "learning_rate": 1.617885827589812e-07, "loss": 16.0469, "step": 24751 }, { "epoch": 1.6438865643886564, "grad_norm": 107.62342834472656, "learning_rate": 1.6172993464080041e-07, "loss": 11.7031, "step": 24752 }, { "epoch": 1.6439529786810123, "grad_norm": 253.92575073242188, "learning_rate": 1.6167129621924414e-07, "loss": 13.9297, "step": 24753 }, { "epoch": 1.6440193929733677, "grad_norm": 307.8912048339844, "learning_rate": 1.6161266749499093e-07, "loss": 16.3125, "step": 24754 }, { "epoch": 1.6440858072657236, "grad_norm": 174.57650756835938, "learning_rate": 1.615540484687188e-07, "loss": 18.9375, "step": 24755 }, { "epoch": 1.6441522215580793, "grad_norm": 450.94580078125, "learning_rate": 1.614954391411061e-07, "loss": 13.2656, "step": 24756 }, { "epoch": 1.644218635850435, "grad_norm": 255.43948364257812, "learning_rate": 1.614368395128306e-07, "loss": 15.5781, "step": 24757 }, { "epoch": 1.6442850501427908, "grad_norm": 192.33322143554688, "learning_rate": 1.6137824958457035e-07, "loss": 13.9688, "step": 24758 }, { "epoch": 1.6443514644351465, "grad_norm": 233.80970764160156, "learning_rate": 1.6131966935700258e-07, "loss": 21.1719, "step": 24759 }, { "epoch": 1.6444178787275021, "grad_norm": 538.3687744140625, "learning_rate": 1.6126109883080574e-07, "loss": 13.6875, "step": 24760 }, { "epoch": 1.644484293019858, "grad_norm": 1495.5078125, "learning_rate": 1.612025380066565e-07, "loss": 18.5, "step": 24761 }, { "epoch": 1.6445507073122134, "grad_norm": 242.43313598632812, "learning_rate": 1.611439868852328e-07, "loss": 16.0938, "step": 24762 }, { "epoch": 1.6446171216045693, "grad_norm": 159.76490783691406, "learning_rate": 1.6108544546721158e-07, "loss": 18.75, "step": 24763 }, { "epoch": 1.6446835358969252, "grad_norm": 370.1626892089844, "learning_rate": 1.6102691375327048e-07, "loss": 14.3359, "step": 24764 }, { "epoch": 1.6447499501892806, "grad_norm": 194.17933654785156, "learning_rate": 1.6096839174408584e-07, "loss": 20.6562, "step": 24765 }, { "epoch": 1.6448163644816365, "grad_norm": 193.65634155273438, "learning_rate": 1.609098794403354e-07, "loss": 17.9219, "step": 24766 }, { "epoch": 1.6448827787739921, "grad_norm": 173.30523681640625, "learning_rate": 1.6085137684269545e-07, "loss": 16.0156, "step": 24767 }, { "epoch": 1.6449491930663478, "grad_norm": 240.52935791015625, "learning_rate": 1.6079288395184298e-07, "loss": 16.5, "step": 24768 }, { "epoch": 1.6450156073587037, "grad_norm": 271.6162414550781, "learning_rate": 1.6073440076845435e-07, "loss": 23.8906, "step": 24769 }, { "epoch": 1.6450820216510593, "grad_norm": 200.1910858154297, "learning_rate": 1.6067592729320633e-07, "loss": 20.9219, "step": 24770 }, { "epoch": 1.645148435943415, "grad_norm": 223.9810333251953, "learning_rate": 1.606174635267752e-07, "loss": 14.2812, "step": 24771 }, { "epoch": 1.6452148502357709, "grad_norm": 178.74539184570312, "learning_rate": 1.6055900946983746e-07, "loss": 14.7969, "step": 24772 }, { "epoch": 1.6452812645281265, "grad_norm": 327.7416076660156, "learning_rate": 1.6050056512306854e-07, "loss": 24.5625, "step": 24773 }, { "epoch": 1.6453476788204822, "grad_norm": 123.15043640136719, "learning_rate": 1.604421304871455e-07, "loss": 14.3594, "step": 24774 }, { "epoch": 1.645414093112838, "grad_norm": 188.45921325683594, "learning_rate": 1.603837055627435e-07, "loss": 13.3906, "step": 24775 }, { "epoch": 1.6454805074051935, "grad_norm": 187.47683715820312, "learning_rate": 1.6032529035053876e-07, "loss": 12.2188, "step": 24776 }, { "epoch": 1.6455469216975493, "grad_norm": 178.82376098632812, "learning_rate": 1.6026688485120677e-07, "loss": 15.8594, "step": 24777 }, { "epoch": 1.645613335989905, "grad_norm": 260.0588073730469, "learning_rate": 1.602084890654233e-07, "loss": 20.4688, "step": 24778 }, { "epoch": 1.6456797502822607, "grad_norm": 218.5176544189453, "learning_rate": 1.6015010299386366e-07, "loss": 17.9219, "step": 24779 }, { "epoch": 1.6457461645746165, "grad_norm": 242.6587677001953, "learning_rate": 1.600917266372035e-07, "loss": 22.9375, "step": 24780 }, { "epoch": 1.6458125788669722, "grad_norm": 275.07574462890625, "learning_rate": 1.6003335999611778e-07, "loss": 20.125, "step": 24781 }, { "epoch": 1.6458789931593278, "grad_norm": 189.8355712890625, "learning_rate": 1.5997500307128186e-07, "loss": 17.875, "step": 24782 }, { "epoch": 1.6459454074516837, "grad_norm": 120.50956726074219, "learning_rate": 1.59916655863371e-07, "loss": 13.75, "step": 24783 }, { "epoch": 1.6460118217440394, "grad_norm": 158.18446350097656, "learning_rate": 1.5985831837305964e-07, "loss": 13.5625, "step": 24784 }, { "epoch": 1.646078236036395, "grad_norm": 291.4322204589844, "learning_rate": 1.5979999060102266e-07, "loss": 14.0, "step": 24785 }, { "epoch": 1.646144650328751, "grad_norm": 339.3775634765625, "learning_rate": 1.5974167254793503e-07, "loss": 14.25, "step": 24786 }, { "epoch": 1.6462110646211063, "grad_norm": 177.5742645263672, "learning_rate": 1.5968336421447116e-07, "loss": 22.5781, "step": 24787 }, { "epoch": 1.6462774789134622, "grad_norm": 643.6922607421875, "learning_rate": 1.596250656013055e-07, "loss": 21.0625, "step": 24788 }, { "epoch": 1.6463438932058179, "grad_norm": 155.8377685546875, "learning_rate": 1.595667767091129e-07, "loss": 14.2031, "step": 24789 }, { "epoch": 1.6464103074981735, "grad_norm": 202.3421630859375, "learning_rate": 1.5950849753856655e-07, "loss": 16.3125, "step": 24790 }, { "epoch": 1.6464767217905294, "grad_norm": 210.2573699951172, "learning_rate": 1.594502280903418e-07, "loss": 15.6719, "step": 24791 }, { "epoch": 1.646543136082885, "grad_norm": 1103.3929443359375, "learning_rate": 1.5939196836511193e-07, "loss": 21.5938, "step": 24792 }, { "epoch": 1.6466095503752407, "grad_norm": 198.3097381591797, "learning_rate": 1.5933371836355104e-07, "loss": 15.0859, "step": 24793 }, { "epoch": 1.6466759646675966, "grad_norm": 281.6152038574219, "learning_rate": 1.592754780863329e-07, "loss": 18.4219, "step": 24794 }, { "epoch": 1.6467423789599522, "grad_norm": 220.75015258789062, "learning_rate": 1.5921724753413123e-07, "loss": 15.1094, "step": 24795 }, { "epoch": 1.646808793252308, "grad_norm": 534.4005737304688, "learning_rate": 1.591590267076197e-07, "loss": 13.2031, "step": 24796 }, { "epoch": 1.6468752075446638, "grad_norm": 371.20648193359375, "learning_rate": 1.591008156074718e-07, "loss": 14.3906, "step": 24797 }, { "epoch": 1.6469416218370192, "grad_norm": 224.3359832763672, "learning_rate": 1.590426142343604e-07, "loss": 15.9375, "step": 24798 }, { "epoch": 1.647008036129375, "grad_norm": 294.1031494140625, "learning_rate": 1.5898442258895962e-07, "loss": 20.2344, "step": 24799 }, { "epoch": 1.6470744504217307, "grad_norm": 213.2449951171875, "learning_rate": 1.589262406719417e-07, "loss": 19.4531, "step": 24800 }, { "epoch": 1.6471408647140864, "grad_norm": 363.2937927246094, "learning_rate": 1.5886806848398015e-07, "loss": 18.25, "step": 24801 }, { "epoch": 1.6472072790064423, "grad_norm": 445.6298522949219, "learning_rate": 1.5880990602574772e-07, "loss": 19.5625, "step": 24802 }, { "epoch": 1.647273693298798, "grad_norm": 182.6721649169922, "learning_rate": 1.5875175329791744e-07, "loss": 11.9531, "step": 24803 }, { "epoch": 1.6473401075911536, "grad_norm": 176.5673065185547, "learning_rate": 1.5869361030116134e-07, "loss": 17.1719, "step": 24804 }, { "epoch": 1.6474065218835094, "grad_norm": 156.5658416748047, "learning_rate": 1.5863547703615298e-07, "loss": 14.625, "step": 24805 }, { "epoch": 1.647472936175865, "grad_norm": 180.951171875, "learning_rate": 1.5857735350356393e-07, "loss": 15.7422, "step": 24806 }, { "epoch": 1.6475393504682208, "grad_norm": 134.09800720214844, "learning_rate": 1.585192397040669e-07, "loss": 15.9688, "step": 24807 }, { "epoch": 1.6476057647605766, "grad_norm": 119.56689453125, "learning_rate": 1.5846113563833408e-07, "loss": 14.875, "step": 24808 }, { "epoch": 1.647672179052932, "grad_norm": 240.4866485595703, "learning_rate": 1.5840304130703762e-07, "loss": 15.6562, "step": 24809 }, { "epoch": 1.647738593345288, "grad_norm": 350.891845703125, "learning_rate": 1.5834495671084946e-07, "loss": 14.7812, "step": 24810 }, { "epoch": 1.6478050076376436, "grad_norm": 377.406494140625, "learning_rate": 1.582868818504417e-07, "loss": 15.6094, "step": 24811 }, { "epoch": 1.6478714219299992, "grad_norm": 135.7036590576172, "learning_rate": 1.5822881672648558e-07, "loss": 11.375, "step": 24812 }, { "epoch": 1.6479378362223551, "grad_norm": 199.19027709960938, "learning_rate": 1.5817076133965346e-07, "loss": 12.5469, "step": 24813 }, { "epoch": 1.6480042505147108, "grad_norm": 277.3501281738281, "learning_rate": 1.5811271569061636e-07, "loss": 15.6094, "step": 24814 }, { "epoch": 1.6480706648070664, "grad_norm": 241.9658660888672, "learning_rate": 1.5805467978004595e-07, "loss": 14.125, "step": 24815 }, { "epoch": 1.6481370790994223, "grad_norm": 165.88035583496094, "learning_rate": 1.5799665360861336e-07, "loss": 17.1719, "step": 24816 }, { "epoch": 1.648203493391778, "grad_norm": 139.31324768066406, "learning_rate": 1.5793863717699008e-07, "loss": 14.7344, "step": 24817 }, { "epoch": 1.6482699076841336, "grad_norm": 540.9365844726562, "learning_rate": 1.578806304858471e-07, "loss": 14.0625, "step": 24818 }, { "epoch": 1.6483363219764895, "grad_norm": 524.9176025390625, "learning_rate": 1.5782263353585535e-07, "loss": 16.1484, "step": 24819 }, { "epoch": 1.648402736268845, "grad_norm": 1417.5040283203125, "learning_rate": 1.577646463276857e-07, "loss": 15.8438, "step": 24820 }, { "epoch": 1.6484691505612008, "grad_norm": 223.60435485839844, "learning_rate": 1.57706668862009e-07, "loss": 13.875, "step": 24821 }, { "epoch": 1.6485355648535565, "grad_norm": 423.2520751953125, "learning_rate": 1.5764870113949612e-07, "loss": 15.3906, "step": 24822 }, { "epoch": 1.648601979145912, "grad_norm": 305.5118408203125, "learning_rate": 1.5759074316081687e-07, "loss": 15.6719, "step": 24823 }, { "epoch": 1.648668393438268, "grad_norm": 250.7974853515625, "learning_rate": 1.5753279492664262e-07, "loss": 22.3281, "step": 24824 }, { "epoch": 1.6487348077306236, "grad_norm": 306.1184387207031, "learning_rate": 1.574748564376429e-07, "loss": 17.4688, "step": 24825 }, { "epoch": 1.6488012220229793, "grad_norm": 310.7344665527344, "learning_rate": 1.5741692769448833e-07, "loss": 13.8906, "step": 24826 }, { "epoch": 1.6488676363153352, "grad_norm": 245.439697265625, "learning_rate": 1.573590086978488e-07, "loss": 11.8906, "step": 24827 }, { "epoch": 1.6489340506076908, "grad_norm": 346.44256591796875, "learning_rate": 1.5730109944839465e-07, "loss": 20.2031, "step": 24828 }, { "epoch": 1.6490004649000465, "grad_norm": 440.56768798828125, "learning_rate": 1.572431999467949e-07, "loss": 13.1406, "step": 24829 }, { "epoch": 1.6490668791924024, "grad_norm": 161.68399047851562, "learning_rate": 1.5718531019372038e-07, "loss": 17.2188, "step": 24830 }, { "epoch": 1.6491332934847578, "grad_norm": 289.5184326171875, "learning_rate": 1.5712743018983988e-07, "loss": 17.5625, "step": 24831 }, { "epoch": 1.6491997077771137, "grad_norm": 178.42308044433594, "learning_rate": 1.570695599358234e-07, "loss": 11.6562, "step": 24832 }, { "epoch": 1.6492661220694693, "grad_norm": 149.64727783203125, "learning_rate": 1.5701169943234004e-07, "loss": 19.5, "step": 24833 }, { "epoch": 1.649332536361825, "grad_norm": 543.7596435546875, "learning_rate": 1.5695384868005924e-07, "loss": 12.9375, "step": 24834 }, { "epoch": 1.6493989506541809, "grad_norm": 258.8646545410156, "learning_rate": 1.5689600767965027e-07, "loss": 13.5312, "step": 24835 }, { "epoch": 1.6494653649465365, "grad_norm": 304.5738830566406, "learning_rate": 1.5683817643178232e-07, "loss": 25.8281, "step": 24836 }, { "epoch": 1.6495317792388922, "grad_norm": 263.97991943359375, "learning_rate": 1.567803549371237e-07, "loss": 33.0469, "step": 24837 }, { "epoch": 1.649598193531248, "grad_norm": 438.81976318359375, "learning_rate": 1.5672254319634415e-07, "loss": 16.0234, "step": 24838 }, { "epoch": 1.6496646078236037, "grad_norm": 208.72940063476562, "learning_rate": 1.5666474121011176e-07, "loss": 20.6719, "step": 24839 }, { "epoch": 1.6497310221159593, "grad_norm": 151.85101318359375, "learning_rate": 1.5660694897909533e-07, "loss": 12.4531, "step": 24840 }, { "epoch": 1.6497974364083152, "grad_norm": 138.11756896972656, "learning_rate": 1.5654916650396343e-07, "loss": 13.4531, "step": 24841 }, { "epoch": 1.6498638507006707, "grad_norm": 144.12393188476562, "learning_rate": 1.5649139378538435e-07, "loss": 16.3281, "step": 24842 }, { "epoch": 1.6499302649930265, "grad_norm": 124.54273986816406, "learning_rate": 1.564336308240265e-07, "loss": 12.3438, "step": 24843 }, { "epoch": 1.6499966792853822, "grad_norm": 831.9243774414062, "learning_rate": 1.5637587762055815e-07, "loss": 14.9844, "step": 24844 }, { "epoch": 1.6500630935777378, "grad_norm": 286.38671875, "learning_rate": 1.5631813417564665e-07, "loss": 21.5469, "step": 24845 }, { "epoch": 1.6501295078700937, "grad_norm": 236.13507080078125, "learning_rate": 1.5626040048996115e-07, "loss": 20.375, "step": 24846 }, { "epoch": 1.6501959221624494, "grad_norm": 202.5302734375, "learning_rate": 1.5620267656416841e-07, "loss": 13.5, "step": 24847 }, { "epoch": 1.650262336454805, "grad_norm": 200.14279174804688, "learning_rate": 1.561449623989366e-07, "loss": 26.1875, "step": 24848 }, { "epoch": 1.650328750747161, "grad_norm": 202.90899658203125, "learning_rate": 1.5608725799493328e-07, "loss": 18.6094, "step": 24849 }, { "epoch": 1.6503951650395166, "grad_norm": 421.4411926269531, "learning_rate": 1.5602956335282613e-07, "loss": 24.0312, "step": 24850 }, { "epoch": 1.6504615793318722, "grad_norm": 204.35260009765625, "learning_rate": 1.5597187847328185e-07, "loss": 17.1562, "step": 24851 }, { "epoch": 1.650527993624228, "grad_norm": 168.7353973388672, "learning_rate": 1.559142033569687e-07, "loss": 16.9531, "step": 24852 }, { "epoch": 1.6505944079165835, "grad_norm": 457.72161865234375, "learning_rate": 1.55856538004553e-07, "loss": 18.6328, "step": 24853 }, { "epoch": 1.6506608222089394, "grad_norm": 155.9185333251953, "learning_rate": 1.5579888241670201e-07, "loss": 16.6797, "step": 24854 }, { "epoch": 1.650727236501295, "grad_norm": 309.46197509765625, "learning_rate": 1.557412365940831e-07, "loss": 12.0625, "step": 24855 }, { "epoch": 1.6507936507936507, "grad_norm": 155.92559814453125, "learning_rate": 1.5568360053736264e-07, "loss": 15.1406, "step": 24856 }, { "epoch": 1.6508600650860066, "grad_norm": 589.9507446289062, "learning_rate": 1.5562597424720726e-07, "loss": 22.7344, "step": 24857 }, { "epoch": 1.6509264793783622, "grad_norm": 309.9376525878906, "learning_rate": 1.5556835772428377e-07, "loss": 12.6797, "step": 24858 }, { "epoch": 1.6509928936707179, "grad_norm": 246.0657196044922, "learning_rate": 1.555107509692587e-07, "loss": 15.3125, "step": 24859 }, { "epoch": 1.6510593079630738, "grad_norm": 328.9976501464844, "learning_rate": 1.5545315398279823e-07, "loss": 16.3594, "step": 24860 }, { "epoch": 1.6511257222554294, "grad_norm": 510.213134765625, "learning_rate": 1.5539556676556888e-07, "loss": 21.6875, "step": 24861 }, { "epoch": 1.651192136547785, "grad_norm": 174.45120239257812, "learning_rate": 1.5533798931823615e-07, "loss": 15.25, "step": 24862 }, { "epoch": 1.651258550840141, "grad_norm": 188.217041015625, "learning_rate": 1.5528042164146693e-07, "loss": 17.4375, "step": 24863 }, { "epoch": 1.6513249651324964, "grad_norm": 236.8739471435547, "learning_rate": 1.5522286373592652e-07, "loss": 18.8906, "step": 24864 }, { "epoch": 1.6513913794248523, "grad_norm": 232.59201049804688, "learning_rate": 1.5516531560228085e-07, "loss": 16.125, "step": 24865 }, { "epoch": 1.651457793717208, "grad_norm": 312.3777770996094, "learning_rate": 1.5510777724119573e-07, "loss": 12.5938, "step": 24866 }, { "epoch": 1.6515242080095636, "grad_norm": 150.18751525878906, "learning_rate": 1.5505024865333682e-07, "loss": 16.7344, "step": 24867 }, { "epoch": 1.6515906223019194, "grad_norm": 275.9169921875, "learning_rate": 1.5499272983936895e-07, "loss": 16.4062, "step": 24868 }, { "epoch": 1.651657036594275, "grad_norm": 241.74610900878906, "learning_rate": 1.5493522079995836e-07, "loss": 16.7344, "step": 24869 }, { "epoch": 1.6517234508866308, "grad_norm": 180.81826782226562, "learning_rate": 1.548777215357696e-07, "loss": 16.2031, "step": 24870 }, { "epoch": 1.6517898651789866, "grad_norm": 204.0933837890625, "learning_rate": 1.5482023204746807e-07, "loss": 14.8203, "step": 24871 }, { "epoch": 1.6518562794713423, "grad_norm": 159.04051208496094, "learning_rate": 1.5476275233571878e-07, "loss": 16.875, "step": 24872 }, { "epoch": 1.651922693763698, "grad_norm": 222.02978515625, "learning_rate": 1.547052824011865e-07, "loss": 13.7188, "step": 24873 }, { "epoch": 1.6519891080560538, "grad_norm": 492.8690490722656, "learning_rate": 1.546478222445361e-07, "loss": 12.8438, "step": 24874 }, { "epoch": 1.6520555223484092, "grad_norm": 222.76795959472656, "learning_rate": 1.5459037186643252e-07, "loss": 15.7812, "step": 24875 }, { "epoch": 1.6521219366407651, "grad_norm": 122.32037353515625, "learning_rate": 1.5453293126753952e-07, "loss": 16.9531, "step": 24876 }, { "epoch": 1.6521883509331208, "grad_norm": 145.35641479492188, "learning_rate": 1.5447550044852253e-07, "loss": 16.3906, "step": 24877 }, { "epoch": 1.6522547652254764, "grad_norm": 196.51272583007812, "learning_rate": 1.5441807941004515e-07, "loss": 11.7812, "step": 24878 }, { "epoch": 1.6523211795178323, "grad_norm": 127.42164611816406, "learning_rate": 1.5436066815277182e-07, "loss": 12.4062, "step": 24879 }, { "epoch": 1.652387593810188, "grad_norm": 121.57000732421875, "learning_rate": 1.543032666773667e-07, "loss": 16.0781, "step": 24880 }, { "epoch": 1.6524540081025436, "grad_norm": 268.0760803222656, "learning_rate": 1.542458749844937e-07, "loss": 12.2812, "step": 24881 }, { "epoch": 1.6525204223948995, "grad_norm": 171.9470672607422, "learning_rate": 1.5418849307481675e-07, "loss": 13.375, "step": 24882 }, { "epoch": 1.6525868366872551, "grad_norm": 339.4295654296875, "learning_rate": 1.5413112094899984e-07, "loss": 17.25, "step": 24883 }, { "epoch": 1.6526532509796108, "grad_norm": 225.8066864013672, "learning_rate": 1.5407375860770588e-07, "loss": 18.7031, "step": 24884 }, { "epoch": 1.6527196652719667, "grad_norm": 210.7998504638672, "learning_rate": 1.5401640605159938e-07, "loss": 15.7266, "step": 24885 }, { "epoch": 1.652786079564322, "grad_norm": 141.92852783203125, "learning_rate": 1.53959063281343e-07, "loss": 12.1094, "step": 24886 }, { "epoch": 1.652852493856678, "grad_norm": 274.19573974609375, "learning_rate": 1.5390173029760034e-07, "loss": 14.6953, "step": 24887 }, { "epoch": 1.6529189081490336, "grad_norm": 344.5126647949219, "learning_rate": 1.5384440710103452e-07, "loss": 23.1875, "step": 24888 }, { "epoch": 1.6529853224413893, "grad_norm": 446.3424987792969, "learning_rate": 1.5378709369230903e-07, "loss": 26.4375, "step": 24889 }, { "epoch": 1.6530517367337452, "grad_norm": 537.0325927734375, "learning_rate": 1.5372979007208586e-07, "loss": 16.2969, "step": 24890 }, { "epoch": 1.6531181510261008, "grad_norm": 144.70797729492188, "learning_rate": 1.5367249624102884e-07, "loss": 12.8125, "step": 24891 }, { "epoch": 1.6531845653184565, "grad_norm": 638.2757568359375, "learning_rate": 1.5361521219980056e-07, "loss": 14.9688, "step": 24892 }, { "epoch": 1.6532509796108124, "grad_norm": 168.55685424804688, "learning_rate": 1.5355793794906303e-07, "loss": 14.8594, "step": 24893 }, { "epoch": 1.653317393903168, "grad_norm": 186.58302307128906, "learning_rate": 1.5350067348947959e-07, "loss": 12.0547, "step": 24894 }, { "epoch": 1.6533838081955237, "grad_norm": 277.54693603515625, "learning_rate": 1.5344341882171208e-07, "loss": 14.0, "step": 24895 }, { "epoch": 1.6534502224878795, "grad_norm": 518.6716918945312, "learning_rate": 1.5338617394642283e-07, "loss": 13.6094, "step": 24896 }, { "epoch": 1.653516636780235, "grad_norm": 118.95182037353516, "learning_rate": 1.5332893886427422e-07, "loss": 12.7891, "step": 24897 }, { "epoch": 1.6535830510725908, "grad_norm": 174.66403198242188, "learning_rate": 1.5327171357592826e-07, "loss": 16.7109, "step": 24898 }, { "epoch": 1.6536494653649465, "grad_norm": 147.66177368164062, "learning_rate": 1.5321449808204678e-07, "loss": 13.875, "step": 24899 }, { "epoch": 1.6537158796573022, "grad_norm": 1483.5574951171875, "learning_rate": 1.53157292383292e-07, "loss": 14.9688, "step": 24900 }, { "epoch": 1.653782293949658, "grad_norm": 174.78271484375, "learning_rate": 1.5310009648032485e-07, "loss": 12.3281, "step": 24901 }, { "epoch": 1.6538487082420137, "grad_norm": 133.7943115234375, "learning_rate": 1.530429103738079e-07, "loss": 14.4062, "step": 24902 }, { "epoch": 1.6539151225343693, "grad_norm": 195.6826934814453, "learning_rate": 1.5298573406440196e-07, "loss": 14.8594, "step": 24903 }, { "epoch": 1.6539815368267252, "grad_norm": 308.19390869140625, "learning_rate": 1.5292856755276862e-07, "loss": 22.5938, "step": 24904 }, { "epoch": 1.6540479511190809, "grad_norm": 106.14767456054688, "learning_rate": 1.528714108395691e-07, "loss": 11.9219, "step": 24905 }, { "epoch": 1.6541143654114365, "grad_norm": 384.7738342285156, "learning_rate": 1.528142639254647e-07, "loss": 18.9062, "step": 24906 }, { "epoch": 1.6541807797037924, "grad_norm": 301.5255126953125, "learning_rate": 1.527571268111164e-07, "loss": 14.2656, "step": 24907 }, { "epoch": 1.6542471939961478, "grad_norm": 1160.6070556640625, "learning_rate": 1.5269999949718527e-07, "loss": 16.1719, "step": 24908 }, { "epoch": 1.6543136082885037, "grad_norm": 215.819580078125, "learning_rate": 1.526428819843316e-07, "loss": 18.0156, "step": 24909 }, { "epoch": 1.6543800225808594, "grad_norm": 525.449462890625, "learning_rate": 1.5258577427321684e-07, "loss": 20.7812, "step": 24910 }, { "epoch": 1.654446436873215, "grad_norm": 695.5877075195312, "learning_rate": 1.5252867636450095e-07, "loss": 21.0156, "step": 24911 }, { "epoch": 1.654512851165571, "grad_norm": 172.0767059326172, "learning_rate": 1.5247158825884464e-07, "loss": 13.5312, "step": 24912 }, { "epoch": 1.6545792654579266, "grad_norm": 608.4347534179688, "learning_rate": 1.5241450995690842e-07, "loss": 12.8125, "step": 24913 }, { "epoch": 1.6546456797502822, "grad_norm": 188.6566162109375, "learning_rate": 1.5235744145935248e-07, "loss": 12.6406, "step": 24914 }, { "epoch": 1.654712094042638, "grad_norm": 188.4800567626953, "learning_rate": 1.5230038276683644e-07, "loss": 18.9531, "step": 24915 }, { "epoch": 1.6547785083349937, "grad_norm": 336.60980224609375, "learning_rate": 1.5224333388002132e-07, "loss": 18.5625, "step": 24916 }, { "epoch": 1.6548449226273494, "grad_norm": 230.20167541503906, "learning_rate": 1.5218629479956614e-07, "loss": 15.125, "step": 24917 }, { "epoch": 1.6549113369197053, "grad_norm": 196.74813842773438, "learning_rate": 1.5212926552613103e-07, "loss": 24.8438, "step": 24918 }, { "epoch": 1.6549777512120607, "grad_norm": 278.9327697753906, "learning_rate": 1.5207224606037572e-07, "loss": 18.1562, "step": 24919 }, { "epoch": 1.6550441655044166, "grad_norm": 198.92135620117188, "learning_rate": 1.5201523640295966e-07, "loss": 15.875, "step": 24920 }, { "epoch": 1.6551105797967722, "grad_norm": 179.4119415283203, "learning_rate": 1.5195823655454244e-07, "loss": 13.0703, "step": 24921 }, { "epoch": 1.6551769940891279, "grad_norm": 156.00094604492188, "learning_rate": 1.519012465157835e-07, "loss": 15.4062, "step": 24922 }, { "epoch": 1.6552434083814838, "grad_norm": 257.4842529296875, "learning_rate": 1.5184426628734148e-07, "loss": 19.5312, "step": 24923 }, { "epoch": 1.6553098226738394, "grad_norm": 169.10096740722656, "learning_rate": 1.5178729586987639e-07, "loss": 15.9531, "step": 24924 }, { "epoch": 1.655376236966195, "grad_norm": 2612.609375, "learning_rate": 1.5173033526404645e-07, "loss": 14.3281, "step": 24925 }, { "epoch": 1.655442651258551, "grad_norm": 228.07064819335938, "learning_rate": 1.5167338447051092e-07, "loss": 16.2344, "step": 24926 }, { "epoch": 1.6555090655509066, "grad_norm": 265.94244384765625, "learning_rate": 1.5161644348992853e-07, "loss": 16.3125, "step": 24927 }, { "epoch": 1.6555754798432623, "grad_norm": 225.7395782470703, "learning_rate": 1.5155951232295783e-07, "loss": 10.5938, "step": 24928 }, { "epoch": 1.6556418941356181, "grad_norm": 272.8168640136719, "learning_rate": 1.5150259097025742e-07, "loss": 17.0859, "step": 24929 }, { "epoch": 1.6557083084279736, "grad_norm": 150.7448272705078, "learning_rate": 1.5144567943248588e-07, "loss": 14.3125, "step": 24930 }, { "epoch": 1.6557747227203294, "grad_norm": 133.20156860351562, "learning_rate": 1.5138877771030157e-07, "loss": 19.9062, "step": 24931 }, { "epoch": 1.655841137012685, "grad_norm": 115.06976318359375, "learning_rate": 1.5133188580436218e-07, "loss": 15.5312, "step": 24932 }, { "epoch": 1.6559075513050407, "grad_norm": 218.5712432861328, "learning_rate": 1.5127500371532654e-07, "loss": 18.7188, "step": 24933 }, { "epoch": 1.6559739655973966, "grad_norm": 167.61354064941406, "learning_rate": 1.5121813144385198e-07, "loss": 15.6406, "step": 24934 }, { "epoch": 1.6560403798897523, "grad_norm": 207.89183044433594, "learning_rate": 1.5116126899059678e-07, "loss": 21.375, "step": 24935 }, { "epoch": 1.656106794182108, "grad_norm": 407.50244140625, "learning_rate": 1.511044163562184e-07, "loss": 21.3125, "step": 24936 }, { "epoch": 1.6561732084744638, "grad_norm": 362.6851806640625, "learning_rate": 1.5104757354137476e-07, "loss": 15.5781, "step": 24937 }, { "epoch": 1.6562396227668195, "grad_norm": 159.9490203857422, "learning_rate": 1.5099074054672323e-07, "loss": 17.0938, "step": 24938 }, { "epoch": 1.6563060370591751, "grad_norm": 450.8476257324219, "learning_rate": 1.5093391737292138e-07, "loss": 16.8438, "step": 24939 }, { "epoch": 1.656372451351531, "grad_norm": 121.05587005615234, "learning_rate": 1.5087710402062603e-07, "loss": 12.5625, "step": 24940 }, { "epoch": 1.6564388656438864, "grad_norm": 169.4484405517578, "learning_rate": 1.508203004904951e-07, "loss": 19.4062, "step": 24941 }, { "epoch": 1.6565052799362423, "grad_norm": 268.28662109375, "learning_rate": 1.5076350678318505e-07, "loss": 16.0625, "step": 24942 }, { "epoch": 1.656571694228598, "grad_norm": 127.1741943359375, "learning_rate": 1.5070672289935303e-07, "loss": 17.6875, "step": 24943 }, { "epoch": 1.6566381085209536, "grad_norm": 196.28277587890625, "learning_rate": 1.50649948839656e-07, "loss": 16.25, "step": 24944 }, { "epoch": 1.6567045228133095, "grad_norm": 161.90829467773438, "learning_rate": 1.5059318460475046e-07, "loss": 14.3281, "step": 24945 }, { "epoch": 1.6567709371056651, "grad_norm": 290.41326904296875, "learning_rate": 1.5053643019529328e-07, "loss": 11.7969, "step": 24946 }, { "epoch": 1.6568373513980208, "grad_norm": 107.68984985351562, "learning_rate": 1.5047968561194102e-07, "loss": 15.7344, "step": 24947 }, { "epoch": 1.6569037656903767, "grad_norm": 327.9673767089844, "learning_rate": 1.5042295085534951e-07, "loss": 15.7344, "step": 24948 }, { "epoch": 1.6569701799827323, "grad_norm": 144.17044067382812, "learning_rate": 1.5036622592617577e-07, "loss": 12.4453, "step": 24949 }, { "epoch": 1.657036594275088, "grad_norm": 79.1612548828125, "learning_rate": 1.5030951082507536e-07, "loss": 12.1094, "step": 24950 }, { "epoch": 1.6571030085674439, "grad_norm": 220.50283813476562, "learning_rate": 1.502528055527047e-07, "loss": 15.6797, "step": 24951 }, { "epoch": 1.6571694228597993, "grad_norm": 355.8869934082031, "learning_rate": 1.5019611010971954e-07, "loss": 20.7812, "step": 24952 }, { "epoch": 1.6572358371521552, "grad_norm": 227.69102478027344, "learning_rate": 1.5013942449677598e-07, "loss": 15.7344, "step": 24953 }, { "epoch": 1.6573022514445108, "grad_norm": 267.3878479003906, "learning_rate": 1.5008274871452898e-07, "loss": 14.5, "step": 24954 }, { "epoch": 1.6573686657368665, "grad_norm": 225.6598358154297, "learning_rate": 1.5002608276363514e-07, "loss": 15.4766, "step": 24955 }, { "epoch": 1.6574350800292224, "grad_norm": 86.03932189941406, "learning_rate": 1.4996942664474922e-07, "loss": 15.0625, "step": 24956 }, { "epoch": 1.657501494321578, "grad_norm": 137.50953674316406, "learning_rate": 1.4991278035852683e-07, "loss": 13.2031, "step": 24957 }, { "epoch": 1.6575679086139337, "grad_norm": 151.18209838867188, "learning_rate": 1.4985614390562328e-07, "loss": 15.2969, "step": 24958 }, { "epoch": 1.6576343229062895, "grad_norm": 129.35617065429688, "learning_rate": 1.497995172866935e-07, "loss": 18.6562, "step": 24959 }, { "epoch": 1.6577007371986452, "grad_norm": 439.5549011230469, "learning_rate": 1.497429005023928e-07, "loss": 17.7031, "step": 24960 }, { "epoch": 1.6577671514910008, "grad_norm": 177.29302978515625, "learning_rate": 1.4968629355337603e-07, "loss": 18.5469, "step": 24961 }, { "epoch": 1.6578335657833567, "grad_norm": 139.2902069091797, "learning_rate": 1.4962969644029755e-07, "loss": 10.125, "step": 24962 }, { "epoch": 1.6578999800757122, "grad_norm": 181.4283447265625, "learning_rate": 1.4957310916381271e-07, "loss": 21.0938, "step": 24963 }, { "epoch": 1.657966394368068, "grad_norm": 403.2251281738281, "learning_rate": 1.4951653172457567e-07, "loss": 17.7344, "step": 24964 }, { "epoch": 1.6580328086604237, "grad_norm": 213.97836303710938, "learning_rate": 1.494599641232407e-07, "loss": 15.3594, "step": 24965 }, { "epoch": 1.6580992229527793, "grad_norm": 155.2154998779297, "learning_rate": 1.4940340636046289e-07, "loss": 15.3125, "step": 24966 }, { "epoch": 1.6581656372451352, "grad_norm": 261.81707763671875, "learning_rate": 1.4934685843689566e-07, "loss": 15.3672, "step": 24967 }, { "epoch": 1.6582320515374909, "grad_norm": 237.28009033203125, "learning_rate": 1.4929032035319355e-07, "loss": 13.0625, "step": 24968 }, { "epoch": 1.6582984658298465, "grad_norm": 280.4202575683594, "learning_rate": 1.4923379211001043e-07, "loss": 19.8438, "step": 24969 }, { "epoch": 1.6583648801222024, "grad_norm": 645.8557739257812, "learning_rate": 1.4917727370800027e-07, "loss": 19.7188, "step": 24970 }, { "epoch": 1.658431294414558, "grad_norm": 133.34072875976562, "learning_rate": 1.491207651478168e-07, "loss": 15.2031, "step": 24971 }, { "epoch": 1.6584977087069137, "grad_norm": 332.1906433105469, "learning_rate": 1.4906426643011383e-07, "loss": 18.2812, "step": 24972 }, { "epoch": 1.6585641229992696, "grad_norm": 197.737060546875, "learning_rate": 1.490077775555446e-07, "loss": 14.6875, "step": 24973 }, { "epoch": 1.658630537291625, "grad_norm": 174.61831665039062, "learning_rate": 1.4895129852476263e-07, "loss": 18.5312, "step": 24974 }, { "epoch": 1.658696951583981, "grad_norm": 188.7240753173828, "learning_rate": 1.4889482933842135e-07, "loss": 21.0781, "step": 24975 }, { "epoch": 1.6587633658763365, "grad_norm": 172.47750854492188, "learning_rate": 1.488383699971738e-07, "loss": 13.2656, "step": 24976 }, { "epoch": 1.6588297801686922, "grad_norm": 182.19850158691406, "learning_rate": 1.4878192050167325e-07, "loss": 16.875, "step": 24977 }, { "epoch": 1.658896194461048, "grad_norm": 203.1860809326172, "learning_rate": 1.4872548085257286e-07, "loss": 13.875, "step": 24978 }, { "epoch": 1.6589626087534037, "grad_norm": 392.09735107421875, "learning_rate": 1.4866905105052474e-07, "loss": 22.8125, "step": 24979 }, { "epoch": 1.6590290230457594, "grad_norm": 226.2626495361328, "learning_rate": 1.486126310961826e-07, "loss": 18.1406, "step": 24980 }, { "epoch": 1.6590954373381153, "grad_norm": 634.3818359375, "learning_rate": 1.4855622099019838e-07, "loss": 10.9688, "step": 24981 }, { "epoch": 1.659161851630471, "grad_norm": 402.6131896972656, "learning_rate": 1.4849982073322485e-07, "loss": 12.9688, "step": 24982 }, { "epoch": 1.6592282659228266, "grad_norm": 156.79354858398438, "learning_rate": 1.4844343032591432e-07, "loss": 16.7031, "step": 24983 }, { "epoch": 1.6592946802151824, "grad_norm": 248.74713134765625, "learning_rate": 1.4838704976891926e-07, "loss": 15.6719, "step": 24984 }, { "epoch": 1.6593610945075379, "grad_norm": 148.5166473388672, "learning_rate": 1.4833067906289165e-07, "loss": 13.9688, "step": 24985 }, { "epoch": 1.6594275087998938, "grad_norm": 181.0801544189453, "learning_rate": 1.482743182084839e-07, "loss": 14.3281, "step": 24986 }, { "epoch": 1.6594939230922494, "grad_norm": 452.48297119140625, "learning_rate": 1.4821796720634728e-07, "loss": 14.9531, "step": 24987 }, { "epoch": 1.659560337384605, "grad_norm": 357.6058349609375, "learning_rate": 1.4816162605713446e-07, "loss": 15.5469, "step": 24988 }, { "epoch": 1.659626751676961, "grad_norm": 165.39381408691406, "learning_rate": 1.4810529476149646e-07, "loss": 16.5, "step": 24989 }, { "epoch": 1.6596931659693166, "grad_norm": 157.5460662841797, "learning_rate": 1.4804897332008537e-07, "loss": 15.9375, "step": 24990 }, { "epoch": 1.6597595802616723, "grad_norm": 227.0642852783203, "learning_rate": 1.479926617335523e-07, "loss": 20.875, "step": 24991 }, { "epoch": 1.6598259945540281, "grad_norm": 214.30235290527344, "learning_rate": 1.4793636000254916e-07, "loss": 16.0781, "step": 24992 }, { "epoch": 1.6598924088463838, "grad_norm": 158.52401733398438, "learning_rate": 1.4788006812772635e-07, "loss": 12.0547, "step": 24993 }, { "epoch": 1.6599588231387394, "grad_norm": 166.27911376953125, "learning_rate": 1.4782378610973611e-07, "loss": 17.0312, "step": 24994 }, { "epoch": 1.6600252374310953, "grad_norm": 472.7980041503906, "learning_rate": 1.477675139492286e-07, "loss": 14.3438, "step": 24995 }, { "epoch": 1.6600916517234507, "grad_norm": 245.90065002441406, "learning_rate": 1.4771125164685516e-07, "loss": 16.8359, "step": 24996 }, { "epoch": 1.6601580660158066, "grad_norm": 162.78848266601562, "learning_rate": 1.4765499920326642e-07, "loss": 16.1875, "step": 24997 }, { "epoch": 1.6602244803081623, "grad_norm": 331.3971252441406, "learning_rate": 1.4759875661911315e-07, "loss": 11.0469, "step": 24998 }, { "epoch": 1.660290894600518, "grad_norm": 414.06024169921875, "learning_rate": 1.4754252389504595e-07, "loss": 15.7344, "step": 24999 }, { "epoch": 1.6603573088928738, "grad_norm": 325.1572265625, "learning_rate": 1.4748630103171554e-07, "loss": 18.5625, "step": 25000 }, { "epoch": 1.6604237231852295, "grad_norm": 793.7909545898438, "learning_rate": 1.4743008802977153e-07, "loss": 20.6875, "step": 25001 }, { "epoch": 1.6604901374775851, "grad_norm": 214.54315185546875, "learning_rate": 1.473738848898649e-07, "loss": 14.7031, "step": 25002 }, { "epoch": 1.660556551769941, "grad_norm": 576.534912109375, "learning_rate": 1.4731769161264574e-07, "loss": 13.25, "step": 25003 }, { "epoch": 1.6606229660622966, "grad_norm": 302.9020690917969, "learning_rate": 1.4726150819876338e-07, "loss": 16.6719, "step": 25004 }, { "epoch": 1.6606893803546523, "grad_norm": 605.7962646484375, "learning_rate": 1.4720533464886852e-07, "loss": 17.0156, "step": 25005 }, { "epoch": 1.6607557946470082, "grad_norm": 232.94949340820312, "learning_rate": 1.4714917096361057e-07, "loss": 16.8438, "step": 25006 }, { "epoch": 1.6608222089393636, "grad_norm": 450.089111328125, "learning_rate": 1.470930171436391e-07, "loss": 12.9062, "step": 25007 }, { "epoch": 1.6608886232317195, "grad_norm": 169.5866241455078, "learning_rate": 1.4703687318960378e-07, "loss": 15.7188, "step": 25008 }, { "epoch": 1.6609550375240751, "grad_norm": 180.13706970214844, "learning_rate": 1.4698073910215415e-07, "loss": 15.6094, "step": 25009 }, { "epoch": 1.6610214518164308, "grad_norm": 753.99755859375, "learning_rate": 1.4692461488193942e-07, "loss": 16.2656, "step": 25010 }, { "epoch": 1.6610878661087867, "grad_norm": 241.98330688476562, "learning_rate": 1.4686850052960897e-07, "loss": 17.9531, "step": 25011 }, { "epoch": 1.6611542804011423, "grad_norm": 220.0245361328125, "learning_rate": 1.4681239604581142e-07, "loss": 19.0625, "step": 25012 }, { "epoch": 1.661220694693498, "grad_norm": 332.4740295410156, "learning_rate": 1.4675630143119655e-07, "loss": 19.5625, "step": 25013 }, { "epoch": 1.6612871089858539, "grad_norm": 141.75521850585938, "learning_rate": 1.4670021668641252e-07, "loss": 11.6406, "step": 25014 }, { "epoch": 1.6613535232782095, "grad_norm": 167.761474609375, "learning_rate": 1.4664414181210828e-07, "loss": 16.9062, "step": 25015 }, { "epoch": 1.6614199375705652, "grad_norm": 158.63401794433594, "learning_rate": 1.4658807680893258e-07, "loss": 18.0625, "step": 25016 }, { "epoch": 1.661486351862921, "grad_norm": 215.951171875, "learning_rate": 1.465320216775342e-07, "loss": 18.6406, "step": 25017 }, { "epoch": 1.6615527661552765, "grad_norm": 180.39378356933594, "learning_rate": 1.4647597641856068e-07, "loss": 18.375, "step": 25018 }, { "epoch": 1.6616191804476323, "grad_norm": 110.38764190673828, "learning_rate": 1.464199410326614e-07, "loss": 13.0781, "step": 25019 }, { "epoch": 1.661685594739988, "grad_norm": 239.20542907714844, "learning_rate": 1.463639155204839e-07, "loss": 13.7734, "step": 25020 }, { "epoch": 1.6617520090323437, "grad_norm": 129.92910766601562, "learning_rate": 1.4630789988267633e-07, "loss": 12.5469, "step": 25021 }, { "epoch": 1.6618184233246995, "grad_norm": 194.95382690429688, "learning_rate": 1.462518941198867e-07, "loss": 16.0625, "step": 25022 }, { "epoch": 1.6618848376170552, "grad_norm": 163.02210998535156, "learning_rate": 1.461958982327628e-07, "loss": 11.6094, "step": 25023 }, { "epoch": 1.6619512519094108, "grad_norm": 147.96473693847656, "learning_rate": 1.4613991222195255e-07, "loss": 13.1406, "step": 25024 }, { "epoch": 1.6620176662017667, "grad_norm": 190.80828857421875, "learning_rate": 1.4608393608810356e-07, "loss": 25.0, "step": 25025 }, { "epoch": 1.6620840804941224, "grad_norm": 128.92098999023438, "learning_rate": 1.4602796983186273e-07, "loss": 12.5625, "step": 25026 }, { "epoch": 1.662150494786478, "grad_norm": 210.6087188720703, "learning_rate": 1.4597201345387845e-07, "loss": 13.9688, "step": 25027 }, { "epoch": 1.662216909078834, "grad_norm": 137.3203582763672, "learning_rate": 1.4591606695479719e-07, "loss": 19.3281, "step": 25028 }, { "epoch": 1.6622833233711893, "grad_norm": 393.2651062011719, "learning_rate": 1.4586013033526634e-07, "loss": 14.9375, "step": 25029 }, { "epoch": 1.6623497376635452, "grad_norm": 522.7659912109375, "learning_rate": 1.4580420359593304e-07, "loss": 28.75, "step": 25030 }, { "epoch": 1.6624161519559009, "grad_norm": 288.2536926269531, "learning_rate": 1.457482867374441e-07, "loss": 17.625, "step": 25031 }, { "epoch": 1.6624825662482565, "grad_norm": 231.87181091308594, "learning_rate": 1.4569237976044636e-07, "loss": 12.125, "step": 25032 }, { "epoch": 1.6625489805406124, "grad_norm": 140.4229736328125, "learning_rate": 1.456364826655868e-07, "loss": 16.9688, "step": 25033 }, { "epoch": 1.662615394832968, "grad_norm": 152.73915100097656, "learning_rate": 1.4558059545351142e-07, "loss": 14.4375, "step": 25034 }, { "epoch": 1.6626818091253237, "grad_norm": 310.5121765136719, "learning_rate": 1.4552471812486733e-07, "loss": 22.5938, "step": 25035 }, { "epoch": 1.6627482234176796, "grad_norm": 386.9742126464844, "learning_rate": 1.4546885068030047e-07, "loss": 18.0938, "step": 25036 }, { "epoch": 1.6628146377100352, "grad_norm": 347.0582275390625, "learning_rate": 1.4541299312045707e-07, "loss": 19.4219, "step": 25037 }, { "epoch": 1.662881052002391, "grad_norm": 141.47909545898438, "learning_rate": 1.4535714544598343e-07, "loss": 12.75, "step": 25038 }, { "epoch": 1.6629474662947468, "grad_norm": 247.04379272460938, "learning_rate": 1.4530130765752557e-07, "loss": 13.0625, "step": 25039 }, { "epoch": 1.6630138805871022, "grad_norm": 121.39620971679688, "learning_rate": 1.4524547975572933e-07, "loss": 13.8125, "step": 25040 }, { "epoch": 1.663080294879458, "grad_norm": 202.11997985839844, "learning_rate": 1.4518966174124047e-07, "loss": 19.1719, "step": 25041 }, { "epoch": 1.6631467091718137, "grad_norm": 203.45594787597656, "learning_rate": 1.4513385361470486e-07, "loss": 14.9844, "step": 25042 }, { "epoch": 1.6632131234641694, "grad_norm": 191.58546447753906, "learning_rate": 1.4507805537676755e-07, "loss": 15.4062, "step": 25043 }, { "epoch": 1.6632795377565253, "grad_norm": 100.68270874023438, "learning_rate": 1.4502226702807475e-07, "loss": 13.0, "step": 25044 }, { "epoch": 1.663345952048881, "grad_norm": 147.22340393066406, "learning_rate": 1.4496648856927107e-07, "loss": 14.5156, "step": 25045 }, { "epoch": 1.6634123663412366, "grad_norm": 79.61996459960938, "learning_rate": 1.4491072000100202e-07, "loss": 16.375, "step": 25046 }, { "epoch": 1.6634787806335924, "grad_norm": 295.7335510253906, "learning_rate": 1.4485496132391273e-07, "loss": 16.4688, "step": 25047 }, { "epoch": 1.663545194925948, "grad_norm": 452.7401123046875, "learning_rate": 1.4479921253864813e-07, "loss": 13.9375, "step": 25048 }, { "epoch": 1.6636116092183038, "grad_norm": 184.92369079589844, "learning_rate": 1.4474347364585305e-07, "loss": 15.3906, "step": 25049 }, { "epoch": 1.6636780235106596, "grad_norm": 333.34564208984375, "learning_rate": 1.446877446461725e-07, "loss": 17.2969, "step": 25050 }, { "epoch": 1.663744437803015, "grad_norm": 243.01405334472656, "learning_rate": 1.4463202554025056e-07, "loss": 13.75, "step": 25051 }, { "epoch": 1.663810852095371, "grad_norm": 454.964111328125, "learning_rate": 1.4457631632873258e-07, "loss": 11.3906, "step": 25052 }, { "epoch": 1.6638772663877266, "grad_norm": 1148.445068359375, "learning_rate": 1.4452061701226225e-07, "loss": 17.5, "step": 25053 }, { "epoch": 1.6639436806800822, "grad_norm": 230.33795166015625, "learning_rate": 1.444649275914841e-07, "loss": 15.6797, "step": 25054 }, { "epoch": 1.6640100949724381, "grad_norm": 164.85801696777344, "learning_rate": 1.4440924806704235e-07, "loss": 15.3281, "step": 25055 }, { "epoch": 1.6640765092647938, "grad_norm": 231.02569580078125, "learning_rate": 1.443535784395813e-07, "loss": 18.3906, "step": 25056 }, { "epoch": 1.6641429235571494, "grad_norm": 152.3562774658203, "learning_rate": 1.4429791870974416e-07, "loss": 14.3438, "step": 25057 }, { "epoch": 1.6642093378495053, "grad_norm": 157.27691650390625, "learning_rate": 1.442422688781757e-07, "loss": 15.75, "step": 25058 }, { "epoch": 1.664275752141861, "grad_norm": 97.94221496582031, "learning_rate": 1.4418662894551902e-07, "loss": 15.7344, "step": 25059 }, { "epoch": 1.6643421664342166, "grad_norm": 373.2418212890625, "learning_rate": 1.44130998912418e-07, "loss": 18.6875, "step": 25060 }, { "epoch": 1.6644085807265725, "grad_norm": 286.7054138183594, "learning_rate": 1.4407537877951602e-07, "loss": 12.0469, "step": 25061 }, { "epoch": 1.664474995018928, "grad_norm": 169.93214416503906, "learning_rate": 1.4401976854745645e-07, "loss": 15.8906, "step": 25062 }, { "epoch": 1.6645414093112838, "grad_norm": 149.3824920654297, "learning_rate": 1.4396416821688274e-07, "loss": 16.6797, "step": 25063 }, { "epoch": 1.6646078236036395, "grad_norm": 258.0405578613281, "learning_rate": 1.43908577788438e-07, "loss": 18.8281, "step": 25064 }, { "epoch": 1.6646742378959951, "grad_norm": 116.69766235351562, "learning_rate": 1.438529972627648e-07, "loss": 13.2344, "step": 25065 }, { "epoch": 1.664740652188351, "grad_norm": 855.8978271484375, "learning_rate": 1.4379742664050687e-07, "loss": 12.1719, "step": 25066 }, { "epoch": 1.6648070664807066, "grad_norm": 407.4504089355469, "learning_rate": 1.4374186592230641e-07, "loss": 15.0469, "step": 25067 }, { "epoch": 1.6648734807730623, "grad_norm": 156.97901916503906, "learning_rate": 1.4368631510880624e-07, "loss": 13.9375, "step": 25068 }, { "epoch": 1.6649398950654182, "grad_norm": 181.19970703125, "learning_rate": 1.4363077420064905e-07, "loss": 19.6094, "step": 25069 }, { "epoch": 1.6650063093577738, "grad_norm": 326.1842956542969, "learning_rate": 1.4357524319847726e-07, "loss": 15.5469, "step": 25070 }, { "epoch": 1.6650727236501295, "grad_norm": 182.140380859375, "learning_rate": 1.4351972210293317e-07, "loss": 18.7969, "step": 25071 }, { "epoch": 1.6651391379424854, "grad_norm": 149.26605224609375, "learning_rate": 1.4346421091465922e-07, "loss": 15.6875, "step": 25072 }, { "epoch": 1.6652055522348408, "grad_norm": 198.97247314453125, "learning_rate": 1.4340870963429706e-07, "loss": 13.6719, "step": 25073 }, { "epoch": 1.6652719665271967, "grad_norm": 214.07525634765625, "learning_rate": 1.4335321826248913e-07, "loss": 16.8125, "step": 25074 }, { "epoch": 1.6653383808195523, "grad_norm": 302.76953125, "learning_rate": 1.4329773679987744e-07, "loss": 15.4375, "step": 25075 }, { "epoch": 1.665404795111908, "grad_norm": 294.35687255859375, "learning_rate": 1.4324226524710314e-07, "loss": 19.7812, "step": 25076 }, { "epoch": 1.6654712094042639, "grad_norm": 694.638427734375, "learning_rate": 1.4318680360480873e-07, "loss": 21.0469, "step": 25077 }, { "epoch": 1.6655376236966195, "grad_norm": 173.37257385253906, "learning_rate": 1.43131351873635e-07, "loss": 15.6562, "step": 25078 }, { "epoch": 1.6656040379889752, "grad_norm": 1080.0478515625, "learning_rate": 1.4307591005422381e-07, "loss": 13.0234, "step": 25079 }, { "epoch": 1.665670452281331, "grad_norm": 113.55204010009766, "learning_rate": 1.430204781472163e-07, "loss": 15.0938, "step": 25080 }, { "epoch": 1.6657368665736867, "grad_norm": 362.87408447265625, "learning_rate": 1.4296505615325394e-07, "loss": 14.9062, "step": 25081 }, { "epoch": 1.6658032808660423, "grad_norm": 333.4412841796875, "learning_rate": 1.429096440729771e-07, "loss": 21.8281, "step": 25082 }, { "epoch": 1.6658696951583982, "grad_norm": 596.5767211914062, "learning_rate": 1.4285424190702788e-07, "loss": 16.3125, "step": 25083 }, { "epoch": 1.6659361094507537, "grad_norm": 160.73236083984375, "learning_rate": 1.4279884965604617e-07, "loss": 16.2031, "step": 25084 }, { "epoch": 1.6660025237431095, "grad_norm": 170.3847198486328, "learning_rate": 1.4274346732067312e-07, "loss": 18.2969, "step": 25085 }, { "epoch": 1.6660689380354652, "grad_norm": 123.86557006835938, "learning_rate": 1.4268809490154932e-07, "loss": 11.75, "step": 25086 }, { "epoch": 1.6661353523278208, "grad_norm": 304.9152526855469, "learning_rate": 1.426327323993154e-07, "loss": 17.4531, "step": 25087 }, { "epoch": 1.6662017666201767, "grad_norm": 314.2093505859375, "learning_rate": 1.4257737981461148e-07, "loss": 13.6875, "step": 25088 }, { "epoch": 1.6662681809125324, "grad_norm": 194.06240844726562, "learning_rate": 1.425220371480783e-07, "loss": 19.8125, "step": 25089 }, { "epoch": 1.666334595204888, "grad_norm": 334.364013671875, "learning_rate": 1.4246670440035535e-07, "loss": 17.6094, "step": 25090 }, { "epoch": 1.666401009497244, "grad_norm": 153.1213836669922, "learning_rate": 1.4241138157208343e-07, "loss": 13.7969, "step": 25091 }, { "epoch": 1.6664674237895996, "grad_norm": 105.80033111572266, "learning_rate": 1.4235606866390193e-07, "loss": 14.9531, "step": 25092 }, { "epoch": 1.6665338380819552, "grad_norm": 187.74618530273438, "learning_rate": 1.4230076567645088e-07, "loss": 18.5625, "step": 25093 }, { "epoch": 1.666600252374311, "grad_norm": 249.32144165039062, "learning_rate": 1.4224547261037002e-07, "loss": 16.9375, "step": 25094 }, { "epoch": 1.6666666666666665, "grad_norm": 241.84817504882812, "learning_rate": 1.4219018946629902e-07, "loss": 19.1875, "step": 25095 }, { "epoch": 1.6667330809590224, "grad_norm": 174.44883728027344, "learning_rate": 1.4213491624487718e-07, "loss": 18.6719, "step": 25096 }, { "epoch": 1.666799495251378, "grad_norm": 359.2127685546875, "learning_rate": 1.4207965294674407e-07, "loss": 14.2656, "step": 25097 }, { "epoch": 1.6668659095437337, "grad_norm": 332.62384033203125, "learning_rate": 1.4202439957253855e-07, "loss": 21.3438, "step": 25098 }, { "epoch": 1.6669323238360896, "grad_norm": 300.0279846191406, "learning_rate": 1.4196915612290038e-07, "loss": 17.4688, "step": 25099 }, { "epoch": 1.6669987381284452, "grad_norm": 264.8466491699219, "learning_rate": 1.4191392259846802e-07, "loss": 20.625, "step": 25100 }, { "epoch": 1.6670651524208009, "grad_norm": 189.30899047851562, "learning_rate": 1.4185869899988067e-07, "loss": 13.4219, "step": 25101 }, { "epoch": 1.6671315667131568, "grad_norm": 145.31556701660156, "learning_rate": 1.4180348532777708e-07, "loss": 19.6875, "step": 25102 }, { "epoch": 1.6671979810055124, "grad_norm": 100.26473999023438, "learning_rate": 1.4174828158279606e-07, "loss": 9.4453, "step": 25103 }, { "epoch": 1.667264395297868, "grad_norm": 174.24005126953125, "learning_rate": 1.4169308776557553e-07, "loss": 13.6719, "step": 25104 }, { "epoch": 1.667330809590224, "grad_norm": 267.420654296875, "learning_rate": 1.41637903876755e-07, "loss": 14.0312, "step": 25105 }, { "epoch": 1.6673972238825794, "grad_norm": 140.95716857910156, "learning_rate": 1.4158272991697196e-07, "loss": 13.4844, "step": 25106 }, { "epoch": 1.6674636381749353, "grad_norm": 259.7113342285156, "learning_rate": 1.4152756588686486e-07, "loss": 16.4844, "step": 25107 }, { "epoch": 1.667530052467291, "grad_norm": 351.4181823730469, "learning_rate": 1.4147241178707193e-07, "loss": 21.8125, "step": 25108 }, { "epoch": 1.6675964667596466, "grad_norm": 248.95591735839844, "learning_rate": 1.4141726761823102e-07, "loss": 15.5938, "step": 25109 }, { "epoch": 1.6676628810520024, "grad_norm": 291.3492126464844, "learning_rate": 1.4136213338098012e-07, "loss": 16.0781, "step": 25110 }, { "epoch": 1.667729295344358, "grad_norm": 204.99148559570312, "learning_rate": 1.4130700907595693e-07, "loss": 14.7031, "step": 25111 }, { "epoch": 1.6677957096367138, "grad_norm": 221.0844268798828, "learning_rate": 1.4125189470379905e-07, "loss": 12.3438, "step": 25112 }, { "epoch": 1.6678621239290696, "grad_norm": 206.18006896972656, "learning_rate": 1.411967902651442e-07, "loss": 20.3281, "step": 25113 }, { "epoch": 1.6679285382214253, "grad_norm": 429.6504211425781, "learning_rate": 1.4114169576062983e-07, "loss": 14.375, "step": 25114 }, { "epoch": 1.667994952513781, "grad_norm": 197.6865692138672, "learning_rate": 1.410866111908926e-07, "loss": 18.2188, "step": 25115 }, { "epoch": 1.6680613668061368, "grad_norm": 279.66912841796875, "learning_rate": 1.4103153655657063e-07, "loss": 15.7578, "step": 25116 }, { "epoch": 1.6681277810984922, "grad_norm": 264.6247253417969, "learning_rate": 1.409764718583003e-07, "loss": 17.9844, "step": 25117 }, { "epoch": 1.6681941953908481, "grad_norm": 179.39773559570312, "learning_rate": 1.4092141709671877e-07, "loss": 16.375, "step": 25118 }, { "epoch": 1.6682606096832038, "grad_norm": 370.7372741699219, "learning_rate": 1.40866372272463e-07, "loss": 18.4531, "step": 25119 }, { "epoch": 1.6683270239755594, "grad_norm": 258.7843017578125, "learning_rate": 1.408113373861698e-07, "loss": 18.5938, "step": 25120 }, { "epoch": 1.6683934382679153, "grad_norm": 183.60855102539062, "learning_rate": 1.407563124384752e-07, "loss": 19.7188, "step": 25121 }, { "epoch": 1.668459852560271, "grad_norm": 277.5854187011719, "learning_rate": 1.4070129743001658e-07, "loss": 16.1406, "step": 25122 }, { "epoch": 1.6685262668526266, "grad_norm": 1437.6375732421875, "learning_rate": 1.4064629236142954e-07, "loss": 12.8594, "step": 25123 }, { "epoch": 1.6685926811449825, "grad_norm": 137.27468872070312, "learning_rate": 1.4059129723335073e-07, "loss": 16.2969, "step": 25124 }, { "epoch": 1.6686590954373381, "grad_norm": 293.9103698730469, "learning_rate": 1.4053631204641624e-07, "loss": 13.5703, "step": 25125 }, { "epoch": 1.6687255097296938, "grad_norm": 140.98960876464844, "learning_rate": 1.4048133680126207e-07, "loss": 13.1094, "step": 25126 }, { "epoch": 1.6687919240220497, "grad_norm": 176.20550537109375, "learning_rate": 1.404263714985241e-07, "loss": 13.3594, "step": 25127 }, { "epoch": 1.668858338314405, "grad_norm": 238.47801208496094, "learning_rate": 1.4037141613883853e-07, "loss": 20.8281, "step": 25128 }, { "epoch": 1.668924752606761, "grad_norm": 227.7064666748047, "learning_rate": 1.4031647072284024e-07, "loss": 11.4219, "step": 25129 }, { "epoch": 1.6689911668991166, "grad_norm": 109.45519256591797, "learning_rate": 1.4026153525116567e-07, "loss": 14.1406, "step": 25130 }, { "epoch": 1.6690575811914723, "grad_norm": 433.749755859375, "learning_rate": 1.4020660972444985e-07, "loss": 14.6875, "step": 25131 }, { "epoch": 1.6691239954838282, "grad_norm": 480.79693603515625, "learning_rate": 1.4015169414332806e-07, "loss": 19.0312, "step": 25132 }, { "epoch": 1.6691904097761838, "grad_norm": 315.8089599609375, "learning_rate": 1.4009678850843564e-07, "loss": 18.2031, "step": 25133 }, { "epoch": 1.6692568240685395, "grad_norm": 289.2798156738281, "learning_rate": 1.400418928204078e-07, "loss": 18.6719, "step": 25134 }, { "epoch": 1.6693232383608954, "grad_norm": 87.93132781982422, "learning_rate": 1.3998700707987944e-07, "loss": 13.5938, "step": 25135 }, { "epoch": 1.669389652653251, "grad_norm": 118.26268768310547, "learning_rate": 1.3993213128748572e-07, "loss": 15.5938, "step": 25136 }, { "epoch": 1.6694560669456067, "grad_norm": 273.7467041015625, "learning_rate": 1.3987726544386068e-07, "loss": 18.9844, "step": 25137 }, { "epoch": 1.6695224812379625, "grad_norm": 174.6806182861328, "learning_rate": 1.3982240954963986e-07, "loss": 18.0781, "step": 25138 }, { "epoch": 1.669588895530318, "grad_norm": 165.7919921875, "learning_rate": 1.3976756360545729e-07, "loss": 13.125, "step": 25139 }, { "epoch": 1.6696553098226738, "grad_norm": 359.416015625, "learning_rate": 1.3971272761194741e-07, "loss": 21.3672, "step": 25140 }, { "epoch": 1.6697217241150295, "grad_norm": 322.0894775390625, "learning_rate": 1.3965790156974467e-07, "loss": 13.4219, "step": 25141 }, { "epoch": 1.6697881384073852, "grad_norm": 622.0099487304688, "learning_rate": 1.396030854794834e-07, "loss": 14.4219, "step": 25142 }, { "epoch": 1.669854552699741, "grad_norm": 239.8017120361328, "learning_rate": 1.3954827934179702e-07, "loss": 20.9062, "step": 25143 }, { "epoch": 1.6699209669920967, "grad_norm": 1287.383056640625, "learning_rate": 1.3949348315732056e-07, "loss": 28.4531, "step": 25144 }, { "epoch": 1.6699873812844523, "grad_norm": 159.8061065673828, "learning_rate": 1.3943869692668696e-07, "loss": 16.7188, "step": 25145 }, { "epoch": 1.6700537955768082, "grad_norm": 260.2119140625, "learning_rate": 1.3938392065053038e-07, "loss": 18.1094, "step": 25146 }, { "epoch": 1.6701202098691639, "grad_norm": 142.1746826171875, "learning_rate": 1.3932915432948422e-07, "loss": 16.8906, "step": 25147 }, { "epoch": 1.6701866241615195, "grad_norm": 279.0509948730469, "learning_rate": 1.3927439796418228e-07, "loss": 17.2812, "step": 25148 }, { "epoch": 1.6702530384538754, "grad_norm": 357.8185729980469, "learning_rate": 1.392196515552576e-07, "loss": 23.7344, "step": 25149 }, { "epoch": 1.6703194527462308, "grad_norm": 188.1571807861328, "learning_rate": 1.391649151033437e-07, "loss": 13.8438, "step": 25150 }, { "epoch": 1.6703858670385867, "grad_norm": 266.25909423828125, "learning_rate": 1.3911018860907376e-07, "loss": 16.2031, "step": 25151 }, { "epoch": 1.6704522813309424, "grad_norm": 145.8397216796875, "learning_rate": 1.3905547207308067e-07, "loss": 18.7031, "step": 25152 }, { "epoch": 1.670518695623298, "grad_norm": 295.07843017578125, "learning_rate": 1.390007654959977e-07, "loss": 14.7656, "step": 25153 }, { "epoch": 1.670585109915654, "grad_norm": 124.85188293457031, "learning_rate": 1.3894606887845682e-07, "loss": 13.375, "step": 25154 }, { "epoch": 1.6706515242080096, "grad_norm": 101.22227478027344, "learning_rate": 1.3889138222109186e-07, "loss": 15.0156, "step": 25155 }, { "epoch": 1.6707179385003652, "grad_norm": 237.939208984375, "learning_rate": 1.3883670552453463e-07, "loss": 16.875, "step": 25156 }, { "epoch": 1.670784352792721, "grad_norm": 153.01913452148438, "learning_rate": 1.3878203878941775e-07, "loss": 12.0625, "step": 25157 }, { "epoch": 1.6708507670850767, "grad_norm": 176.86886596679688, "learning_rate": 1.3872738201637368e-07, "loss": 13.8906, "step": 25158 }, { "epoch": 1.6709171813774324, "grad_norm": 200.00717163085938, "learning_rate": 1.3867273520603463e-07, "loss": 14.2344, "step": 25159 }, { "epoch": 1.6709835956697883, "grad_norm": 797.8112182617188, "learning_rate": 1.3861809835903272e-07, "loss": 19.5938, "step": 25160 }, { "epoch": 1.6710500099621437, "grad_norm": 229.82276916503906, "learning_rate": 1.3856347147600013e-07, "loss": 12.8438, "step": 25161 }, { "epoch": 1.6711164242544996, "grad_norm": 100.51569366455078, "learning_rate": 1.385088545575681e-07, "loss": 9.2656, "step": 25162 }, { "epoch": 1.6711828385468552, "grad_norm": 211.87167358398438, "learning_rate": 1.3845424760436942e-07, "loss": 15.0391, "step": 25163 }, { "epoch": 1.6712492528392109, "grad_norm": 301.6649169921875, "learning_rate": 1.3839965061703496e-07, "loss": 23.375, "step": 25164 }, { "epoch": 1.6713156671315668, "grad_norm": 363.1893615722656, "learning_rate": 1.3834506359619648e-07, "loss": 19.2969, "step": 25165 }, { "epoch": 1.6713820814239224, "grad_norm": 183.1330108642578, "learning_rate": 1.3829048654248542e-07, "loss": 12.5625, "step": 25166 }, { "epoch": 1.671448495716278, "grad_norm": 123.68959045410156, "learning_rate": 1.3823591945653333e-07, "loss": 15.4375, "step": 25167 }, { "epoch": 1.671514910008634, "grad_norm": 337.8734130859375, "learning_rate": 1.381813623389708e-07, "loss": 16.5312, "step": 25168 }, { "epoch": 1.6715813243009896, "grad_norm": 221.76190185546875, "learning_rate": 1.3812681519042978e-07, "loss": 13.9219, "step": 25169 }, { "epoch": 1.6716477385933453, "grad_norm": 191.4490966796875, "learning_rate": 1.3807227801154053e-07, "loss": 12.625, "step": 25170 }, { "epoch": 1.6717141528857011, "grad_norm": 339.3574523925781, "learning_rate": 1.380177508029341e-07, "loss": 24.375, "step": 25171 }, { "epoch": 1.6717805671780566, "grad_norm": 148.33163452148438, "learning_rate": 1.379632335652413e-07, "loss": 16.9219, "step": 25172 }, { "epoch": 1.6718469814704124, "grad_norm": 406.21917724609375, "learning_rate": 1.379087262990928e-07, "loss": 24.2969, "step": 25173 }, { "epoch": 1.671913395762768, "grad_norm": 177.68038940429688, "learning_rate": 1.37854229005119e-07, "loss": 16.2812, "step": 25174 }, { "epoch": 1.6719798100551237, "grad_norm": 200.01910400390625, "learning_rate": 1.377997416839506e-07, "loss": 14.5625, "step": 25175 }, { "epoch": 1.6720462243474796, "grad_norm": 150.70700073242188, "learning_rate": 1.3774526433621713e-07, "loss": 15.125, "step": 25176 }, { "epoch": 1.6721126386398353, "grad_norm": 164.18756103515625, "learning_rate": 1.376907969625497e-07, "loss": 14.7031, "step": 25177 }, { "epoch": 1.672179052932191, "grad_norm": 334.8578186035156, "learning_rate": 1.3763633956357767e-07, "loss": 14.9531, "step": 25178 }, { "epoch": 1.6722454672245468, "grad_norm": 179.11614990234375, "learning_rate": 1.3758189213993122e-07, "loss": 20.3438, "step": 25179 }, { "epoch": 1.6723118815169025, "grad_norm": 138.8618927001953, "learning_rate": 1.3752745469224013e-07, "loss": 15.0, "step": 25180 }, { "epoch": 1.6723782958092581, "grad_norm": 243.35557556152344, "learning_rate": 1.3747302722113407e-07, "loss": 18.0625, "step": 25181 }, { "epoch": 1.672444710101614, "grad_norm": 196.13377380371094, "learning_rate": 1.3741860972724272e-07, "loss": 12.4844, "step": 25182 }, { "epoch": 1.6725111243939694, "grad_norm": 332.13458251953125, "learning_rate": 1.373642022111957e-07, "loss": 16.375, "step": 25183 }, { "epoch": 1.6725775386863253, "grad_norm": 333.42938232421875, "learning_rate": 1.3730980467362196e-07, "loss": 15.5469, "step": 25184 }, { "epoch": 1.672643952978681, "grad_norm": 304.3075866699219, "learning_rate": 1.3725541711515066e-07, "loss": 14.0312, "step": 25185 }, { "epoch": 1.6727103672710366, "grad_norm": 171.75350952148438, "learning_rate": 1.3720103953641172e-07, "loss": 17.875, "step": 25186 }, { "epoch": 1.6727767815633925, "grad_norm": 299.0529479980469, "learning_rate": 1.3714667193803343e-07, "loss": 15.1719, "step": 25187 }, { "epoch": 1.6728431958557481, "grad_norm": 258.4412536621094, "learning_rate": 1.3709231432064482e-07, "loss": 17.1719, "step": 25188 }, { "epoch": 1.6729096101481038, "grad_norm": 278.87908935546875, "learning_rate": 1.3703796668487478e-07, "loss": 21.1406, "step": 25189 }, { "epoch": 1.6729760244404597, "grad_norm": 208.71250915527344, "learning_rate": 1.3698362903135185e-07, "loss": 15.7656, "step": 25190 }, { "epoch": 1.6730424387328153, "grad_norm": 179.50338745117188, "learning_rate": 1.369293013607048e-07, "loss": 16.5625, "step": 25191 }, { "epoch": 1.673108853025171, "grad_norm": 172.96571350097656, "learning_rate": 1.3687498367356209e-07, "loss": 13.1797, "step": 25192 }, { "epoch": 1.6731752673175269, "grad_norm": 286.73211669921875, "learning_rate": 1.3682067597055136e-07, "loss": 17.1562, "step": 25193 }, { "epoch": 1.6732416816098823, "grad_norm": 288.93316650390625, "learning_rate": 1.3676637825230175e-07, "loss": 15.1875, "step": 25194 }, { "epoch": 1.6733080959022382, "grad_norm": 133.7747344970703, "learning_rate": 1.3671209051944078e-07, "loss": 14.5312, "step": 25195 }, { "epoch": 1.6733745101945938, "grad_norm": 151.98410034179688, "learning_rate": 1.3665781277259647e-07, "loss": 16.1094, "step": 25196 }, { "epoch": 1.6734409244869495, "grad_norm": 419.7115478515625, "learning_rate": 1.366035450123968e-07, "loss": 20.0, "step": 25197 }, { "epoch": 1.6735073387793054, "grad_norm": 168.16355895996094, "learning_rate": 1.3654928723946934e-07, "loss": 15.8828, "step": 25198 }, { "epoch": 1.673573753071661, "grad_norm": 215.31280517578125, "learning_rate": 1.3649503945444195e-07, "loss": 17.7031, "step": 25199 }, { "epoch": 1.6736401673640167, "grad_norm": 154.75704956054688, "learning_rate": 1.3644080165794226e-07, "loss": 11.8125, "step": 25200 }, { "epoch": 1.6737065816563725, "grad_norm": 128.7505645751953, "learning_rate": 1.3638657385059692e-07, "loss": 11.2969, "step": 25201 }, { "epoch": 1.6737729959487282, "grad_norm": 157.97650146484375, "learning_rate": 1.3633235603303418e-07, "loss": 14.8906, "step": 25202 }, { "epoch": 1.6738394102410838, "grad_norm": 182.7496337890625, "learning_rate": 1.3627814820588047e-07, "loss": 17.4688, "step": 25203 }, { "epoch": 1.6739058245334397, "grad_norm": 148.40487670898438, "learning_rate": 1.362239503697631e-07, "loss": 13.5078, "step": 25204 }, { "epoch": 1.6739722388257952, "grad_norm": 447.069091796875, "learning_rate": 1.3616976252530886e-07, "loss": 20.2109, "step": 25205 }, { "epoch": 1.674038653118151, "grad_norm": 206.6367950439453, "learning_rate": 1.361155846731451e-07, "loss": 15.0625, "step": 25206 }, { "epoch": 1.6741050674105067, "grad_norm": 940.0115356445312, "learning_rate": 1.360614168138976e-07, "loss": 23.7656, "step": 25207 }, { "epoch": 1.6741714817028623, "grad_norm": 155.09457397460938, "learning_rate": 1.3600725894819398e-07, "loss": 11.8359, "step": 25208 }, { "epoch": 1.6742378959952182, "grad_norm": 135.04049682617188, "learning_rate": 1.3595311107665985e-07, "loss": 14.4688, "step": 25209 }, { "epoch": 1.6743043102875739, "grad_norm": 122.70467376708984, "learning_rate": 1.3589897319992193e-07, "loss": 15.75, "step": 25210 }, { "epoch": 1.6743707245799295, "grad_norm": 193.55453491210938, "learning_rate": 1.3584484531860642e-07, "loss": 19.3125, "step": 25211 }, { "epoch": 1.6744371388722854, "grad_norm": 317.40338134765625, "learning_rate": 1.3579072743333943e-07, "loss": 12.1641, "step": 25212 }, { "epoch": 1.674503553164641, "grad_norm": 635.2227783203125, "learning_rate": 1.3573661954474702e-07, "loss": 14.125, "step": 25213 }, { "epoch": 1.6745699674569967, "grad_norm": 121.92414093017578, "learning_rate": 1.3568252165345518e-07, "loss": 17.0781, "step": 25214 }, { "epoch": 1.6746363817493526, "grad_norm": 649.1119384765625, "learning_rate": 1.3562843376008916e-07, "loss": 15.6094, "step": 25215 }, { "epoch": 1.674702796041708, "grad_norm": 153.89109802246094, "learning_rate": 1.3557435586527543e-07, "loss": 13.8438, "step": 25216 }, { "epoch": 1.674769210334064, "grad_norm": 140.4676971435547, "learning_rate": 1.3552028796963887e-07, "loss": 17.5156, "step": 25217 }, { "epoch": 1.6748356246264196, "grad_norm": 109.57628631591797, "learning_rate": 1.354662300738051e-07, "loss": 15.4219, "step": 25218 }, { "epoch": 1.6749020389187752, "grad_norm": 151.131591796875, "learning_rate": 1.3541218217839945e-07, "loss": 17.75, "step": 25219 }, { "epoch": 1.674968453211131, "grad_norm": 176.4338836669922, "learning_rate": 1.3535814428404713e-07, "loss": 19.6719, "step": 25220 }, { "epoch": 1.6750348675034867, "grad_norm": 157.91683959960938, "learning_rate": 1.353041163913733e-07, "loss": 11.6719, "step": 25221 }, { "epoch": 1.6751012817958424, "grad_norm": 530.1910400390625, "learning_rate": 1.3525009850100276e-07, "loss": 32.9688, "step": 25222 }, { "epoch": 1.6751676960881983, "grad_norm": 166.52037048339844, "learning_rate": 1.351960906135604e-07, "loss": 15.4062, "step": 25223 }, { "epoch": 1.675234110380554, "grad_norm": 186.40501403808594, "learning_rate": 1.3514209272967102e-07, "loss": 15.25, "step": 25224 }, { "epoch": 1.6753005246729096, "grad_norm": 221.18052673339844, "learning_rate": 1.3508810484995948e-07, "loss": 27.0625, "step": 25225 }, { "epoch": 1.6753669389652655, "grad_norm": 377.9537353515625, "learning_rate": 1.350341269750498e-07, "loss": 18.8125, "step": 25226 }, { "epoch": 1.6754333532576209, "grad_norm": 170.22805786132812, "learning_rate": 1.3498015910556649e-07, "loss": 14.6875, "step": 25227 }, { "epoch": 1.6754997675499768, "grad_norm": 198.7698211669922, "learning_rate": 1.3492620124213394e-07, "loss": 17.25, "step": 25228 }, { "epoch": 1.6755661818423326, "grad_norm": 117.24041748046875, "learning_rate": 1.3487225338537623e-07, "loss": 13.8047, "step": 25229 }, { "epoch": 1.675632596134688, "grad_norm": 162.46893310546875, "learning_rate": 1.348183155359175e-07, "loss": 16.7031, "step": 25230 }, { "epoch": 1.675699010427044, "grad_norm": 282.652099609375, "learning_rate": 1.347643876943818e-07, "loss": 19.0156, "step": 25231 }, { "epoch": 1.6757654247193996, "grad_norm": 299.94378662109375, "learning_rate": 1.3471046986139222e-07, "loss": 16.1406, "step": 25232 }, { "epoch": 1.6758318390117553, "grad_norm": 377.21649169921875, "learning_rate": 1.3465656203757357e-07, "loss": 20.4062, "step": 25233 }, { "epoch": 1.6758982533041111, "grad_norm": 115.08787536621094, "learning_rate": 1.346026642235485e-07, "loss": 12.5, "step": 25234 }, { "epoch": 1.6759646675964668, "grad_norm": 214.7025146484375, "learning_rate": 1.3454877641994078e-07, "loss": 13.7266, "step": 25235 }, { "epoch": 1.6760310818888224, "grad_norm": 179.273681640625, "learning_rate": 1.3449489862737373e-07, "loss": 15.7969, "step": 25236 }, { "epoch": 1.6760974961811783, "grad_norm": 123.83705139160156, "learning_rate": 1.3444103084647074e-07, "loss": 12.5781, "step": 25237 }, { "epoch": 1.6761639104735337, "grad_norm": 149.46005249023438, "learning_rate": 1.3438717307785473e-07, "loss": 15.2188, "step": 25238 }, { "epoch": 1.6762303247658896, "grad_norm": 111.97615051269531, "learning_rate": 1.3433332532214892e-07, "loss": 12.375, "step": 25239 }, { "epoch": 1.6762967390582455, "grad_norm": 142.14398193359375, "learning_rate": 1.3427948757997564e-07, "loss": 10.4844, "step": 25240 }, { "epoch": 1.676363153350601, "grad_norm": 146.3848876953125, "learning_rate": 1.3422565985195856e-07, "loss": 13.9062, "step": 25241 }, { "epoch": 1.6764295676429568, "grad_norm": 244.49732971191406, "learning_rate": 1.3417184213871957e-07, "loss": 18.7188, "step": 25242 }, { "epoch": 1.6764959819353125, "grad_norm": 171.84310913085938, "learning_rate": 1.341180344408813e-07, "loss": 15.125, "step": 25243 }, { "epoch": 1.6765623962276681, "grad_norm": 118.1689682006836, "learning_rate": 1.3406423675906642e-07, "loss": 11.6719, "step": 25244 }, { "epoch": 1.676628810520024, "grad_norm": 147.84820556640625, "learning_rate": 1.3401044909389737e-07, "loss": 15.2188, "step": 25245 }, { "epoch": 1.6766952248123796, "grad_norm": 165.35693359375, "learning_rate": 1.3395667144599565e-07, "loss": 13.2266, "step": 25246 }, { "epoch": 1.6767616391047353, "grad_norm": 141.89535522460938, "learning_rate": 1.3390290381598413e-07, "loss": 13.9844, "step": 25247 }, { "epoch": 1.6768280533970912, "grad_norm": 453.7483215332031, "learning_rate": 1.3384914620448429e-07, "loss": 18.7031, "step": 25248 }, { "epoch": 1.6768944676894466, "grad_norm": 462.9569396972656, "learning_rate": 1.3379539861211797e-07, "loss": 17.6094, "step": 25249 }, { "epoch": 1.6769608819818025, "grad_norm": 198.69068908691406, "learning_rate": 1.3374166103950701e-07, "loss": 21.125, "step": 25250 }, { "epoch": 1.6770272962741584, "grad_norm": 237.44541931152344, "learning_rate": 1.3368793348727303e-07, "loss": 16.0469, "step": 25251 }, { "epoch": 1.6770937105665138, "grad_norm": 328.58380126953125, "learning_rate": 1.3363421595603751e-07, "loss": 15.1406, "step": 25252 }, { "epoch": 1.6771601248588697, "grad_norm": 121.37659454345703, "learning_rate": 1.33580508446422e-07, "loss": 12.9531, "step": 25253 }, { "epoch": 1.6772265391512253, "grad_norm": 668.538818359375, "learning_rate": 1.3352681095904705e-07, "loss": 13.3594, "step": 25254 }, { "epoch": 1.677292953443581, "grad_norm": 200.35585021972656, "learning_rate": 1.3347312349453488e-07, "loss": 17.2344, "step": 25255 }, { "epoch": 1.6773593677359369, "grad_norm": 210.58865356445312, "learning_rate": 1.3341944605350563e-07, "loss": 13.4688, "step": 25256 }, { "epoch": 1.6774257820282925, "grad_norm": 157.56727600097656, "learning_rate": 1.3336577863658062e-07, "loss": 14.5938, "step": 25257 }, { "epoch": 1.6774921963206482, "grad_norm": 284.2850646972656, "learning_rate": 1.333121212443805e-07, "loss": 20.7969, "step": 25258 }, { "epoch": 1.677558610613004, "grad_norm": 232.08921813964844, "learning_rate": 1.3325847387752598e-07, "loss": 20.0938, "step": 25259 }, { "epoch": 1.6776250249053595, "grad_norm": 204.6021270751953, "learning_rate": 1.3320483653663772e-07, "loss": 11.6328, "step": 25260 }, { "epoch": 1.6776914391977154, "grad_norm": 100.87801361083984, "learning_rate": 1.3315120922233603e-07, "loss": 11.9844, "step": 25261 }, { "epoch": 1.6777578534900712, "grad_norm": 251.94407653808594, "learning_rate": 1.3309759193524128e-07, "loss": 20.6875, "step": 25262 }, { "epoch": 1.6778242677824267, "grad_norm": 155.43919372558594, "learning_rate": 1.3304398467597367e-07, "loss": 14.4375, "step": 25263 }, { "epoch": 1.6778906820747825, "grad_norm": 440.80975341796875, "learning_rate": 1.3299038744515367e-07, "loss": 12.0938, "step": 25264 }, { "epoch": 1.6779570963671382, "grad_norm": 141.9542236328125, "learning_rate": 1.3293680024340038e-07, "loss": 13.8906, "step": 25265 }, { "epoch": 1.6780235106594938, "grad_norm": 164.23944091796875, "learning_rate": 1.328832230713347e-07, "loss": 16.2656, "step": 25266 }, { "epoch": 1.6780899249518497, "grad_norm": 332.21435546875, "learning_rate": 1.3282965592957562e-07, "loss": 18.2656, "step": 25267 }, { "epoch": 1.6781563392442054, "grad_norm": 211.45350646972656, "learning_rate": 1.327760988187431e-07, "loss": 14.0078, "step": 25268 }, { "epoch": 1.678222753536561, "grad_norm": 367.39166259765625, "learning_rate": 1.3272255173945656e-07, "loss": 15.2969, "step": 25269 }, { "epoch": 1.678289167828917, "grad_norm": 192.1044158935547, "learning_rate": 1.326690146923357e-07, "loss": 16.75, "step": 25270 }, { "epoch": 1.6783555821212723, "grad_norm": 181.1266326904297, "learning_rate": 1.3261548767799902e-07, "loss": 12.8281, "step": 25271 }, { "epoch": 1.6784219964136282, "grad_norm": 121.89523315429688, "learning_rate": 1.3256197069706676e-07, "loss": 17.2656, "step": 25272 }, { "epoch": 1.678488410705984, "grad_norm": 179.15415954589844, "learning_rate": 1.3250846375015712e-07, "loss": 13.1875, "step": 25273 }, { "epoch": 1.6785548249983395, "grad_norm": 176.035888671875, "learning_rate": 1.3245496683788937e-07, "loss": 17.1094, "step": 25274 }, { "epoch": 1.6786212392906954, "grad_norm": 180.41348266601562, "learning_rate": 1.3240147996088237e-07, "loss": 10.4453, "step": 25275 }, { "epoch": 1.678687653583051, "grad_norm": 122.25028991699219, "learning_rate": 1.3234800311975468e-07, "loss": 13.8047, "step": 25276 }, { "epoch": 1.6787540678754067, "grad_norm": 128.73707580566406, "learning_rate": 1.322945363151251e-07, "loss": 17.8281, "step": 25277 }, { "epoch": 1.6788204821677626, "grad_norm": 286.7418518066406, "learning_rate": 1.322410795476121e-07, "loss": 12.6406, "step": 25278 }, { "epoch": 1.6788868964601182, "grad_norm": 226.82884216308594, "learning_rate": 1.321876328178335e-07, "loss": 19.25, "step": 25279 }, { "epoch": 1.678953310752474, "grad_norm": 236.93824768066406, "learning_rate": 1.321341961264083e-07, "loss": 13.875, "step": 25280 }, { "epoch": 1.6790197250448298, "grad_norm": 379.11773681640625, "learning_rate": 1.320807694739542e-07, "loss": 16.2969, "step": 25281 }, { "epoch": 1.6790861393371852, "grad_norm": 114.00653839111328, "learning_rate": 1.3202735286108925e-07, "loss": 15.1719, "step": 25282 }, { "epoch": 1.679152553629541, "grad_norm": 226.00759887695312, "learning_rate": 1.319739462884313e-07, "loss": 13.5156, "step": 25283 }, { "epoch": 1.679218967921897, "grad_norm": 196.3681640625, "learning_rate": 1.3192054975659827e-07, "loss": 16.7969, "step": 25284 }, { "epoch": 1.6792853822142524, "grad_norm": 281.2684631347656, "learning_rate": 1.3186716326620772e-07, "loss": 17.1719, "step": 25285 }, { "epoch": 1.6793517965066083, "grad_norm": 294.8760986328125, "learning_rate": 1.3181378681787758e-07, "loss": 13.75, "step": 25286 }, { "epoch": 1.679418210798964, "grad_norm": 141.32643127441406, "learning_rate": 1.3176042041222436e-07, "loss": 13.5, "step": 25287 }, { "epoch": 1.6794846250913196, "grad_norm": 209.28927612304688, "learning_rate": 1.3170706404986641e-07, "loss": 14.5938, "step": 25288 }, { "epoch": 1.6795510393836754, "grad_norm": 120.8055419921875, "learning_rate": 1.316537177314202e-07, "loss": 14.2812, "step": 25289 }, { "epoch": 1.679617453676031, "grad_norm": 152.24526977539062, "learning_rate": 1.316003814575032e-07, "loss": 19.4375, "step": 25290 }, { "epoch": 1.6796838679683868, "grad_norm": 339.0668029785156, "learning_rate": 1.315470552287321e-07, "loss": 13.7812, "step": 25291 }, { "epoch": 1.6797502822607426, "grad_norm": 1213.9832763671875, "learning_rate": 1.3149373904572414e-07, "loss": 14.625, "step": 25292 }, { "epoch": 1.679816696553098, "grad_norm": 132.98388671875, "learning_rate": 1.3144043290909534e-07, "loss": 13.4219, "step": 25293 }, { "epoch": 1.679883110845454, "grad_norm": 298.6749572753906, "learning_rate": 1.3138713681946322e-07, "loss": 16.1719, "step": 25294 }, { "epoch": 1.6799495251378098, "grad_norm": 151.3690643310547, "learning_rate": 1.3133385077744362e-07, "loss": 14.3906, "step": 25295 }, { "epoch": 1.6800159394301653, "grad_norm": 116.54745483398438, "learning_rate": 1.3128057478365295e-07, "loss": 12.9688, "step": 25296 }, { "epoch": 1.6800823537225211, "grad_norm": 150.71578979492188, "learning_rate": 1.3122730883870814e-07, "loss": 13.7812, "step": 25297 }, { "epoch": 1.6801487680148768, "grad_norm": 125.9801025390625, "learning_rate": 1.3117405294322448e-07, "loss": 11.0312, "step": 25298 }, { "epoch": 1.6802151823072324, "grad_norm": 187.29469299316406, "learning_rate": 1.3112080709781848e-07, "loss": 16.4844, "step": 25299 }, { "epoch": 1.6802815965995883, "grad_norm": 117.0516357421875, "learning_rate": 1.3106757130310597e-07, "loss": 16.125, "step": 25300 }, { "epoch": 1.680348010891944, "grad_norm": 351.8733825683594, "learning_rate": 1.310143455597027e-07, "loss": 21.0625, "step": 25301 }, { "epoch": 1.6804144251842996, "grad_norm": 182.29513549804688, "learning_rate": 1.309611298682245e-07, "loss": 17.7344, "step": 25302 }, { "epoch": 1.6804808394766555, "grad_norm": 301.6922607421875, "learning_rate": 1.30907924229287e-07, "loss": 17.9219, "step": 25303 }, { "epoch": 1.680547253769011, "grad_norm": 149.153564453125, "learning_rate": 1.30854728643505e-07, "loss": 13.0703, "step": 25304 }, { "epoch": 1.6806136680613668, "grad_norm": 139.71682739257812, "learning_rate": 1.3080154311149473e-07, "loss": 10.8594, "step": 25305 }, { "epoch": 1.6806800823537227, "grad_norm": 351.4768981933594, "learning_rate": 1.3074836763387087e-07, "loss": 13.5781, "step": 25306 }, { "epoch": 1.6807464966460781, "grad_norm": 195.40318298339844, "learning_rate": 1.3069520221124852e-07, "loss": 18.2812, "step": 25307 }, { "epoch": 1.680812910938434, "grad_norm": 255.8126983642578, "learning_rate": 1.3064204684424295e-07, "loss": 17.9062, "step": 25308 }, { "epoch": 1.6808793252307896, "grad_norm": 154.2095947265625, "learning_rate": 1.3058890153346902e-07, "loss": 15.1875, "step": 25309 }, { "epoch": 1.6809457395231453, "grad_norm": 113.6834945678711, "learning_rate": 1.3053576627954088e-07, "loss": 13.375, "step": 25310 }, { "epoch": 1.6810121538155012, "grad_norm": 209.29624938964844, "learning_rate": 1.3048264108307417e-07, "loss": 15.8281, "step": 25311 }, { "epoch": 1.6810785681078568, "grad_norm": 230.97796630859375, "learning_rate": 1.304295259446826e-07, "loss": 14.5938, "step": 25312 }, { "epoch": 1.6811449824002125, "grad_norm": 204.28353881835938, "learning_rate": 1.303764208649809e-07, "loss": 16.6406, "step": 25313 }, { "epoch": 1.6812113966925684, "grad_norm": 352.0077209472656, "learning_rate": 1.3032332584458328e-07, "loss": 12.5156, "step": 25314 }, { "epoch": 1.6812778109849238, "grad_norm": 185.722412109375, "learning_rate": 1.3027024088410389e-07, "loss": 19.5625, "step": 25315 }, { "epoch": 1.6813442252772797, "grad_norm": 204.47669982910156, "learning_rate": 1.3021716598415688e-07, "loss": 19.4375, "step": 25316 }, { "epoch": 1.6814106395696355, "grad_norm": 178.8354949951172, "learning_rate": 1.3016410114535636e-07, "loss": 14.6406, "step": 25317 }, { "epoch": 1.681477053861991, "grad_norm": 190.38975524902344, "learning_rate": 1.301110463683155e-07, "loss": 13.9219, "step": 25318 }, { "epoch": 1.6815434681543469, "grad_norm": 383.2427978515625, "learning_rate": 1.3005800165364888e-07, "loss": 11.4844, "step": 25319 }, { "epoch": 1.6816098824467025, "grad_norm": 238.43624877929688, "learning_rate": 1.300049670019695e-07, "loss": 14.5781, "step": 25320 }, { "epoch": 1.6816762967390582, "grad_norm": 270.7567138671875, "learning_rate": 1.2995194241389095e-07, "loss": 18.9531, "step": 25321 }, { "epoch": 1.681742711031414, "grad_norm": 120.85359954833984, "learning_rate": 1.2989892789002666e-07, "loss": 14.7969, "step": 25322 }, { "epoch": 1.6818091253237697, "grad_norm": 132.21139526367188, "learning_rate": 1.2984592343098976e-07, "loss": 14.7656, "step": 25323 }, { "epoch": 1.6818755396161253, "grad_norm": 168.90675354003906, "learning_rate": 1.2979292903739358e-07, "loss": 16.25, "step": 25324 }, { "epoch": 1.6819419539084812, "grad_norm": 196.9850311279297, "learning_rate": 1.2973994470985117e-07, "loss": 15.5859, "step": 25325 }, { "epoch": 1.6820083682008367, "grad_norm": 173.98895263671875, "learning_rate": 1.296869704489748e-07, "loss": 17.7344, "step": 25326 }, { "epoch": 1.6820747824931925, "grad_norm": 158.20558166503906, "learning_rate": 1.2963400625537813e-07, "loss": 14.5625, "step": 25327 }, { "epoch": 1.6821411967855484, "grad_norm": 254.9800262451172, "learning_rate": 1.2958105212967318e-07, "loss": 15.0156, "step": 25328 }, { "epoch": 1.6822076110779038, "grad_norm": 365.3626403808594, "learning_rate": 1.295281080724727e-07, "loss": 17.3438, "step": 25329 }, { "epoch": 1.6822740253702597, "grad_norm": 234.04248046875, "learning_rate": 1.2947517408438924e-07, "loss": 16.0469, "step": 25330 }, { "epoch": 1.6823404396626154, "grad_norm": 447.1850891113281, "learning_rate": 1.2942225016603504e-07, "loss": 19.1094, "step": 25331 }, { "epoch": 1.682406853954971, "grad_norm": 744.2071533203125, "learning_rate": 1.2936933631802193e-07, "loss": 22.4688, "step": 25332 }, { "epoch": 1.682473268247327, "grad_norm": 171.2146453857422, "learning_rate": 1.2931643254096249e-07, "loss": 15.0938, "step": 25333 }, { "epoch": 1.6825396825396826, "grad_norm": 173.7641143798828, "learning_rate": 1.292635388354687e-07, "loss": 15.4844, "step": 25334 }, { "epoch": 1.6826060968320382, "grad_norm": 98.12420654296875, "learning_rate": 1.292106552021518e-07, "loss": 15.3438, "step": 25335 }, { "epoch": 1.682672511124394, "grad_norm": 164.96058654785156, "learning_rate": 1.2915778164162428e-07, "loss": 14.4688, "step": 25336 }, { "epoch": 1.6827389254167495, "grad_norm": 125.7167739868164, "learning_rate": 1.291049181544972e-07, "loss": 12.1406, "step": 25337 }, { "epoch": 1.6828053397091054, "grad_norm": 242.9467010498047, "learning_rate": 1.290520647413822e-07, "loss": 19.9062, "step": 25338 }, { "epoch": 1.6828717540014613, "grad_norm": 143.68356323242188, "learning_rate": 1.2899922140289076e-07, "loss": 15.7969, "step": 25339 }, { "epoch": 1.6829381682938167, "grad_norm": 386.7071838378906, "learning_rate": 1.289463881396341e-07, "loss": 20.8906, "step": 25340 }, { "epoch": 1.6830045825861726, "grad_norm": 161.33287048339844, "learning_rate": 1.2889356495222327e-07, "loss": 14.3438, "step": 25341 }, { "epoch": 1.6830709968785282, "grad_norm": 142.25099182128906, "learning_rate": 1.2884075184126963e-07, "loss": 16.0391, "step": 25342 }, { "epoch": 1.683137411170884, "grad_norm": 285.1299133300781, "learning_rate": 1.287879488073834e-07, "loss": 16.7812, "step": 25343 }, { "epoch": 1.6832038254632398, "grad_norm": 198.84710693359375, "learning_rate": 1.2873515585117624e-07, "loss": 17.5625, "step": 25344 }, { "epoch": 1.6832702397555954, "grad_norm": 119.94572448730469, "learning_rate": 1.2868237297325813e-07, "loss": 15.9141, "step": 25345 }, { "epoch": 1.683336654047951, "grad_norm": 290.19561767578125, "learning_rate": 1.2862960017423996e-07, "loss": 17.3438, "step": 25346 }, { "epoch": 1.683403068340307, "grad_norm": 125.63602447509766, "learning_rate": 1.2857683745473214e-07, "loss": 11.8438, "step": 25347 }, { "epoch": 1.6834694826326624, "grad_norm": 403.00152587890625, "learning_rate": 1.2852408481534482e-07, "loss": 15.9844, "step": 25348 }, { "epoch": 1.6835358969250183, "grad_norm": 223.63433837890625, "learning_rate": 1.2847134225668855e-07, "loss": 17.5156, "step": 25349 }, { "epoch": 1.6836023112173741, "grad_norm": 260.9658203125, "learning_rate": 1.2841860977937335e-07, "loss": 18.8281, "step": 25350 }, { "epoch": 1.6836687255097296, "grad_norm": 576.0648803710938, "learning_rate": 1.2836588738400867e-07, "loss": 16.2031, "step": 25351 }, { "epoch": 1.6837351398020854, "grad_norm": 813.1222534179688, "learning_rate": 1.2831317507120531e-07, "loss": 14.3906, "step": 25352 }, { "epoch": 1.683801554094441, "grad_norm": 200.4315185546875, "learning_rate": 1.2826047284157216e-07, "loss": 19.3594, "step": 25353 }, { "epoch": 1.6838679683867968, "grad_norm": 110.72540283203125, "learning_rate": 1.2820778069571925e-07, "loss": 13.0938, "step": 25354 }, { "epoch": 1.6839343826791526, "grad_norm": 198.39207458496094, "learning_rate": 1.28155098634256e-07, "loss": 13.9531, "step": 25355 }, { "epoch": 1.6840007969715083, "grad_norm": 119.93766021728516, "learning_rate": 1.2810242665779213e-07, "loss": 15.125, "step": 25356 }, { "epoch": 1.684067211263864, "grad_norm": 303.3851318359375, "learning_rate": 1.2804976476693609e-07, "loss": 19.9688, "step": 25357 }, { "epoch": 1.6841336255562198, "grad_norm": 221.88792419433594, "learning_rate": 1.279971129622981e-07, "loss": 16.5156, "step": 25358 }, { "epoch": 1.6842000398485752, "grad_norm": 255.99603271484375, "learning_rate": 1.2794447124448638e-07, "loss": 16.2812, "step": 25359 }, { "epoch": 1.6842664541409311, "grad_norm": 189.0933074951172, "learning_rate": 1.278918396141102e-07, "loss": 20.7344, "step": 25360 }, { "epoch": 1.684332868433287, "grad_norm": 859.4766235351562, "learning_rate": 1.2783921807177832e-07, "loss": 19.5625, "step": 25361 }, { "epoch": 1.6843992827256424, "grad_norm": 197.0419158935547, "learning_rate": 1.2778660661809948e-07, "loss": 18.2812, "step": 25362 }, { "epoch": 1.6844656970179983, "grad_norm": 211.8947296142578, "learning_rate": 1.2773400525368217e-07, "loss": 16.1094, "step": 25363 }, { "epoch": 1.684532111310354, "grad_norm": 383.7095947265625, "learning_rate": 1.2768141397913513e-07, "loss": 30.3281, "step": 25364 }, { "epoch": 1.6845985256027096, "grad_norm": 472.9150695800781, "learning_rate": 1.2762883279506608e-07, "loss": 14.9062, "step": 25365 }, { "epoch": 1.6846649398950655, "grad_norm": 152.96788024902344, "learning_rate": 1.2757626170208414e-07, "loss": 14.2812, "step": 25366 }, { "epoch": 1.6847313541874211, "grad_norm": 117.47879028320312, "learning_rate": 1.2752370070079665e-07, "loss": 15.9062, "step": 25367 }, { "epoch": 1.6847977684797768, "grad_norm": 175.59425354003906, "learning_rate": 1.2747114979181196e-07, "loss": 15.2031, "step": 25368 }, { "epoch": 1.6848641827721327, "grad_norm": 338.43408203125, "learning_rate": 1.2741860897573774e-07, "loss": 21.5312, "step": 25369 }, { "epoch": 1.6849305970644881, "grad_norm": 248.66461181640625, "learning_rate": 1.27366078253182e-07, "loss": 17.8125, "step": 25370 }, { "epoch": 1.684997011356844, "grad_norm": 400.6050109863281, "learning_rate": 1.2731355762475226e-07, "loss": 20.7812, "step": 25371 }, { "epoch": 1.6850634256491999, "grad_norm": 287.3775939941406, "learning_rate": 1.2726104709105612e-07, "loss": 14.5312, "step": 25372 }, { "epoch": 1.6851298399415553, "grad_norm": 276.9024353027344, "learning_rate": 1.272085466527011e-07, "loss": 22.5938, "step": 25373 }, { "epoch": 1.6851962542339112, "grad_norm": 594.7587890625, "learning_rate": 1.271560563102938e-07, "loss": 21.1562, "step": 25374 }, { "epoch": 1.6852626685262668, "grad_norm": 287.8730773925781, "learning_rate": 1.2710357606444255e-07, "loss": 13.7656, "step": 25375 }, { "epoch": 1.6853290828186225, "grad_norm": 349.06646728515625, "learning_rate": 1.2705110591575342e-07, "loss": 16.3125, "step": 25376 }, { "epoch": 1.6853954971109784, "grad_norm": 276.4078063964844, "learning_rate": 1.269986458648338e-07, "loss": 13.0469, "step": 25377 }, { "epoch": 1.685461911403334, "grad_norm": 251.79612731933594, "learning_rate": 1.269461959122905e-07, "loss": 18.625, "step": 25378 }, { "epoch": 1.6855283256956897, "grad_norm": 256.5609436035156, "learning_rate": 1.268937560587301e-07, "loss": 12.6562, "step": 25379 }, { "epoch": 1.6855947399880455, "grad_norm": 288.67047119140625, "learning_rate": 1.2684132630475918e-07, "loss": 15.6562, "step": 25380 }, { "epoch": 1.6856611542804012, "grad_norm": 198.3355712890625, "learning_rate": 1.2678890665098462e-07, "loss": 14.4375, "step": 25381 }, { "epoch": 1.6857275685727569, "grad_norm": 293.7575378417969, "learning_rate": 1.2673649709801204e-07, "loss": 24.5469, "step": 25382 }, { "epoch": 1.6857939828651127, "grad_norm": 182.63832092285156, "learning_rate": 1.2668409764644862e-07, "loss": 15.5, "step": 25383 }, { "epoch": 1.6858603971574682, "grad_norm": 386.2640686035156, "learning_rate": 1.2663170829689962e-07, "loss": 16.2344, "step": 25384 }, { "epoch": 1.685926811449824, "grad_norm": 148.83135986328125, "learning_rate": 1.265793290499715e-07, "loss": 21.8594, "step": 25385 }, { "epoch": 1.6859932257421797, "grad_norm": 181.6973876953125, "learning_rate": 1.2652695990626995e-07, "loss": 13.5625, "step": 25386 }, { "epoch": 1.6860596400345353, "grad_norm": 290.93499755859375, "learning_rate": 1.2647460086640094e-07, "loss": 18.3125, "step": 25387 }, { "epoch": 1.6861260543268912, "grad_norm": 221.36155700683594, "learning_rate": 1.2642225193097e-07, "loss": 10.7656, "step": 25388 }, { "epoch": 1.6861924686192469, "grad_norm": 149.73699951171875, "learning_rate": 1.2636991310058298e-07, "loss": 14.7344, "step": 25389 }, { "epoch": 1.6862588829116025, "grad_norm": 158.03717041015625, "learning_rate": 1.2631758437584462e-07, "loss": 11.3906, "step": 25390 }, { "epoch": 1.6863252972039584, "grad_norm": 341.7645263671875, "learning_rate": 1.2626526575736107e-07, "loss": 14.4062, "step": 25391 }, { "epoch": 1.686391711496314, "grad_norm": 958.60205078125, "learning_rate": 1.2621295724573677e-07, "loss": 27.4062, "step": 25392 }, { "epoch": 1.6864581257886697, "grad_norm": 146.89964294433594, "learning_rate": 1.2616065884157723e-07, "loss": 13.4844, "step": 25393 }, { "epoch": 1.6865245400810256, "grad_norm": 195.65655517578125, "learning_rate": 1.2610837054548728e-07, "loss": 13.0, "step": 25394 }, { "epoch": 1.686590954373381, "grad_norm": 155.21051025390625, "learning_rate": 1.2605609235807202e-07, "loss": 17.8438, "step": 25395 }, { "epoch": 1.686657368665737, "grad_norm": 2314.369140625, "learning_rate": 1.260038242799355e-07, "loss": 16.9844, "step": 25396 }, { "epoch": 1.6867237829580926, "grad_norm": 110.41883850097656, "learning_rate": 1.2595156631168325e-07, "loss": 12.7812, "step": 25397 }, { "epoch": 1.6867901972504482, "grad_norm": 125.44739532470703, "learning_rate": 1.25899318453919e-07, "loss": 16.5781, "step": 25398 }, { "epoch": 1.686856611542804, "grad_norm": 120.15917205810547, "learning_rate": 1.2584708070724737e-07, "loss": 14.3906, "step": 25399 }, { "epoch": 1.6869230258351597, "grad_norm": 289.4845275878906, "learning_rate": 1.2579485307227265e-07, "loss": 18.9141, "step": 25400 }, { "epoch": 1.6869894401275154, "grad_norm": 231.0990753173828, "learning_rate": 1.2574263554959906e-07, "loss": 20.0938, "step": 25401 }, { "epoch": 1.6870558544198713, "grad_norm": 152.55340576171875, "learning_rate": 1.2569042813983045e-07, "loss": 14.9688, "step": 25402 }, { "epoch": 1.687122268712227, "grad_norm": 255.82785034179688, "learning_rate": 1.2563823084357106e-07, "loss": 18.1094, "step": 25403 }, { "epoch": 1.6871886830045826, "grad_norm": 254.36727905273438, "learning_rate": 1.2558604366142401e-07, "loss": 17.4531, "step": 25404 }, { "epoch": 1.6872550972969385, "grad_norm": 170.43756103515625, "learning_rate": 1.255338665939938e-07, "loss": 15.0, "step": 25405 }, { "epoch": 1.6873215115892939, "grad_norm": 393.87542724609375, "learning_rate": 1.2548169964188337e-07, "loss": 18.2812, "step": 25406 }, { "epoch": 1.6873879258816498, "grad_norm": 286.5780334472656, "learning_rate": 1.2542954280569616e-07, "loss": 20.4531, "step": 25407 }, { "epoch": 1.6874543401740054, "grad_norm": 313.57568359375, "learning_rate": 1.2537739608603616e-07, "loss": 16.9375, "step": 25408 }, { "epoch": 1.687520754466361, "grad_norm": 235.24935913085938, "learning_rate": 1.2532525948350581e-07, "loss": 13.4375, "step": 25409 }, { "epoch": 1.687587168758717, "grad_norm": 261.845703125, "learning_rate": 1.2527313299870845e-07, "loss": 13.4062, "step": 25410 }, { "epoch": 1.6876535830510726, "grad_norm": 224.410888671875, "learning_rate": 1.2522101663224716e-07, "loss": 14.6953, "step": 25411 }, { "epoch": 1.6877199973434283, "grad_norm": 191.11131286621094, "learning_rate": 1.2516891038472477e-07, "loss": 14.625, "step": 25412 }, { "epoch": 1.6877864116357841, "grad_norm": 116.06317138671875, "learning_rate": 1.2511681425674393e-07, "loss": 12.875, "step": 25413 }, { "epoch": 1.6878528259281398, "grad_norm": 176.2069854736328, "learning_rate": 1.2506472824890747e-07, "loss": 19.6562, "step": 25414 }, { "epoch": 1.6879192402204954, "grad_norm": 192.06752014160156, "learning_rate": 1.2501265236181734e-07, "loss": 13.7344, "step": 25415 }, { "epoch": 1.6879856545128513, "grad_norm": 255.12623596191406, "learning_rate": 1.2496058659607666e-07, "loss": 26.0625, "step": 25416 }, { "epoch": 1.6880520688052068, "grad_norm": 534.6220703125, "learning_rate": 1.2490853095228726e-07, "loss": 18.1094, "step": 25417 }, { "epoch": 1.6881184830975626, "grad_norm": 162.7140655517578, "learning_rate": 1.248564854310512e-07, "loss": 14.6172, "step": 25418 }, { "epoch": 1.6881848973899183, "grad_norm": 113.91728210449219, "learning_rate": 1.2480445003297079e-07, "loss": 14.3438, "step": 25419 }, { "epoch": 1.688251311682274, "grad_norm": 355.46014404296875, "learning_rate": 1.24752424758648e-07, "loss": 19.5625, "step": 25420 }, { "epoch": 1.6883177259746298, "grad_norm": 319.4665832519531, "learning_rate": 1.2470040960868412e-07, "loss": 17.9375, "step": 25421 }, { "epoch": 1.6883841402669855, "grad_norm": 109.81975555419922, "learning_rate": 1.2464840458368154e-07, "loss": 14.0, "step": 25422 }, { "epoch": 1.6884505545593411, "grad_norm": 229.31858825683594, "learning_rate": 1.2459640968424122e-07, "loss": 19.2031, "step": 25423 }, { "epoch": 1.688516968851697, "grad_norm": 622.4183349609375, "learning_rate": 1.2454442491096483e-07, "loss": 21.5938, "step": 25424 }, { "epoch": 1.6885833831440527, "grad_norm": 218.4623565673828, "learning_rate": 1.2449245026445376e-07, "loss": 18.7031, "step": 25425 }, { "epoch": 1.6886497974364083, "grad_norm": 229.84425354003906, "learning_rate": 1.244404857453092e-07, "loss": 15.4062, "step": 25426 }, { "epoch": 1.6887162117287642, "grad_norm": 242.62905883789062, "learning_rate": 1.2438853135413218e-07, "loss": 12.2344, "step": 25427 }, { "epoch": 1.6887826260211196, "grad_norm": 146.73533630371094, "learning_rate": 1.2433658709152394e-07, "loss": 20.4688, "step": 25428 }, { "epoch": 1.6888490403134755, "grad_norm": 166.078857421875, "learning_rate": 1.2428465295808478e-07, "loss": 19.5156, "step": 25429 }, { "epoch": 1.6889154546058311, "grad_norm": 454.8368835449219, "learning_rate": 1.2423272895441618e-07, "loss": 12.8984, "step": 25430 }, { "epoch": 1.6889818688981868, "grad_norm": 285.4532165527344, "learning_rate": 1.241808150811181e-07, "loss": 16.125, "step": 25431 }, { "epoch": 1.6890482831905427, "grad_norm": 254.51382446289062, "learning_rate": 1.2412891133879132e-07, "loss": 17.3438, "step": 25432 }, { "epoch": 1.6891146974828983, "grad_norm": 120.1074447631836, "learning_rate": 1.2407701772803624e-07, "loss": 15.5156, "step": 25433 }, { "epoch": 1.689181111775254, "grad_norm": 136.76144409179688, "learning_rate": 1.2402513424945328e-07, "loss": 16.5625, "step": 25434 }, { "epoch": 1.6892475260676099, "grad_norm": 217.45277404785156, "learning_rate": 1.2397326090364223e-07, "loss": 15.3281, "step": 25435 }, { "epoch": 1.6893139403599655, "grad_norm": 219.4400177001953, "learning_rate": 1.2392139769120369e-07, "loss": 18.3125, "step": 25436 }, { "epoch": 1.6893803546523212, "grad_norm": 217.33871459960938, "learning_rate": 1.2386954461273701e-07, "loss": 14.3125, "step": 25437 }, { "epoch": 1.689446768944677, "grad_norm": 290.0177001953125, "learning_rate": 1.238177016688423e-07, "loss": 16.125, "step": 25438 }, { "epoch": 1.6895131832370325, "grad_norm": 223.67440795898438, "learning_rate": 1.2376586886011907e-07, "loss": 14.8906, "step": 25439 }, { "epoch": 1.6895795975293884, "grad_norm": 463.87347412109375, "learning_rate": 1.2371404618716707e-07, "loss": 31.0156, "step": 25440 }, { "epoch": 1.689646011821744, "grad_norm": 260.548828125, "learning_rate": 1.236622336505857e-07, "loss": 17.375, "step": 25441 }, { "epoch": 1.6897124261140997, "grad_norm": 131.15087890625, "learning_rate": 1.2361043125097426e-07, "loss": 11.6562, "step": 25442 }, { "epoch": 1.6897788404064555, "grad_norm": 190.73008728027344, "learning_rate": 1.2355863898893194e-07, "loss": 14.5625, "step": 25443 }, { "epoch": 1.6898452546988112, "grad_norm": 108.3929672241211, "learning_rate": 1.2350685686505792e-07, "loss": 13.7656, "step": 25444 }, { "epoch": 1.6899116689911668, "grad_norm": 117.50628662109375, "learning_rate": 1.2345508487995148e-07, "loss": 14.5625, "step": 25445 }, { "epoch": 1.6899780832835227, "grad_norm": 404.4986572265625, "learning_rate": 1.2340332303421065e-07, "loss": 16.5625, "step": 25446 }, { "epoch": 1.6900444975758784, "grad_norm": 270.7894287109375, "learning_rate": 1.233515713284351e-07, "loss": 15.5078, "step": 25447 }, { "epoch": 1.690110911868234, "grad_norm": 558.0354614257812, "learning_rate": 1.2329982976322294e-07, "loss": 11.625, "step": 25448 }, { "epoch": 1.69017732616059, "grad_norm": 334.5543518066406, "learning_rate": 1.2324809833917283e-07, "loss": 11.75, "step": 25449 }, { "epoch": 1.6902437404529453, "grad_norm": 167.0690460205078, "learning_rate": 1.2319637705688314e-07, "loss": 12.6406, "step": 25450 }, { "epoch": 1.6903101547453012, "grad_norm": 510.2638854980469, "learning_rate": 1.2314466591695206e-07, "loss": 18.6328, "step": 25451 }, { "epoch": 1.6903765690376569, "grad_norm": 429.3954162597656, "learning_rate": 1.230929649199779e-07, "loss": 12.4609, "step": 25452 }, { "epoch": 1.6904429833300125, "grad_norm": 440.8197937011719, "learning_rate": 1.2304127406655896e-07, "loss": 17.9844, "step": 25453 }, { "epoch": 1.6905093976223684, "grad_norm": 111.90471649169922, "learning_rate": 1.2298959335729242e-07, "loss": 16.3281, "step": 25454 }, { "epoch": 1.690575811914724, "grad_norm": 246.51019287109375, "learning_rate": 1.2293792279277693e-07, "loss": 20.5156, "step": 25455 }, { "epoch": 1.6906422262070797, "grad_norm": 488.9840087890625, "learning_rate": 1.2288626237360967e-07, "loss": 18.2344, "step": 25456 }, { "epoch": 1.6907086404994356, "grad_norm": 251.8382568359375, "learning_rate": 1.2283461210038825e-07, "loss": 12.4219, "step": 25457 }, { "epoch": 1.6907750547917912, "grad_norm": 247.98133850097656, "learning_rate": 1.2278297197371035e-07, "loss": 12.9062, "step": 25458 }, { "epoch": 1.690841469084147, "grad_norm": 156.3382568359375, "learning_rate": 1.2273134199417335e-07, "loss": 20.2812, "step": 25459 }, { "epoch": 1.6909078833765028, "grad_norm": 211.2928924560547, "learning_rate": 1.2267972216237387e-07, "loss": 19.2031, "step": 25460 }, { "epoch": 1.6909742976688582, "grad_norm": 205.8748779296875, "learning_rate": 1.2262811247891003e-07, "loss": 15.6562, "step": 25461 }, { "epoch": 1.691040711961214, "grad_norm": 124.29154968261719, "learning_rate": 1.2257651294437787e-07, "loss": 13.6094, "step": 25462 }, { "epoch": 1.6911071262535697, "grad_norm": 652.74609375, "learning_rate": 1.2252492355937482e-07, "loss": 20.3281, "step": 25463 }, { "epoch": 1.6911735405459254, "grad_norm": 120.20482635498047, "learning_rate": 1.2247334432449741e-07, "loss": 11.1406, "step": 25464 }, { "epoch": 1.6912399548382813, "grad_norm": 171.7567596435547, "learning_rate": 1.2242177524034225e-07, "loss": 18.0625, "step": 25465 }, { "epoch": 1.691306369130637, "grad_norm": 348.9651184082031, "learning_rate": 1.223702163075061e-07, "loss": 25.2656, "step": 25466 }, { "epoch": 1.6913727834229926, "grad_norm": 251.2785186767578, "learning_rate": 1.2231866752658538e-07, "loss": 18.8281, "step": 25467 }, { "epoch": 1.6914391977153485, "grad_norm": 393.8915100097656, "learning_rate": 1.222671288981757e-07, "loss": 20.2031, "step": 25468 }, { "epoch": 1.691505612007704, "grad_norm": 138.10093688964844, "learning_rate": 1.2221560042287426e-07, "loss": 17.6875, "step": 25469 }, { "epoch": 1.6915720263000598, "grad_norm": 177.11846923828125, "learning_rate": 1.2216408210127638e-07, "loss": 17.5469, "step": 25470 }, { "epoch": 1.6916384405924156, "grad_norm": 288.4052429199219, "learning_rate": 1.2211257393397812e-07, "loss": 13.1719, "step": 25471 }, { "epoch": 1.691704854884771, "grad_norm": 242.26759338378906, "learning_rate": 1.2206107592157556e-07, "loss": 16.9844, "step": 25472 }, { "epoch": 1.691771269177127, "grad_norm": 168.18479919433594, "learning_rate": 1.2200958806466412e-07, "loss": 14.1719, "step": 25473 }, { "epoch": 1.6918376834694826, "grad_norm": 206.02560424804688, "learning_rate": 1.2195811036383942e-07, "loss": 17.7969, "step": 25474 }, { "epoch": 1.6919040977618383, "grad_norm": 118.71665954589844, "learning_rate": 1.2190664281969732e-07, "loss": 12.875, "step": 25475 }, { "epoch": 1.6919705120541941, "grad_norm": 120.61177825927734, "learning_rate": 1.2185518543283236e-07, "loss": 13.9688, "step": 25476 }, { "epoch": 1.6920369263465498, "grad_norm": 269.1175537109375, "learning_rate": 1.218037382038406e-07, "loss": 18.1406, "step": 25477 }, { "epoch": 1.6921033406389054, "grad_norm": 296.19281005859375, "learning_rate": 1.2175230113331668e-07, "loss": 23.5938, "step": 25478 }, { "epoch": 1.6921697549312613, "grad_norm": 175.9613494873047, "learning_rate": 1.2170087422185572e-07, "loss": 18.5781, "step": 25479 }, { "epoch": 1.692236169223617, "grad_norm": 120.25546264648438, "learning_rate": 1.2164945747005262e-07, "loss": 14.4219, "step": 25480 }, { "epoch": 1.6923025835159726, "grad_norm": 373.0701904296875, "learning_rate": 1.2159805087850205e-07, "loss": 14.3281, "step": 25481 }, { "epoch": 1.6923689978083285, "grad_norm": 380.3665466308594, "learning_rate": 1.2154665444779867e-07, "loss": 15.125, "step": 25482 }, { "epoch": 1.692435412100684, "grad_norm": 207.24647521972656, "learning_rate": 1.2149526817853706e-07, "loss": 16.5312, "step": 25483 }, { "epoch": 1.6925018263930398, "grad_norm": 274.470947265625, "learning_rate": 1.2144389207131188e-07, "loss": 21.0, "step": 25484 }, { "epoch": 1.6925682406853955, "grad_norm": 133.2754669189453, "learning_rate": 1.213925261267167e-07, "loss": 15.9062, "step": 25485 }, { "epoch": 1.6926346549777511, "grad_norm": 278.6233215332031, "learning_rate": 1.2134117034534664e-07, "loss": 15.125, "step": 25486 }, { "epoch": 1.692701069270107, "grad_norm": 141.68557739257812, "learning_rate": 1.2128982472779492e-07, "loss": 14.2656, "step": 25487 }, { "epoch": 1.6927674835624626, "grad_norm": 830.6768798828125, "learning_rate": 1.2123848927465596e-07, "loss": 20.6875, "step": 25488 }, { "epoch": 1.6928338978548183, "grad_norm": 202.1793975830078, "learning_rate": 1.2118716398652328e-07, "loss": 17.2812, "step": 25489 }, { "epoch": 1.6929003121471742, "grad_norm": 262.0810546875, "learning_rate": 1.2113584886399088e-07, "loss": 15.5, "step": 25490 }, { "epoch": 1.6929667264395298, "grad_norm": 195.5421905517578, "learning_rate": 1.2108454390765212e-07, "loss": 18.0, "step": 25491 }, { "epoch": 1.6930331407318855, "grad_norm": 220.50070190429688, "learning_rate": 1.2103324911810074e-07, "loss": 27.5625, "step": 25492 }, { "epoch": 1.6930995550242414, "grad_norm": 138.92153930664062, "learning_rate": 1.2098196449592945e-07, "loss": 12.8125, "step": 25493 }, { "epoch": 1.6931659693165968, "grad_norm": 105.70489501953125, "learning_rate": 1.2093069004173239e-07, "loss": 14.8906, "step": 25494 }, { "epoch": 1.6932323836089527, "grad_norm": 277.5660400390625, "learning_rate": 1.20879425756102e-07, "loss": 14.1562, "step": 25495 }, { "epoch": 1.6932987979013083, "grad_norm": 273.4808349609375, "learning_rate": 1.2082817163963144e-07, "loss": 22.1719, "step": 25496 }, { "epoch": 1.693365212193664, "grad_norm": 192.83750915527344, "learning_rate": 1.207769276929137e-07, "loss": 15.25, "step": 25497 }, { "epoch": 1.6934316264860199, "grad_norm": 244.82232666015625, "learning_rate": 1.2072569391654163e-07, "loss": 17.4844, "step": 25498 }, { "epoch": 1.6934980407783755, "grad_norm": 138.1641082763672, "learning_rate": 1.2067447031110734e-07, "loss": 11.0, "step": 25499 }, { "epoch": 1.6935644550707312, "grad_norm": 197.0797576904297, "learning_rate": 1.2062325687720409e-07, "loss": 12.75, "step": 25500 }, { "epoch": 1.693630869363087, "grad_norm": 231.43588256835938, "learning_rate": 1.2057205361542376e-07, "loss": 17.3125, "step": 25501 }, { "epoch": 1.6936972836554427, "grad_norm": 201.99948120117188, "learning_rate": 1.2052086052635879e-07, "loss": 14.7344, "step": 25502 }, { "epoch": 1.6937636979477984, "grad_norm": 114.77852630615234, "learning_rate": 1.2046967761060134e-07, "loss": 14.1875, "step": 25503 }, { "epoch": 1.6938301122401542, "grad_norm": 119.94985961914062, "learning_rate": 1.204185048687436e-07, "loss": 11.6562, "step": 25504 }, { "epoch": 1.6938965265325097, "grad_norm": 334.82806396484375, "learning_rate": 1.2036734230137735e-07, "loss": 15.5938, "step": 25505 }, { "epoch": 1.6939629408248655, "grad_norm": 256.1380920410156, "learning_rate": 1.2031618990909466e-07, "loss": 21.5625, "step": 25506 }, { "epoch": 1.6940293551172212, "grad_norm": 366.6845397949219, "learning_rate": 1.2026504769248658e-07, "loss": 21.1094, "step": 25507 }, { "epoch": 1.6940957694095768, "grad_norm": 809.23388671875, "learning_rate": 1.2021391565214557e-07, "loss": 15.0156, "step": 25508 }, { "epoch": 1.6941621837019327, "grad_norm": 366.4205322265625, "learning_rate": 1.2016279378866246e-07, "loss": 16.6719, "step": 25509 }, { "epoch": 1.6942285979942884, "grad_norm": 135.60951232910156, "learning_rate": 1.2011168210262878e-07, "loss": 13.0938, "step": 25510 }, { "epoch": 1.694295012286644, "grad_norm": 556.5594482421875, "learning_rate": 1.2006058059463586e-07, "loss": 17.0312, "step": 25511 }, { "epoch": 1.694361426579, "grad_norm": 128.05157470703125, "learning_rate": 1.2000948926527465e-07, "loss": 14.1719, "step": 25512 }, { "epoch": 1.6944278408713556, "grad_norm": 215.7818603515625, "learning_rate": 1.1995840811513636e-07, "loss": 18.3594, "step": 25513 }, { "epoch": 1.6944942551637112, "grad_norm": 161.024169921875, "learning_rate": 1.1990733714481182e-07, "loss": 11.1797, "step": 25514 }, { "epoch": 1.694560669456067, "grad_norm": 715.2501831054688, "learning_rate": 1.1985627635489138e-07, "loss": 15.3594, "step": 25515 }, { "epoch": 1.6946270837484225, "grad_norm": 531.530517578125, "learning_rate": 1.198052257459662e-07, "loss": 19.2656, "step": 25516 }, { "epoch": 1.6946934980407784, "grad_norm": 185.46144104003906, "learning_rate": 1.197541853186268e-07, "loss": 16.0, "step": 25517 }, { "epoch": 1.694759912333134, "grad_norm": 463.78668212890625, "learning_rate": 1.1970315507346306e-07, "loss": 11.3594, "step": 25518 }, { "epoch": 1.6948263266254897, "grad_norm": 94.37353515625, "learning_rate": 1.1965213501106596e-07, "loss": 13.3672, "step": 25519 }, { "epoch": 1.6948927409178456, "grad_norm": 246.20797729492188, "learning_rate": 1.1960112513202514e-07, "loss": 15.8125, "step": 25520 }, { "epoch": 1.6949591552102012, "grad_norm": 238.0900115966797, "learning_rate": 1.195501254369309e-07, "loss": 17.3438, "step": 25521 }, { "epoch": 1.695025569502557, "grad_norm": 110.80831146240234, "learning_rate": 1.1949913592637306e-07, "loss": 11.1719, "step": 25522 }, { "epoch": 1.6950919837949128, "grad_norm": 181.90687561035156, "learning_rate": 1.1944815660094164e-07, "loss": 17.4141, "step": 25523 }, { "epoch": 1.6951583980872684, "grad_norm": 89.06182861328125, "learning_rate": 1.1939718746122583e-07, "loss": 14.4844, "step": 25524 }, { "epoch": 1.695224812379624, "grad_norm": 282.8095397949219, "learning_rate": 1.1934622850781607e-07, "loss": 28.2188, "step": 25525 }, { "epoch": 1.69529122667198, "grad_norm": 181.21493530273438, "learning_rate": 1.1929527974130104e-07, "loss": 19.8438, "step": 25526 }, { "epoch": 1.6953576409643354, "grad_norm": 273.68341064453125, "learning_rate": 1.1924434116227034e-07, "loss": 16.0312, "step": 25527 }, { "epoch": 1.6954240552566913, "grad_norm": 211.77647399902344, "learning_rate": 1.1919341277131323e-07, "loss": 19.875, "step": 25528 }, { "epoch": 1.695490469549047, "grad_norm": 1146.8353271484375, "learning_rate": 1.1914249456901881e-07, "loss": 29.25, "step": 25529 }, { "epoch": 1.6955568838414026, "grad_norm": 123.085205078125, "learning_rate": 1.1909158655597617e-07, "loss": 17.1406, "step": 25530 }, { "epoch": 1.6956232981337584, "grad_norm": 226.72178649902344, "learning_rate": 1.1904068873277418e-07, "loss": 13.9219, "step": 25531 }, { "epoch": 1.695689712426114, "grad_norm": 128.62303161621094, "learning_rate": 1.1898980110000123e-07, "loss": 18.5625, "step": 25532 }, { "epoch": 1.6957561267184698, "grad_norm": 146.8354949951172, "learning_rate": 1.1893892365824654e-07, "loss": 13.4219, "step": 25533 }, { "epoch": 1.6958225410108256, "grad_norm": 424.64935302734375, "learning_rate": 1.1888805640809817e-07, "loss": 22.75, "step": 25534 }, { "epoch": 1.6958889553031813, "grad_norm": 123.74556732177734, "learning_rate": 1.188371993501447e-07, "loss": 14.5312, "step": 25535 }, { "epoch": 1.695955369595537, "grad_norm": 457.64569091796875, "learning_rate": 1.1878635248497437e-07, "loss": 17.1406, "step": 25536 }, { "epoch": 1.6960217838878928, "grad_norm": 198.8658447265625, "learning_rate": 1.1873551581317543e-07, "loss": 17.2344, "step": 25537 }, { "epoch": 1.6960881981802483, "grad_norm": 218.39637756347656, "learning_rate": 1.1868468933533582e-07, "loss": 14.4219, "step": 25538 }, { "epoch": 1.6961546124726041, "grad_norm": 194.7821807861328, "learning_rate": 1.1863387305204375e-07, "loss": 16.5781, "step": 25539 }, { "epoch": 1.6962210267649598, "grad_norm": 125.67886352539062, "learning_rate": 1.1858306696388642e-07, "loss": 15.6719, "step": 25540 }, { "epoch": 1.6962874410573154, "grad_norm": 180.23995971679688, "learning_rate": 1.1853227107145236e-07, "loss": 16.875, "step": 25541 }, { "epoch": 1.6963538553496713, "grad_norm": 154.05380249023438, "learning_rate": 1.1848148537532842e-07, "loss": 14.4844, "step": 25542 }, { "epoch": 1.696420269642027, "grad_norm": 175.1541748046875, "learning_rate": 1.1843070987610238e-07, "loss": 13.2188, "step": 25543 }, { "epoch": 1.6964866839343826, "grad_norm": 425.25750732421875, "learning_rate": 1.1837994457436162e-07, "loss": 22.6875, "step": 25544 }, { "epoch": 1.6965530982267385, "grad_norm": 269.74658203125, "learning_rate": 1.1832918947069337e-07, "loss": 20.8594, "step": 25545 }, { "epoch": 1.6966195125190942, "grad_norm": 180.54586791992188, "learning_rate": 1.1827844456568425e-07, "loss": 16.4531, "step": 25546 }, { "epoch": 1.6966859268114498, "grad_norm": 210.6194610595703, "learning_rate": 1.1822770985992214e-07, "loss": 16.2344, "step": 25547 }, { "epoch": 1.6967523411038057, "grad_norm": 437.7851867675781, "learning_rate": 1.1817698535399323e-07, "loss": 12.6562, "step": 25548 }, { "epoch": 1.6968187553961611, "grad_norm": 148.69052124023438, "learning_rate": 1.1812627104848438e-07, "loss": 12.1562, "step": 25549 }, { "epoch": 1.696885169688517, "grad_norm": 266.6640625, "learning_rate": 1.1807556694398236e-07, "loss": 20.5938, "step": 25550 }, { "epoch": 1.6969515839808726, "grad_norm": 553.6187133789062, "learning_rate": 1.1802487304107368e-07, "loss": 15.4062, "step": 25551 }, { "epoch": 1.6970179982732283, "grad_norm": 770.3931884765625, "learning_rate": 1.1797418934034465e-07, "loss": 14.1953, "step": 25552 }, { "epoch": 1.6970844125655842, "grad_norm": 480.9880065917969, "learning_rate": 1.1792351584238158e-07, "loss": 20.75, "step": 25553 }, { "epoch": 1.6971508268579398, "grad_norm": 218.92564392089844, "learning_rate": 1.1787285254777057e-07, "loss": 26.3906, "step": 25554 }, { "epoch": 1.6972172411502955, "grad_norm": 188.0690155029297, "learning_rate": 1.1782219945709781e-07, "loss": 16.0469, "step": 25555 }, { "epoch": 1.6972836554426514, "grad_norm": 112.01811218261719, "learning_rate": 1.177715565709494e-07, "loss": 16.4219, "step": 25556 }, { "epoch": 1.697350069735007, "grad_norm": 259.85614013671875, "learning_rate": 1.1772092388991051e-07, "loss": 16.5156, "step": 25557 }, { "epoch": 1.6974164840273627, "grad_norm": 281.67523193359375, "learning_rate": 1.1767030141456757e-07, "loss": 20.7969, "step": 25558 }, { "epoch": 1.6974828983197185, "grad_norm": 158.7449188232422, "learning_rate": 1.1761968914550557e-07, "loss": 13.4219, "step": 25559 }, { "epoch": 1.697549312612074, "grad_norm": 211.23089599609375, "learning_rate": 1.1756908708331025e-07, "loss": 15.4844, "step": 25560 }, { "epoch": 1.6976157269044299, "grad_norm": 139.9147491455078, "learning_rate": 1.1751849522856683e-07, "loss": 12.9062, "step": 25561 }, { "epoch": 1.6976821411967855, "grad_norm": 278.6538391113281, "learning_rate": 1.174679135818608e-07, "loss": 15.125, "step": 25562 }, { "epoch": 1.6977485554891412, "grad_norm": 180.79434204101562, "learning_rate": 1.1741734214377674e-07, "loss": 23.5469, "step": 25563 }, { "epoch": 1.697814969781497, "grad_norm": 119.76439666748047, "learning_rate": 1.1736678091490026e-07, "loss": 16.2812, "step": 25564 }, { "epoch": 1.6978813840738527, "grad_norm": 197.18653869628906, "learning_rate": 1.173162298958158e-07, "loss": 14.4844, "step": 25565 }, { "epoch": 1.6979477983662083, "grad_norm": 275.6305847167969, "learning_rate": 1.172656890871081e-07, "loss": 14.4062, "step": 25566 }, { "epoch": 1.6980142126585642, "grad_norm": 205.0542449951172, "learning_rate": 1.1721515848936191e-07, "loss": 15.6094, "step": 25567 }, { "epoch": 1.6980806269509199, "grad_norm": 257.4345703125, "learning_rate": 1.1716463810316168e-07, "loss": 19.3438, "step": 25568 }, { "epoch": 1.6981470412432755, "grad_norm": 362.3243713378906, "learning_rate": 1.1711412792909192e-07, "loss": 16.7344, "step": 25569 }, { "epoch": 1.6982134555356314, "grad_norm": 255.40145874023438, "learning_rate": 1.1706362796773705e-07, "loss": 19.2188, "step": 25570 }, { "epoch": 1.6982798698279868, "grad_norm": 345.20037841796875, "learning_rate": 1.1701313821968051e-07, "loss": 21.9062, "step": 25571 }, { "epoch": 1.6983462841203427, "grad_norm": 247.90765380859375, "learning_rate": 1.1696265868550737e-07, "loss": 17.375, "step": 25572 }, { "epoch": 1.6984126984126984, "grad_norm": 268.04522705078125, "learning_rate": 1.1691218936580071e-07, "loss": 18.0938, "step": 25573 }, { "epoch": 1.698479112705054, "grad_norm": 340.0032958984375, "learning_rate": 1.1686173026114466e-07, "loss": 16.9062, "step": 25574 }, { "epoch": 1.69854552699741, "grad_norm": 504.464599609375, "learning_rate": 1.1681128137212282e-07, "loss": 17.1094, "step": 25575 }, { "epoch": 1.6986119412897656, "grad_norm": 532.0255126953125, "learning_rate": 1.1676084269931874e-07, "loss": 13.6719, "step": 25576 }, { "epoch": 1.6986783555821212, "grad_norm": 115.87440490722656, "learning_rate": 1.1671041424331607e-07, "loss": 12.7812, "step": 25577 }, { "epoch": 1.698744769874477, "grad_norm": 362.5233459472656, "learning_rate": 1.1665999600469811e-07, "loss": 18.1875, "step": 25578 }, { "epoch": 1.6988111841668327, "grad_norm": 118.35658264160156, "learning_rate": 1.1660958798404752e-07, "loss": 13.4375, "step": 25579 }, { "epoch": 1.6988775984591884, "grad_norm": 191.98995971679688, "learning_rate": 1.1655919018194837e-07, "loss": 21.7188, "step": 25580 }, { "epoch": 1.6989440127515443, "grad_norm": 163.8838348388672, "learning_rate": 1.1650880259898277e-07, "loss": 15.2188, "step": 25581 }, { "epoch": 1.6990104270438997, "grad_norm": 917.9111938476562, "learning_rate": 1.164584252357339e-07, "loss": 21.0938, "step": 25582 }, { "epoch": 1.6990768413362556, "grad_norm": 97.90670013427734, "learning_rate": 1.1640805809278454e-07, "loss": 11.7344, "step": 25583 }, { "epoch": 1.6991432556286112, "grad_norm": 319.97222900390625, "learning_rate": 1.1635770117071742e-07, "loss": 29.3281, "step": 25584 }, { "epoch": 1.699209669920967, "grad_norm": 233.26661682128906, "learning_rate": 1.1630735447011441e-07, "loss": 21.6406, "step": 25585 }, { "epoch": 1.6992760842133228, "grad_norm": 285.2081604003906, "learning_rate": 1.1625701799155885e-07, "loss": 19.5625, "step": 25586 }, { "epoch": 1.6993424985056784, "grad_norm": 225.71177673339844, "learning_rate": 1.1620669173563224e-07, "loss": 18.2656, "step": 25587 }, { "epoch": 1.699408912798034, "grad_norm": 606.1556396484375, "learning_rate": 1.1615637570291714e-07, "loss": 19.875, "step": 25588 }, { "epoch": 1.69947532709039, "grad_norm": 383.9282531738281, "learning_rate": 1.1610606989399529e-07, "loss": 21.7344, "step": 25589 }, { "epoch": 1.6995417413827456, "grad_norm": 127.84004211425781, "learning_rate": 1.160557743094488e-07, "loss": 16.8438, "step": 25590 }, { "epoch": 1.6996081556751013, "grad_norm": 219.4220428466797, "learning_rate": 1.160054889498594e-07, "loss": 16.6094, "step": 25591 }, { "epoch": 1.6996745699674571, "grad_norm": 204.7715606689453, "learning_rate": 1.1595521381580875e-07, "loss": 15.4219, "step": 25592 }, { "epoch": 1.6997409842598126, "grad_norm": 464.45361328125, "learning_rate": 1.1590494890787838e-07, "loss": 11.6641, "step": 25593 }, { "epoch": 1.6998073985521684, "grad_norm": 318.095458984375, "learning_rate": 1.1585469422664985e-07, "loss": 20.2188, "step": 25594 }, { "epoch": 1.699873812844524, "grad_norm": 233.8553009033203, "learning_rate": 1.1580444977270443e-07, "loss": 13.4844, "step": 25595 }, { "epoch": 1.6999402271368798, "grad_norm": 318.5550231933594, "learning_rate": 1.1575421554662302e-07, "loss": 17.4531, "step": 25596 }, { "epoch": 1.7000066414292356, "grad_norm": 190.603271484375, "learning_rate": 1.1570399154898735e-07, "loss": 15.6406, "step": 25597 }, { "epoch": 1.7000730557215913, "grad_norm": 180.3448944091797, "learning_rate": 1.1565377778037778e-07, "loss": 14.1562, "step": 25598 }, { "epoch": 1.700139470013947, "grad_norm": 163.3728485107422, "learning_rate": 1.1560357424137535e-07, "loss": 11.1562, "step": 25599 }, { "epoch": 1.7002058843063028, "grad_norm": 139.72979736328125, "learning_rate": 1.1555338093256073e-07, "loss": 10.125, "step": 25600 }, { "epoch": 1.7002722985986585, "grad_norm": 504.73651123046875, "learning_rate": 1.1550319785451468e-07, "loss": 15.2344, "step": 25601 }, { "epoch": 1.7003387128910141, "grad_norm": 479.5087585449219, "learning_rate": 1.1545302500781762e-07, "loss": 12.9375, "step": 25602 }, { "epoch": 1.70040512718337, "grad_norm": 287.28399658203125, "learning_rate": 1.1540286239305008e-07, "loss": 22.0156, "step": 25603 }, { "epoch": 1.7004715414757254, "grad_norm": 372.18731689453125, "learning_rate": 1.1535271001079172e-07, "loss": 21.4375, "step": 25604 }, { "epoch": 1.7005379557680813, "grad_norm": 468.831298828125, "learning_rate": 1.1530256786162351e-07, "loss": 20.5312, "step": 25605 }, { "epoch": 1.700604370060437, "grad_norm": 208.55516052246094, "learning_rate": 1.1525243594612488e-07, "loss": 15.875, "step": 25606 }, { "epoch": 1.7006707843527926, "grad_norm": 132.3523712158203, "learning_rate": 1.1520231426487593e-07, "loss": 16.5312, "step": 25607 }, { "epoch": 1.7007371986451485, "grad_norm": 562.4256591796875, "learning_rate": 1.1515220281845628e-07, "loss": 24.9531, "step": 25608 }, { "epoch": 1.7008036129375042, "grad_norm": 183.50704956054688, "learning_rate": 1.1510210160744604e-07, "loss": 18.1094, "step": 25609 }, { "epoch": 1.7008700272298598, "grad_norm": 226.31719970703125, "learning_rate": 1.1505201063242398e-07, "loss": 20.3906, "step": 25610 }, { "epoch": 1.7009364415222157, "grad_norm": 288.0292663574219, "learning_rate": 1.150019298939704e-07, "loss": 25.7188, "step": 25611 }, { "epoch": 1.7010028558145713, "grad_norm": 1185.9468994140625, "learning_rate": 1.1495185939266395e-07, "loss": 12.7188, "step": 25612 }, { "epoch": 1.701069270106927, "grad_norm": 196.10342407226562, "learning_rate": 1.1490179912908393e-07, "loss": 24.7812, "step": 25613 }, { "epoch": 1.7011356843992829, "grad_norm": 176.97860717773438, "learning_rate": 1.1485174910380968e-07, "loss": 14.4766, "step": 25614 }, { "epoch": 1.7012020986916383, "grad_norm": 165.33485412597656, "learning_rate": 1.1480170931741995e-07, "loss": 13.75, "step": 25615 }, { "epoch": 1.7012685129839942, "grad_norm": 317.97021484375, "learning_rate": 1.147516797704936e-07, "loss": 16.2344, "step": 25616 }, { "epoch": 1.7013349272763498, "grad_norm": 192.69851684570312, "learning_rate": 1.1470166046360952e-07, "loss": 16.7188, "step": 25617 }, { "epoch": 1.7014013415687055, "grad_norm": 1147.855712890625, "learning_rate": 1.1465165139734578e-07, "loss": 14.6719, "step": 25618 }, { "epoch": 1.7014677558610614, "grad_norm": 596.7531127929688, "learning_rate": 1.146016525722816e-07, "loss": 13.6719, "step": 25619 }, { "epoch": 1.701534170153417, "grad_norm": 380.7615966796875, "learning_rate": 1.1455166398899474e-07, "loss": 15.0156, "step": 25620 }, { "epoch": 1.7016005844457727, "grad_norm": 241.4517059326172, "learning_rate": 1.1450168564806362e-07, "loss": 21.4688, "step": 25621 }, { "epoch": 1.7016669987381285, "grad_norm": 423.8030090332031, "learning_rate": 1.1445171755006644e-07, "loss": 17.3438, "step": 25622 }, { "epoch": 1.7017334130304842, "grad_norm": 300.8081359863281, "learning_rate": 1.1440175969558119e-07, "loss": 15.7812, "step": 25623 }, { "epoch": 1.7017998273228399, "grad_norm": 162.3988800048828, "learning_rate": 1.1435181208518574e-07, "loss": 13.7969, "step": 25624 }, { "epoch": 1.7018662416151957, "grad_norm": 485.9059143066406, "learning_rate": 1.1430187471945796e-07, "loss": 19.3594, "step": 25625 }, { "epoch": 1.7019326559075512, "grad_norm": 94.93731689453125, "learning_rate": 1.1425194759897517e-07, "loss": 12.8281, "step": 25626 }, { "epoch": 1.701999070199907, "grad_norm": 527.733154296875, "learning_rate": 1.1420203072431489e-07, "loss": 15.7188, "step": 25627 }, { "epoch": 1.7020654844922627, "grad_norm": 509.7036437988281, "learning_rate": 1.1415212409605523e-07, "loss": 18.0312, "step": 25628 }, { "epoch": 1.7021318987846183, "grad_norm": 407.6966857910156, "learning_rate": 1.1410222771477274e-07, "loss": 20.1094, "step": 25629 }, { "epoch": 1.7021983130769742, "grad_norm": 783.0986328125, "learning_rate": 1.1405234158104482e-07, "loss": 14.6719, "step": 25630 }, { "epoch": 1.7022647273693299, "grad_norm": 171.85614013671875, "learning_rate": 1.1400246569544847e-07, "loss": 13.25, "step": 25631 }, { "epoch": 1.7023311416616855, "grad_norm": 404.5690612792969, "learning_rate": 1.1395260005856078e-07, "loss": 22.5312, "step": 25632 }, { "epoch": 1.7023975559540414, "grad_norm": 1451.5975341796875, "learning_rate": 1.1390274467095851e-07, "loss": 14.875, "step": 25633 }, { "epoch": 1.702463970246397, "grad_norm": 380.0823974609375, "learning_rate": 1.1385289953321853e-07, "loss": 14.3594, "step": 25634 }, { "epoch": 1.7025303845387527, "grad_norm": 457.8905029296875, "learning_rate": 1.1380306464591683e-07, "loss": 17.1719, "step": 25635 }, { "epoch": 1.7025967988311086, "grad_norm": 178.48699951171875, "learning_rate": 1.1375324000963071e-07, "loss": 15.5, "step": 25636 }, { "epoch": 1.702663213123464, "grad_norm": 171.7808380126953, "learning_rate": 1.1370342562493574e-07, "loss": 22.5938, "step": 25637 }, { "epoch": 1.70272962741582, "grad_norm": 132.62277221679688, "learning_rate": 1.1365362149240853e-07, "loss": 12.4141, "step": 25638 }, { "epoch": 1.7027960417081756, "grad_norm": 250.0684814453125, "learning_rate": 1.1360382761262522e-07, "loss": 16.1094, "step": 25639 }, { "epoch": 1.7028624560005312, "grad_norm": 133.93898010253906, "learning_rate": 1.1355404398616164e-07, "loss": 17.6094, "step": 25640 }, { "epoch": 1.702928870292887, "grad_norm": 166.2353973388672, "learning_rate": 1.1350427061359369e-07, "loss": 15.0312, "step": 25641 }, { "epoch": 1.7029952845852427, "grad_norm": 174.8408660888672, "learning_rate": 1.1345450749549734e-07, "loss": 15.9219, "step": 25642 }, { "epoch": 1.7030616988775984, "grad_norm": 253.41525268554688, "learning_rate": 1.134047546324477e-07, "loss": 16.4844, "step": 25643 }, { "epoch": 1.7031281131699543, "grad_norm": 136.78643798828125, "learning_rate": 1.1335501202502106e-07, "loss": 15.25, "step": 25644 }, { "epoch": 1.70319452746231, "grad_norm": 163.39723205566406, "learning_rate": 1.133052796737921e-07, "loss": 14.1562, "step": 25645 }, { "epoch": 1.7032609417546656, "grad_norm": 332.5260009765625, "learning_rate": 1.1325555757933636e-07, "loss": 37.2188, "step": 25646 }, { "epoch": 1.7033273560470215, "grad_norm": 348.3746337890625, "learning_rate": 1.1320584574222903e-07, "loss": 31.2188, "step": 25647 }, { "epoch": 1.703393770339377, "grad_norm": 506.05096435546875, "learning_rate": 1.1315614416304531e-07, "loss": 12.7969, "step": 25648 }, { "epoch": 1.7034601846317328, "grad_norm": 142.30638122558594, "learning_rate": 1.1310645284235953e-07, "loss": 13.6562, "step": 25649 }, { "epoch": 1.7035265989240884, "grad_norm": 164.56671142578125, "learning_rate": 1.1305677178074735e-07, "loss": 14.2188, "step": 25650 }, { "epoch": 1.703593013216444, "grad_norm": 106.47209167480469, "learning_rate": 1.1300710097878275e-07, "loss": 16.1875, "step": 25651 }, { "epoch": 1.7036594275088, "grad_norm": 342.26165771484375, "learning_rate": 1.1295744043704047e-07, "loss": 25.7656, "step": 25652 }, { "epoch": 1.7037258418011556, "grad_norm": 435.78912353515625, "learning_rate": 1.1290779015609508e-07, "loss": 18.0781, "step": 25653 }, { "epoch": 1.7037922560935113, "grad_norm": 400.74798583984375, "learning_rate": 1.1285815013652088e-07, "loss": 15.1406, "step": 25654 }, { "epoch": 1.7038586703858671, "grad_norm": 131.3631591796875, "learning_rate": 1.1280852037889199e-07, "loss": 10.4062, "step": 25655 }, { "epoch": 1.7039250846782228, "grad_norm": 510.8754577636719, "learning_rate": 1.1275890088378281e-07, "loss": 29.4531, "step": 25656 }, { "epoch": 1.7039914989705784, "grad_norm": 339.33233642578125, "learning_rate": 1.1270929165176656e-07, "loss": 19.7344, "step": 25657 }, { "epoch": 1.7040579132629343, "grad_norm": 167.47036743164062, "learning_rate": 1.1265969268341802e-07, "loss": 14.7031, "step": 25658 }, { "epoch": 1.7041243275552898, "grad_norm": 198.5015106201172, "learning_rate": 1.1261010397931037e-07, "loss": 13.5781, "step": 25659 }, { "epoch": 1.7041907418476456, "grad_norm": 186.4150848388672, "learning_rate": 1.1256052554001727e-07, "loss": 16.0625, "step": 25660 }, { "epoch": 1.7042571561400013, "grad_norm": 221.750732421875, "learning_rate": 1.1251095736611227e-07, "loss": 14.6562, "step": 25661 }, { "epoch": 1.704323570432357, "grad_norm": 143.30908203125, "learning_rate": 1.1246139945816868e-07, "loss": 13.5859, "step": 25662 }, { "epoch": 1.7043899847247128, "grad_norm": 280.49761962890625, "learning_rate": 1.1241185181675994e-07, "loss": 20.5, "step": 25663 }, { "epoch": 1.7044563990170685, "grad_norm": 97.1354751586914, "learning_rate": 1.1236231444245913e-07, "loss": 13.1094, "step": 25664 }, { "epoch": 1.7045228133094241, "grad_norm": 115.80294036865234, "learning_rate": 1.1231278733583916e-07, "loss": 14.7422, "step": 25665 }, { "epoch": 1.70458922760178, "grad_norm": 316.1295166015625, "learning_rate": 1.1226327049747298e-07, "loss": 19.7031, "step": 25666 }, { "epoch": 1.7046556418941357, "grad_norm": 173.89048767089844, "learning_rate": 1.1221376392793369e-07, "loss": 17.7031, "step": 25667 }, { "epoch": 1.7047220561864913, "grad_norm": 177.17098999023438, "learning_rate": 1.1216426762779308e-07, "loss": 14.9375, "step": 25668 }, { "epoch": 1.7047884704788472, "grad_norm": 151.02456665039062, "learning_rate": 1.1211478159762478e-07, "loss": 12.2656, "step": 25669 }, { "epoch": 1.7048548847712026, "grad_norm": 155.892333984375, "learning_rate": 1.1206530583800056e-07, "loss": 14.9844, "step": 25670 }, { "epoch": 1.7049212990635585, "grad_norm": 338.2569885253906, "learning_rate": 1.1201584034949275e-07, "loss": 21.2188, "step": 25671 }, { "epoch": 1.7049877133559141, "grad_norm": 194.20164489746094, "learning_rate": 1.1196638513267375e-07, "loss": 13.9688, "step": 25672 }, { "epoch": 1.7050541276482698, "grad_norm": 148.3564453125, "learning_rate": 1.1191694018811571e-07, "loss": 15.4219, "step": 25673 }, { "epoch": 1.7051205419406257, "grad_norm": 218.85520935058594, "learning_rate": 1.118675055163899e-07, "loss": 17.1719, "step": 25674 }, { "epoch": 1.7051869562329813, "grad_norm": 262.9972839355469, "learning_rate": 1.1181808111806922e-07, "loss": 15.2344, "step": 25675 }, { "epoch": 1.705253370525337, "grad_norm": 430.6782531738281, "learning_rate": 1.1176866699372455e-07, "loss": 19.125, "step": 25676 }, { "epoch": 1.7053197848176929, "grad_norm": 445.505126953125, "learning_rate": 1.1171926314392777e-07, "loss": 18.7188, "step": 25677 }, { "epoch": 1.7053861991100485, "grad_norm": 116.07901000976562, "learning_rate": 1.1166986956925029e-07, "loss": 13.4375, "step": 25678 }, { "epoch": 1.7054526134024042, "grad_norm": 279.07257080078125, "learning_rate": 1.1162048627026355e-07, "loss": 18.6875, "step": 25679 }, { "epoch": 1.70551902769476, "grad_norm": 173.1627197265625, "learning_rate": 1.1157111324753876e-07, "loss": 15.5469, "step": 25680 }, { "epoch": 1.7055854419871155, "grad_norm": 371.84619140625, "learning_rate": 1.1152175050164725e-07, "loss": 14.1406, "step": 25681 }, { "epoch": 1.7056518562794714, "grad_norm": 191.09446716308594, "learning_rate": 1.1147239803315944e-07, "loss": 15.4062, "step": 25682 }, { "epoch": 1.705718270571827, "grad_norm": 264.81158447265625, "learning_rate": 1.1142305584264688e-07, "loss": 15.0625, "step": 25683 }, { "epoch": 1.7057846848641827, "grad_norm": 342.8394775390625, "learning_rate": 1.113737239306799e-07, "loss": 18.8281, "step": 25684 }, { "epoch": 1.7058510991565385, "grad_norm": 119.11206817626953, "learning_rate": 1.1132440229782924e-07, "loss": 13.7344, "step": 25685 }, { "epoch": 1.7059175134488942, "grad_norm": 297.7575988769531, "learning_rate": 1.1127509094466548e-07, "loss": 15.2188, "step": 25686 }, { "epoch": 1.7059839277412499, "grad_norm": 314.0950622558594, "learning_rate": 1.112257898717589e-07, "loss": 14.3984, "step": 25687 }, { "epoch": 1.7060503420336057, "grad_norm": 215.60556030273438, "learning_rate": 1.1117649907967997e-07, "loss": 17.6406, "step": 25688 }, { "epoch": 1.7061167563259614, "grad_norm": 168.50343322753906, "learning_rate": 1.1112721856899898e-07, "loss": 17.4531, "step": 25689 }, { "epoch": 1.706183170618317, "grad_norm": 192.0160369873047, "learning_rate": 1.1107794834028561e-07, "loss": 14.2969, "step": 25690 }, { "epoch": 1.706249584910673, "grad_norm": 578.4913330078125, "learning_rate": 1.1102868839410983e-07, "loss": 14.8125, "step": 25691 }, { "epoch": 1.7063159992030283, "grad_norm": 988.4155883789062, "learning_rate": 1.1097943873104165e-07, "loss": 18.8594, "step": 25692 }, { "epoch": 1.7063824134953842, "grad_norm": 556.6993408203125, "learning_rate": 1.1093019935165071e-07, "loss": 11.8906, "step": 25693 }, { "epoch": 1.7064488277877399, "grad_norm": 419.2991943359375, "learning_rate": 1.1088097025650656e-07, "loss": 18.9219, "step": 25694 }, { "epoch": 1.7065152420800955, "grad_norm": 153.262451171875, "learning_rate": 1.1083175144617885e-07, "loss": 17.5469, "step": 25695 }, { "epoch": 1.7065816563724514, "grad_norm": 179.87008666992188, "learning_rate": 1.1078254292123624e-07, "loss": 14.4062, "step": 25696 }, { "epoch": 1.706648070664807, "grad_norm": 216.0765380859375, "learning_rate": 1.1073334468224882e-07, "loss": 16.9062, "step": 25697 }, { "epoch": 1.7067144849571627, "grad_norm": 189.68760681152344, "learning_rate": 1.1068415672978515e-07, "loss": 17.9375, "step": 25698 }, { "epoch": 1.7067808992495186, "grad_norm": 188.25360107421875, "learning_rate": 1.1063497906441433e-07, "loss": 19.0781, "step": 25699 }, { "epoch": 1.7068473135418742, "grad_norm": 427.53082275390625, "learning_rate": 1.1058581168670523e-07, "loss": 16.0469, "step": 25700 }, { "epoch": 1.70691372783423, "grad_norm": 184.88267517089844, "learning_rate": 1.1053665459722661e-07, "loss": 16.3047, "step": 25701 }, { "epoch": 1.7069801421265858, "grad_norm": 135.0713653564453, "learning_rate": 1.1048750779654703e-07, "loss": 13.9062, "step": 25702 }, { "epoch": 1.7070465564189412, "grad_norm": 212.306396484375, "learning_rate": 1.1043837128523514e-07, "loss": 11.2344, "step": 25703 }, { "epoch": 1.707112970711297, "grad_norm": 157.86888122558594, "learning_rate": 1.1038924506385916e-07, "loss": 11.1406, "step": 25704 }, { "epoch": 1.7071793850036527, "grad_norm": 180.41285705566406, "learning_rate": 1.103401291329874e-07, "loss": 21.0625, "step": 25705 }, { "epoch": 1.7072457992960084, "grad_norm": 200.7631072998047, "learning_rate": 1.102910234931882e-07, "loss": 15.8906, "step": 25706 }, { "epoch": 1.7073122135883643, "grad_norm": 185.2394256591797, "learning_rate": 1.1024192814502908e-07, "loss": 10.4922, "step": 25707 }, { "epoch": 1.70737862788072, "grad_norm": 842.6781616210938, "learning_rate": 1.1019284308907862e-07, "loss": 13.6406, "step": 25708 }, { "epoch": 1.7074450421730756, "grad_norm": 158.38626098632812, "learning_rate": 1.10143768325904e-07, "loss": 15.8906, "step": 25709 }, { "epoch": 1.7075114564654315, "grad_norm": 194.07249450683594, "learning_rate": 1.1009470385607323e-07, "loss": 13.2812, "step": 25710 }, { "epoch": 1.707577870757787, "grad_norm": 1061.90576171875, "learning_rate": 1.1004564968015373e-07, "loss": 12.7969, "step": 25711 }, { "epoch": 1.7076442850501428, "grad_norm": 144.74362182617188, "learning_rate": 1.0999660579871317e-07, "loss": 19.2812, "step": 25712 }, { "epoch": 1.7077106993424986, "grad_norm": 105.90676879882812, "learning_rate": 1.0994757221231832e-07, "loss": 15.6562, "step": 25713 }, { "epoch": 1.707777113634854, "grad_norm": 237.40121459960938, "learning_rate": 1.0989854892153716e-07, "loss": 15.6875, "step": 25714 }, { "epoch": 1.70784352792721, "grad_norm": 263.29351806640625, "learning_rate": 1.0984953592693602e-07, "loss": 20.0938, "step": 25715 }, { "epoch": 1.7079099422195656, "grad_norm": 114.37184143066406, "learning_rate": 1.0980053322908222e-07, "loss": 12.625, "step": 25716 }, { "epoch": 1.7079763565119213, "grad_norm": 392.2239990234375, "learning_rate": 1.0975154082854255e-07, "loss": 23.6094, "step": 25717 }, { "epoch": 1.7080427708042771, "grad_norm": 487.20916748046875, "learning_rate": 1.0970255872588364e-07, "loss": 15.8438, "step": 25718 }, { "epoch": 1.7081091850966328, "grad_norm": 265.7515563964844, "learning_rate": 1.0965358692167214e-07, "loss": 21.7188, "step": 25719 }, { "epoch": 1.7081755993889884, "grad_norm": 132.3734588623047, "learning_rate": 1.0960462541647486e-07, "loss": 12.1719, "step": 25720 }, { "epoch": 1.7082420136813443, "grad_norm": 160.6936492919922, "learning_rate": 1.0955567421085731e-07, "loss": 16.0469, "step": 25721 }, { "epoch": 1.7083084279737, "grad_norm": 399.862060546875, "learning_rate": 1.0950673330538673e-07, "loss": 19.1094, "step": 25722 }, { "epoch": 1.7083748422660556, "grad_norm": 110.48066711425781, "learning_rate": 1.0945780270062866e-07, "loss": 14.3125, "step": 25723 }, { "epoch": 1.7084412565584115, "grad_norm": 111.23731994628906, "learning_rate": 1.0940888239714907e-07, "loss": 11.7969, "step": 25724 }, { "epoch": 1.708507670850767, "grad_norm": 261.6544189453125, "learning_rate": 1.0935997239551409e-07, "loss": 13.2344, "step": 25725 }, { "epoch": 1.7085740851431228, "grad_norm": 745.0756225585938, "learning_rate": 1.0931107269628925e-07, "loss": 16.7656, "step": 25726 }, { "epoch": 1.7086404994354785, "grad_norm": 163.30694580078125, "learning_rate": 1.0926218330004045e-07, "loss": 14.3438, "step": 25727 }, { "epoch": 1.7087069137278341, "grad_norm": 282.0787658691406, "learning_rate": 1.0921330420733321e-07, "loss": 19.625, "step": 25728 }, { "epoch": 1.70877332802019, "grad_norm": 228.52391052246094, "learning_rate": 1.0916443541873243e-07, "loss": 14.7344, "step": 25729 }, { "epoch": 1.7088397423125457, "grad_norm": 177.60736083984375, "learning_rate": 1.0911557693480411e-07, "loss": 12.0312, "step": 25730 }, { "epoch": 1.7089061566049013, "grad_norm": 369.97515869140625, "learning_rate": 1.09066728756113e-07, "loss": 16.2344, "step": 25731 }, { "epoch": 1.7089725708972572, "grad_norm": 187.901611328125, "learning_rate": 1.0901789088322422e-07, "loss": 13.1406, "step": 25732 }, { "epoch": 1.7090389851896128, "grad_norm": 249.7629852294922, "learning_rate": 1.0896906331670263e-07, "loss": 13.3438, "step": 25733 }, { "epoch": 1.7091053994819685, "grad_norm": 193.44911193847656, "learning_rate": 1.0892024605711336e-07, "loss": 16.9375, "step": 25734 }, { "epoch": 1.7091718137743244, "grad_norm": 239.49256896972656, "learning_rate": 1.0887143910502039e-07, "loss": 12.3906, "step": 25735 }, { "epoch": 1.7092382280666798, "grad_norm": 179.42039489746094, "learning_rate": 1.0882264246098927e-07, "loss": 13.2188, "step": 25736 }, { "epoch": 1.7093046423590357, "grad_norm": 103.58839416503906, "learning_rate": 1.0877385612558366e-07, "loss": 10.7031, "step": 25737 }, { "epoch": 1.7093710566513913, "grad_norm": 310.1231994628906, "learning_rate": 1.0872508009936798e-07, "loss": 15.6562, "step": 25738 }, { "epoch": 1.709437470943747, "grad_norm": 230.95974731445312, "learning_rate": 1.0867631438290703e-07, "loss": 15.0469, "step": 25739 }, { "epoch": 1.7095038852361029, "grad_norm": 347.0518798828125, "learning_rate": 1.0862755897676434e-07, "loss": 18.5312, "step": 25740 }, { "epoch": 1.7095702995284585, "grad_norm": 158.73300170898438, "learning_rate": 1.0857881388150414e-07, "loss": 13.1875, "step": 25741 }, { "epoch": 1.7096367138208142, "grad_norm": 127.14637756347656, "learning_rate": 1.0853007909769007e-07, "loss": 17.6719, "step": 25742 }, { "epoch": 1.70970312811317, "grad_norm": 222.64051818847656, "learning_rate": 1.0848135462588603e-07, "loss": 13.375, "step": 25743 }, { "epoch": 1.7097695424055257, "grad_norm": 128.11810302734375, "learning_rate": 1.0843264046665556e-07, "loss": 11.9219, "step": 25744 }, { "epoch": 1.7098359566978814, "grad_norm": 252.07546997070312, "learning_rate": 1.0838393662056244e-07, "loss": 15.7188, "step": 25745 }, { "epoch": 1.7099023709902372, "grad_norm": 243.95603942871094, "learning_rate": 1.0833524308816944e-07, "loss": 12.0938, "step": 25746 }, { "epoch": 1.7099687852825927, "grad_norm": 234.41763305664062, "learning_rate": 1.0828655987004054e-07, "loss": 19.9688, "step": 25747 }, { "epoch": 1.7100351995749485, "grad_norm": 153.3867645263672, "learning_rate": 1.082378869667383e-07, "loss": 13.625, "step": 25748 }, { "epoch": 1.7101016138673042, "grad_norm": 114.2532958984375, "learning_rate": 1.0818922437882604e-07, "loss": 11.4688, "step": 25749 }, { "epoch": 1.7101680281596598, "grad_norm": 178.6932373046875, "learning_rate": 1.0814057210686656e-07, "loss": 15.0938, "step": 25750 }, { "epoch": 1.7102344424520157, "grad_norm": 143.9534149169922, "learning_rate": 1.0809193015142282e-07, "loss": 18.4375, "step": 25751 }, { "epoch": 1.7103008567443714, "grad_norm": 180.14207458496094, "learning_rate": 1.0804329851305694e-07, "loss": 12.4062, "step": 25752 }, { "epoch": 1.710367271036727, "grad_norm": 463.63201904296875, "learning_rate": 1.0799467719233224e-07, "loss": 17.5938, "step": 25753 }, { "epoch": 1.710433685329083, "grad_norm": 331.8535461425781, "learning_rate": 1.0794606618981061e-07, "loss": 21.1094, "step": 25754 }, { "epoch": 1.7105000996214386, "grad_norm": 125.37671661376953, "learning_rate": 1.078974655060545e-07, "loss": 13.8438, "step": 25755 }, { "epoch": 1.7105665139137942, "grad_norm": 115.11560821533203, "learning_rate": 1.07848875141626e-07, "loss": 15.1406, "step": 25756 }, { "epoch": 1.71063292820615, "grad_norm": 154.4127960205078, "learning_rate": 1.0780029509708732e-07, "loss": 14.5469, "step": 25757 }, { "epoch": 1.7106993424985055, "grad_norm": 146.40338134765625, "learning_rate": 1.0775172537300037e-07, "loss": 15.6406, "step": 25758 }, { "epoch": 1.7107657567908614, "grad_norm": 192.1776885986328, "learning_rate": 1.0770316596992713e-07, "loss": 13.7109, "step": 25759 }, { "epoch": 1.710832171083217, "grad_norm": 220.6234893798828, "learning_rate": 1.076546168884288e-07, "loss": 13.5469, "step": 25760 }, { "epoch": 1.7108985853755727, "grad_norm": 240.7540283203125, "learning_rate": 1.0760607812906763e-07, "loss": 16.2188, "step": 25761 }, { "epoch": 1.7109649996679286, "grad_norm": 320.8638000488281, "learning_rate": 1.075575496924046e-07, "loss": 20.0625, "step": 25762 }, { "epoch": 1.7110314139602842, "grad_norm": 429.147216796875, "learning_rate": 1.0750903157900126e-07, "loss": 18.3125, "step": 25763 }, { "epoch": 1.71109782825264, "grad_norm": 304.26788330078125, "learning_rate": 1.0746052378941883e-07, "loss": 23.75, "step": 25764 }, { "epoch": 1.7111642425449958, "grad_norm": 178.33255004882812, "learning_rate": 1.0741202632421831e-07, "loss": 16.2812, "step": 25765 }, { "epoch": 1.7112306568373514, "grad_norm": 105.11449432373047, "learning_rate": 1.0736353918396079e-07, "loss": 12.4844, "step": 25766 }, { "epoch": 1.711297071129707, "grad_norm": 274.71087646484375, "learning_rate": 1.073150623692073e-07, "loss": 28.625, "step": 25767 }, { "epoch": 1.711363485422063, "grad_norm": 338.9111022949219, "learning_rate": 1.0726659588051812e-07, "loss": 18.7812, "step": 25768 }, { "epoch": 1.7114298997144184, "grad_norm": 197.84864807128906, "learning_rate": 1.072181397184545e-07, "loss": 13.7891, "step": 25769 }, { "epoch": 1.7114963140067743, "grad_norm": 254.59654235839844, "learning_rate": 1.0716969388357643e-07, "loss": 19.5469, "step": 25770 }, { "epoch": 1.71156272829913, "grad_norm": 115.17784118652344, "learning_rate": 1.0712125837644448e-07, "loss": 10.4375, "step": 25771 }, { "epoch": 1.7116291425914856, "grad_norm": 164.46934509277344, "learning_rate": 1.0707283319761895e-07, "loss": 14.6094, "step": 25772 }, { "epoch": 1.7116955568838415, "grad_norm": 296.6405944824219, "learning_rate": 1.0702441834766007e-07, "loss": 30.6875, "step": 25773 }, { "epoch": 1.711761971176197, "grad_norm": 223.35250854492188, "learning_rate": 1.0697601382712774e-07, "loss": 17.4688, "step": 25774 }, { "epoch": 1.7118283854685528, "grad_norm": 141.5470428466797, "learning_rate": 1.0692761963658192e-07, "loss": 13.0469, "step": 25775 }, { "epoch": 1.7118947997609086, "grad_norm": 467.9132080078125, "learning_rate": 1.0687923577658276e-07, "loss": 12.4766, "step": 25776 }, { "epoch": 1.7119612140532643, "grad_norm": 518.2460327148438, "learning_rate": 1.06830862247689e-07, "loss": 21.2656, "step": 25777 }, { "epoch": 1.71202762834562, "grad_norm": 95.46273040771484, "learning_rate": 1.0678249905046133e-07, "loss": 13.0938, "step": 25778 }, { "epoch": 1.7120940426379758, "grad_norm": 289.83544921875, "learning_rate": 1.067341461854584e-07, "loss": 16.4375, "step": 25779 }, { "epoch": 1.7121604569303313, "grad_norm": 250.41973876953125, "learning_rate": 1.0668580365323976e-07, "loss": 17.1094, "step": 25780 }, { "epoch": 1.7122268712226871, "grad_norm": 238.15121459960938, "learning_rate": 1.0663747145436474e-07, "loss": 18.2656, "step": 25781 }, { "epoch": 1.7122932855150428, "grad_norm": 165.54220581054688, "learning_rate": 1.0658914958939214e-07, "loss": 16.5625, "step": 25782 }, { "epoch": 1.7123596998073984, "grad_norm": 163.42506408691406, "learning_rate": 1.0654083805888125e-07, "loss": 16.8594, "step": 25783 }, { "epoch": 1.7124261140997543, "grad_norm": 196.82505798339844, "learning_rate": 1.0649253686339088e-07, "loss": 19.3125, "step": 25784 }, { "epoch": 1.71249252839211, "grad_norm": 118.57633972167969, "learning_rate": 1.0644424600347912e-07, "loss": 14.9844, "step": 25785 }, { "epoch": 1.7125589426844656, "grad_norm": 432.75982666015625, "learning_rate": 1.0639596547970564e-07, "loss": 14.25, "step": 25786 }, { "epoch": 1.7126253569768215, "grad_norm": 167.95620727539062, "learning_rate": 1.0634769529262799e-07, "loss": 14.7031, "step": 25787 }, { "epoch": 1.7126917712691772, "grad_norm": 194.07212829589844, "learning_rate": 1.0629943544280506e-07, "loss": 15.1562, "step": 25788 }, { "epoch": 1.7127581855615328, "grad_norm": 802.500732421875, "learning_rate": 1.0625118593079474e-07, "loss": 28.375, "step": 25789 }, { "epoch": 1.7128245998538887, "grad_norm": 198.05223083496094, "learning_rate": 1.0620294675715546e-07, "loss": 16.6094, "step": 25790 }, { "epoch": 1.7128910141462441, "grad_norm": 1052.542236328125, "learning_rate": 1.0615471792244513e-07, "loss": 20.9219, "step": 25791 }, { "epoch": 1.7129574284386, "grad_norm": 117.47980499267578, "learning_rate": 1.0610649942722172e-07, "loss": 15.125, "step": 25792 }, { "epoch": 1.7130238427309556, "grad_norm": 193.2723388671875, "learning_rate": 1.0605829127204246e-07, "loss": 12.2891, "step": 25793 }, { "epoch": 1.7130902570233113, "grad_norm": 173.39471435546875, "learning_rate": 1.060100934574658e-07, "loss": 12.25, "step": 25794 }, { "epoch": 1.7131566713156672, "grad_norm": 167.3128662109375, "learning_rate": 1.0596190598404875e-07, "loss": 11.7969, "step": 25795 }, { "epoch": 1.7132230856080228, "grad_norm": 122.46290588378906, "learning_rate": 1.0591372885234884e-07, "loss": 14.9609, "step": 25796 }, { "epoch": 1.7132894999003785, "grad_norm": 364.5428466796875, "learning_rate": 1.058655620629233e-07, "loss": 16.3906, "step": 25797 }, { "epoch": 1.7133559141927344, "grad_norm": 221.6320343017578, "learning_rate": 1.0581740561632958e-07, "loss": 10.8125, "step": 25798 }, { "epoch": 1.71342232848509, "grad_norm": 235.23280334472656, "learning_rate": 1.057692595131241e-07, "loss": 15.9062, "step": 25799 }, { "epoch": 1.7134887427774457, "grad_norm": 772.310546875, "learning_rate": 1.0572112375386454e-07, "loss": 22.6094, "step": 25800 }, { "epoch": 1.7135551570698015, "grad_norm": 292.85791015625, "learning_rate": 1.0567299833910726e-07, "loss": 13.2969, "step": 25801 }, { "epoch": 1.713621571362157, "grad_norm": 216.85482788085938, "learning_rate": 1.0562488326940888e-07, "loss": 14.2188, "step": 25802 }, { "epoch": 1.7136879856545129, "grad_norm": 176.6827392578125, "learning_rate": 1.0557677854532621e-07, "loss": 16.4141, "step": 25803 }, { "epoch": 1.7137543999468685, "grad_norm": 226.96221923828125, "learning_rate": 1.0552868416741567e-07, "loss": 12.75, "step": 25804 }, { "epoch": 1.7138208142392242, "grad_norm": 227.49464416503906, "learning_rate": 1.0548060013623339e-07, "loss": 16.4375, "step": 25805 }, { "epoch": 1.71388722853158, "grad_norm": 210.71890258789062, "learning_rate": 1.0543252645233602e-07, "loss": 15.2031, "step": 25806 }, { "epoch": 1.7139536428239357, "grad_norm": 126.33541107177734, "learning_rate": 1.0538446311627891e-07, "loss": 15.9453, "step": 25807 }, { "epoch": 1.7140200571162914, "grad_norm": 154.17681884765625, "learning_rate": 1.0533641012861894e-07, "loss": 15.5781, "step": 25808 }, { "epoch": 1.7140864714086472, "grad_norm": 158.01913452148438, "learning_rate": 1.0528836748991121e-07, "loss": 15.0, "step": 25809 }, { "epoch": 1.7141528857010029, "grad_norm": 1695.874755859375, "learning_rate": 1.0524033520071152e-07, "loss": 14.5469, "step": 25810 }, { "epoch": 1.7142192999933585, "grad_norm": 460.2096862792969, "learning_rate": 1.0519231326157607e-07, "loss": 19.8281, "step": 25811 }, { "epoch": 1.7142857142857144, "grad_norm": 91.85147857666016, "learning_rate": 1.0514430167305988e-07, "loss": 13.9531, "step": 25812 }, { "epoch": 1.7143521285780698, "grad_norm": 259.4097595214844, "learning_rate": 1.0509630043571827e-07, "loss": 14.7656, "step": 25813 }, { "epoch": 1.7144185428704257, "grad_norm": 189.649658203125, "learning_rate": 1.0504830955010668e-07, "loss": 10.7344, "step": 25814 }, { "epoch": 1.7144849571627814, "grad_norm": 282.3822937011719, "learning_rate": 1.0500032901678035e-07, "loss": 17.4219, "step": 25815 }, { "epoch": 1.714551371455137, "grad_norm": 159.0872344970703, "learning_rate": 1.0495235883629383e-07, "loss": 19.5, "step": 25816 }, { "epoch": 1.714617785747493, "grad_norm": 297.9964904785156, "learning_rate": 1.0490439900920257e-07, "loss": 16.875, "step": 25817 }, { "epoch": 1.7146842000398486, "grad_norm": 189.64508056640625, "learning_rate": 1.0485644953606098e-07, "loss": 14.7656, "step": 25818 }, { "epoch": 1.7147506143322042, "grad_norm": 216.94277954101562, "learning_rate": 1.0480851041742378e-07, "loss": 16.5781, "step": 25819 }, { "epoch": 1.71481702862456, "grad_norm": 114.1396484375, "learning_rate": 1.047605816538455e-07, "loss": 15.8516, "step": 25820 }, { "epoch": 1.7148834429169157, "grad_norm": 157.710205078125, "learning_rate": 1.0471266324588068e-07, "loss": 13.2344, "step": 25821 }, { "epoch": 1.7149498572092714, "grad_norm": 276.39996337890625, "learning_rate": 1.0466475519408347e-07, "loss": 19.1875, "step": 25822 }, { "epoch": 1.7150162715016273, "grad_norm": 256.0589904785156, "learning_rate": 1.0461685749900829e-07, "loss": 12.8906, "step": 25823 }, { "epoch": 1.7150826857939827, "grad_norm": 136.990234375, "learning_rate": 1.0456897016120858e-07, "loss": 15.0, "step": 25824 }, { "epoch": 1.7151491000863386, "grad_norm": 205.49000549316406, "learning_rate": 1.0452109318123902e-07, "loss": 12.2031, "step": 25825 }, { "epoch": 1.7152155143786945, "grad_norm": 197.846435546875, "learning_rate": 1.0447322655965308e-07, "loss": 13.25, "step": 25826 }, { "epoch": 1.71528192867105, "grad_norm": 358.2952575683594, "learning_rate": 1.0442537029700427e-07, "loss": 13.8125, "step": 25827 }, { "epoch": 1.7153483429634058, "grad_norm": 148.08660888671875, "learning_rate": 1.043775243938465e-07, "loss": 18.4844, "step": 25828 }, { "epoch": 1.7154147572557614, "grad_norm": 349.1478576660156, "learning_rate": 1.0432968885073301e-07, "loss": 22.3125, "step": 25829 }, { "epoch": 1.715481171548117, "grad_norm": 92.6115493774414, "learning_rate": 1.0428186366821722e-07, "loss": 10.0938, "step": 25830 }, { "epoch": 1.715547585840473, "grad_norm": 514.8524780273438, "learning_rate": 1.042340488468525e-07, "loss": 16.0781, "step": 25831 }, { "epoch": 1.7156140001328286, "grad_norm": 299.41082763671875, "learning_rate": 1.0418624438719148e-07, "loss": 17.4688, "step": 25832 }, { "epoch": 1.7156804144251843, "grad_norm": 336.1060485839844, "learning_rate": 1.0413845028978774e-07, "loss": 13.9531, "step": 25833 }, { "epoch": 1.7157468287175401, "grad_norm": 176.50355529785156, "learning_rate": 1.040906665551936e-07, "loss": 14.2969, "step": 25834 }, { "epoch": 1.7158132430098956, "grad_norm": 198.72732543945312, "learning_rate": 1.0404289318396209e-07, "loss": 17.6562, "step": 25835 }, { "epoch": 1.7158796573022514, "grad_norm": 339.25799560546875, "learning_rate": 1.0399513017664574e-07, "loss": 18.6094, "step": 25836 }, { "epoch": 1.7159460715946073, "grad_norm": 164.94151306152344, "learning_rate": 1.0394737753379723e-07, "loss": 15.0781, "step": 25837 }, { "epoch": 1.7160124858869628, "grad_norm": 146.9794921875, "learning_rate": 1.0389963525596834e-07, "loss": 12.7109, "step": 25838 }, { "epoch": 1.7160789001793186, "grad_norm": 162.57437133789062, "learning_rate": 1.038519033437122e-07, "loss": 16.0, "step": 25839 }, { "epoch": 1.7161453144716743, "grad_norm": 223.66371154785156, "learning_rate": 1.0380418179758021e-07, "loss": 17.0312, "step": 25840 }, { "epoch": 1.71621172876403, "grad_norm": 213.18824768066406, "learning_rate": 1.0375647061812476e-07, "loss": 15.7188, "step": 25841 }, { "epoch": 1.7162781430563858, "grad_norm": 212.4232940673828, "learning_rate": 1.037087698058976e-07, "loss": 15.2031, "step": 25842 }, { "epoch": 1.7163445573487415, "grad_norm": 123.95953369140625, "learning_rate": 1.0366107936145064e-07, "loss": 13.5156, "step": 25843 }, { "epoch": 1.7164109716410971, "grad_norm": 216.14796447753906, "learning_rate": 1.0361339928533542e-07, "loss": 11.9531, "step": 25844 }, { "epoch": 1.716477385933453, "grad_norm": 232.84933471679688, "learning_rate": 1.0356572957810361e-07, "loss": 15.5469, "step": 25845 }, { "epoch": 1.7165438002258084, "grad_norm": 309.3574523925781, "learning_rate": 1.0351807024030623e-07, "loss": 18.125, "step": 25846 }, { "epoch": 1.7166102145181643, "grad_norm": 356.4818420410156, "learning_rate": 1.0347042127249506e-07, "loss": 19.5, "step": 25847 }, { "epoch": 1.7166766288105202, "grad_norm": 375.4408264160156, "learning_rate": 1.0342278267522132e-07, "loss": 17.9062, "step": 25848 }, { "epoch": 1.7167430431028756, "grad_norm": 438.359130859375, "learning_rate": 1.0337515444903533e-07, "loss": 14.7031, "step": 25849 }, { "epoch": 1.7168094573952315, "grad_norm": 290.6999816894531, "learning_rate": 1.0332753659448901e-07, "loss": 27.3125, "step": 25850 }, { "epoch": 1.7168758716875872, "grad_norm": 161.54893493652344, "learning_rate": 1.0327992911213246e-07, "loss": 16.5156, "step": 25851 }, { "epoch": 1.7169422859799428, "grad_norm": 211.31333923339844, "learning_rate": 1.0323233200251669e-07, "loss": 19.3125, "step": 25852 }, { "epoch": 1.7170087002722987, "grad_norm": 294.8592529296875, "learning_rate": 1.0318474526619214e-07, "loss": 17.3125, "step": 25853 }, { "epoch": 1.7170751145646543, "grad_norm": 338.69000244140625, "learning_rate": 1.0313716890370926e-07, "loss": 18.3438, "step": 25854 }, { "epoch": 1.71714152885701, "grad_norm": 129.28024291992188, "learning_rate": 1.0308960291561852e-07, "loss": 15.0312, "step": 25855 }, { "epoch": 1.7172079431493659, "grad_norm": 356.0297546386719, "learning_rate": 1.0304204730247024e-07, "loss": 14.1562, "step": 25856 }, { "epoch": 1.7172743574417213, "grad_norm": 232.55079650878906, "learning_rate": 1.0299450206481397e-07, "loss": 16.0938, "step": 25857 }, { "epoch": 1.7173407717340772, "grad_norm": 228.5433349609375, "learning_rate": 1.029469672032005e-07, "loss": 19.0, "step": 25858 }, { "epoch": 1.717407186026433, "grad_norm": 248.7301025390625, "learning_rate": 1.0289944271817896e-07, "loss": 20.9531, "step": 25859 }, { "epoch": 1.7174736003187885, "grad_norm": 239.38145446777344, "learning_rate": 1.0285192861029934e-07, "loss": 13.3906, "step": 25860 }, { "epoch": 1.7175400146111444, "grad_norm": 235.36871337890625, "learning_rate": 1.0280442488011142e-07, "loss": 14.5469, "step": 25861 }, { "epoch": 1.7176064289035, "grad_norm": 476.1346130371094, "learning_rate": 1.0275693152816467e-07, "loss": 17.9062, "step": 25862 }, { "epoch": 1.7176728431958557, "grad_norm": 204.99490356445312, "learning_rate": 1.0270944855500796e-07, "loss": 21.7188, "step": 25863 }, { "epoch": 1.7177392574882115, "grad_norm": 176.17356872558594, "learning_rate": 1.0266197596119131e-07, "loss": 16.2812, "step": 25864 }, { "epoch": 1.7178056717805672, "grad_norm": 494.46697998046875, "learning_rate": 1.0261451374726337e-07, "loss": 31.9688, "step": 25865 }, { "epoch": 1.7178720860729229, "grad_norm": 118.9361343383789, "learning_rate": 1.0256706191377318e-07, "loss": 15.1719, "step": 25866 }, { "epoch": 1.7179385003652787, "grad_norm": 241.77200317382812, "learning_rate": 1.0251962046126982e-07, "loss": 15.4375, "step": 25867 }, { "epoch": 1.7180049146576342, "grad_norm": 481.8039245605469, "learning_rate": 1.0247218939030189e-07, "loss": 16.3438, "step": 25868 }, { "epoch": 1.71807132894999, "grad_norm": 165.7002716064453, "learning_rate": 1.0242476870141825e-07, "loss": 19.6406, "step": 25869 }, { "epoch": 1.718137743242346, "grad_norm": 444.2780456542969, "learning_rate": 1.0237735839516737e-07, "loss": 16.7188, "step": 25870 }, { "epoch": 1.7182041575347013, "grad_norm": 289.0910949707031, "learning_rate": 1.0232995847209724e-07, "loss": 18.5156, "step": 25871 }, { "epoch": 1.7182705718270572, "grad_norm": 104.964599609375, "learning_rate": 1.02282568932757e-07, "loss": 16.7344, "step": 25872 }, { "epoch": 1.7183369861194129, "grad_norm": 366.14373779296875, "learning_rate": 1.0223518977769408e-07, "loss": 20.2188, "step": 25873 }, { "epoch": 1.7184034004117685, "grad_norm": 150.88136291503906, "learning_rate": 1.0218782100745682e-07, "loss": 12.9375, "step": 25874 }, { "epoch": 1.7184698147041244, "grad_norm": 116.7618637084961, "learning_rate": 1.0214046262259323e-07, "loss": 15.0781, "step": 25875 }, { "epoch": 1.71853622899648, "grad_norm": 312.9597473144531, "learning_rate": 1.020931146236509e-07, "loss": 22.4688, "step": 25876 }, { "epoch": 1.7186026432888357, "grad_norm": 157.36187744140625, "learning_rate": 1.0204577701117778e-07, "loss": 10.6094, "step": 25877 }, { "epoch": 1.7186690575811916, "grad_norm": 239.31582641601562, "learning_rate": 1.0199844978572137e-07, "loss": 18.1875, "step": 25878 }, { "epoch": 1.718735471873547, "grad_norm": 190.97549438476562, "learning_rate": 1.0195113294782898e-07, "loss": 15.2812, "step": 25879 }, { "epoch": 1.718801886165903, "grad_norm": 327.03704833984375, "learning_rate": 1.0190382649804796e-07, "loss": 16.2188, "step": 25880 }, { "epoch": 1.7188683004582588, "grad_norm": 244.5810546875, "learning_rate": 1.0185653043692565e-07, "loss": 15.4375, "step": 25881 }, { "epoch": 1.7189347147506142, "grad_norm": 287.4345703125, "learning_rate": 1.0180924476500918e-07, "loss": 17.2031, "step": 25882 }, { "epoch": 1.71900112904297, "grad_norm": 142.3070526123047, "learning_rate": 1.0176196948284532e-07, "loss": 13.6094, "step": 25883 }, { "epoch": 1.7190675433353257, "grad_norm": 258.33367919921875, "learning_rate": 1.0171470459098109e-07, "loss": 21.4062, "step": 25884 }, { "epoch": 1.7191339576276814, "grad_norm": 1042.3974609375, "learning_rate": 1.0166745008996325e-07, "loss": 14.2656, "step": 25885 }, { "epoch": 1.7192003719200373, "grad_norm": 90.2541732788086, "learning_rate": 1.0162020598033827e-07, "loss": 10.0312, "step": 25886 }, { "epoch": 1.719266786212393, "grad_norm": 504.7471008300781, "learning_rate": 1.0157297226265282e-07, "loss": 12.8438, "step": 25887 }, { "epoch": 1.7193332005047486, "grad_norm": 166.27442932128906, "learning_rate": 1.0152574893745291e-07, "loss": 17.5781, "step": 25888 }, { "epoch": 1.7193996147971045, "grad_norm": 205.70960998535156, "learning_rate": 1.0147853600528533e-07, "loss": 16.3906, "step": 25889 }, { "epoch": 1.71946602908946, "grad_norm": 147.10519409179688, "learning_rate": 1.0143133346669585e-07, "loss": 16.9062, "step": 25890 }, { "epoch": 1.7195324433818158, "grad_norm": 90.2742919921875, "learning_rate": 1.013841413222305e-07, "loss": 15.9688, "step": 25891 }, { "epoch": 1.7195988576741716, "grad_norm": 1215.892822265625, "learning_rate": 1.0133695957243515e-07, "loss": 31.8125, "step": 25892 }, { "epoch": 1.719665271966527, "grad_norm": 161.77906799316406, "learning_rate": 1.0128978821785572e-07, "loss": 17.125, "step": 25893 }, { "epoch": 1.719731686258883, "grad_norm": 157.10618591308594, "learning_rate": 1.0124262725903787e-07, "loss": 15.5156, "step": 25894 }, { "epoch": 1.7197981005512386, "grad_norm": 260.3001403808594, "learning_rate": 1.0119547669652718e-07, "loss": 15.4688, "step": 25895 }, { "epoch": 1.7198645148435943, "grad_norm": 270.4018249511719, "learning_rate": 1.0114833653086852e-07, "loss": 18.6875, "step": 25896 }, { "epoch": 1.7199309291359501, "grad_norm": 199.02781677246094, "learning_rate": 1.0110120676260803e-07, "loss": 11.3906, "step": 25897 }, { "epoch": 1.7199973434283058, "grad_norm": 278.6924743652344, "learning_rate": 1.0105408739229027e-07, "loss": 21.3438, "step": 25898 }, { "epoch": 1.7200637577206614, "grad_norm": 510.16595458984375, "learning_rate": 1.0100697842046035e-07, "loss": 23.8125, "step": 25899 }, { "epoch": 1.7201301720130173, "grad_norm": 102.97457885742188, "learning_rate": 1.0095987984766341e-07, "loss": 16.7031, "step": 25900 }, { "epoch": 1.7201965863053728, "grad_norm": 258.4420166015625, "learning_rate": 1.0091279167444433e-07, "loss": 15.2188, "step": 25901 }, { "epoch": 1.7202630005977286, "grad_norm": 250.19337463378906, "learning_rate": 1.0086571390134724e-07, "loss": 21.2188, "step": 25902 }, { "epoch": 1.7203294148900845, "grad_norm": 170.60842895507812, "learning_rate": 1.0081864652891758e-07, "loss": 15.7656, "step": 25903 }, { "epoch": 1.72039582918244, "grad_norm": 384.2850646972656, "learning_rate": 1.0077158955769904e-07, "loss": 13.1406, "step": 25904 }, { "epoch": 1.7204622434747958, "grad_norm": 220.50115966796875, "learning_rate": 1.0072454298823619e-07, "loss": 15.8594, "step": 25905 }, { "epoch": 1.7205286577671515, "grad_norm": 163.35678100585938, "learning_rate": 1.0067750682107334e-07, "loss": 12.375, "step": 25906 }, { "epoch": 1.7205950720595071, "grad_norm": 133.3751678466797, "learning_rate": 1.0063048105675454e-07, "loss": 12.8594, "step": 25907 }, { "epoch": 1.720661486351863, "grad_norm": 168.95892333984375, "learning_rate": 1.0058346569582366e-07, "loss": 15.3438, "step": 25908 }, { "epoch": 1.7207279006442187, "grad_norm": 206.59896850585938, "learning_rate": 1.0053646073882483e-07, "loss": 16.4219, "step": 25909 }, { "epoch": 1.7207943149365743, "grad_norm": 205.71397399902344, "learning_rate": 1.0048946618630117e-07, "loss": 14.5625, "step": 25910 }, { "epoch": 1.7208607292289302, "grad_norm": 130.00753784179688, "learning_rate": 1.0044248203879702e-07, "loss": 17.1094, "step": 25911 }, { "epoch": 1.7209271435212856, "grad_norm": 179.7127685546875, "learning_rate": 1.0039550829685539e-07, "loss": 13.0938, "step": 25912 }, { "epoch": 1.7209935578136415, "grad_norm": 560.2591552734375, "learning_rate": 1.0034854496101986e-07, "loss": 17.2969, "step": 25913 }, { "epoch": 1.7210599721059974, "grad_norm": 157.8686981201172, "learning_rate": 1.0030159203183341e-07, "loss": 14.0312, "step": 25914 }, { "epoch": 1.7211263863983528, "grad_norm": 82.04119873046875, "learning_rate": 1.0025464950983952e-07, "loss": 15.8125, "step": 25915 }, { "epoch": 1.7211928006907087, "grad_norm": 269.7477111816406, "learning_rate": 1.0020771739558088e-07, "loss": 17.2812, "step": 25916 }, { "epoch": 1.7212592149830643, "grad_norm": 125.68821716308594, "learning_rate": 1.0016079568960079e-07, "loss": 15.7188, "step": 25917 }, { "epoch": 1.72132562927542, "grad_norm": 437.76300048828125, "learning_rate": 1.0011388439244128e-07, "loss": 27.7031, "step": 25918 }, { "epoch": 1.7213920435677759, "grad_norm": 118.25093841552734, "learning_rate": 1.0006698350464593e-07, "loss": 19.0625, "step": 25919 }, { "epoch": 1.7214584578601315, "grad_norm": 460.2245178222656, "learning_rate": 1.0002009302675651e-07, "loss": 11.8438, "step": 25920 }, { "epoch": 1.7215248721524872, "grad_norm": 471.0545654296875, "learning_rate": 9.99732129593156e-08, "loss": 11.2031, "step": 25921 }, { "epoch": 1.721591286444843, "grad_norm": 275.39776611328125, "learning_rate": 9.992634330286587e-08, "loss": 15.4062, "step": 25922 }, { "epoch": 1.7216577007371985, "grad_norm": 262.2104797363281, "learning_rate": 9.987948405794911e-08, "loss": 16.0625, "step": 25923 }, { "epoch": 1.7217241150295544, "grad_norm": 186.34593200683594, "learning_rate": 9.983263522510732e-08, "loss": 12.7656, "step": 25924 }, { "epoch": 1.7217905293219102, "grad_norm": 156.2871551513672, "learning_rate": 9.978579680488253e-08, "loss": 12.2656, "step": 25925 }, { "epoch": 1.7218569436142657, "grad_norm": 312.624267578125, "learning_rate": 9.973896879781685e-08, "loss": 12.0625, "step": 25926 }, { "epoch": 1.7219233579066215, "grad_norm": 213.31434631347656, "learning_rate": 9.969215120445118e-08, "loss": 20.8906, "step": 25927 }, { "epoch": 1.7219897721989772, "grad_norm": 121.37708282470703, "learning_rate": 9.964534402532799e-08, "loss": 14.7188, "step": 25928 }, { "epoch": 1.7220561864913329, "grad_norm": 190.54466247558594, "learning_rate": 9.959854726098805e-08, "loss": 14.3906, "step": 25929 }, { "epoch": 1.7221226007836887, "grad_norm": 189.08912658691406, "learning_rate": 9.955176091197304e-08, "loss": 16.1172, "step": 25930 }, { "epoch": 1.7221890150760444, "grad_norm": 129.9619903564453, "learning_rate": 9.950498497882387e-08, "loss": 11.2031, "step": 25931 }, { "epoch": 1.7222554293684, "grad_norm": 194.7342071533203, "learning_rate": 9.945821946208189e-08, "loss": 16.0625, "step": 25932 }, { "epoch": 1.722321843660756, "grad_norm": 136.89373779296875, "learning_rate": 9.941146436228787e-08, "loss": 13.3125, "step": 25933 }, { "epoch": 1.7223882579531113, "grad_norm": 1449.1190185546875, "learning_rate": 9.936471967998294e-08, "loss": 13.5, "step": 25934 }, { "epoch": 1.7224546722454672, "grad_norm": 356.0588684082031, "learning_rate": 9.931798541570724e-08, "loss": 16.0469, "step": 25935 }, { "epoch": 1.722521086537823, "grad_norm": 198.884765625, "learning_rate": 9.92712615700021e-08, "loss": 24.8438, "step": 25936 }, { "epoch": 1.7225875008301785, "grad_norm": 171.84446716308594, "learning_rate": 9.922454814340742e-08, "loss": 15.7031, "step": 25937 }, { "epoch": 1.7226539151225344, "grad_norm": 206.0782928466797, "learning_rate": 9.917784513646366e-08, "loss": 15.2656, "step": 25938 }, { "epoch": 1.72272032941489, "grad_norm": 228.6842041015625, "learning_rate": 9.913115254971128e-08, "loss": 16.0469, "step": 25939 }, { "epoch": 1.7227867437072457, "grad_norm": 234.1576690673828, "learning_rate": 9.908447038369016e-08, "loss": 12.4453, "step": 25940 }, { "epoch": 1.7228531579996016, "grad_norm": 401.1042785644531, "learning_rate": 9.903779863894046e-08, "loss": 17.7031, "step": 25941 }, { "epoch": 1.7229195722919572, "grad_norm": 104.28253936767578, "learning_rate": 9.899113731600206e-08, "loss": 13.5781, "step": 25942 }, { "epoch": 1.722985986584313, "grad_norm": 306.4199523925781, "learning_rate": 9.894448641541453e-08, "loss": 17.75, "step": 25943 }, { "epoch": 1.7230524008766688, "grad_norm": 431.91522216796875, "learning_rate": 9.889784593771765e-08, "loss": 18.1719, "step": 25944 }, { "epoch": 1.7231188151690242, "grad_norm": 310.2216491699219, "learning_rate": 9.88512158834508e-08, "loss": 17.0625, "step": 25945 }, { "epoch": 1.72318522946138, "grad_norm": 229.28106689453125, "learning_rate": 9.880459625315351e-08, "loss": 13.75, "step": 25946 }, { "epoch": 1.723251643753736, "grad_norm": 239.10739135742188, "learning_rate": 9.875798704736493e-08, "loss": 11.5469, "step": 25947 }, { "epoch": 1.7233180580460914, "grad_norm": 395.841064453125, "learning_rate": 9.871138826662451e-08, "loss": 11.8594, "step": 25948 }, { "epoch": 1.7233844723384473, "grad_norm": 200.8102569580078, "learning_rate": 9.866479991147058e-08, "loss": 12.3125, "step": 25949 }, { "epoch": 1.723450886630803, "grad_norm": 208.4303436279297, "learning_rate": 9.861822198244297e-08, "loss": 15.9062, "step": 25950 }, { "epoch": 1.7235173009231586, "grad_norm": 413.5968017578125, "learning_rate": 9.857165448007975e-08, "loss": 19.4844, "step": 25951 }, { "epoch": 1.7235837152155145, "grad_norm": 160.15164184570312, "learning_rate": 9.852509740491988e-08, "loss": 13.5469, "step": 25952 }, { "epoch": 1.72365012950787, "grad_norm": 237.56301879882812, "learning_rate": 9.847855075750188e-08, "loss": 12.3594, "step": 25953 }, { "epoch": 1.7237165438002258, "grad_norm": 184.19625854492188, "learning_rate": 9.843201453836414e-08, "loss": 15.0, "step": 25954 }, { "epoch": 1.7237829580925816, "grad_norm": 254.65428161621094, "learning_rate": 9.838548874804497e-08, "loss": 15.8594, "step": 25955 }, { "epoch": 1.723849372384937, "grad_norm": 181.57582092285156, "learning_rate": 9.833897338708274e-08, "loss": 15.3594, "step": 25956 }, { "epoch": 1.723915786677293, "grad_norm": 242.7000274658203, "learning_rate": 9.829246845601502e-08, "loss": 15.8594, "step": 25957 }, { "epoch": 1.7239822009696488, "grad_norm": 210.32923889160156, "learning_rate": 9.824597395538015e-08, "loss": 15.2656, "step": 25958 }, { "epoch": 1.7240486152620043, "grad_norm": 173.86300659179688, "learning_rate": 9.819948988571614e-08, "loss": 19.4219, "step": 25959 }, { "epoch": 1.7241150295543601, "grad_norm": 545.6322021484375, "learning_rate": 9.815301624756e-08, "loss": 14.0938, "step": 25960 }, { "epoch": 1.7241814438467158, "grad_norm": 140.8660888671875, "learning_rate": 9.810655304145022e-08, "loss": 13.2188, "step": 25961 }, { "epoch": 1.7242478581390714, "grad_norm": 514.1328125, "learning_rate": 9.806010026792344e-08, "loss": 15.1094, "step": 25962 }, { "epoch": 1.7243142724314273, "grad_norm": 138.68118286132812, "learning_rate": 9.801365792751748e-08, "loss": 15.4375, "step": 25963 }, { "epoch": 1.724380686723783, "grad_norm": 161.9535675048828, "learning_rate": 9.796722602076935e-08, "loss": 17.4375, "step": 25964 }, { "epoch": 1.7244471010161386, "grad_norm": 230.32081604003906, "learning_rate": 9.792080454821638e-08, "loss": 14.5781, "step": 25965 }, { "epoch": 1.7245135153084945, "grad_norm": 270.5963134765625, "learning_rate": 9.787439351039495e-08, "loss": 16.8438, "step": 25966 }, { "epoch": 1.72457992960085, "grad_norm": 328.42303466796875, "learning_rate": 9.782799290784272e-08, "loss": 16.5156, "step": 25967 }, { "epoch": 1.7246463438932058, "grad_norm": 442.441650390625, "learning_rate": 9.778160274109592e-08, "loss": 16.6875, "step": 25968 }, { "epoch": 1.7247127581855617, "grad_norm": 318.92645263671875, "learning_rate": 9.773522301069126e-08, "loss": 17.8906, "step": 25969 }, { "epoch": 1.7247791724779171, "grad_norm": 1665.996826171875, "learning_rate": 9.768885371716529e-08, "loss": 14.2812, "step": 25970 }, { "epoch": 1.724845586770273, "grad_norm": 201.50775146484375, "learning_rate": 9.764249486105436e-08, "loss": 15.25, "step": 25971 }, { "epoch": 1.7249120010626287, "grad_norm": 297.03656005859375, "learning_rate": 9.759614644289472e-08, "loss": 20.2969, "step": 25972 }, { "epoch": 1.7249784153549843, "grad_norm": 130.90621948242188, "learning_rate": 9.754980846322269e-08, "loss": 10.8672, "step": 25973 }, { "epoch": 1.7250448296473402, "grad_norm": 142.97991943359375, "learning_rate": 9.750348092257365e-08, "loss": 16.3594, "step": 25974 }, { "epoch": 1.7251112439396958, "grad_norm": 161.49192810058594, "learning_rate": 9.745716382148439e-08, "loss": 13.0312, "step": 25975 }, { "epoch": 1.7251776582320515, "grad_norm": 515.5780029296875, "learning_rate": 9.741085716049003e-08, "loss": 14.2812, "step": 25976 }, { "epoch": 1.7252440725244074, "grad_norm": 147.74761962890625, "learning_rate": 9.736456094012635e-08, "loss": 12.4922, "step": 25977 }, { "epoch": 1.725310486816763, "grad_norm": 350.5162353515625, "learning_rate": 9.731827516092894e-08, "loss": 20.0156, "step": 25978 }, { "epoch": 1.7253769011091187, "grad_norm": 231.5416717529297, "learning_rate": 9.727199982343325e-08, "loss": 17.7188, "step": 25979 }, { "epoch": 1.7254433154014746, "grad_norm": 206.2393341064453, "learning_rate": 9.722573492817454e-08, "loss": 12.3281, "step": 25980 }, { "epoch": 1.72550972969383, "grad_norm": 173.60147094726562, "learning_rate": 9.717948047568814e-08, "loss": 19.0781, "step": 25981 }, { "epoch": 1.7255761439861859, "grad_norm": 838.39453125, "learning_rate": 9.71332364665085e-08, "loss": 23.7656, "step": 25982 }, { "epoch": 1.7256425582785415, "grad_norm": 133.2779083251953, "learning_rate": 9.708700290117133e-08, "loss": 13.8906, "step": 25983 }, { "epoch": 1.7257089725708972, "grad_norm": 314.72735595703125, "learning_rate": 9.704077978021086e-08, "loss": 12.2578, "step": 25984 }, { "epoch": 1.725775386863253, "grad_norm": 212.38885498046875, "learning_rate": 9.69945671041621e-08, "loss": 10.125, "step": 25985 }, { "epoch": 1.7258418011556087, "grad_norm": 303.94122314453125, "learning_rate": 9.694836487355928e-08, "loss": 23.6875, "step": 25986 }, { "epoch": 1.7259082154479644, "grad_norm": 153.67308044433594, "learning_rate": 9.690217308893745e-08, "loss": 20.7812, "step": 25987 }, { "epoch": 1.7259746297403202, "grad_norm": 408.3115234375, "learning_rate": 9.685599175083004e-08, "loss": 12.6406, "step": 25988 }, { "epoch": 1.7260410440326759, "grad_norm": 133.14332580566406, "learning_rate": 9.680982085977207e-08, "loss": 15.1562, "step": 25989 }, { "epoch": 1.7261074583250315, "grad_norm": 177.7230682373047, "learning_rate": 9.676366041629714e-08, "loss": 13.8438, "step": 25990 }, { "epoch": 1.7261738726173874, "grad_norm": 323.1013488769531, "learning_rate": 9.671751042093934e-08, "loss": 13.8438, "step": 25991 }, { "epoch": 1.7262402869097428, "grad_norm": 154.27796936035156, "learning_rate": 9.66713708742326e-08, "loss": 12.4688, "step": 25992 }, { "epoch": 1.7263067012020987, "grad_norm": 468.07952880859375, "learning_rate": 9.662524177671039e-08, "loss": 22.0156, "step": 25993 }, { "epoch": 1.7263731154944544, "grad_norm": 546.9329833984375, "learning_rate": 9.657912312890659e-08, "loss": 18.1875, "step": 25994 }, { "epoch": 1.72643952978681, "grad_norm": 283.77093505859375, "learning_rate": 9.653301493135457e-08, "loss": 16.1406, "step": 25995 }, { "epoch": 1.726505944079166, "grad_norm": 139.6654052734375, "learning_rate": 9.648691718458756e-08, "loss": 12.8594, "step": 25996 }, { "epoch": 1.7265723583715216, "grad_norm": 1469.4830322265625, "learning_rate": 9.644082988913893e-08, "loss": 14.4922, "step": 25997 }, { "epoch": 1.7266387726638772, "grad_norm": 170.94908142089844, "learning_rate": 9.639475304554201e-08, "loss": 14.0234, "step": 25998 }, { "epoch": 1.726705186956233, "grad_norm": 101.14415740966797, "learning_rate": 9.634868665432916e-08, "loss": 14.0703, "step": 25999 }, { "epoch": 1.7267716012485888, "grad_norm": 401.15594482421875, "learning_rate": 9.630263071603395e-08, "loss": 23.3906, "step": 26000 }, { "epoch": 1.7268380155409444, "grad_norm": 120.2745590209961, "learning_rate": 9.625658523118862e-08, "loss": 14.5156, "step": 26001 }, { "epoch": 1.7269044298333003, "grad_norm": 401.9844665527344, "learning_rate": 9.621055020032597e-08, "loss": 15.8125, "step": 26002 }, { "epoch": 1.7269708441256557, "grad_norm": 184.81182861328125, "learning_rate": 9.616452562397859e-08, "loss": 16.8594, "step": 26003 }, { "epoch": 1.7270372584180116, "grad_norm": 137.85887145996094, "learning_rate": 9.611851150267892e-08, "loss": 13.9922, "step": 26004 }, { "epoch": 1.7271036727103672, "grad_norm": 164.39271545410156, "learning_rate": 9.607250783695875e-08, "loss": 17.4062, "step": 26005 }, { "epoch": 1.727170087002723, "grad_norm": 169.47755432128906, "learning_rate": 9.602651462735089e-08, "loss": 14.1562, "step": 26006 }, { "epoch": 1.7272365012950788, "grad_norm": 171.98680114746094, "learning_rate": 9.598053187438693e-08, "loss": 13.125, "step": 26007 }, { "epoch": 1.7273029155874344, "grad_norm": 246.3172149658203, "learning_rate": 9.593455957859886e-08, "loss": 16.7344, "step": 26008 }, { "epoch": 1.72736932987979, "grad_norm": 336.4993591308594, "learning_rate": 9.58885977405185e-08, "loss": 14.4531, "step": 26009 }, { "epoch": 1.727435744172146, "grad_norm": 301.70147705078125, "learning_rate": 9.584264636067751e-08, "loss": 18.2656, "step": 26010 }, { "epoch": 1.7275021584645016, "grad_norm": 160.5574493408203, "learning_rate": 9.579670543960749e-08, "loss": 14.2109, "step": 26011 }, { "epoch": 1.7275685727568573, "grad_norm": 419.95013427734375, "learning_rate": 9.57507749778399e-08, "loss": 19.0938, "step": 26012 }, { "epoch": 1.7276349870492131, "grad_norm": 310.8310852050781, "learning_rate": 9.570485497590552e-08, "loss": 21.1875, "step": 26013 }, { "epoch": 1.7277014013415686, "grad_norm": 182.9268798828125, "learning_rate": 9.565894543433639e-08, "loss": 16.5938, "step": 26014 }, { "epoch": 1.7277678156339245, "grad_norm": 113.51620483398438, "learning_rate": 9.561304635366285e-08, "loss": 10.9922, "step": 26015 }, { "epoch": 1.72783422992628, "grad_norm": 107.34459686279297, "learning_rate": 9.556715773441604e-08, "loss": 9.8125, "step": 26016 }, { "epoch": 1.7279006442186358, "grad_norm": 126.89913940429688, "learning_rate": 9.552127957712696e-08, "loss": 10.1875, "step": 26017 }, { "epoch": 1.7279670585109916, "grad_norm": 154.84010314941406, "learning_rate": 9.54754118823261e-08, "loss": 13.3906, "step": 26018 }, { "epoch": 1.7280334728033473, "grad_norm": 520.8294677734375, "learning_rate": 9.542955465054403e-08, "loss": 18.7344, "step": 26019 }, { "epoch": 1.728099887095703, "grad_norm": 102.4485092163086, "learning_rate": 9.538370788231154e-08, "loss": 12.875, "step": 26020 }, { "epoch": 1.7281663013880588, "grad_norm": 374.0919494628906, "learning_rate": 9.533787157815831e-08, "loss": 16.25, "step": 26021 }, { "epoch": 1.7282327156804145, "grad_norm": 174.86575317382812, "learning_rate": 9.529204573861527e-08, "loss": 14.1875, "step": 26022 }, { "epoch": 1.7282991299727701, "grad_norm": 248.6500244140625, "learning_rate": 9.524623036421198e-08, "loss": 15.5781, "step": 26023 }, { "epoch": 1.728365544265126, "grad_norm": 492.7840576171875, "learning_rate": 9.520042545547857e-08, "loss": 13.6016, "step": 26024 }, { "epoch": 1.7284319585574814, "grad_norm": 227.93861389160156, "learning_rate": 9.515463101294496e-08, "loss": 13.2344, "step": 26025 }, { "epoch": 1.7284983728498373, "grad_norm": 289.76690673828125, "learning_rate": 9.510884703714095e-08, "loss": 17.9531, "step": 26026 }, { "epoch": 1.728564787142193, "grad_norm": 110.02981567382812, "learning_rate": 9.506307352859566e-08, "loss": 14.0781, "step": 26027 }, { "epoch": 1.7286312014345486, "grad_norm": 300.5476379394531, "learning_rate": 9.501731048783923e-08, "loss": 19.5, "step": 26028 }, { "epoch": 1.7286976157269045, "grad_norm": 74.21595001220703, "learning_rate": 9.497155791540057e-08, "loss": 10.3438, "step": 26029 }, { "epoch": 1.7287640300192602, "grad_norm": 231.8955078125, "learning_rate": 9.492581581180903e-08, "loss": 14.8906, "step": 26030 }, { "epoch": 1.7288304443116158, "grad_norm": 131.72579956054688, "learning_rate": 9.488008417759385e-08, "loss": 15.7969, "step": 26031 }, { "epoch": 1.7288968586039717, "grad_norm": 306.80462646484375, "learning_rate": 9.483436301328385e-08, "loss": 15.3984, "step": 26032 }, { "epoch": 1.7289632728963273, "grad_norm": 188.97669982910156, "learning_rate": 9.478865231940802e-08, "loss": 21.3125, "step": 26033 }, { "epoch": 1.729029687188683, "grad_norm": 193.29417419433594, "learning_rate": 9.474295209649508e-08, "loss": 12.5625, "step": 26034 }, { "epoch": 1.7290961014810389, "grad_norm": 211.7382354736328, "learning_rate": 9.469726234507357e-08, "loss": 18.4219, "step": 26035 }, { "epoch": 1.7291625157733943, "grad_norm": 127.00166320800781, "learning_rate": 9.465158306567223e-08, "loss": 14.3984, "step": 26036 }, { "epoch": 1.7292289300657502, "grad_norm": 892.2772216796875, "learning_rate": 9.460591425881948e-08, "loss": 22.3906, "step": 26037 }, { "epoch": 1.7292953443581058, "grad_norm": 138.33148193359375, "learning_rate": 9.456025592504302e-08, "loss": 20.2031, "step": 26038 }, { "epoch": 1.7293617586504615, "grad_norm": 256.57513427734375, "learning_rate": 9.451460806487187e-08, "loss": 17.2188, "step": 26039 }, { "epoch": 1.7294281729428174, "grad_norm": 154.9647216796875, "learning_rate": 9.446897067883331e-08, "loss": 16.4219, "step": 26040 }, { "epoch": 1.729494587235173, "grad_norm": 219.03477478027344, "learning_rate": 9.442334376745564e-08, "loss": 11.7812, "step": 26041 }, { "epoch": 1.7295610015275287, "grad_norm": 201.07192993164062, "learning_rate": 9.437772733126647e-08, "loss": 13.4375, "step": 26042 }, { "epoch": 1.7296274158198846, "grad_norm": 256.44085693359375, "learning_rate": 9.43321213707936e-08, "loss": 22.3125, "step": 26043 }, { "epoch": 1.7296938301122402, "grad_norm": 131.94053649902344, "learning_rate": 9.428652588656449e-08, "loss": 13.7031, "step": 26044 }, { "epoch": 1.7297602444045959, "grad_norm": 462.759521484375, "learning_rate": 9.424094087910672e-08, "loss": 16.0938, "step": 26045 }, { "epoch": 1.7298266586969517, "grad_norm": 185.2436065673828, "learning_rate": 9.419536634894709e-08, "loss": 15.3281, "step": 26046 }, { "epoch": 1.7298930729893072, "grad_norm": 206.67977905273438, "learning_rate": 9.414980229661363e-08, "loss": 13.6562, "step": 26047 }, { "epoch": 1.729959487281663, "grad_norm": 323.5589294433594, "learning_rate": 9.410424872263256e-08, "loss": 15.5938, "step": 26048 }, { "epoch": 1.7300259015740187, "grad_norm": 249.48036193847656, "learning_rate": 9.405870562753126e-08, "loss": 15.4375, "step": 26049 }, { "epoch": 1.7300923158663744, "grad_norm": 313.10211181640625, "learning_rate": 9.401317301183653e-08, "loss": 20.8125, "step": 26050 }, { "epoch": 1.7301587301587302, "grad_norm": 466.55792236328125, "learning_rate": 9.396765087607506e-08, "loss": 18.1562, "step": 26051 }, { "epoch": 1.7302251444510859, "grad_norm": 138.0037078857422, "learning_rate": 9.392213922077297e-08, "loss": 16.25, "step": 26052 }, { "epoch": 1.7302915587434415, "grad_norm": 471.6734619140625, "learning_rate": 9.387663804645762e-08, "loss": 22.375, "step": 26053 }, { "epoch": 1.7303579730357974, "grad_norm": 355.32781982421875, "learning_rate": 9.38311473536545e-08, "loss": 13.5781, "step": 26054 }, { "epoch": 1.730424387328153, "grad_norm": 187.0042266845703, "learning_rate": 9.378566714289016e-08, "loss": 16.1875, "step": 26055 }, { "epoch": 1.7304908016205087, "grad_norm": 263.4911193847656, "learning_rate": 9.374019741469075e-08, "loss": 18.5, "step": 26056 }, { "epoch": 1.7305572159128646, "grad_norm": 169.1753387451172, "learning_rate": 9.369473816958207e-08, "loss": 17.2812, "step": 26057 }, { "epoch": 1.73062363020522, "grad_norm": 167.57818603515625, "learning_rate": 9.364928940809003e-08, "loss": 14.9062, "step": 26058 }, { "epoch": 1.730690044497576, "grad_norm": 264.27899169921875, "learning_rate": 9.360385113074066e-08, "loss": 17.0078, "step": 26059 }, { "epoch": 1.7307564587899316, "grad_norm": 185.68870544433594, "learning_rate": 9.355842333805885e-08, "loss": 16.1406, "step": 26060 }, { "epoch": 1.7308228730822872, "grad_norm": 190.0272216796875, "learning_rate": 9.351300603057088e-08, "loss": 16.0781, "step": 26061 }, { "epoch": 1.730889287374643, "grad_norm": 158.62484741210938, "learning_rate": 9.346759920880166e-08, "loss": 17.1328, "step": 26062 }, { "epoch": 1.7309557016669987, "grad_norm": 146.66799926757812, "learning_rate": 9.34222028732764e-08, "loss": 17.3125, "step": 26063 }, { "epoch": 1.7310221159593544, "grad_norm": 252.2549591064453, "learning_rate": 9.337681702452039e-08, "loss": 16.5781, "step": 26064 }, { "epoch": 1.7310885302517103, "grad_norm": 283.2375183105469, "learning_rate": 9.33314416630585e-08, "loss": 25.0938, "step": 26065 }, { "epoch": 1.731154944544066, "grad_norm": 182.72262573242188, "learning_rate": 9.32860767894158e-08, "loss": 22.3594, "step": 26066 }, { "epoch": 1.7312213588364216, "grad_norm": 178.06166076660156, "learning_rate": 9.324072240411707e-08, "loss": 17.875, "step": 26067 }, { "epoch": 1.7312877731287775, "grad_norm": 188.7586669921875, "learning_rate": 9.319537850768655e-08, "loss": 22.8281, "step": 26068 }, { "epoch": 1.731354187421133, "grad_norm": 656.4755859375, "learning_rate": 9.315004510064884e-08, "loss": 17.9375, "step": 26069 }, { "epoch": 1.7314206017134888, "grad_norm": 161.6780242919922, "learning_rate": 9.310472218352882e-08, "loss": 18.5469, "step": 26070 }, { "epoch": 1.7314870160058444, "grad_norm": 927.461669921875, "learning_rate": 9.305940975685034e-08, "loss": 25.8281, "step": 26071 }, { "epoch": 1.7315534302982, "grad_norm": 620.41455078125, "learning_rate": 9.30141078211375e-08, "loss": 17.7656, "step": 26072 }, { "epoch": 1.731619844590556, "grad_norm": 132.83241271972656, "learning_rate": 9.296881637691456e-08, "loss": 16.2656, "step": 26073 }, { "epoch": 1.7316862588829116, "grad_norm": 364.4736022949219, "learning_rate": 9.292353542470532e-08, "loss": 22.0625, "step": 26074 }, { "epoch": 1.7317526731752673, "grad_norm": 292.7309875488281, "learning_rate": 9.28782649650336e-08, "loss": 16.8594, "step": 26075 }, { "epoch": 1.7318190874676231, "grad_norm": 140.97897338867188, "learning_rate": 9.283300499842317e-08, "loss": 17.7969, "step": 26076 }, { "epoch": 1.7318855017599788, "grad_norm": 297.061279296875, "learning_rate": 9.278775552539709e-08, "loss": 17.6406, "step": 26077 }, { "epoch": 1.7319519160523345, "grad_norm": 152.98614501953125, "learning_rate": 9.274251654647947e-08, "loss": 15.8281, "step": 26078 }, { "epoch": 1.7320183303446903, "grad_norm": 181.5089569091797, "learning_rate": 9.269728806219313e-08, "loss": 16.4844, "step": 26079 }, { "epoch": 1.7320847446370458, "grad_norm": 160.7008514404297, "learning_rate": 9.265207007306131e-08, "loss": 19.6875, "step": 26080 }, { "epoch": 1.7321511589294016, "grad_norm": 491.9972839355469, "learning_rate": 9.260686257960714e-08, "loss": 20.6875, "step": 26081 }, { "epoch": 1.7322175732217573, "grad_norm": 195.16184997558594, "learning_rate": 9.256166558235367e-08, "loss": 14.4922, "step": 26082 }, { "epoch": 1.732283987514113, "grad_norm": 421.77008056640625, "learning_rate": 9.251647908182347e-08, "loss": 15.5625, "step": 26083 }, { "epoch": 1.7323504018064688, "grad_norm": 255.205322265625, "learning_rate": 9.247130307853956e-08, "loss": 23.0312, "step": 26084 }, { "epoch": 1.7324168160988245, "grad_norm": 161.0686798095703, "learning_rate": 9.242613757302397e-08, "loss": 13.5, "step": 26085 }, { "epoch": 1.7324832303911801, "grad_norm": 211.0172576904297, "learning_rate": 9.238098256579985e-08, "loss": 17.9844, "step": 26086 }, { "epoch": 1.732549644683536, "grad_norm": 209.2916259765625, "learning_rate": 9.233583805738898e-08, "loss": 14.2891, "step": 26087 }, { "epoch": 1.7326160589758917, "grad_norm": 215.9180450439453, "learning_rate": 9.229070404831362e-08, "loss": 15.8281, "step": 26088 }, { "epoch": 1.7326824732682473, "grad_norm": 187.84732055664062, "learning_rate": 9.224558053909615e-08, "loss": 17.3281, "step": 26089 }, { "epoch": 1.7327488875606032, "grad_norm": 422.8512268066406, "learning_rate": 9.220046753025845e-08, "loss": 13.6719, "step": 26090 }, { "epoch": 1.7328153018529586, "grad_norm": 258.511962890625, "learning_rate": 9.215536502232201e-08, "loss": 15.3594, "step": 26091 }, { "epoch": 1.7328817161453145, "grad_norm": 274.01885986328125, "learning_rate": 9.21102730158091e-08, "loss": 17.3438, "step": 26092 }, { "epoch": 1.7329481304376702, "grad_norm": 203.73977661132812, "learning_rate": 9.206519151124082e-08, "loss": 18.9688, "step": 26093 }, { "epoch": 1.7330145447300258, "grad_norm": 154.4518280029297, "learning_rate": 9.202012050913888e-08, "loss": 16.8125, "step": 26094 }, { "epoch": 1.7330809590223817, "grad_norm": 119.2303466796875, "learning_rate": 9.197506001002464e-08, "loss": 15.8594, "step": 26095 }, { "epoch": 1.7331473733147373, "grad_norm": 532.8281860351562, "learning_rate": 9.193001001441936e-08, "loss": 18.9062, "step": 26096 }, { "epoch": 1.733213787607093, "grad_norm": 165.4431610107422, "learning_rate": 9.188497052284405e-08, "loss": 16.3906, "step": 26097 }, { "epoch": 1.7332802018994489, "grad_norm": 286.6299133300781, "learning_rate": 9.183994153581987e-08, "loss": 20.7188, "step": 26098 }, { "epoch": 1.7333466161918045, "grad_norm": 652.0991821289062, "learning_rate": 9.179492305386727e-08, "loss": 21.625, "step": 26099 }, { "epoch": 1.7334130304841602, "grad_norm": 323.6949157714844, "learning_rate": 9.174991507750773e-08, "loss": 12.4219, "step": 26100 }, { "epoch": 1.733479444776516, "grad_norm": 96.44269561767578, "learning_rate": 9.170491760726118e-08, "loss": 10.4453, "step": 26101 }, { "epoch": 1.7335458590688715, "grad_norm": 132.44166564941406, "learning_rate": 9.165993064364841e-08, "loss": 14.3984, "step": 26102 }, { "epoch": 1.7336122733612274, "grad_norm": 199.4113311767578, "learning_rate": 9.161495418718979e-08, "loss": 15.7656, "step": 26103 }, { "epoch": 1.733678687653583, "grad_norm": 216.85702514648438, "learning_rate": 9.156998823840556e-08, "loss": 15.5625, "step": 26104 }, { "epoch": 1.7337451019459387, "grad_norm": 175.4969940185547, "learning_rate": 9.152503279781588e-08, "loss": 14.5, "step": 26105 }, { "epoch": 1.7338115162382945, "grad_norm": 257.7153625488281, "learning_rate": 9.148008786594074e-08, "loss": 14.1719, "step": 26106 }, { "epoch": 1.7338779305306502, "grad_norm": 299.2811584472656, "learning_rate": 9.14351534433e-08, "loss": 16.8438, "step": 26107 }, { "epoch": 1.7339443448230059, "grad_norm": 146.84873962402344, "learning_rate": 9.139022953041364e-08, "loss": 19.0625, "step": 26108 }, { "epoch": 1.7340107591153617, "grad_norm": 104.94176483154297, "learning_rate": 9.134531612780128e-08, "loss": 13.6719, "step": 26109 }, { "epoch": 1.7340771734077174, "grad_norm": 250.64027404785156, "learning_rate": 9.130041323598204e-08, "loss": 15.0625, "step": 26110 }, { "epoch": 1.734143587700073, "grad_norm": 200.078125, "learning_rate": 9.125552085547595e-08, "loss": 13.7188, "step": 26111 }, { "epoch": 1.734210001992429, "grad_norm": 171.7620849609375, "learning_rate": 9.12106389868017e-08, "loss": 14.7969, "step": 26112 }, { "epoch": 1.7342764162847844, "grad_norm": 204.51824951171875, "learning_rate": 9.11657676304789e-08, "loss": 23.3828, "step": 26113 }, { "epoch": 1.7343428305771402, "grad_norm": 148.4524383544922, "learning_rate": 9.112090678702633e-08, "loss": 14.2109, "step": 26114 }, { "epoch": 1.7344092448694959, "grad_norm": 268.0737609863281, "learning_rate": 9.107605645696326e-08, "loss": 15.3594, "step": 26115 }, { "epoch": 1.7344756591618515, "grad_norm": 157.29403686523438, "learning_rate": 9.103121664080793e-08, "loss": 15.5156, "step": 26116 }, { "epoch": 1.7345420734542074, "grad_norm": 212.7645263671875, "learning_rate": 9.098638733907959e-08, "loss": 15.1797, "step": 26117 }, { "epoch": 1.734608487746563, "grad_norm": 247.84417724609375, "learning_rate": 9.09415685522964e-08, "loss": 21.2188, "step": 26118 }, { "epoch": 1.7346749020389187, "grad_norm": 321.4837951660156, "learning_rate": 9.089676028097704e-08, "loss": 20.4062, "step": 26119 }, { "epoch": 1.7347413163312746, "grad_norm": 444.1607360839844, "learning_rate": 9.085196252563964e-08, "loss": 14.4375, "step": 26120 }, { "epoch": 1.7348077306236303, "grad_norm": 197.66152954101562, "learning_rate": 9.080717528680259e-08, "loss": 16.0938, "step": 26121 }, { "epoch": 1.734874144915986, "grad_norm": 368.37347412109375, "learning_rate": 9.07623985649838e-08, "loss": 15.6562, "step": 26122 }, { "epoch": 1.7349405592083418, "grad_norm": 109.38034057617188, "learning_rate": 9.071763236070151e-08, "loss": 17.0781, "step": 26123 }, { "epoch": 1.7350069735006972, "grad_norm": 316.0385437011719, "learning_rate": 9.067287667447287e-08, "loss": 14.6484, "step": 26124 }, { "epoch": 1.735073387793053, "grad_norm": 211.1905975341797, "learning_rate": 9.062813150681647e-08, "loss": 14.25, "step": 26125 }, { "epoch": 1.7351398020854087, "grad_norm": 187.54171752929688, "learning_rate": 9.058339685824934e-08, "loss": 12.6719, "step": 26126 }, { "epoch": 1.7352062163777644, "grad_norm": 163.9710693359375, "learning_rate": 9.053867272928906e-08, "loss": 12.5625, "step": 26127 }, { "epoch": 1.7352726306701203, "grad_norm": 129.3474578857422, "learning_rate": 9.049395912045288e-08, "loss": 16.1875, "step": 26128 }, { "epoch": 1.735339044962476, "grad_norm": 144.58807373046875, "learning_rate": 9.044925603225828e-08, "loss": 14.2812, "step": 26129 }, { "epoch": 1.7354054592548316, "grad_norm": 152.63711547851562, "learning_rate": 9.040456346522207e-08, "loss": 17.6719, "step": 26130 }, { "epoch": 1.7354718735471875, "grad_norm": 202.72377014160156, "learning_rate": 9.035988141986162e-08, "loss": 15.6562, "step": 26131 }, { "epoch": 1.7355382878395431, "grad_norm": 235.34007263183594, "learning_rate": 9.031520989669339e-08, "loss": 18.7031, "step": 26132 }, { "epoch": 1.7356047021318988, "grad_norm": 247.91519165039062, "learning_rate": 9.02705488962342e-08, "loss": 20.4844, "step": 26133 }, { "epoch": 1.7356711164242546, "grad_norm": 141.17623901367188, "learning_rate": 9.022589841900075e-08, "loss": 14.0625, "step": 26134 }, { "epoch": 1.73573753071661, "grad_norm": 626.0313110351562, "learning_rate": 9.01812584655095e-08, "loss": 25.6094, "step": 26135 }, { "epoch": 1.735803945008966, "grad_norm": 148.1427764892578, "learning_rate": 9.013662903627673e-08, "loss": 13.0938, "step": 26136 }, { "epoch": 1.7358703593013216, "grad_norm": 160.10964965820312, "learning_rate": 9.009201013181911e-08, "loss": 10.6719, "step": 26137 }, { "epoch": 1.7359367735936773, "grad_norm": 126.45365142822266, "learning_rate": 9.00474017526519e-08, "loss": 15.2969, "step": 26138 }, { "epoch": 1.7360031878860331, "grad_norm": 128.18563842773438, "learning_rate": 9.000280389929216e-08, "loss": 12.4609, "step": 26139 }, { "epoch": 1.7360696021783888, "grad_norm": 508.1146545410156, "learning_rate": 8.9958216572255e-08, "loss": 20.1562, "step": 26140 }, { "epoch": 1.7361360164707444, "grad_norm": 92.67943572998047, "learning_rate": 8.991363977205623e-08, "loss": 14.625, "step": 26141 }, { "epoch": 1.7362024307631003, "grad_norm": 157.24363708496094, "learning_rate": 8.986907349921214e-08, "loss": 15.4844, "step": 26142 }, { "epoch": 1.736268845055456, "grad_norm": 138.22837829589844, "learning_rate": 8.982451775423749e-08, "loss": 13.4531, "step": 26143 }, { "epoch": 1.7363352593478116, "grad_norm": 190.91683959960938, "learning_rate": 8.977997253764801e-08, "loss": 17.4844, "step": 26144 }, { "epoch": 1.7364016736401675, "grad_norm": 116.10053253173828, "learning_rate": 8.973543784995908e-08, "loss": 16.0938, "step": 26145 }, { "epoch": 1.736468087932523, "grad_norm": 90.57270812988281, "learning_rate": 8.969091369168558e-08, "loss": 15.8281, "step": 26146 }, { "epoch": 1.7365345022248788, "grad_norm": 160.7576141357422, "learning_rate": 8.964640006334267e-08, "loss": 14.2344, "step": 26147 }, { "epoch": 1.7366009165172345, "grad_norm": 154.5744171142578, "learning_rate": 8.960189696544551e-08, "loss": 18.9062, "step": 26148 }, { "epoch": 1.7366673308095901, "grad_norm": 283.64691162109375, "learning_rate": 8.955740439850823e-08, "loss": 18.8906, "step": 26149 }, { "epoch": 1.736733745101946, "grad_norm": 201.59469604492188, "learning_rate": 8.951292236304631e-08, "loss": 18.3125, "step": 26150 }, { "epoch": 1.7368001593943017, "grad_norm": 176.60276794433594, "learning_rate": 8.946845085957366e-08, "loss": 13.4609, "step": 26151 }, { "epoch": 1.7368665736866573, "grad_norm": 365.3612365722656, "learning_rate": 8.9423989888605e-08, "loss": 11.3984, "step": 26152 }, { "epoch": 1.7369329879790132, "grad_norm": 140.71844482421875, "learning_rate": 8.937953945065446e-08, "loss": 11.9453, "step": 26153 }, { "epoch": 1.7369994022713688, "grad_norm": 197.27195739746094, "learning_rate": 8.933509954623653e-08, "loss": 21.4062, "step": 26154 }, { "epoch": 1.7370658165637245, "grad_norm": 216.62315368652344, "learning_rate": 8.929067017586477e-08, "loss": 19.9844, "step": 26155 }, { "epoch": 1.7371322308560804, "grad_norm": 354.8457946777344, "learning_rate": 8.92462513400537e-08, "loss": 15.6562, "step": 26156 }, { "epoch": 1.7371986451484358, "grad_norm": 250.64759826660156, "learning_rate": 8.920184303931655e-08, "loss": 13.9844, "step": 26157 }, { "epoch": 1.7372650594407917, "grad_norm": 552.9765625, "learning_rate": 8.915744527416735e-08, "loss": 21.0, "step": 26158 }, { "epoch": 1.7373314737331473, "grad_norm": 338.05517578125, "learning_rate": 8.911305804511971e-08, "loss": 17.7812, "step": 26159 }, { "epoch": 1.737397888025503, "grad_norm": 422.7544250488281, "learning_rate": 8.906868135268686e-08, "loss": 25.5156, "step": 26160 }, { "epoch": 1.7374643023178589, "grad_norm": 386.80157470703125, "learning_rate": 8.902431519738218e-08, "loss": 26.6562, "step": 26161 }, { "epoch": 1.7375307166102145, "grad_norm": 155.04498291015625, "learning_rate": 8.897995957971916e-08, "loss": 12.5, "step": 26162 }, { "epoch": 1.7375971309025702, "grad_norm": 110.00071716308594, "learning_rate": 8.893561450021037e-08, "loss": 15.8594, "step": 26163 }, { "epoch": 1.737663545194926, "grad_norm": 146.8643341064453, "learning_rate": 8.88912799593693e-08, "loss": 13.7656, "step": 26164 }, { "epoch": 1.7377299594872817, "grad_norm": 111.38101196289062, "learning_rate": 8.884695595770831e-08, "loss": 11.9844, "step": 26165 }, { "epoch": 1.7377963737796374, "grad_norm": 124.9444808959961, "learning_rate": 8.880264249574043e-08, "loss": 14.6562, "step": 26166 }, { "epoch": 1.7378627880719932, "grad_norm": 337.6846618652344, "learning_rate": 8.875833957397805e-08, "loss": 23.4062, "step": 26167 }, { "epoch": 1.7379292023643487, "grad_norm": 156.72447204589844, "learning_rate": 8.871404719293385e-08, "loss": 12.7734, "step": 26168 }, { "epoch": 1.7379956166567045, "grad_norm": 444.6737976074219, "learning_rate": 8.86697653531201e-08, "loss": 15.2031, "step": 26169 }, { "epoch": 1.7380620309490602, "grad_norm": 163.9584197998047, "learning_rate": 8.862549405504905e-08, "loss": 16.5, "step": 26170 }, { "epoch": 1.7381284452414159, "grad_norm": 396.63702392578125, "learning_rate": 8.858123329923251e-08, "loss": 15.9062, "step": 26171 }, { "epoch": 1.7381948595337717, "grad_norm": 425.7588806152344, "learning_rate": 8.853698308618295e-08, "loss": 15.4219, "step": 26172 }, { "epoch": 1.7382612738261274, "grad_norm": 123.09089660644531, "learning_rate": 8.849274341641188e-08, "loss": 19.2812, "step": 26173 }, { "epoch": 1.738327688118483, "grad_norm": 177.2875518798828, "learning_rate": 8.844851429043121e-08, "loss": 11.5781, "step": 26174 }, { "epoch": 1.738394102410839, "grad_norm": 243.22418212890625, "learning_rate": 8.840429570875241e-08, "loss": 16.4688, "step": 26175 }, { "epoch": 1.7384605167031946, "grad_norm": 152.20050048828125, "learning_rate": 8.836008767188729e-08, "loss": 13.1484, "step": 26176 }, { "epoch": 1.7385269309955502, "grad_norm": 98.19361877441406, "learning_rate": 8.831589018034657e-08, "loss": 16.0, "step": 26177 }, { "epoch": 1.738593345287906, "grad_norm": 214.55958557128906, "learning_rate": 8.827170323464217e-08, "loss": 20.2188, "step": 26178 }, { "epoch": 1.7386597595802615, "grad_norm": 221.4049072265625, "learning_rate": 8.822752683528512e-08, "loss": 18.8438, "step": 26179 }, { "epoch": 1.7387261738726174, "grad_norm": 264.2300720214844, "learning_rate": 8.81833609827859e-08, "loss": 14.9375, "step": 26180 }, { "epoch": 1.738792588164973, "grad_norm": 382.7991027832031, "learning_rate": 8.81392056776562e-08, "loss": 18.0781, "step": 26181 }, { "epoch": 1.7388590024573287, "grad_norm": 302.0975341796875, "learning_rate": 8.80950609204062e-08, "loss": 15.4688, "step": 26182 }, { "epoch": 1.7389254167496846, "grad_norm": 218.11312866210938, "learning_rate": 8.805092671154657e-08, "loss": 20.9219, "step": 26183 }, { "epoch": 1.7389918310420402, "grad_norm": 188.8838653564453, "learning_rate": 8.800680305158803e-08, "loss": 16.2344, "step": 26184 }, { "epoch": 1.739058245334396, "grad_norm": 200.5750274658203, "learning_rate": 8.796268994104094e-08, "loss": 15.5781, "step": 26185 }, { "epoch": 1.7391246596267518, "grad_norm": 474.0824890136719, "learning_rate": 8.791858738041558e-08, "loss": 22.9688, "step": 26186 }, { "epoch": 1.7391910739191074, "grad_norm": 340.4132385253906, "learning_rate": 8.787449537022219e-08, "loss": 13.3281, "step": 26187 }, { "epoch": 1.739257488211463, "grad_norm": 605.9867553710938, "learning_rate": 8.783041391097024e-08, "loss": 21.9844, "step": 26188 }, { "epoch": 1.739323902503819, "grad_norm": 317.56011962890625, "learning_rate": 8.778634300317057e-08, "loss": 15.4375, "step": 26189 }, { "epoch": 1.7393903167961744, "grad_norm": 162.767822265625, "learning_rate": 8.77422826473322e-08, "loss": 13.0156, "step": 26190 }, { "epoch": 1.7394567310885303, "grad_norm": 153.18943786621094, "learning_rate": 8.769823284396505e-08, "loss": 15.1562, "step": 26191 }, { "epoch": 1.739523145380886, "grad_norm": 198.2942657470703, "learning_rate": 8.765419359357873e-08, "loss": 15.625, "step": 26192 }, { "epoch": 1.7395895596732416, "grad_norm": 269.18365478515625, "learning_rate": 8.76101648966826e-08, "loss": 17.1094, "step": 26193 }, { "epoch": 1.7396559739655975, "grad_norm": 173.8445587158203, "learning_rate": 8.756614675378593e-08, "loss": 13.9688, "step": 26194 }, { "epoch": 1.7397223882579531, "grad_norm": 480.3148498535156, "learning_rate": 8.752213916539819e-08, "loss": 16.2188, "step": 26195 }, { "epoch": 1.7397888025503088, "grad_norm": 138.6425018310547, "learning_rate": 8.747814213202797e-08, "loss": 12.75, "step": 26196 }, { "epoch": 1.7398552168426646, "grad_norm": 135.7149658203125, "learning_rate": 8.743415565418444e-08, "loss": 15.2031, "step": 26197 }, { "epoch": 1.7399216311350203, "grad_norm": 110.61782836914062, "learning_rate": 8.739017973237628e-08, "loss": 16.2031, "step": 26198 }, { "epoch": 1.739988045427376, "grad_norm": 168.2418212890625, "learning_rate": 8.734621436711242e-08, "loss": 16.6562, "step": 26199 }, { "epoch": 1.7400544597197318, "grad_norm": 264.6153869628906, "learning_rate": 8.730225955890113e-08, "loss": 14.5312, "step": 26200 }, { "epoch": 1.7401208740120873, "grad_norm": 103.6810531616211, "learning_rate": 8.725831530825133e-08, "loss": 13.3125, "step": 26201 }, { "epoch": 1.7401872883044431, "grad_norm": 282.6298522949219, "learning_rate": 8.721438161567063e-08, "loss": 13.7031, "step": 26202 }, { "epoch": 1.7402537025967988, "grad_norm": 156.80856323242188, "learning_rate": 8.717045848166793e-08, "loss": 17.8438, "step": 26203 }, { "epoch": 1.7403201168891544, "grad_norm": 118.54673767089844, "learning_rate": 8.712654590675084e-08, "loss": 16.1719, "step": 26204 }, { "epoch": 1.7403865311815103, "grad_norm": 469.965087890625, "learning_rate": 8.70826438914275e-08, "loss": 23.9844, "step": 26205 }, { "epoch": 1.740452945473866, "grad_norm": 116.722412109375, "learning_rate": 8.703875243620563e-08, "loss": 15.5, "step": 26206 }, { "epoch": 1.7405193597662216, "grad_norm": 187.0040283203125, "learning_rate": 8.699487154159313e-08, "loss": 12.4688, "step": 26207 }, { "epoch": 1.7405857740585775, "grad_norm": 160.0634307861328, "learning_rate": 8.69510012080974e-08, "loss": 14.5469, "step": 26208 }, { "epoch": 1.7406521883509332, "grad_norm": 201.88702392578125, "learning_rate": 8.690714143622624e-08, "loss": 16.9141, "step": 26209 }, { "epoch": 1.7407186026432888, "grad_norm": 162.47999572753906, "learning_rate": 8.686329222648637e-08, "loss": 13.8438, "step": 26210 }, { "epoch": 1.7407850169356447, "grad_norm": 514.0052490234375, "learning_rate": 8.68194535793858e-08, "loss": 22.5938, "step": 26211 }, { "epoch": 1.7408514312280001, "grad_norm": 209.6240997314453, "learning_rate": 8.677562549543093e-08, "loss": 19.0, "step": 26212 }, { "epoch": 1.740917845520356, "grad_norm": 158.76336669921875, "learning_rate": 8.673180797512913e-08, "loss": 12.3906, "step": 26213 }, { "epoch": 1.7409842598127117, "grad_norm": 468.62725830078125, "learning_rate": 8.66880010189871e-08, "loss": 27.125, "step": 26214 }, { "epoch": 1.7410506741050673, "grad_norm": 249.4393310546875, "learning_rate": 8.664420462751166e-08, "loss": 16.2656, "step": 26215 }, { "epoch": 1.7411170883974232, "grad_norm": 212.9421844482422, "learning_rate": 8.660041880120938e-08, "loss": 15.625, "step": 26216 }, { "epoch": 1.7411835026897788, "grad_norm": 163.62535095214844, "learning_rate": 8.655664354058678e-08, "loss": 12.5, "step": 26217 }, { "epoch": 1.7412499169821345, "grad_norm": 159.3642578125, "learning_rate": 8.651287884615032e-08, "loss": 17.3594, "step": 26218 }, { "epoch": 1.7413163312744904, "grad_norm": 271.76824951171875, "learning_rate": 8.646912471840584e-08, "loss": 14.125, "step": 26219 }, { "epoch": 1.741382745566846, "grad_norm": 331.82159423828125, "learning_rate": 8.642538115786002e-08, "loss": 13.8438, "step": 26220 }, { "epoch": 1.7414491598592017, "grad_norm": 162.91770935058594, "learning_rate": 8.638164816501847e-08, "loss": 14.8438, "step": 26221 }, { "epoch": 1.7415155741515576, "grad_norm": 207.4838104248047, "learning_rate": 8.633792574038723e-08, "loss": 16.6719, "step": 26222 }, { "epoch": 1.741581988443913, "grad_norm": 133.31304931640625, "learning_rate": 8.6294213884472e-08, "loss": 17.1406, "step": 26223 }, { "epoch": 1.7416484027362689, "grad_norm": 439.73577880859375, "learning_rate": 8.625051259777837e-08, "loss": 17.5, "step": 26224 }, { "epoch": 1.7417148170286245, "grad_norm": 206.45962524414062, "learning_rate": 8.620682188081196e-08, "loss": 20.2969, "step": 26225 }, { "epoch": 1.7417812313209802, "grad_norm": 543.1194458007812, "learning_rate": 8.616314173407835e-08, "loss": 17.5156, "step": 26226 }, { "epoch": 1.741847645613336, "grad_norm": 279.2398681640625, "learning_rate": 8.611947215808213e-08, "loss": 14.3594, "step": 26227 }, { "epoch": 1.7419140599056917, "grad_norm": 297.7392883300781, "learning_rate": 8.607581315332924e-08, "loss": 13.7656, "step": 26228 }, { "epoch": 1.7419804741980474, "grad_norm": 153.80540466308594, "learning_rate": 8.603216472032415e-08, "loss": 10.8281, "step": 26229 }, { "epoch": 1.7420468884904032, "grad_norm": 237.52293395996094, "learning_rate": 8.598852685957203e-08, "loss": 14.6094, "step": 26230 }, { "epoch": 1.7421133027827589, "grad_norm": 218.5663604736328, "learning_rate": 8.594489957157758e-08, "loss": 12.8984, "step": 26231 }, { "epoch": 1.7421797170751145, "grad_norm": 318.66094970703125, "learning_rate": 8.590128285684539e-08, "loss": 27.7969, "step": 26232 }, { "epoch": 1.7422461313674704, "grad_norm": 123.51360321044922, "learning_rate": 8.585767671588006e-08, "loss": 13.7969, "step": 26233 }, { "epoch": 1.7423125456598259, "grad_norm": 196.33108520507812, "learning_rate": 8.58140811491862e-08, "loss": 17.4062, "step": 26234 }, { "epoch": 1.7423789599521817, "grad_norm": 309.29766845703125, "learning_rate": 8.577049615726761e-08, "loss": 17.2812, "step": 26235 }, { "epoch": 1.7424453742445374, "grad_norm": 103.69471740722656, "learning_rate": 8.572692174062901e-08, "loss": 10.75, "step": 26236 }, { "epoch": 1.742511788536893, "grad_norm": 120.188232421875, "learning_rate": 8.5683357899774e-08, "loss": 16.0469, "step": 26237 }, { "epoch": 1.742578202829249, "grad_norm": 130.2578582763672, "learning_rate": 8.56398046352067e-08, "loss": 16.4688, "step": 26238 }, { "epoch": 1.7426446171216046, "grad_norm": 116.37169647216797, "learning_rate": 8.559626194743097e-08, "loss": 14.3906, "step": 26239 }, { "epoch": 1.7427110314139602, "grad_norm": 191.07513427734375, "learning_rate": 8.55527298369505e-08, "loss": 16.0469, "step": 26240 }, { "epoch": 1.742777445706316, "grad_norm": 649.8890991210938, "learning_rate": 8.550920830426844e-08, "loss": 12.0156, "step": 26241 }, { "epoch": 1.7428438599986718, "grad_norm": 189.81910705566406, "learning_rate": 8.546569734988884e-08, "loss": 17.9453, "step": 26242 }, { "epoch": 1.7429102742910274, "grad_norm": 112.52415466308594, "learning_rate": 8.542219697431452e-08, "loss": 12.5547, "step": 26243 }, { "epoch": 1.7429766885833833, "grad_norm": 110.71949005126953, "learning_rate": 8.537870717804885e-08, "loss": 18.9844, "step": 26244 }, { "epoch": 1.7430431028757387, "grad_norm": 95.6597900390625, "learning_rate": 8.533522796159498e-08, "loss": 13.1953, "step": 26245 }, { "epoch": 1.7431095171680946, "grad_norm": 315.8277893066406, "learning_rate": 8.529175932545574e-08, "loss": 16.2812, "step": 26246 }, { "epoch": 1.7431759314604502, "grad_norm": 137.94920349121094, "learning_rate": 8.524830127013394e-08, "loss": 15.4375, "step": 26247 }, { "epoch": 1.743242345752806, "grad_norm": 133.88734436035156, "learning_rate": 8.520485379613251e-08, "loss": 18.8125, "step": 26248 }, { "epoch": 1.7433087600451618, "grad_norm": 579.4607543945312, "learning_rate": 8.51614169039534e-08, "loss": 16.3281, "step": 26249 }, { "epoch": 1.7433751743375174, "grad_norm": 143.8913116455078, "learning_rate": 8.511799059409996e-08, "loss": 15.8125, "step": 26250 }, { "epoch": 1.743441588629873, "grad_norm": 128.01278686523438, "learning_rate": 8.507457486707382e-08, "loss": 15.0625, "step": 26251 }, { "epoch": 1.743508002922229, "grad_norm": 222.75845336914062, "learning_rate": 8.503116972337721e-08, "loss": 17.7422, "step": 26252 }, { "epoch": 1.7435744172145846, "grad_norm": 230.57736206054688, "learning_rate": 8.498777516351285e-08, "loss": 13.875, "step": 26253 }, { "epoch": 1.7436408315069403, "grad_norm": 267.6309509277344, "learning_rate": 8.494439118798202e-08, "loss": 20.0, "step": 26254 }, { "epoch": 1.7437072457992961, "grad_norm": 193.6896514892578, "learning_rate": 8.490101779728687e-08, "loss": 17.0469, "step": 26255 }, { "epoch": 1.7437736600916516, "grad_norm": 182.2393035888672, "learning_rate": 8.48576549919291e-08, "loss": 17.8594, "step": 26256 }, { "epoch": 1.7438400743840075, "grad_norm": 91.5650634765625, "learning_rate": 8.48143027724103e-08, "loss": 12.4844, "step": 26257 }, { "epoch": 1.743906488676363, "grad_norm": 132.0998992919922, "learning_rate": 8.477096113923188e-08, "loss": 18.1719, "step": 26258 }, { "epoch": 1.7439729029687188, "grad_norm": 497.2096252441406, "learning_rate": 8.472763009289541e-08, "loss": 20.6719, "step": 26259 }, { "epoch": 1.7440393172610746, "grad_norm": 218.81802368164062, "learning_rate": 8.468430963390172e-08, "loss": 16.5938, "step": 26260 }, { "epoch": 1.7441057315534303, "grad_norm": 152.9831085205078, "learning_rate": 8.464099976275218e-08, "loss": 15.3125, "step": 26261 }, { "epoch": 1.744172145845786, "grad_norm": 822.7430419921875, "learning_rate": 8.459770047994763e-08, "loss": 11.5625, "step": 26262 }, { "epoch": 1.7442385601381418, "grad_norm": 155.0045928955078, "learning_rate": 8.455441178598921e-08, "loss": 12.8281, "step": 26263 }, { "epoch": 1.7443049744304975, "grad_norm": 329.8954162597656, "learning_rate": 8.45111336813773e-08, "loss": 17.0781, "step": 26264 }, { "epoch": 1.7443713887228531, "grad_norm": 149.99896240234375, "learning_rate": 8.446786616661295e-08, "loss": 15.0625, "step": 26265 }, { "epoch": 1.744437803015209, "grad_norm": 283.47113037109375, "learning_rate": 8.442460924219608e-08, "loss": 18.7812, "step": 26266 }, { "epoch": 1.7445042173075644, "grad_norm": 2820.651611328125, "learning_rate": 8.438136290862774e-08, "loss": 12.6406, "step": 26267 }, { "epoch": 1.7445706315999203, "grad_norm": 157.49453735351562, "learning_rate": 8.433812716640753e-08, "loss": 16.2344, "step": 26268 }, { "epoch": 1.744637045892276, "grad_norm": 167.22186279296875, "learning_rate": 8.429490201603595e-08, "loss": 16.0, "step": 26269 }, { "epoch": 1.7447034601846316, "grad_norm": 387.54095458984375, "learning_rate": 8.425168745801292e-08, "loss": 14.375, "step": 26270 }, { "epoch": 1.7447698744769875, "grad_norm": 758.6829223632812, "learning_rate": 8.420848349283827e-08, "loss": 12.625, "step": 26271 }, { "epoch": 1.7448362887693432, "grad_norm": 201.6976318359375, "learning_rate": 8.416529012101192e-08, "loss": 15.3281, "step": 26272 }, { "epoch": 1.7449027030616988, "grad_norm": 102.96226501464844, "learning_rate": 8.412210734303349e-08, "loss": 13.5938, "step": 26273 }, { "epoch": 1.7449691173540547, "grad_norm": 492.7320556640625, "learning_rate": 8.4078935159402e-08, "loss": 19.1406, "step": 26274 }, { "epoch": 1.7450355316464103, "grad_norm": 124.86959838867188, "learning_rate": 8.403577357061764e-08, "loss": 11.6953, "step": 26275 }, { "epoch": 1.745101945938766, "grad_norm": 191.2132110595703, "learning_rate": 8.39926225771791e-08, "loss": 17.9688, "step": 26276 }, { "epoch": 1.7451683602311219, "grad_norm": 203.28419494628906, "learning_rate": 8.394948217958564e-08, "loss": 12.0938, "step": 26277 }, { "epoch": 1.7452347745234773, "grad_norm": 169.6829071044922, "learning_rate": 8.390635237833643e-08, "loss": 14.9531, "step": 26278 }, { "epoch": 1.7453011888158332, "grad_norm": 301.3877868652344, "learning_rate": 8.38632331739304e-08, "loss": 20.4062, "step": 26279 }, { "epoch": 1.7453676031081888, "grad_norm": 196.60946655273438, "learning_rate": 8.382012456686594e-08, "loss": 14.0312, "step": 26280 }, { "epoch": 1.7454340174005445, "grad_norm": 185.46066284179688, "learning_rate": 8.37770265576423e-08, "loss": 14.125, "step": 26281 }, { "epoch": 1.7455004316929004, "grad_norm": 174.00933837890625, "learning_rate": 8.373393914675753e-08, "loss": 15.375, "step": 26282 }, { "epoch": 1.745566845985256, "grad_norm": 195.1215057373047, "learning_rate": 8.369086233471012e-08, "loss": 14.0469, "step": 26283 }, { "epoch": 1.7456332602776117, "grad_norm": 248.17630004882812, "learning_rate": 8.364779612199846e-08, "loss": 17.5938, "step": 26284 }, { "epoch": 1.7456996745699676, "grad_norm": 193.71383666992188, "learning_rate": 8.360474050912081e-08, "loss": 16.2031, "step": 26285 }, { "epoch": 1.7457660888623232, "grad_norm": 214.05189514160156, "learning_rate": 8.3561695496575e-08, "loss": 14.3594, "step": 26286 }, { "epoch": 1.7458325031546789, "grad_norm": 142.23385620117188, "learning_rate": 8.351866108485928e-08, "loss": 15.7031, "step": 26287 }, { "epoch": 1.7458989174470347, "grad_norm": 201.80120849609375, "learning_rate": 8.347563727447083e-08, "loss": 17.4844, "step": 26288 }, { "epoch": 1.7459653317393902, "grad_norm": 250.62229919433594, "learning_rate": 8.343262406590778e-08, "loss": 16.7812, "step": 26289 }, { "epoch": 1.746031746031746, "grad_norm": 689.063232421875, "learning_rate": 8.3389621459668e-08, "loss": 17.8906, "step": 26290 }, { "epoch": 1.7460981603241017, "grad_norm": 290.9963073730469, "learning_rate": 8.334662945624804e-08, "loss": 19.5312, "step": 26291 }, { "epoch": 1.7461645746164574, "grad_norm": 257.60626220703125, "learning_rate": 8.330364805614609e-08, "loss": 13.3281, "step": 26292 }, { "epoch": 1.7462309889088132, "grad_norm": 249.1885223388672, "learning_rate": 8.326067725985875e-08, "loss": 18.8594, "step": 26293 }, { "epoch": 1.7462974032011689, "grad_norm": 128.58279418945312, "learning_rate": 8.321771706788328e-08, "loss": 21.0156, "step": 26294 }, { "epoch": 1.7463638174935245, "grad_norm": 260.35430908203125, "learning_rate": 8.317476748071661e-08, "loss": 19.9688, "step": 26295 }, { "epoch": 1.7464302317858804, "grad_norm": 388.85675048828125, "learning_rate": 8.313182849885547e-08, "loss": 17.5156, "step": 26296 }, { "epoch": 1.746496646078236, "grad_norm": 315.15960693359375, "learning_rate": 8.308890012279679e-08, "loss": 16.5469, "step": 26297 }, { "epoch": 1.7465630603705917, "grad_norm": 106.3709945678711, "learning_rate": 8.304598235303706e-08, "loss": 14.3438, "step": 26298 }, { "epoch": 1.7466294746629476, "grad_norm": 235.67776489257812, "learning_rate": 8.300307519007233e-08, "loss": 13.7344, "step": 26299 }, { "epoch": 1.746695888955303, "grad_norm": 734.9844360351562, "learning_rate": 8.296017863439964e-08, "loss": 17.4688, "step": 26300 }, { "epoch": 1.746762303247659, "grad_norm": 105.05986785888672, "learning_rate": 8.291729268651471e-08, "loss": 12.7812, "step": 26301 }, { "epoch": 1.7468287175400146, "grad_norm": 159.8942413330078, "learning_rate": 8.287441734691359e-08, "loss": 11.8906, "step": 26302 }, { "epoch": 1.7468951318323702, "grad_norm": 116.34818267822266, "learning_rate": 8.283155261609243e-08, "loss": 10.1562, "step": 26303 }, { "epoch": 1.746961546124726, "grad_norm": 110.64582061767578, "learning_rate": 8.278869849454717e-08, "loss": 14.1094, "step": 26304 }, { "epoch": 1.7470279604170817, "grad_norm": 264.1698303222656, "learning_rate": 8.274585498277297e-08, "loss": 17.4688, "step": 26305 }, { "epoch": 1.7470943747094374, "grad_norm": 198.79347229003906, "learning_rate": 8.270302208126611e-08, "loss": 17.4688, "step": 26306 }, { "epoch": 1.7471607890017933, "grad_norm": 222.98507690429688, "learning_rate": 8.266019979052163e-08, "loss": 14.7969, "step": 26307 }, { "epoch": 1.747227203294149, "grad_norm": 128.87872314453125, "learning_rate": 8.261738811103503e-08, "loss": 14.2656, "step": 26308 }, { "epoch": 1.7472936175865046, "grad_norm": 127.38488006591797, "learning_rate": 8.257458704330144e-08, "loss": 13.9844, "step": 26309 }, { "epoch": 1.7473600318788605, "grad_norm": 358.130859375, "learning_rate": 8.253179658781595e-08, "loss": 15.7344, "step": 26310 }, { "epoch": 1.747426446171216, "grad_norm": 292.86004638671875, "learning_rate": 8.24890167450737e-08, "loss": 15.0781, "step": 26311 }, { "epoch": 1.7474928604635718, "grad_norm": 325.5134582519531, "learning_rate": 8.244624751556961e-08, "loss": 14.7188, "step": 26312 }, { "epoch": 1.7475592747559274, "grad_norm": 340.5531921386719, "learning_rate": 8.240348889979787e-08, "loss": 19.5625, "step": 26313 }, { "epoch": 1.747625689048283, "grad_norm": 239.7089080810547, "learning_rate": 8.236074089825373e-08, "loss": 20.6562, "step": 26314 }, { "epoch": 1.747692103340639, "grad_norm": 461.51055908203125, "learning_rate": 8.231800351143126e-08, "loss": 12.2969, "step": 26315 }, { "epoch": 1.7477585176329946, "grad_norm": 301.5216064453125, "learning_rate": 8.227527673982504e-08, "loss": 18.5625, "step": 26316 }, { "epoch": 1.7478249319253503, "grad_norm": 146.0976104736328, "learning_rate": 8.223256058392913e-08, "loss": 14.2656, "step": 26317 }, { "epoch": 1.7478913462177061, "grad_norm": 131.5243377685547, "learning_rate": 8.21898550442378e-08, "loss": 13.9844, "step": 26318 }, { "epoch": 1.7479577605100618, "grad_norm": 109.517822265625, "learning_rate": 8.214716012124489e-08, "loss": 11.1406, "step": 26319 }, { "epoch": 1.7480241748024175, "grad_norm": 468.3892822265625, "learning_rate": 8.210447581544466e-08, "loss": 17.0625, "step": 26320 }, { "epoch": 1.7480905890947733, "grad_norm": 266.62176513671875, "learning_rate": 8.206180212733038e-08, "loss": 9.9844, "step": 26321 }, { "epoch": 1.7481570033871288, "grad_norm": 613.5281372070312, "learning_rate": 8.201913905739578e-08, "loss": 23.4844, "step": 26322 }, { "epoch": 1.7482234176794846, "grad_norm": 306.1884765625, "learning_rate": 8.197648660613455e-08, "loss": 14.7031, "step": 26323 }, { "epoch": 1.7482898319718403, "grad_norm": 537.4653930664062, "learning_rate": 8.193384477403986e-08, "loss": 20.3281, "step": 26324 }, { "epoch": 1.748356246264196, "grad_norm": 255.75967407226562, "learning_rate": 8.189121356160511e-08, "loss": 19.8125, "step": 26325 }, { "epoch": 1.7484226605565518, "grad_norm": 269.18951416015625, "learning_rate": 8.184859296932334e-08, "loss": 17.0156, "step": 26326 }, { "epoch": 1.7484890748489075, "grad_norm": 240.5056610107422, "learning_rate": 8.180598299768771e-08, "loss": 12.9844, "step": 26327 }, { "epoch": 1.7485554891412631, "grad_norm": 331.8773498535156, "learning_rate": 8.176338364719093e-08, "loss": 12.0312, "step": 26328 }, { "epoch": 1.748621903433619, "grad_norm": 202.70628356933594, "learning_rate": 8.172079491832606e-08, "loss": 15.4219, "step": 26329 }, { "epoch": 1.7486883177259747, "grad_norm": 245.71473693847656, "learning_rate": 8.167821681158515e-08, "loss": 16.3281, "step": 26330 }, { "epoch": 1.7487547320183303, "grad_norm": 230.13275146484375, "learning_rate": 8.163564932746147e-08, "loss": 15.8438, "step": 26331 }, { "epoch": 1.7488211463106862, "grad_norm": 402.97430419921875, "learning_rate": 8.159309246644685e-08, "loss": 14.1016, "step": 26332 }, { "epoch": 1.7488875606030416, "grad_norm": 124.43500518798828, "learning_rate": 8.155054622903368e-08, "loss": 16.0312, "step": 26333 }, { "epoch": 1.7489539748953975, "grad_norm": 283.5324401855469, "learning_rate": 8.150801061571433e-08, "loss": 26.5781, "step": 26334 }, { "epoch": 1.7490203891877532, "grad_norm": 411.38665771484375, "learning_rate": 8.146548562698052e-08, "loss": 17.4844, "step": 26335 }, { "epoch": 1.7490868034801088, "grad_norm": 118.6633529663086, "learning_rate": 8.142297126332443e-08, "loss": 15.4531, "step": 26336 }, { "epoch": 1.7491532177724647, "grad_norm": 122.91126251220703, "learning_rate": 8.138046752523786e-08, "loss": 15.4375, "step": 26337 }, { "epoch": 1.7492196320648203, "grad_norm": 199.40603637695312, "learning_rate": 8.1337974413212e-08, "loss": 14.125, "step": 26338 }, { "epoch": 1.749286046357176, "grad_norm": 664.428466796875, "learning_rate": 8.12954919277391e-08, "loss": 17.125, "step": 26339 }, { "epoch": 1.7493524606495319, "grad_norm": 435.79998779296875, "learning_rate": 8.12530200693099e-08, "loss": 20.4688, "step": 26340 }, { "epoch": 1.7494188749418875, "grad_norm": 182.9720916748047, "learning_rate": 8.12105588384161e-08, "loss": 14.4688, "step": 26341 }, { "epoch": 1.7494852892342432, "grad_norm": 289.4696960449219, "learning_rate": 8.116810823554876e-08, "loss": 18.625, "step": 26342 }, { "epoch": 1.749551703526599, "grad_norm": 105.29559326171875, "learning_rate": 8.112566826119904e-08, "loss": 15.2188, "step": 26343 }, { "epoch": 1.7496181178189545, "grad_norm": 123.31398010253906, "learning_rate": 8.108323891585733e-08, "loss": 15.7188, "step": 26344 }, { "epoch": 1.7496845321113104, "grad_norm": 190.15914916992188, "learning_rate": 8.104082020001524e-08, "loss": 15.9531, "step": 26345 }, { "epoch": 1.749750946403666, "grad_norm": 156.63807678222656, "learning_rate": 8.099841211416292e-08, "loss": 12.3047, "step": 26346 }, { "epoch": 1.7498173606960217, "grad_norm": 237.10292053222656, "learning_rate": 8.095601465879098e-08, "loss": 18.8594, "step": 26347 }, { "epoch": 1.7498837749883775, "grad_norm": 97.14147186279297, "learning_rate": 8.091362783438992e-08, "loss": 16.3906, "step": 26348 }, { "epoch": 1.7499501892807332, "grad_norm": 279.40606689453125, "learning_rate": 8.087125164145004e-08, "loss": 12.7031, "step": 26349 }, { "epoch": 1.7500166035730889, "grad_norm": 124.65924835205078, "learning_rate": 8.082888608046156e-08, "loss": 18.5, "step": 26350 }, { "epoch": 1.7500830178654447, "grad_norm": 246.8675537109375, "learning_rate": 8.078653115191458e-08, "loss": 16.1094, "step": 26351 }, { "epoch": 1.7501494321578004, "grad_norm": 135.89163208007812, "learning_rate": 8.074418685629869e-08, "loss": 13.2656, "step": 26352 }, { "epoch": 1.750215846450156, "grad_norm": 180.98162841796875, "learning_rate": 8.070185319410427e-08, "loss": 16.7812, "step": 26353 }, { "epoch": 1.750282260742512, "grad_norm": 610.8787841796875, "learning_rate": 8.065953016582061e-08, "loss": 14.75, "step": 26354 }, { "epoch": 1.7503486750348674, "grad_norm": 221.974853515625, "learning_rate": 8.06172177719373e-08, "loss": 16.5469, "step": 26355 }, { "epoch": 1.7504150893272232, "grad_norm": 170.8473358154297, "learning_rate": 8.057491601294398e-08, "loss": 17.7812, "step": 26356 }, { "epoch": 1.7504815036195789, "grad_norm": 162.45547485351562, "learning_rate": 8.053262488932977e-08, "loss": 14.3438, "step": 26357 }, { "epoch": 1.7505479179119345, "grad_norm": 585.0145874023438, "learning_rate": 8.049034440158398e-08, "loss": 16.4375, "step": 26358 }, { "epoch": 1.7506143322042904, "grad_norm": 105.32299041748047, "learning_rate": 8.044807455019586e-08, "loss": 15.3906, "step": 26359 }, { "epoch": 1.750680746496646, "grad_norm": 198.91554260253906, "learning_rate": 8.040581533565383e-08, "loss": 17.8438, "step": 26360 }, { "epoch": 1.7507471607890017, "grad_norm": 214.88339233398438, "learning_rate": 8.036356675844735e-08, "loss": 21.3125, "step": 26361 }, { "epoch": 1.7508135750813576, "grad_norm": 196.41793823242188, "learning_rate": 8.032132881906472e-08, "loss": 12.9219, "step": 26362 }, { "epoch": 1.7508799893737133, "grad_norm": 364.8415832519531, "learning_rate": 8.027910151799445e-08, "loss": 12.4531, "step": 26363 }, { "epoch": 1.750946403666069, "grad_norm": 248.91700744628906, "learning_rate": 8.023688485572545e-08, "loss": 13.8125, "step": 26364 }, { "epoch": 1.7510128179584248, "grad_norm": 292.8507385253906, "learning_rate": 8.019467883274577e-08, "loss": 12.6406, "step": 26365 }, { "epoch": 1.7510792322507802, "grad_norm": 141.0903778076172, "learning_rate": 8.015248344954362e-08, "loss": 16.1875, "step": 26366 }, { "epoch": 1.751145646543136, "grad_norm": 843.2088012695312, "learning_rate": 8.011029870660701e-08, "loss": 29.7422, "step": 26367 }, { "epoch": 1.7512120608354917, "grad_norm": 242.7064208984375, "learning_rate": 8.006812460442435e-08, "loss": 13.0859, "step": 26368 }, { "epoch": 1.7512784751278474, "grad_norm": 189.48016357421875, "learning_rate": 8.00259611434827e-08, "loss": 16.0, "step": 26369 }, { "epoch": 1.7513448894202033, "grad_norm": 232.5322265625, "learning_rate": 7.998380832427066e-08, "loss": 20.0938, "step": 26370 }, { "epoch": 1.751411303712559, "grad_norm": 293.2953186035156, "learning_rate": 7.994166614727527e-08, "loss": 12.9062, "step": 26371 }, { "epoch": 1.7514777180049146, "grad_norm": 185.9793701171875, "learning_rate": 7.989953461298404e-08, "loss": 18.4219, "step": 26372 }, { "epoch": 1.7515441322972705, "grad_norm": 230.44393920898438, "learning_rate": 7.985741372188448e-08, "loss": 17.5312, "step": 26373 }, { "epoch": 1.7516105465896261, "grad_norm": 307.146728515625, "learning_rate": 7.981530347446386e-08, "loss": 13.4844, "step": 26374 }, { "epoch": 1.7516769608819818, "grad_norm": 170.92242431640625, "learning_rate": 7.977320387120911e-08, "loss": 12.9375, "step": 26375 }, { "epoch": 1.7517433751743376, "grad_norm": 205.96128845214844, "learning_rate": 7.973111491260753e-08, "loss": 18.3438, "step": 26376 }, { "epoch": 1.751809789466693, "grad_norm": 250.45555114746094, "learning_rate": 7.968903659914538e-08, "loss": 20.3438, "step": 26377 }, { "epoch": 1.751876203759049, "grad_norm": 200.9956817626953, "learning_rate": 7.964696893131007e-08, "loss": 13.0938, "step": 26378 }, { "epoch": 1.7519426180514046, "grad_norm": 227.71896362304688, "learning_rate": 7.960491190958785e-08, "loss": 13.3438, "step": 26379 }, { "epoch": 1.7520090323437603, "grad_norm": 210.50604248046875, "learning_rate": 7.956286553446523e-08, "loss": 17.75, "step": 26380 }, { "epoch": 1.7520754466361161, "grad_norm": 369.4295654296875, "learning_rate": 7.95208298064286e-08, "loss": 26.5312, "step": 26381 }, { "epoch": 1.7521418609284718, "grad_norm": 233.87562561035156, "learning_rate": 7.947880472596425e-08, "loss": 16.3906, "step": 26382 }, { "epoch": 1.7522082752208274, "grad_norm": 260.30902099609375, "learning_rate": 7.943679029355821e-08, "loss": 23.7031, "step": 26383 }, { "epoch": 1.7522746895131833, "grad_norm": 422.9404602050781, "learning_rate": 7.939478650969688e-08, "loss": 23.1406, "step": 26384 }, { "epoch": 1.752341103805539, "grad_norm": 235.3041534423828, "learning_rate": 7.935279337486556e-08, "loss": 16.2188, "step": 26385 }, { "epoch": 1.7524075180978946, "grad_norm": 351.6361999511719, "learning_rate": 7.931081088955016e-08, "loss": 15.0156, "step": 26386 }, { "epoch": 1.7524739323902505, "grad_norm": 132.09579467773438, "learning_rate": 7.926883905423653e-08, "loss": 14.8438, "step": 26387 }, { "epoch": 1.752540346682606, "grad_norm": 265.8781433105469, "learning_rate": 7.922687786940996e-08, "loss": 15.7344, "step": 26388 }, { "epoch": 1.7526067609749618, "grad_norm": 157.085693359375, "learning_rate": 7.918492733555604e-08, "loss": 14.1094, "step": 26389 }, { "epoch": 1.7526731752673175, "grad_norm": 410.4643249511719, "learning_rate": 7.914298745315994e-08, "loss": 16.5156, "step": 26390 }, { "epoch": 1.7527395895596731, "grad_norm": 154.98683166503906, "learning_rate": 7.910105822270652e-08, "loss": 14.2969, "step": 26391 }, { "epoch": 1.752806003852029, "grad_norm": 159.5713348388672, "learning_rate": 7.905913964468136e-08, "loss": 14.1094, "step": 26392 }, { "epoch": 1.7528724181443847, "grad_norm": 172.24537658691406, "learning_rate": 7.901723171956886e-08, "loss": 21.25, "step": 26393 }, { "epoch": 1.7529388324367403, "grad_norm": 269.31683349609375, "learning_rate": 7.897533444785387e-08, "loss": 24.0, "step": 26394 }, { "epoch": 1.7530052467290962, "grad_norm": 167.64439392089844, "learning_rate": 7.893344783002121e-08, "loss": 10.3203, "step": 26395 }, { "epoch": 1.7530716610214518, "grad_norm": 155.2164764404297, "learning_rate": 7.889157186655526e-08, "loss": 19.1406, "step": 26396 }, { "epoch": 1.7531380753138075, "grad_norm": 570.1160278320312, "learning_rate": 7.884970655794054e-08, "loss": 14.0703, "step": 26397 }, { "epoch": 1.7532044896061634, "grad_norm": 151.3242950439453, "learning_rate": 7.880785190466133e-08, "loss": 12.6875, "step": 26398 }, { "epoch": 1.7532709038985188, "grad_norm": 250.53761291503906, "learning_rate": 7.876600790720134e-08, "loss": 16.9531, "step": 26399 }, { "epoch": 1.7533373181908747, "grad_norm": 183.57521057128906, "learning_rate": 7.872417456604519e-08, "loss": 14.3125, "step": 26400 }, { "epoch": 1.7534037324832303, "grad_norm": 205.2632598876953, "learning_rate": 7.86823518816767e-08, "loss": 23.125, "step": 26401 }, { "epoch": 1.753470146775586, "grad_norm": 164.00540161132812, "learning_rate": 7.86405398545792e-08, "loss": 12.3438, "step": 26402 }, { "epoch": 1.7535365610679419, "grad_norm": 252.86856079101562, "learning_rate": 7.859873848523702e-08, "loss": 16.1562, "step": 26403 }, { "epoch": 1.7536029753602975, "grad_norm": 210.0837860107422, "learning_rate": 7.855694777413302e-08, "loss": 18.3359, "step": 26404 }, { "epoch": 1.7536693896526532, "grad_norm": 204.79408264160156, "learning_rate": 7.851516772175104e-08, "loss": 16.8438, "step": 26405 }, { "epoch": 1.753735803945009, "grad_norm": 149.31005859375, "learning_rate": 7.847339832857413e-08, "loss": 15.4688, "step": 26406 }, { "epoch": 1.7538022182373647, "grad_norm": 245.6005859375, "learning_rate": 7.843163959508581e-08, "loss": 14.0781, "step": 26407 }, { "epoch": 1.7538686325297204, "grad_norm": 253.87490844726562, "learning_rate": 7.838989152176855e-08, "loss": 22.3594, "step": 26408 }, { "epoch": 1.7539350468220762, "grad_norm": 790.4627075195312, "learning_rate": 7.834815410910589e-08, "loss": 23.0781, "step": 26409 }, { "epoch": 1.7540014611144317, "grad_norm": 132.5728759765625, "learning_rate": 7.83064273575802e-08, "loss": 13.6875, "step": 26410 }, { "epoch": 1.7540678754067875, "grad_norm": 297.1310119628906, "learning_rate": 7.82647112676742e-08, "loss": 22.3125, "step": 26411 }, { "epoch": 1.7541342896991432, "grad_norm": 125.50235748291016, "learning_rate": 7.822300583987052e-08, "loss": 14.7656, "step": 26412 }, { "epoch": 1.7542007039914989, "grad_norm": 129.67092895507812, "learning_rate": 7.818131107465164e-08, "loss": 13.875, "step": 26413 }, { "epoch": 1.7542671182838547, "grad_norm": 192.6528778076172, "learning_rate": 7.813962697249988e-08, "loss": 17.5938, "step": 26414 }, { "epoch": 1.7543335325762104, "grad_norm": 151.89903259277344, "learning_rate": 7.809795353389736e-08, "loss": 15.2031, "step": 26415 }, { "epoch": 1.754399946868566, "grad_norm": 661.2876586914062, "learning_rate": 7.805629075932585e-08, "loss": 20.0938, "step": 26416 }, { "epoch": 1.754466361160922, "grad_norm": 146.1642303466797, "learning_rate": 7.801463864926782e-08, "loss": 16.4219, "step": 26417 }, { "epoch": 1.7545327754532776, "grad_norm": 159.41705322265625, "learning_rate": 7.79729972042047e-08, "loss": 17.0469, "step": 26418 }, { "epoch": 1.7545991897456332, "grad_norm": 142.4676513671875, "learning_rate": 7.793136642461828e-08, "loss": 13.9062, "step": 26419 }, { "epoch": 1.754665604037989, "grad_norm": 215.3150634765625, "learning_rate": 7.78897463109901e-08, "loss": 19.3125, "step": 26420 }, { "epoch": 1.7547320183303445, "grad_norm": 414.434814453125, "learning_rate": 7.784813686380165e-08, "loss": 15.1719, "step": 26421 }, { "epoch": 1.7547984326227004, "grad_norm": 187.2335968017578, "learning_rate": 7.78065380835341e-08, "loss": 16.6875, "step": 26422 }, { "epoch": 1.754864846915056, "grad_norm": 240.7632598876953, "learning_rate": 7.776494997066907e-08, "loss": 21.2188, "step": 26423 }, { "epoch": 1.7549312612074117, "grad_norm": 215.57618713378906, "learning_rate": 7.772337252568683e-08, "loss": 15.8281, "step": 26424 }, { "epoch": 1.7549976754997676, "grad_norm": 338.2196044921875, "learning_rate": 7.768180574906923e-08, "loss": 12.2344, "step": 26425 }, { "epoch": 1.7550640897921232, "grad_norm": 123.04195404052734, "learning_rate": 7.764024964129646e-08, "loss": 11.7734, "step": 26426 }, { "epoch": 1.755130504084479, "grad_norm": 598.8456420898438, "learning_rate": 7.759870420284941e-08, "loss": 26.3125, "step": 26427 }, { "epoch": 1.7551969183768348, "grad_norm": 184.6223907470703, "learning_rate": 7.755716943420865e-08, "loss": 13.75, "step": 26428 }, { "epoch": 1.7552633326691904, "grad_norm": 259.9255676269531, "learning_rate": 7.751564533585486e-08, "loss": 20.2812, "step": 26429 }, { "epoch": 1.755329746961546, "grad_norm": 210.1807098388672, "learning_rate": 7.747413190826779e-08, "loss": 20.7344, "step": 26430 }, { "epoch": 1.755396161253902, "grad_norm": 1064.2978515625, "learning_rate": 7.743262915192839e-08, "loss": 10.5938, "step": 26431 }, { "epoch": 1.7554625755462574, "grad_norm": 163.7341766357422, "learning_rate": 7.739113706731615e-08, "loss": 16.2344, "step": 26432 }, { "epoch": 1.7555289898386133, "grad_norm": 146.14944458007812, "learning_rate": 7.734965565491114e-08, "loss": 11.5625, "step": 26433 }, { "epoch": 1.7555954041309692, "grad_norm": 145.58352661132812, "learning_rate": 7.730818491519342e-08, "loss": 14.0156, "step": 26434 }, { "epoch": 1.7556618184233246, "grad_norm": 241.64013671875, "learning_rate": 7.72667248486425e-08, "loss": 21.5938, "step": 26435 }, { "epoch": 1.7557282327156805, "grad_norm": 219.66683959960938, "learning_rate": 7.722527545573799e-08, "loss": 14.625, "step": 26436 }, { "epoch": 1.7557946470080361, "grad_norm": 154.08624267578125, "learning_rate": 7.718383673695939e-08, "loss": 12.8594, "step": 26437 }, { "epoch": 1.7558610613003918, "grad_norm": 653.4202270507812, "learning_rate": 7.71424086927861e-08, "loss": 15.9219, "step": 26438 }, { "epoch": 1.7559274755927476, "grad_norm": 305.282470703125, "learning_rate": 7.71009913236973e-08, "loss": 14.0312, "step": 26439 }, { "epoch": 1.7559938898851033, "grad_norm": 110.04700469970703, "learning_rate": 7.705958463017215e-08, "loss": 14.5156, "step": 26440 }, { "epoch": 1.756060304177459, "grad_norm": 693.571533203125, "learning_rate": 7.701818861268927e-08, "loss": 26.8125, "step": 26441 }, { "epoch": 1.7561267184698148, "grad_norm": 268.1062316894531, "learning_rate": 7.697680327172807e-08, "loss": 14.7812, "step": 26442 }, { "epoch": 1.7561931327621703, "grad_norm": 338.46063232421875, "learning_rate": 7.693542860776669e-08, "loss": 19.0781, "step": 26443 }, { "epoch": 1.7562595470545261, "grad_norm": 362.9146728515625, "learning_rate": 7.689406462128412e-08, "loss": 18.4844, "step": 26444 }, { "epoch": 1.756325961346882, "grad_norm": 136.42117309570312, "learning_rate": 7.685271131275873e-08, "loss": 20.7188, "step": 26445 }, { "epoch": 1.7563923756392374, "grad_norm": 613.45361328125, "learning_rate": 7.68113686826688e-08, "loss": 29.4531, "step": 26446 }, { "epoch": 1.7564587899315933, "grad_norm": 118.08555603027344, "learning_rate": 7.677003673149263e-08, "loss": 14.7812, "step": 26447 }, { "epoch": 1.756525204223949, "grad_norm": 319.156005859375, "learning_rate": 7.67287154597085e-08, "loss": 18.25, "step": 26448 }, { "epoch": 1.7565916185163046, "grad_norm": 164.64222717285156, "learning_rate": 7.668740486779402e-08, "loss": 17.1094, "step": 26449 }, { "epoch": 1.7566580328086605, "grad_norm": 285.14007568359375, "learning_rate": 7.664610495622714e-08, "loss": 16.5703, "step": 26450 }, { "epoch": 1.7567244471010162, "grad_norm": 203.70846557617188, "learning_rate": 7.660481572548583e-08, "loss": 17.0625, "step": 26451 }, { "epoch": 1.7567908613933718, "grad_norm": 178.2967987060547, "learning_rate": 7.656353717604747e-08, "loss": 13.9062, "step": 26452 }, { "epoch": 1.7568572756857277, "grad_norm": 796.984130859375, "learning_rate": 7.652226930838957e-08, "loss": 24.6406, "step": 26453 }, { "epoch": 1.7569236899780831, "grad_norm": 176.318359375, "learning_rate": 7.648101212298986e-08, "loss": 14.3906, "step": 26454 }, { "epoch": 1.756990104270439, "grad_norm": 233.9280548095703, "learning_rate": 7.643976562032484e-08, "loss": 23.2969, "step": 26455 }, { "epoch": 1.7570565185627949, "grad_norm": 343.6685485839844, "learning_rate": 7.639852980087246e-08, "loss": 17.5938, "step": 26456 }, { "epoch": 1.7571229328551503, "grad_norm": 132.73159790039062, "learning_rate": 7.635730466510903e-08, "loss": 10.7812, "step": 26457 }, { "epoch": 1.7571893471475062, "grad_norm": 124.64004516601562, "learning_rate": 7.631609021351182e-08, "loss": 12.2969, "step": 26458 }, { "epoch": 1.7572557614398618, "grad_norm": 137.1598663330078, "learning_rate": 7.627488644655733e-08, "loss": 16.75, "step": 26459 }, { "epoch": 1.7573221757322175, "grad_norm": 183.36492919921875, "learning_rate": 7.623369336472241e-08, "loss": 11.5625, "step": 26460 }, { "epoch": 1.7573885900245734, "grad_norm": 96.34974670410156, "learning_rate": 7.619251096848345e-08, "loss": 13.0469, "step": 26461 }, { "epoch": 1.757455004316929, "grad_norm": 8957.9189453125, "learning_rate": 7.615133925831685e-08, "loss": 20.6719, "step": 26462 }, { "epoch": 1.7575214186092847, "grad_norm": 182.39093017578125, "learning_rate": 7.611017823469868e-08, "loss": 15.0938, "step": 26463 }, { "epoch": 1.7575878329016406, "grad_norm": 98.35343170166016, "learning_rate": 7.606902789810543e-08, "loss": 13.4219, "step": 26464 }, { "epoch": 1.757654247193996, "grad_norm": 173.8397674560547, "learning_rate": 7.602788824901285e-08, "loss": 16.9062, "step": 26465 }, { "epoch": 1.7577206614863519, "grad_norm": 381.79388427734375, "learning_rate": 7.598675928789677e-08, "loss": 23.0312, "step": 26466 }, { "epoch": 1.7577870757787077, "grad_norm": 254.6836395263672, "learning_rate": 7.594564101523305e-08, "loss": 18.6094, "step": 26467 }, { "epoch": 1.7578534900710632, "grad_norm": 166.0313262939453, "learning_rate": 7.59045334314975e-08, "loss": 10.9531, "step": 26468 }, { "epoch": 1.757919904363419, "grad_norm": 195.60667419433594, "learning_rate": 7.58634365371651e-08, "loss": 16.0, "step": 26469 }, { "epoch": 1.7579863186557747, "grad_norm": 227.72962951660156, "learning_rate": 7.58223503327119e-08, "loss": 15.0938, "step": 26470 }, { "epoch": 1.7580527329481304, "grad_norm": 164.88719177246094, "learning_rate": 7.578127481861274e-08, "loss": 16.2656, "step": 26471 }, { "epoch": 1.7581191472404862, "grad_norm": 259.55078125, "learning_rate": 7.574020999534259e-08, "loss": 15.6406, "step": 26472 }, { "epoch": 1.758185561532842, "grad_norm": 351.718994140625, "learning_rate": 7.569915586337716e-08, "loss": 16.2969, "step": 26473 }, { "epoch": 1.7582519758251975, "grad_norm": 738.551025390625, "learning_rate": 7.565811242319064e-08, "loss": 14.5938, "step": 26474 }, { "epoch": 1.7583183901175534, "grad_norm": 112.75862884521484, "learning_rate": 7.561707967525822e-08, "loss": 15.5, "step": 26475 }, { "epoch": 1.7583848044099089, "grad_norm": 256.17718505859375, "learning_rate": 7.557605762005425e-08, "loss": 19.8906, "step": 26476 }, { "epoch": 1.7584512187022647, "grad_norm": 250.9843292236328, "learning_rate": 7.553504625805351e-08, "loss": 12.875, "step": 26477 }, { "epoch": 1.7585176329946206, "grad_norm": 116.81549072265625, "learning_rate": 7.549404558973016e-08, "loss": 13.8438, "step": 26478 }, { "epoch": 1.758584047286976, "grad_norm": 476.1893615722656, "learning_rate": 7.545305561555881e-08, "loss": 14.2031, "step": 26479 }, { "epoch": 1.758650461579332, "grad_norm": 266.5490417480469, "learning_rate": 7.54120763360131e-08, "loss": 9.4609, "step": 26480 }, { "epoch": 1.7587168758716876, "grad_norm": 108.44517517089844, "learning_rate": 7.537110775156763e-08, "loss": 13.6953, "step": 26481 }, { "epoch": 1.7587832901640432, "grad_norm": 335.0329284667969, "learning_rate": 7.533014986269582e-08, "loss": 11.7188, "step": 26482 }, { "epoch": 1.758849704456399, "grad_norm": 218.98394775390625, "learning_rate": 7.52892026698717e-08, "loss": 9.6875, "step": 26483 }, { "epoch": 1.7589161187487548, "grad_norm": 124.67513275146484, "learning_rate": 7.524826617356883e-08, "loss": 14.25, "step": 26484 }, { "epoch": 1.7589825330411104, "grad_norm": 645.4005737304688, "learning_rate": 7.52073403742608e-08, "loss": 10.4219, "step": 26485 }, { "epoch": 1.7590489473334663, "grad_norm": 248.21458435058594, "learning_rate": 7.51664252724209e-08, "loss": 24.5312, "step": 26486 }, { "epoch": 1.7591153616258217, "grad_norm": 198.8101348876953, "learning_rate": 7.512552086852276e-08, "loss": 15.6562, "step": 26487 }, { "epoch": 1.7591817759181776, "grad_norm": 114.53759002685547, "learning_rate": 7.508462716303898e-08, "loss": 13.9062, "step": 26488 }, { "epoch": 1.7592481902105335, "grad_norm": 313.31268310546875, "learning_rate": 7.504374415644322e-08, "loss": 19.375, "step": 26489 }, { "epoch": 1.759314604502889, "grad_norm": 142.4486846923828, "learning_rate": 7.500287184920783e-08, "loss": 15.0469, "step": 26490 }, { "epoch": 1.7593810187952448, "grad_norm": 366.0886535644531, "learning_rate": 7.496201024180582e-08, "loss": 14.1562, "step": 26491 }, { "epoch": 1.7594474330876004, "grad_norm": 219.75692749023438, "learning_rate": 7.492115933470988e-08, "loss": 13.9375, "step": 26492 }, { "epoch": 1.759513847379956, "grad_norm": 329.13385009765625, "learning_rate": 7.488031912839276e-08, "loss": 16.2109, "step": 26493 }, { "epoch": 1.759580261672312, "grad_norm": 176.32669067382812, "learning_rate": 7.483948962332632e-08, "loss": 12.25, "step": 26494 }, { "epoch": 1.7596466759646676, "grad_norm": 266.5724182128906, "learning_rate": 7.479867081998348e-08, "loss": 14.0156, "step": 26495 }, { "epoch": 1.7597130902570233, "grad_norm": 167.6455078125, "learning_rate": 7.4757862718836e-08, "loss": 15.5469, "step": 26496 }, { "epoch": 1.7597795045493791, "grad_norm": 203.0379180908203, "learning_rate": 7.471706532035604e-08, "loss": 16.7969, "step": 26497 }, { "epoch": 1.7598459188417346, "grad_norm": 127.64952087402344, "learning_rate": 7.467627862501546e-08, "loss": 14.1719, "step": 26498 }, { "epoch": 1.7599123331340905, "grad_norm": 108.09134674072266, "learning_rate": 7.46355026332861e-08, "loss": 15.7344, "step": 26499 }, { "epoch": 1.7599787474264463, "grad_norm": 99.63542175292969, "learning_rate": 7.459473734563971e-08, "loss": 10.7344, "step": 26500 }, { "epoch": 1.7600451617188018, "grad_norm": 453.6127014160156, "learning_rate": 7.455398276254788e-08, "loss": 21.5469, "step": 26501 }, { "epoch": 1.7601115760111576, "grad_norm": 138.69677734375, "learning_rate": 7.451323888448169e-08, "loss": 17.0312, "step": 26502 }, { "epoch": 1.7601779903035133, "grad_norm": 298.0712890625, "learning_rate": 7.447250571191299e-08, "loss": 14.8438, "step": 26503 }, { "epoch": 1.760244404595869, "grad_norm": 130.68032836914062, "learning_rate": 7.443178324531241e-08, "loss": 16.0625, "step": 26504 }, { "epoch": 1.7603108188882248, "grad_norm": 176.19442749023438, "learning_rate": 7.439107148515133e-08, "loss": 20.4062, "step": 26505 }, { "epoch": 1.7603772331805805, "grad_norm": 159.12750244140625, "learning_rate": 7.435037043190062e-08, "loss": 16.0781, "step": 26506 }, { "epoch": 1.7604436474729361, "grad_norm": 148.8953857421875, "learning_rate": 7.4309680086031e-08, "loss": 15.0, "step": 26507 }, { "epoch": 1.760510061765292, "grad_norm": 205.33987426757812, "learning_rate": 7.426900044801321e-08, "loss": 13.3906, "step": 26508 }, { "epoch": 1.7605764760576474, "grad_norm": 165.5389862060547, "learning_rate": 7.422833151831786e-08, "loss": 18.9688, "step": 26509 }, { "epoch": 1.7606428903500033, "grad_norm": 890.61962890625, "learning_rate": 7.418767329741538e-08, "loss": 15.2812, "step": 26510 }, { "epoch": 1.7607093046423592, "grad_norm": 231.24562072753906, "learning_rate": 7.414702578577592e-08, "loss": 24.3594, "step": 26511 }, { "epoch": 1.7607757189347146, "grad_norm": 331.4763488769531, "learning_rate": 7.410638898387012e-08, "loss": 18.5312, "step": 26512 }, { "epoch": 1.7608421332270705, "grad_norm": 244.81814575195312, "learning_rate": 7.406576289216737e-08, "loss": 17.2031, "step": 26513 }, { "epoch": 1.7609085475194262, "grad_norm": 129.67495727539062, "learning_rate": 7.402514751113808e-08, "loss": 13.8125, "step": 26514 }, { "epoch": 1.7609749618117818, "grad_norm": 362.4737548828125, "learning_rate": 7.398454284125188e-08, "loss": 18.3438, "step": 26515 }, { "epoch": 1.7610413761041377, "grad_norm": 279.8298034667969, "learning_rate": 7.39439488829785e-08, "loss": 18.9531, "step": 26516 }, { "epoch": 1.7611077903964933, "grad_norm": 209.0885009765625, "learning_rate": 7.390336563678756e-08, "loss": 15.8594, "step": 26517 }, { "epoch": 1.761174204688849, "grad_norm": 178.96214294433594, "learning_rate": 7.386279310314858e-08, "loss": 16.3906, "step": 26518 }, { "epoch": 1.7612406189812049, "grad_norm": 478.18707275390625, "learning_rate": 7.382223128253051e-08, "loss": 18.0781, "step": 26519 }, { "epoch": 1.7613070332735603, "grad_norm": 206.61805725097656, "learning_rate": 7.378168017540309e-08, "loss": 18.75, "step": 26520 }, { "epoch": 1.7613734475659162, "grad_norm": 156.54331970214844, "learning_rate": 7.374113978223495e-08, "loss": 15.125, "step": 26521 }, { "epoch": 1.761439861858272, "grad_norm": 118.45928192138672, "learning_rate": 7.370061010349527e-08, "loss": 11.6328, "step": 26522 }, { "epoch": 1.7615062761506275, "grad_norm": 208.23324584960938, "learning_rate": 7.366009113965266e-08, "loss": 13.9531, "step": 26523 }, { "epoch": 1.7615726904429834, "grad_norm": 285.4051513671875, "learning_rate": 7.36195828911761e-08, "loss": 18.2812, "step": 26524 }, { "epoch": 1.761639104735339, "grad_norm": 253.87400817871094, "learning_rate": 7.357908535853386e-08, "loss": 14.8594, "step": 26525 }, { "epoch": 1.7617055190276947, "grad_norm": 730.6211547851562, "learning_rate": 7.35385985421948e-08, "loss": 21.0, "step": 26526 }, { "epoch": 1.7617719333200506, "grad_norm": 106.82018280029297, "learning_rate": 7.349812244262676e-08, "loss": 12.7969, "step": 26527 }, { "epoch": 1.7618383476124062, "grad_norm": 275.96832275390625, "learning_rate": 7.345765706029838e-08, "loss": 15.3828, "step": 26528 }, { "epoch": 1.7619047619047619, "grad_norm": 130.8168487548828, "learning_rate": 7.34172023956775e-08, "loss": 13.9219, "step": 26529 }, { "epoch": 1.7619711761971177, "grad_norm": 114.91158294677734, "learning_rate": 7.337675844923196e-08, "loss": 13.2812, "step": 26530 }, { "epoch": 1.7620375904894732, "grad_norm": 267.0753479003906, "learning_rate": 7.333632522142997e-08, "loss": 15.5938, "step": 26531 }, { "epoch": 1.762104004781829, "grad_norm": 488.2171936035156, "learning_rate": 7.3295902712739e-08, "loss": 20.6719, "step": 26532 }, { "epoch": 1.762170419074185, "grad_norm": 197.7195281982422, "learning_rate": 7.325549092362648e-08, "loss": 13.875, "step": 26533 }, { "epoch": 1.7622368333665404, "grad_norm": 208.60462951660156, "learning_rate": 7.321508985456027e-08, "loss": 16.0312, "step": 26534 }, { "epoch": 1.7623032476588962, "grad_norm": 601.0870361328125, "learning_rate": 7.317469950600741e-08, "loss": 27.8125, "step": 26535 }, { "epoch": 1.7623696619512519, "grad_norm": 215.58973693847656, "learning_rate": 7.31343198784351e-08, "loss": 16.2344, "step": 26536 }, { "epoch": 1.7624360762436075, "grad_norm": 268.1944580078125, "learning_rate": 7.309395097231052e-08, "loss": 14.75, "step": 26537 }, { "epoch": 1.7625024905359634, "grad_norm": 237.09288024902344, "learning_rate": 7.305359278810075e-08, "loss": 9.7656, "step": 26538 }, { "epoch": 1.762568904828319, "grad_norm": 130.39657592773438, "learning_rate": 7.30132453262724e-08, "loss": 14.0312, "step": 26539 }, { "epoch": 1.7626353191206747, "grad_norm": 114.6617202758789, "learning_rate": 7.297290858729244e-08, "loss": 19.7656, "step": 26540 }, { "epoch": 1.7627017334130306, "grad_norm": 246.4390869140625, "learning_rate": 7.293258257162704e-08, "loss": 13.25, "step": 26541 }, { "epoch": 1.762768147705386, "grad_norm": 122.97862243652344, "learning_rate": 7.289226727974329e-08, "loss": 14.7812, "step": 26542 }, { "epoch": 1.762834561997742, "grad_norm": 233.52239990234375, "learning_rate": 7.285196271210703e-08, "loss": 16.7969, "step": 26543 }, { "epoch": 1.7629009762900978, "grad_norm": 179.01319885253906, "learning_rate": 7.281166886918466e-08, "loss": 17.0625, "step": 26544 }, { "epoch": 1.7629673905824532, "grad_norm": 206.86642456054688, "learning_rate": 7.277138575144215e-08, "loss": 21.3594, "step": 26545 }, { "epoch": 1.763033804874809, "grad_norm": 146.61276245117188, "learning_rate": 7.273111335934567e-08, "loss": 16.3281, "step": 26546 }, { "epoch": 1.7631002191671648, "grad_norm": 274.4325256347656, "learning_rate": 7.269085169336098e-08, "loss": 15.3594, "step": 26547 }, { "epoch": 1.7631666334595204, "grad_norm": 260.4654541015625, "learning_rate": 7.265060075395379e-08, "loss": 27.1875, "step": 26548 }, { "epoch": 1.7632330477518763, "grad_norm": 235.6290740966797, "learning_rate": 7.261036054158965e-08, "loss": 16.8906, "step": 26549 }, { "epoch": 1.763299462044232, "grad_norm": 226.43804931640625, "learning_rate": 7.257013105673415e-08, "loss": 15.2031, "step": 26550 }, { "epoch": 1.7633658763365876, "grad_norm": 224.25051879882812, "learning_rate": 7.252991229985272e-08, "loss": 18.0078, "step": 26551 }, { "epoch": 1.7634322906289435, "grad_norm": 177.70977783203125, "learning_rate": 7.248970427141021e-08, "loss": 14.5938, "step": 26552 }, { "epoch": 1.763498704921299, "grad_norm": 187.11605834960938, "learning_rate": 7.244950697187225e-08, "loss": 13.7031, "step": 26553 }, { "epoch": 1.7635651192136548, "grad_norm": 233.97006225585938, "learning_rate": 7.240932040170333e-08, "loss": 14.9219, "step": 26554 }, { "epoch": 1.7636315335060107, "grad_norm": 316.640380859375, "learning_rate": 7.236914456136856e-08, "loss": 13.5156, "step": 26555 }, { "epoch": 1.763697947798366, "grad_norm": 211.88888549804688, "learning_rate": 7.232897945133253e-08, "loss": 19.5625, "step": 26556 }, { "epoch": 1.763764362090722, "grad_norm": 211.0606231689453, "learning_rate": 7.228882507206025e-08, "loss": 17.6562, "step": 26557 }, { "epoch": 1.7638307763830776, "grad_norm": 137.3230438232422, "learning_rate": 7.224868142401541e-08, "loss": 11.8281, "step": 26558 }, { "epoch": 1.7638971906754333, "grad_norm": 198.58554077148438, "learning_rate": 7.220854850766333e-08, "loss": 14.2188, "step": 26559 }, { "epoch": 1.7639636049677891, "grad_norm": 103.71370697021484, "learning_rate": 7.216842632346753e-08, "loss": 10.2656, "step": 26560 }, { "epoch": 1.7640300192601448, "grad_norm": 176.08961486816406, "learning_rate": 7.21283148718923e-08, "loss": 23.0938, "step": 26561 }, { "epoch": 1.7640964335525005, "grad_norm": 235.09735107421875, "learning_rate": 7.208821415340182e-08, "loss": 17.1719, "step": 26562 }, { "epoch": 1.7641628478448563, "grad_norm": 510.5304260253906, "learning_rate": 7.204812416845974e-08, "loss": 17.5, "step": 26563 }, { "epoch": 1.7642292621372118, "grad_norm": 236.19320678710938, "learning_rate": 7.200804491752988e-08, "loss": 18.4375, "step": 26564 }, { "epoch": 1.7642956764295676, "grad_norm": 110.90215301513672, "learning_rate": 7.1967976401076e-08, "loss": 12.8125, "step": 26565 }, { "epoch": 1.7643620907219235, "grad_norm": 363.99371337890625, "learning_rate": 7.192791861956105e-08, "loss": 18.0703, "step": 26566 }, { "epoch": 1.764428505014279, "grad_norm": 269.90582275390625, "learning_rate": 7.188787157344911e-08, "loss": 15.5938, "step": 26567 }, { "epoch": 1.7644949193066348, "grad_norm": 120.48639678955078, "learning_rate": 7.184783526320304e-08, "loss": 14.0469, "step": 26568 }, { "epoch": 1.7645613335989905, "grad_norm": 269.7485656738281, "learning_rate": 7.18078096892859e-08, "loss": 19.5781, "step": 26569 }, { "epoch": 1.7646277478913461, "grad_norm": 241.0064239501953, "learning_rate": 7.176779485216089e-08, "loss": 17.1562, "step": 26570 }, { "epoch": 1.764694162183702, "grad_norm": 174.759521484375, "learning_rate": 7.172779075229074e-08, "loss": 16.9844, "step": 26571 }, { "epoch": 1.7647605764760577, "grad_norm": 455.46075439453125, "learning_rate": 7.168779739013819e-08, "loss": 23.7812, "step": 26572 }, { "epoch": 1.7648269907684133, "grad_norm": 150.91363525390625, "learning_rate": 7.16478147661661e-08, "loss": 20.8906, "step": 26573 }, { "epoch": 1.7648934050607692, "grad_norm": 181.1534881591797, "learning_rate": 7.160784288083665e-08, "loss": 13.7812, "step": 26574 }, { "epoch": 1.7649598193531246, "grad_norm": 203.2832489013672, "learning_rate": 7.156788173461226e-08, "loss": 16.7656, "step": 26575 }, { "epoch": 1.7650262336454805, "grad_norm": 111.4338150024414, "learning_rate": 7.152793132795532e-08, "loss": 13.0938, "step": 26576 }, { "epoch": 1.7650926479378364, "grad_norm": 163.7503204345703, "learning_rate": 7.14879916613278e-08, "loss": 13.9844, "step": 26577 }, { "epoch": 1.7651590622301918, "grad_norm": 263.2359924316406, "learning_rate": 7.144806273519177e-08, "loss": 16.5781, "step": 26578 }, { "epoch": 1.7652254765225477, "grad_norm": 684.9260864257812, "learning_rate": 7.140814455000943e-08, "loss": 27.375, "step": 26579 }, { "epoch": 1.7652918908149033, "grad_norm": 162.3395538330078, "learning_rate": 7.136823710624173e-08, "loss": 19.2188, "step": 26580 }, { "epoch": 1.765358305107259, "grad_norm": 303.666259765625, "learning_rate": 7.13283404043512e-08, "loss": 23.9688, "step": 26581 }, { "epoch": 1.7654247193996149, "grad_norm": 1549.60546875, "learning_rate": 7.12884544447987e-08, "loss": 13.75, "step": 26582 }, { "epoch": 1.7654911336919705, "grad_norm": 459.0602722167969, "learning_rate": 7.124857922804573e-08, "loss": 20.3125, "step": 26583 }, { "epoch": 1.7655575479843262, "grad_norm": 153.22853088378906, "learning_rate": 7.120871475455392e-08, "loss": 13.1406, "step": 26584 }, { "epoch": 1.765623962276682, "grad_norm": 199.34593200683594, "learning_rate": 7.116886102478403e-08, "loss": 16.9844, "step": 26585 }, { "epoch": 1.7656903765690377, "grad_norm": 502.9111022949219, "learning_rate": 7.112901803919712e-08, "loss": 20.9219, "step": 26586 }, { "epoch": 1.7657567908613934, "grad_norm": 417.6197509765625, "learning_rate": 7.108918579825407e-08, "loss": 19.6094, "step": 26587 }, { "epoch": 1.7658232051537492, "grad_norm": 217.1965789794922, "learning_rate": 7.104936430241582e-08, "loss": 13.7969, "step": 26588 }, { "epoch": 1.7658896194461047, "grad_norm": 240.03016662597656, "learning_rate": 7.100955355214267e-08, "loss": 14.5625, "step": 26589 }, { "epoch": 1.7659560337384606, "grad_norm": 834.3767700195312, "learning_rate": 7.096975354789558e-08, "loss": 18.9375, "step": 26590 }, { "epoch": 1.7660224480308162, "grad_norm": 174.10374450683594, "learning_rate": 7.092996429013442e-08, "loss": 17.4375, "step": 26591 }, { "epoch": 1.7660888623231719, "grad_norm": 178.5448760986328, "learning_rate": 7.089018577931993e-08, "loss": 15.1719, "step": 26592 }, { "epoch": 1.7661552766155277, "grad_norm": 163.10992431640625, "learning_rate": 7.085041801591185e-08, "loss": 15.4844, "step": 26593 }, { "epoch": 1.7662216909078834, "grad_norm": 297.69793701171875, "learning_rate": 7.081066100037036e-08, "loss": 14.6562, "step": 26594 }, { "epoch": 1.766288105200239, "grad_norm": 395.7687683105469, "learning_rate": 7.077091473315533e-08, "loss": 20.0156, "step": 26595 }, { "epoch": 1.766354519492595, "grad_norm": 296.5072937011719, "learning_rate": 7.07311792147267e-08, "loss": 23.8438, "step": 26596 }, { "epoch": 1.7664209337849506, "grad_norm": 193.5880126953125, "learning_rate": 7.06914544455437e-08, "loss": 15.5156, "step": 26597 }, { "epoch": 1.7664873480773062, "grad_norm": 133.4832763671875, "learning_rate": 7.065174042606636e-08, "loss": 12.8984, "step": 26598 }, { "epoch": 1.766553762369662, "grad_norm": 141.7232208251953, "learning_rate": 7.061203715675357e-08, "loss": 14.0547, "step": 26599 }, { "epoch": 1.7666201766620175, "grad_norm": 299.54443359375, "learning_rate": 7.057234463806494e-08, "loss": 17.1094, "step": 26600 }, { "epoch": 1.7666865909543734, "grad_norm": 399.8680725097656, "learning_rate": 7.053266287045934e-08, "loss": 34.8281, "step": 26601 }, { "epoch": 1.766753005246729, "grad_norm": 191.67654418945312, "learning_rate": 7.049299185439606e-08, "loss": 11.4062, "step": 26602 }, { "epoch": 1.7668194195390847, "grad_norm": 127.69969177246094, "learning_rate": 7.045333159033384e-08, "loss": 11.9844, "step": 26603 }, { "epoch": 1.7668858338314406, "grad_norm": 1137.09814453125, "learning_rate": 7.041368207873155e-08, "loss": 14.5156, "step": 26604 }, { "epoch": 1.7669522481237963, "grad_norm": 245.7794952392578, "learning_rate": 7.037404332004748e-08, "loss": 22.6562, "step": 26605 }, { "epoch": 1.767018662416152, "grad_norm": 283.08892822265625, "learning_rate": 7.033441531474083e-08, "loss": 24.0312, "step": 26606 }, { "epoch": 1.7670850767085078, "grad_norm": 146.6776885986328, "learning_rate": 7.029479806326933e-08, "loss": 14.1562, "step": 26607 }, { "epoch": 1.7671514910008634, "grad_norm": 167.44418334960938, "learning_rate": 7.02551915660915e-08, "loss": 20.7188, "step": 26608 }, { "epoch": 1.767217905293219, "grad_norm": 100.97808837890625, "learning_rate": 7.021559582366554e-08, "loss": 19.9062, "step": 26609 }, { "epoch": 1.767284319585575, "grad_norm": 256.986328125, "learning_rate": 7.017601083644942e-08, "loss": 19.2188, "step": 26610 }, { "epoch": 1.7673507338779304, "grad_norm": 173.96409606933594, "learning_rate": 7.01364366049011e-08, "loss": 12.9531, "step": 26611 }, { "epoch": 1.7674171481702863, "grad_norm": 261.11224365234375, "learning_rate": 7.009687312947831e-08, "loss": 10.1484, "step": 26612 }, { "epoch": 1.767483562462642, "grad_norm": 574.078369140625, "learning_rate": 7.00573204106385e-08, "loss": 16.375, "step": 26613 }, { "epoch": 1.7675499767549976, "grad_norm": 160.58755493164062, "learning_rate": 7.001777844883961e-08, "loss": 10.0938, "step": 26614 }, { "epoch": 1.7676163910473535, "grad_norm": 312.8530578613281, "learning_rate": 6.99782472445386e-08, "loss": 15.1406, "step": 26615 }, { "epoch": 1.7676828053397091, "grad_norm": 196.79368591308594, "learning_rate": 6.993872679819301e-08, "loss": 19.4375, "step": 26616 }, { "epoch": 1.7677492196320648, "grad_norm": 1087.7110595703125, "learning_rate": 6.989921711025992e-08, "loss": 16.6484, "step": 26617 }, { "epoch": 1.7678156339244206, "grad_norm": 308.84381103515625, "learning_rate": 6.985971818119651e-08, "loss": 15.2344, "step": 26618 }, { "epoch": 1.7678820482167763, "grad_norm": 198.3328094482422, "learning_rate": 6.98202300114592e-08, "loss": 16.5781, "step": 26619 }, { "epoch": 1.767948462509132, "grad_norm": 157.56398010253906, "learning_rate": 6.978075260150529e-08, "loss": 13.2656, "step": 26620 }, { "epoch": 1.7680148768014878, "grad_norm": 294.9845886230469, "learning_rate": 6.97412859517914e-08, "loss": 19.0938, "step": 26621 }, { "epoch": 1.7680812910938433, "grad_norm": 199.17074584960938, "learning_rate": 6.970183006277352e-08, "loss": 18.8594, "step": 26622 }, { "epoch": 1.7681477053861991, "grad_norm": 202.3933868408203, "learning_rate": 6.966238493490884e-08, "loss": 17.2812, "step": 26623 }, { "epoch": 1.7682141196785548, "grad_norm": 142.38551330566406, "learning_rate": 6.962295056865297e-08, "loss": 15.0312, "step": 26624 }, { "epoch": 1.7682805339709105, "grad_norm": 128.76409912109375, "learning_rate": 6.958352696446235e-08, "loss": 16.4531, "step": 26625 }, { "epoch": 1.7683469482632663, "grad_norm": 213.8857421875, "learning_rate": 6.954411412279293e-08, "loss": 17.7344, "step": 26626 }, { "epoch": 1.768413362555622, "grad_norm": 242.31646728515625, "learning_rate": 6.95047120441008e-08, "loss": 10.8125, "step": 26627 }, { "epoch": 1.7684797768479776, "grad_norm": 236.24961853027344, "learning_rate": 6.946532072884148e-08, "loss": 13.8438, "step": 26628 }, { "epoch": 1.7685461911403335, "grad_norm": 150.41244506835938, "learning_rate": 6.942594017747105e-08, "loss": 13.2969, "step": 26629 }, { "epoch": 1.7686126054326892, "grad_norm": 418.44622802734375, "learning_rate": 6.938657039044427e-08, "loss": 18.6406, "step": 26630 }, { "epoch": 1.7686790197250448, "grad_norm": 199.8572998046875, "learning_rate": 6.934721136821741e-08, "loss": 20.875, "step": 26631 }, { "epoch": 1.7687454340174007, "grad_norm": 446.50634765625, "learning_rate": 6.930786311124525e-08, "loss": 20.4531, "step": 26632 }, { "epoch": 1.7688118483097561, "grad_norm": 140.39373779296875, "learning_rate": 6.926852561998298e-08, "loss": 13.4844, "step": 26633 }, { "epoch": 1.768878262602112, "grad_norm": 347.4007873535156, "learning_rate": 6.922919889488576e-08, "loss": 18.8906, "step": 26634 }, { "epoch": 1.7689446768944677, "grad_norm": 162.7534637451172, "learning_rate": 6.918988293640848e-08, "loss": 18.125, "step": 26635 }, { "epoch": 1.7690110911868233, "grad_norm": 97.85620880126953, "learning_rate": 6.915057774500588e-08, "loss": 17.25, "step": 26636 }, { "epoch": 1.7690775054791792, "grad_norm": 129.4241180419922, "learning_rate": 6.91112833211328e-08, "loss": 11.4844, "step": 26637 }, { "epoch": 1.7691439197715348, "grad_norm": 292.4856262207031, "learning_rate": 6.907199966524335e-08, "loss": 14.4844, "step": 26638 }, { "epoch": 1.7692103340638905, "grad_norm": 176.22669982910156, "learning_rate": 6.903272677779238e-08, "loss": 12.9688, "step": 26639 }, { "epoch": 1.7692767483562464, "grad_norm": 259.6336975097656, "learning_rate": 6.899346465923383e-08, "loss": 17.75, "step": 26640 }, { "epoch": 1.769343162648602, "grad_norm": 272.3783264160156, "learning_rate": 6.895421331002216e-08, "loss": 16.0312, "step": 26641 }, { "epoch": 1.7694095769409577, "grad_norm": 153.39373779296875, "learning_rate": 6.89149727306112e-08, "loss": 11.1641, "step": 26642 }, { "epoch": 1.7694759912333136, "grad_norm": 323.65838623046875, "learning_rate": 6.887574292145515e-08, "loss": 16.9609, "step": 26643 }, { "epoch": 1.769542405525669, "grad_norm": 152.6136474609375, "learning_rate": 6.883652388300731e-08, "loss": 13.2188, "step": 26644 }, { "epoch": 1.7696088198180249, "grad_norm": 161.96681213378906, "learning_rate": 6.879731561572189e-08, "loss": 14.9844, "step": 26645 }, { "epoch": 1.7696752341103805, "grad_norm": 802.7642211914062, "learning_rate": 6.875811812005194e-08, "loss": 16.625, "step": 26646 }, { "epoch": 1.7697416484027362, "grad_norm": 139.35147094726562, "learning_rate": 6.871893139645113e-08, "loss": 13.2031, "step": 26647 }, { "epoch": 1.769808062695092, "grad_norm": 174.8388671875, "learning_rate": 6.867975544537274e-08, "loss": 12.9219, "step": 26648 }, { "epoch": 1.7698744769874477, "grad_norm": 141.83717346191406, "learning_rate": 6.864059026726998e-08, "loss": 15.2031, "step": 26649 }, { "epoch": 1.7699408912798034, "grad_norm": 198.99742126464844, "learning_rate": 6.86014358625957e-08, "loss": 20.9844, "step": 26650 }, { "epoch": 1.7700073055721592, "grad_norm": 194.11605834960938, "learning_rate": 6.85622922318031e-08, "loss": 12.6875, "step": 26651 }, { "epoch": 1.770073719864515, "grad_norm": 390.548095703125, "learning_rate": 6.852315937534459e-08, "loss": 17.9531, "step": 26652 }, { "epoch": 1.7701401341568705, "grad_norm": 208.1240234375, "learning_rate": 6.848403729367325e-08, "loss": 16.4688, "step": 26653 }, { "epoch": 1.7702065484492264, "grad_norm": 686.9681396484375, "learning_rate": 6.844492598724139e-08, "loss": 11.8203, "step": 26654 }, { "epoch": 1.7702729627415819, "grad_norm": 458.5649719238281, "learning_rate": 6.840582545650131e-08, "loss": 14.0625, "step": 26655 }, { "epoch": 1.7703393770339377, "grad_norm": 348.4136657714844, "learning_rate": 6.836673570190555e-08, "loss": 17.125, "step": 26656 }, { "epoch": 1.7704057913262934, "grad_norm": 637.1329345703125, "learning_rate": 6.832765672390617e-08, "loss": 16.3281, "step": 26657 }, { "epoch": 1.770472205618649, "grad_norm": 175.41452026367188, "learning_rate": 6.828858852295516e-08, "loss": 18.7188, "step": 26658 }, { "epoch": 1.770538619911005, "grad_norm": 160.28903198242188, "learning_rate": 6.82495310995046e-08, "loss": 12.4531, "step": 26659 }, { "epoch": 1.7706050342033606, "grad_norm": 157.57269287109375, "learning_rate": 6.821048445400635e-08, "loss": 15.4219, "step": 26660 }, { "epoch": 1.7706714484957162, "grad_norm": 146.11538696289062, "learning_rate": 6.817144858691148e-08, "loss": 16.3438, "step": 26661 }, { "epoch": 1.770737862788072, "grad_norm": 402.61529541015625, "learning_rate": 6.813242349867243e-08, "loss": 14.2188, "step": 26662 }, { "epoch": 1.7708042770804278, "grad_norm": 265.7395324707031, "learning_rate": 6.809340918973993e-08, "loss": 18.4375, "step": 26663 }, { "epoch": 1.7708706913727834, "grad_norm": 137.45269775390625, "learning_rate": 6.805440566056553e-08, "loss": 13.3906, "step": 26664 }, { "epoch": 1.7709371056651393, "grad_norm": 127.91893005371094, "learning_rate": 6.801541291160029e-08, "loss": 11.7969, "step": 26665 }, { "epoch": 1.7710035199574947, "grad_norm": 146.7561798095703, "learning_rate": 6.797643094329542e-08, "loss": 12.3281, "step": 26666 }, { "epoch": 1.7710699342498506, "grad_norm": 321.7397155761719, "learning_rate": 6.793745975610177e-08, "loss": 20.4062, "step": 26667 }, { "epoch": 1.7711363485422063, "grad_norm": 369.3148193359375, "learning_rate": 6.78984993504702e-08, "loss": 15.9219, "step": 26668 }, { "epoch": 1.771202762834562, "grad_norm": 388.7696228027344, "learning_rate": 6.785954972685103e-08, "loss": 19.75, "step": 26669 }, { "epoch": 1.7712691771269178, "grad_norm": 133.52972412109375, "learning_rate": 6.782061088569546e-08, "loss": 15.0625, "step": 26670 }, { "epoch": 1.7713355914192734, "grad_norm": 223.005126953125, "learning_rate": 6.778168282745333e-08, "loss": 19.0, "step": 26671 }, { "epoch": 1.771402005711629, "grad_norm": 254.12786865234375, "learning_rate": 6.774276555257519e-08, "loss": 17.0, "step": 26672 }, { "epoch": 1.771468420003985, "grad_norm": 109.4454345703125, "learning_rate": 6.770385906151111e-08, "loss": 20.5312, "step": 26673 }, { "epoch": 1.7715348342963406, "grad_norm": 585.9320068359375, "learning_rate": 6.766496335471117e-08, "loss": 14.5469, "step": 26674 }, { "epoch": 1.7716012485886963, "grad_norm": 205.6728057861328, "learning_rate": 6.762607843262535e-08, "loss": 30.0312, "step": 26675 }, { "epoch": 1.7716676628810522, "grad_norm": 186.7869110107422, "learning_rate": 6.758720429570364e-08, "loss": 15.2188, "step": 26676 }, { "epoch": 1.7717340771734076, "grad_norm": 259.92376708984375, "learning_rate": 6.75483409443951e-08, "loss": 18.4531, "step": 26677 }, { "epoch": 1.7718004914657635, "grad_norm": 186.64523315429688, "learning_rate": 6.750948837915005e-08, "loss": 15.7812, "step": 26678 }, { "epoch": 1.7718669057581191, "grad_norm": 202.38734436035156, "learning_rate": 6.747064660041734e-08, "loss": 16.5312, "step": 26679 }, { "epoch": 1.7719333200504748, "grad_norm": 423.695556640625, "learning_rate": 6.743181560864641e-08, "loss": 17.2031, "step": 26680 }, { "epoch": 1.7719997343428306, "grad_norm": 149.17466735839844, "learning_rate": 6.739299540428656e-08, "loss": 12.2031, "step": 26681 }, { "epoch": 1.7720661486351863, "grad_norm": 197.87118530273438, "learning_rate": 6.735418598778697e-08, "loss": 13.6875, "step": 26682 }, { "epoch": 1.772132562927542, "grad_norm": 105.11158752441406, "learning_rate": 6.731538735959597e-08, "loss": 14.4688, "step": 26683 }, { "epoch": 1.7721989772198978, "grad_norm": 217.5417022705078, "learning_rate": 6.727659952016307e-08, "loss": 17.3594, "step": 26684 }, { "epoch": 1.7722653915122535, "grad_norm": 135.63916015625, "learning_rate": 6.723782246993647e-08, "loss": 13.6562, "step": 26685 }, { "epoch": 1.7723318058046091, "grad_norm": 1267.850830078125, "learning_rate": 6.719905620936484e-08, "loss": 19.6094, "step": 26686 }, { "epoch": 1.772398220096965, "grad_norm": 201.87486267089844, "learning_rate": 6.716030073889667e-08, "loss": 14.625, "step": 26687 }, { "epoch": 1.7724646343893204, "grad_norm": 166.81838989257812, "learning_rate": 6.71215560589803e-08, "loss": 14.5469, "step": 26688 }, { "epoch": 1.7725310486816763, "grad_norm": 365.51519775390625, "learning_rate": 6.708282217006378e-08, "loss": 16.1719, "step": 26689 }, { "epoch": 1.772597462974032, "grad_norm": 1152.645263671875, "learning_rate": 6.704409907259545e-08, "loss": 26.5625, "step": 26690 }, { "epoch": 1.7726638772663876, "grad_norm": 152.3605194091797, "learning_rate": 6.70053867670226e-08, "loss": 14.6875, "step": 26691 }, { "epoch": 1.7727302915587435, "grad_norm": 308.02471923828125, "learning_rate": 6.696668525379378e-08, "loss": 14.1094, "step": 26692 }, { "epoch": 1.7727967058510992, "grad_norm": 186.87461853027344, "learning_rate": 6.692799453335618e-08, "loss": 14.4766, "step": 26693 }, { "epoch": 1.7728631201434548, "grad_norm": 245.60934448242188, "learning_rate": 6.688931460615732e-08, "loss": 11.3281, "step": 26694 }, { "epoch": 1.7729295344358107, "grad_norm": 352.2583312988281, "learning_rate": 6.685064547264518e-08, "loss": 20.0625, "step": 26695 }, { "epoch": 1.7729959487281663, "grad_norm": 157.13560485839844, "learning_rate": 6.681198713326652e-08, "loss": 18.0, "step": 26696 }, { "epoch": 1.773062363020522, "grad_norm": 413.87335205078125, "learning_rate": 6.677333958846865e-08, "loss": 20.9062, "step": 26697 }, { "epoch": 1.7731287773128779, "grad_norm": 210.63719177246094, "learning_rate": 6.673470283869865e-08, "loss": 21.0469, "step": 26698 }, { "epoch": 1.7731951916052333, "grad_norm": 178.58213806152344, "learning_rate": 6.669607688440349e-08, "loss": 18.5156, "step": 26699 }, { "epoch": 1.7732616058975892, "grad_norm": 212.20191955566406, "learning_rate": 6.665746172602993e-08, "loss": 11.4766, "step": 26700 }, { "epoch": 1.7733280201899448, "grad_norm": 177.286376953125, "learning_rate": 6.661885736402484e-08, "loss": 17.1562, "step": 26701 }, { "epoch": 1.7733944344823005, "grad_norm": 166.6929168701172, "learning_rate": 6.658026379883441e-08, "loss": 13.2734, "step": 26702 }, { "epoch": 1.7734608487746564, "grad_norm": 118.62730407714844, "learning_rate": 6.654168103090518e-08, "loss": 14.125, "step": 26703 }, { "epoch": 1.773527263067012, "grad_norm": 559.0181884765625, "learning_rate": 6.650310906068357e-08, "loss": 24.4531, "step": 26704 }, { "epoch": 1.7735936773593677, "grad_norm": 561.4877319335938, "learning_rate": 6.646454788861577e-08, "loss": 13.5781, "step": 26705 }, { "epoch": 1.7736600916517236, "grad_norm": 263.70562744140625, "learning_rate": 6.642599751514777e-08, "loss": 13.4688, "step": 26706 }, { "epoch": 1.7737265059440792, "grad_norm": 1038.7763671875, "learning_rate": 6.638745794072554e-08, "loss": 17.6875, "step": 26707 }, { "epoch": 1.7737929202364349, "grad_norm": 115.54944610595703, "learning_rate": 6.63489291657946e-08, "loss": 11.1172, "step": 26708 }, { "epoch": 1.7738593345287907, "grad_norm": 286.028564453125, "learning_rate": 6.631041119080128e-08, "loss": 21.2969, "step": 26709 }, { "epoch": 1.7739257488211462, "grad_norm": 217.2678680419922, "learning_rate": 6.627190401619042e-08, "loss": 15.4219, "step": 26710 }, { "epoch": 1.773992163113502, "grad_norm": 222.36416625976562, "learning_rate": 6.623340764240782e-08, "loss": 18.6406, "step": 26711 }, { "epoch": 1.7740585774058577, "grad_norm": 117.89707946777344, "learning_rate": 6.619492206989873e-08, "loss": 13.25, "step": 26712 }, { "epoch": 1.7741249916982134, "grad_norm": 152.64608764648438, "learning_rate": 6.615644729910829e-08, "loss": 15.0781, "step": 26713 }, { "epoch": 1.7741914059905692, "grad_norm": 252.0772705078125, "learning_rate": 6.611798333048158e-08, "loss": 17.0781, "step": 26714 }, { "epoch": 1.774257820282925, "grad_norm": 131.90380859375, "learning_rate": 6.607953016446366e-08, "loss": 13.1406, "step": 26715 }, { "epoch": 1.7743242345752805, "grad_norm": 197.56028747558594, "learning_rate": 6.604108780149897e-08, "loss": 16.3281, "step": 26716 }, { "epoch": 1.7743906488676364, "grad_norm": 178.8931427001953, "learning_rate": 6.600265624203271e-08, "loss": 12.375, "step": 26717 }, { "epoch": 1.774457063159992, "grad_norm": 324.32489013671875, "learning_rate": 6.596423548650898e-08, "loss": 15.2422, "step": 26718 }, { "epoch": 1.7745234774523477, "grad_norm": 207.18959045410156, "learning_rate": 6.592582553537241e-08, "loss": 16.7344, "step": 26719 }, { "epoch": 1.7745898917447036, "grad_norm": 275.03045654296875, "learning_rate": 6.588742638906719e-08, "loss": 24.2031, "step": 26720 }, { "epoch": 1.774656306037059, "grad_norm": 1094.787353515625, "learning_rate": 6.584903804803787e-08, "loss": 24.4844, "step": 26721 }, { "epoch": 1.774722720329415, "grad_norm": 106.11074829101562, "learning_rate": 6.581066051272776e-08, "loss": 13.4375, "step": 26722 }, { "epoch": 1.7747891346217706, "grad_norm": 359.2059020996094, "learning_rate": 6.577229378358162e-08, "loss": 15.2031, "step": 26723 }, { "epoch": 1.7748555489141262, "grad_norm": 202.67408752441406, "learning_rate": 6.573393786104275e-08, "loss": 15.4062, "step": 26724 }, { "epoch": 1.774921963206482, "grad_norm": 202.75030517578125, "learning_rate": 6.569559274555492e-08, "loss": 16.4219, "step": 26725 }, { "epoch": 1.7749883774988378, "grad_norm": 226.7096710205078, "learning_rate": 6.565725843756176e-08, "loss": 21.25, "step": 26726 }, { "epoch": 1.7750547917911934, "grad_norm": 258.8410339355469, "learning_rate": 6.561893493750669e-08, "loss": 15.1406, "step": 26727 }, { "epoch": 1.7751212060835493, "grad_norm": 196.91928100585938, "learning_rate": 6.558062224583294e-08, "loss": 20.75, "step": 26728 }, { "epoch": 1.775187620375905, "grad_norm": 173.6639404296875, "learning_rate": 6.55423203629839e-08, "loss": 20.0625, "step": 26729 }, { "epoch": 1.7752540346682606, "grad_norm": 459.9050598144531, "learning_rate": 6.550402928940214e-08, "loss": 14.9062, "step": 26730 }, { "epoch": 1.7753204489606165, "grad_norm": 181.4366912841797, "learning_rate": 6.546574902553103e-08, "loss": 18.8125, "step": 26731 }, { "epoch": 1.775386863252972, "grad_norm": 370.8844299316406, "learning_rate": 6.542747957181339e-08, "loss": 17.5312, "step": 26732 }, { "epoch": 1.7754532775453278, "grad_norm": 395.11212158203125, "learning_rate": 6.538922092869148e-08, "loss": 14.5469, "step": 26733 }, { "epoch": 1.7755196918376834, "grad_norm": 119.80274200439453, "learning_rate": 6.535097309660854e-08, "loss": 16.5156, "step": 26734 }, { "epoch": 1.775586106130039, "grad_norm": 156.89645385742188, "learning_rate": 6.53127360760063e-08, "loss": 13.4688, "step": 26735 }, { "epoch": 1.775652520422395, "grad_norm": 275.7352294921875, "learning_rate": 6.52745098673274e-08, "loss": 17.2188, "step": 26736 }, { "epoch": 1.7757189347147506, "grad_norm": 120.75643920898438, "learning_rate": 6.523629447101398e-08, "loss": 8.7891, "step": 26737 }, { "epoch": 1.7757853490071063, "grad_norm": 271.47222900390625, "learning_rate": 6.51980898875082e-08, "loss": 17.875, "step": 26738 }, { "epoch": 1.7758517632994621, "grad_norm": 654.1392211914062, "learning_rate": 6.51598961172517e-08, "loss": 13.8594, "step": 26739 }, { "epoch": 1.7759181775918178, "grad_norm": 135.1383056640625, "learning_rate": 6.512171316068671e-08, "loss": 20.3125, "step": 26740 }, { "epoch": 1.7759845918841735, "grad_norm": 319.61395263671875, "learning_rate": 6.50835410182543e-08, "loss": 20.5625, "step": 26741 }, { "epoch": 1.7760510061765293, "grad_norm": 223.74844360351562, "learning_rate": 6.50453796903968e-08, "loss": 21.3125, "step": 26742 }, { "epoch": 1.7761174204688848, "grad_norm": 511.6430969238281, "learning_rate": 6.500722917755497e-08, "loss": 16.9062, "step": 26743 }, { "epoch": 1.7761838347612406, "grad_norm": 162.3533935546875, "learning_rate": 6.496908948017044e-08, "loss": 16.7031, "step": 26744 }, { "epoch": 1.7762502490535963, "grad_norm": 274.3724060058594, "learning_rate": 6.49309605986842e-08, "loss": 15.3438, "step": 26745 }, { "epoch": 1.776316663345952, "grad_norm": 182.81690979003906, "learning_rate": 6.489284253353766e-08, "loss": 14.9609, "step": 26746 }, { "epoch": 1.7763830776383078, "grad_norm": 332.3346252441406, "learning_rate": 6.485473528517116e-08, "loss": 15.9375, "step": 26747 }, { "epoch": 1.7764494919306635, "grad_norm": 230.55755615234375, "learning_rate": 6.481663885402621e-08, "loss": 15.3594, "step": 26748 }, { "epoch": 1.7765159062230191, "grad_norm": 127.32157897949219, "learning_rate": 6.477855324054294e-08, "loss": 15.75, "step": 26749 }, { "epoch": 1.776582320515375, "grad_norm": 517.9224243164062, "learning_rate": 6.474047844516207e-08, "loss": 12.4297, "step": 26750 }, { "epoch": 1.7766487348077307, "grad_norm": 168.1137237548828, "learning_rate": 6.470241446832403e-08, "loss": 14.3516, "step": 26751 }, { "epoch": 1.7767151491000863, "grad_norm": 214.47775268554688, "learning_rate": 6.466436131046916e-08, "loss": 16.9844, "step": 26752 }, { "epoch": 1.7767815633924422, "grad_norm": 447.87481689453125, "learning_rate": 6.462631897203763e-08, "loss": 16.9844, "step": 26753 }, { "epoch": 1.7768479776847976, "grad_norm": 129.43350219726562, "learning_rate": 6.458828745346956e-08, "loss": 13.4219, "step": 26754 }, { "epoch": 1.7769143919771535, "grad_norm": 102.16096496582031, "learning_rate": 6.455026675520447e-08, "loss": 16.2188, "step": 26755 }, { "epoch": 1.7769808062695092, "grad_norm": 143.20855712890625, "learning_rate": 6.451225687768292e-08, "loss": 14.875, "step": 26756 }, { "epoch": 1.7770472205618648, "grad_norm": 527.3674926757812, "learning_rate": 6.447425782134396e-08, "loss": 17.7031, "step": 26757 }, { "epoch": 1.7771136348542207, "grad_norm": 112.01699829101562, "learning_rate": 6.443626958662728e-08, "loss": 19.875, "step": 26758 }, { "epoch": 1.7771800491465763, "grad_norm": 194.49664306640625, "learning_rate": 6.43982921739723e-08, "loss": 11.2969, "step": 26759 }, { "epoch": 1.777246463438932, "grad_norm": 126.82599639892578, "learning_rate": 6.436032558381855e-08, "loss": 13.2188, "step": 26760 }, { "epoch": 1.7773128777312879, "grad_norm": 303.59295654296875, "learning_rate": 6.432236981660488e-08, "loss": 15.625, "step": 26761 }, { "epoch": 1.7773792920236435, "grad_norm": 165.7812042236328, "learning_rate": 6.428442487277074e-08, "loss": 13.3281, "step": 26762 }, { "epoch": 1.7774457063159992, "grad_norm": 117.39046478271484, "learning_rate": 6.424649075275446e-08, "loss": 16.4375, "step": 26763 }, { "epoch": 1.777512120608355, "grad_norm": 338.49346923828125, "learning_rate": 6.420856745699543e-08, "loss": 18.8125, "step": 26764 }, { "epoch": 1.7775785349007105, "grad_norm": 233.79739379882812, "learning_rate": 6.417065498593199e-08, "loss": 19.8906, "step": 26765 }, { "epoch": 1.7776449491930664, "grad_norm": 150.9033203125, "learning_rate": 6.413275334000278e-08, "loss": 16.1562, "step": 26766 }, { "epoch": 1.777711363485422, "grad_norm": 109.12752532958984, "learning_rate": 6.409486251964624e-08, "loss": 14.0938, "step": 26767 }, { "epoch": 1.7777777777777777, "grad_norm": 264.7114562988281, "learning_rate": 6.405698252530067e-08, "loss": 19.0156, "step": 26768 }, { "epoch": 1.7778441920701336, "grad_norm": 496.5395202636719, "learning_rate": 6.401911335740417e-08, "loss": 20.8281, "step": 26769 }, { "epoch": 1.7779106063624892, "grad_norm": 182.92416381835938, "learning_rate": 6.398125501639495e-08, "loss": 12.6562, "step": 26770 }, { "epoch": 1.7779770206548449, "grad_norm": 109.70623779296875, "learning_rate": 6.394340750271088e-08, "loss": 13.3594, "step": 26771 }, { "epoch": 1.7780434349472007, "grad_norm": 632.6480712890625, "learning_rate": 6.39055708167895e-08, "loss": 12.7188, "step": 26772 }, { "epoch": 1.7781098492395564, "grad_norm": 759.8305053710938, "learning_rate": 6.386774495906899e-08, "loss": 26.3438, "step": 26773 }, { "epoch": 1.778176263531912, "grad_norm": 156.88002014160156, "learning_rate": 6.382992992998637e-08, "loss": 28.8594, "step": 26774 }, { "epoch": 1.778242677824268, "grad_norm": 248.076904296875, "learning_rate": 6.379212572997938e-08, "loss": 12.625, "step": 26775 }, { "epoch": 1.7783090921166234, "grad_norm": 217.24935913085938, "learning_rate": 6.375433235948513e-08, "loss": 15.3906, "step": 26776 }, { "epoch": 1.7783755064089792, "grad_norm": 359.9628601074219, "learning_rate": 6.371654981894092e-08, "loss": 12.25, "step": 26777 }, { "epoch": 1.7784419207013349, "grad_norm": 177.68072509765625, "learning_rate": 6.367877810878386e-08, "loss": 13.9375, "step": 26778 }, { "epoch": 1.7785083349936905, "grad_norm": 430.5247497558594, "learning_rate": 6.364101722945081e-08, "loss": 15.7344, "step": 26779 }, { "epoch": 1.7785747492860464, "grad_norm": 534.0552978515625, "learning_rate": 6.360326718137832e-08, "loss": 14.8281, "step": 26780 }, { "epoch": 1.778641163578402, "grad_norm": 117.05340576171875, "learning_rate": 6.356552796500359e-08, "loss": 13.4688, "step": 26781 }, { "epoch": 1.7787075778707577, "grad_norm": 308.2835693359375, "learning_rate": 6.352779958076271e-08, "loss": 14.2656, "step": 26782 }, { "epoch": 1.7787739921631136, "grad_norm": 214.43922424316406, "learning_rate": 6.349008202909212e-08, "loss": 12.6094, "step": 26783 }, { "epoch": 1.7788404064554693, "grad_norm": 122.75159454345703, "learning_rate": 6.345237531042835e-08, "loss": 15.8594, "step": 26784 }, { "epoch": 1.778906820747825, "grad_norm": 379.52716064453125, "learning_rate": 6.341467942520762e-08, "loss": 16.4219, "step": 26785 }, { "epoch": 1.7789732350401808, "grad_norm": 232.46409606933594, "learning_rate": 6.337699437386546e-08, "loss": 16.0, "step": 26786 }, { "epoch": 1.7790396493325362, "grad_norm": 153.8270263671875, "learning_rate": 6.33393201568384e-08, "loss": 14.9609, "step": 26787 }, { "epoch": 1.779106063624892, "grad_norm": 271.3073425292969, "learning_rate": 6.33016567745619e-08, "loss": 14.875, "step": 26788 }, { "epoch": 1.7791724779172478, "grad_norm": 184.62741088867188, "learning_rate": 6.32640042274717e-08, "loss": 15.0469, "step": 26789 }, { "epoch": 1.7792388922096034, "grad_norm": 164.69764709472656, "learning_rate": 6.322636251600333e-08, "loss": 21.3438, "step": 26790 }, { "epoch": 1.7793053065019593, "grad_norm": 185.71923828125, "learning_rate": 6.318873164059224e-08, "loss": 17.3906, "step": 26791 }, { "epoch": 1.779371720794315, "grad_norm": 121.74933624267578, "learning_rate": 6.315111160167363e-08, "loss": 13.5078, "step": 26792 }, { "epoch": 1.7794381350866706, "grad_norm": 228.89700317382812, "learning_rate": 6.311350239968294e-08, "loss": 14.6719, "step": 26793 }, { "epoch": 1.7795045493790265, "grad_norm": 247.21255493164062, "learning_rate": 6.30759040350548e-08, "loss": 12.7969, "step": 26794 }, { "epoch": 1.7795709636713821, "grad_norm": 119.20487976074219, "learning_rate": 6.303831650822455e-08, "loss": 15.1719, "step": 26795 }, { "epoch": 1.7796373779637378, "grad_norm": 402.2705383300781, "learning_rate": 6.300073981962662e-08, "loss": 12.1484, "step": 26796 }, { "epoch": 1.7797037922560937, "grad_norm": 358.6572265625, "learning_rate": 6.296317396969586e-08, "loss": 20.7812, "step": 26797 }, { "epoch": 1.779770206548449, "grad_norm": 324.9524230957031, "learning_rate": 6.292561895886673e-08, "loss": 17.8438, "step": 26798 }, { "epoch": 1.779836620840805, "grad_norm": 132.1038818359375, "learning_rate": 6.288807478757374e-08, "loss": 16.5781, "step": 26799 }, { "epoch": 1.7799030351331606, "grad_norm": 178.14047241210938, "learning_rate": 6.285054145625112e-08, "loss": 22.5156, "step": 26800 }, { "epoch": 1.7799694494255163, "grad_norm": 174.3317108154297, "learning_rate": 6.28130189653332e-08, "loss": 19.3594, "step": 26801 }, { "epoch": 1.7800358637178721, "grad_norm": 117.48812866210938, "learning_rate": 6.277550731525349e-08, "loss": 14.3594, "step": 26802 }, { "epoch": 1.7801022780102278, "grad_norm": 354.2734375, "learning_rate": 6.273800650644667e-08, "loss": 17.4375, "step": 26803 }, { "epoch": 1.7801686923025835, "grad_norm": 294.2231750488281, "learning_rate": 6.270051653934594e-08, "loss": 14.9844, "step": 26804 }, { "epoch": 1.7802351065949393, "grad_norm": 986.740966796875, "learning_rate": 6.266303741438506e-08, "loss": 13.9375, "step": 26805 }, { "epoch": 1.780301520887295, "grad_norm": 1349.7509765625, "learning_rate": 6.26255691319979e-08, "loss": 14.2656, "step": 26806 }, { "epoch": 1.7803679351796506, "grad_norm": 127.4338150024414, "learning_rate": 6.258811169261746e-08, "loss": 15.1719, "step": 26807 }, { "epoch": 1.7804343494720065, "grad_norm": 500.6315002441406, "learning_rate": 6.255066509667727e-08, "loss": 18.0, "step": 26808 }, { "epoch": 1.780500763764362, "grad_norm": 280.3976745605469, "learning_rate": 6.251322934461045e-08, "loss": 17.7969, "step": 26809 }, { "epoch": 1.7805671780567178, "grad_norm": 153.65219116210938, "learning_rate": 6.247580443685008e-08, "loss": 15.0625, "step": 26810 }, { "epoch": 1.7806335923490735, "grad_norm": 94.34908294677734, "learning_rate": 6.243839037382869e-08, "loss": 12.5938, "step": 26811 }, { "epoch": 1.7807000066414291, "grad_norm": 331.2492980957031, "learning_rate": 6.240098715597975e-08, "loss": 15.0, "step": 26812 }, { "epoch": 1.780766420933785, "grad_norm": 107.8221206665039, "learning_rate": 6.236359478373532e-08, "loss": 14.3125, "step": 26813 }, { "epoch": 1.7808328352261407, "grad_norm": 99.83516693115234, "learning_rate": 6.232621325752818e-08, "loss": 12.4219, "step": 26814 }, { "epoch": 1.7808992495184963, "grad_norm": 214.5417022705078, "learning_rate": 6.228884257779077e-08, "loss": 18.9531, "step": 26815 }, { "epoch": 1.7809656638108522, "grad_norm": 405.78961181640625, "learning_rate": 6.225148274495517e-08, "loss": 18.6094, "step": 26816 }, { "epoch": 1.7810320781032078, "grad_norm": 144.6468505859375, "learning_rate": 6.221413375945384e-08, "loss": 14.0, "step": 26817 }, { "epoch": 1.7810984923955635, "grad_norm": 274.6700439453125, "learning_rate": 6.217679562171874e-08, "loss": 26.4375, "step": 26818 }, { "epoch": 1.7811649066879194, "grad_norm": 250.1077880859375, "learning_rate": 6.213946833218131e-08, "loss": 19.375, "step": 26819 }, { "epoch": 1.7812313209802748, "grad_norm": 254.6688690185547, "learning_rate": 6.21021518912741e-08, "loss": 17.7969, "step": 26820 }, { "epoch": 1.7812977352726307, "grad_norm": 146.75633239746094, "learning_rate": 6.20648462994282e-08, "loss": 16.7031, "step": 26821 }, { "epoch": 1.7813641495649863, "grad_norm": 163.5991668701172, "learning_rate": 6.202755155707528e-08, "loss": 14.6406, "step": 26822 }, { "epoch": 1.781430563857342, "grad_norm": 110.36346435546875, "learning_rate": 6.199026766464688e-08, "loss": 8.9219, "step": 26823 }, { "epoch": 1.7814969781496979, "grad_norm": 228.47120666503906, "learning_rate": 6.195299462257409e-08, "loss": 15.2188, "step": 26824 }, { "epoch": 1.7815633924420535, "grad_norm": 166.48382568359375, "learning_rate": 6.191573243128811e-08, "loss": 11.9219, "step": 26825 }, { "epoch": 1.7816298067344092, "grad_norm": 110.57574462890625, "learning_rate": 6.187848109122029e-08, "loss": 12.7656, "step": 26826 }, { "epoch": 1.781696221026765, "grad_norm": 315.6418762207031, "learning_rate": 6.184124060280104e-08, "loss": 15.1172, "step": 26827 }, { "epoch": 1.7817626353191207, "grad_norm": 128.54502868652344, "learning_rate": 6.180401096646126e-08, "loss": 19.5, "step": 26828 }, { "epoch": 1.7818290496114764, "grad_norm": 114.50635528564453, "learning_rate": 6.17667921826318e-08, "loss": 13.9688, "step": 26829 }, { "epoch": 1.7818954639038322, "grad_norm": 110.46731567382812, "learning_rate": 6.172958425174302e-08, "loss": 12.6094, "step": 26830 }, { "epoch": 1.7819618781961877, "grad_norm": 346.9800720214844, "learning_rate": 6.169238717422542e-08, "loss": 14.7188, "step": 26831 }, { "epoch": 1.7820282924885436, "grad_norm": 123.02449798583984, "learning_rate": 6.165520095050925e-08, "loss": 15.2031, "step": 26832 }, { "epoch": 1.7820947067808992, "grad_norm": 176.65318298339844, "learning_rate": 6.161802558102447e-08, "loss": 11.8281, "step": 26833 }, { "epoch": 1.7821611210732549, "grad_norm": 179.15655517578125, "learning_rate": 6.158086106620153e-08, "loss": 18.2031, "step": 26834 }, { "epoch": 1.7822275353656107, "grad_norm": 346.62939453125, "learning_rate": 6.154370740646986e-08, "loss": 13.1875, "step": 26835 }, { "epoch": 1.7822939496579664, "grad_norm": 165.30738830566406, "learning_rate": 6.150656460225945e-08, "loss": 18.1172, "step": 26836 }, { "epoch": 1.782360363950322, "grad_norm": 203.3377685546875, "learning_rate": 6.146943265400007e-08, "loss": 18.5469, "step": 26837 }, { "epoch": 1.782426778242678, "grad_norm": 203.12083435058594, "learning_rate": 6.143231156212092e-08, "loss": 17.0469, "step": 26838 }, { "epoch": 1.7824931925350336, "grad_norm": 256.8232421875, "learning_rate": 6.139520132705167e-08, "loss": 14.4531, "step": 26839 }, { "epoch": 1.7825596068273892, "grad_norm": 116.99937438964844, "learning_rate": 6.135810194922152e-08, "loss": 13.5781, "step": 26840 }, { "epoch": 1.782626021119745, "grad_norm": 192.51058959960938, "learning_rate": 6.13210134290596e-08, "loss": 16.625, "step": 26841 }, { "epoch": 1.7826924354121005, "grad_norm": 512.3817138671875, "learning_rate": 6.128393576699487e-08, "loss": 17.9375, "step": 26842 }, { "epoch": 1.7827588497044564, "grad_norm": 257.0479736328125, "learning_rate": 6.124686896345644e-08, "loss": 15.5312, "step": 26843 }, { "epoch": 1.782825263996812, "grad_norm": 173.50933837890625, "learning_rate": 6.120981301887262e-08, "loss": 18.1875, "step": 26844 }, { "epoch": 1.7828916782891677, "grad_norm": 226.4384002685547, "learning_rate": 6.117276793367266e-08, "loss": 19.5, "step": 26845 }, { "epoch": 1.7829580925815236, "grad_norm": 677.6864624023438, "learning_rate": 6.113573370828451e-08, "loss": 24.875, "step": 26846 }, { "epoch": 1.7830245068738793, "grad_norm": 160.6301727294922, "learning_rate": 6.109871034313695e-08, "loss": 17.8594, "step": 26847 }, { "epoch": 1.783090921166235, "grad_norm": 179.73451232910156, "learning_rate": 6.1061697838658e-08, "loss": 14.5312, "step": 26848 }, { "epoch": 1.7831573354585908, "grad_norm": 256.0964050292969, "learning_rate": 6.102469619527617e-08, "loss": 15.3125, "step": 26849 }, { "epoch": 1.7832237497509464, "grad_norm": 94.76277923583984, "learning_rate": 6.09877054134188e-08, "loss": 13.1094, "step": 26850 }, { "epoch": 1.783290164043302, "grad_norm": 243.28439331054688, "learning_rate": 6.095072549351454e-08, "loss": 17.9844, "step": 26851 }, { "epoch": 1.783356578335658, "grad_norm": 139.79518127441406, "learning_rate": 6.091375643599073e-08, "loss": 10.625, "step": 26852 }, { "epoch": 1.7834229926280134, "grad_norm": 361.5723571777344, "learning_rate": 6.087679824127501e-08, "loss": 21.9375, "step": 26853 }, { "epoch": 1.7834894069203693, "grad_norm": 162.1166229248047, "learning_rate": 6.083985090979493e-08, "loss": 13.25, "step": 26854 }, { "epoch": 1.783555821212725, "grad_norm": 255.3829345703125, "learning_rate": 6.080291444197794e-08, "loss": 16.2656, "step": 26855 }, { "epoch": 1.7836222355050806, "grad_norm": 194.60438537597656, "learning_rate": 6.076598883825124e-08, "loss": 17.4531, "step": 26856 }, { "epoch": 1.7836886497974365, "grad_norm": 251.6312255859375, "learning_rate": 6.072907409904215e-08, "loss": 14.6406, "step": 26857 }, { "epoch": 1.7837550640897921, "grad_norm": 135.591552734375, "learning_rate": 6.069217022477713e-08, "loss": 18.1875, "step": 26858 }, { "epoch": 1.7838214783821478, "grad_norm": 654.2042236328125, "learning_rate": 6.065527721588381e-08, "loss": 21.6875, "step": 26859 }, { "epoch": 1.7838878926745037, "grad_norm": 316.3386535644531, "learning_rate": 6.061839507278854e-08, "loss": 15.3594, "step": 26860 }, { "epoch": 1.7839543069668593, "grad_norm": 415.70867919921875, "learning_rate": 6.058152379591785e-08, "loss": 12.4844, "step": 26861 }, { "epoch": 1.784020721259215, "grad_norm": 2476.706787109375, "learning_rate": 6.054466338569853e-08, "loss": 13.8125, "step": 26862 }, { "epoch": 1.7840871355515708, "grad_norm": 341.90423583984375, "learning_rate": 6.050781384255665e-08, "loss": 17.0938, "step": 26863 }, { "epoch": 1.7841535498439263, "grad_norm": 303.11334228515625, "learning_rate": 6.047097516691879e-08, "loss": 14.2812, "step": 26864 }, { "epoch": 1.7842199641362821, "grad_norm": 106.99288177490234, "learning_rate": 6.043414735921104e-08, "loss": 10.5, "step": 26865 }, { "epoch": 1.7842863784286378, "grad_norm": 147.73162841796875, "learning_rate": 6.039733041985895e-08, "loss": 18.125, "step": 26866 }, { "epoch": 1.7843527927209935, "grad_norm": 354.5250244140625, "learning_rate": 6.036052434928907e-08, "loss": 17.9531, "step": 26867 }, { "epoch": 1.7844192070133493, "grad_norm": 286.32904052734375, "learning_rate": 6.03237291479266e-08, "loss": 15.0625, "step": 26868 }, { "epoch": 1.784485621305705, "grad_norm": 242.54237365722656, "learning_rate": 6.028694481619745e-08, "loss": 20.7188, "step": 26869 }, { "epoch": 1.7845520355980606, "grad_norm": 280.2579040527344, "learning_rate": 6.025017135452704e-08, "loss": 14.6875, "step": 26870 }, { "epoch": 1.7846184498904165, "grad_norm": 178.01760864257812, "learning_rate": 6.021340876334091e-08, "loss": 11.4062, "step": 26871 }, { "epoch": 1.7846848641827722, "grad_norm": 153.08822631835938, "learning_rate": 6.017665704306385e-08, "loss": 11.8438, "step": 26872 }, { "epoch": 1.7847512784751278, "grad_norm": 244.8264923095703, "learning_rate": 6.013991619412162e-08, "loss": 13.6094, "step": 26873 }, { "epoch": 1.7848176927674837, "grad_norm": 425.2100524902344, "learning_rate": 6.010318621693866e-08, "loss": 14.6562, "step": 26874 }, { "epoch": 1.7848841070598391, "grad_norm": 539.6654663085938, "learning_rate": 6.006646711194008e-08, "loss": 24.375, "step": 26875 }, { "epoch": 1.784950521352195, "grad_norm": 136.95492553710938, "learning_rate": 6.002975887955065e-08, "loss": 23.8594, "step": 26876 }, { "epoch": 1.7850169356445507, "grad_norm": 160.16127014160156, "learning_rate": 5.999306152019501e-08, "loss": 20.2812, "step": 26877 }, { "epoch": 1.7850833499369063, "grad_norm": 167.4718475341797, "learning_rate": 5.995637503429751e-08, "loss": 15.3516, "step": 26878 }, { "epoch": 1.7851497642292622, "grad_norm": 174.82296752929688, "learning_rate": 5.991969942228259e-08, "loss": 13.8438, "step": 26879 }, { "epoch": 1.7852161785216178, "grad_norm": 356.3953552246094, "learning_rate": 5.988303468457457e-08, "loss": 19.0469, "step": 26880 }, { "epoch": 1.7852825928139735, "grad_norm": 174.72653198242188, "learning_rate": 5.984638082159743e-08, "loss": 14.5312, "step": 26881 }, { "epoch": 1.7853490071063294, "grad_norm": 233.9862823486328, "learning_rate": 5.98097378337754e-08, "loss": 18.2031, "step": 26882 }, { "epoch": 1.785415421398685, "grad_norm": 165.22848510742188, "learning_rate": 5.977310572153194e-08, "loss": 16.2344, "step": 26883 }, { "epoch": 1.7854818356910407, "grad_norm": 197.31393432617188, "learning_rate": 5.973648448529123e-08, "loss": 17.125, "step": 26884 }, { "epoch": 1.7855482499833966, "grad_norm": 475.4735412597656, "learning_rate": 5.969987412547661e-08, "loss": 16.1562, "step": 26885 }, { "epoch": 1.785614664275752, "grad_norm": 204.56753540039062, "learning_rate": 5.966327464251164e-08, "loss": 22.6719, "step": 26886 }, { "epoch": 1.7856810785681079, "grad_norm": 479.2003479003906, "learning_rate": 5.962668603681964e-08, "loss": 18.6719, "step": 26887 }, { "epoch": 1.7857474928604635, "grad_norm": 317.3549499511719, "learning_rate": 5.959010830882394e-08, "loss": 21.1562, "step": 26888 }, { "epoch": 1.7858139071528192, "grad_norm": 206.14291381835938, "learning_rate": 5.955354145894753e-08, "loss": 15.7656, "step": 26889 }, { "epoch": 1.785880321445175, "grad_norm": 176.80775451660156, "learning_rate": 5.9516985487613635e-08, "loss": 13.6562, "step": 26890 }, { "epoch": 1.7859467357375307, "grad_norm": 152.70947265625, "learning_rate": 5.94804403952448e-08, "loss": 17.0938, "step": 26891 }, { "epoch": 1.7860131500298864, "grad_norm": 244.7862548828125, "learning_rate": 5.944390618226392e-08, "loss": 15.7031, "step": 26892 }, { "epoch": 1.7860795643222422, "grad_norm": 151.45521545410156, "learning_rate": 5.940738284909353e-08, "loss": 15.4375, "step": 26893 }, { "epoch": 1.786145978614598, "grad_norm": 288.3533020019531, "learning_rate": 5.937087039615618e-08, "loss": 14.5156, "step": 26894 }, { "epoch": 1.7862123929069535, "grad_norm": 345.92095947265625, "learning_rate": 5.9334368823874106e-08, "loss": 18.2031, "step": 26895 }, { "epoch": 1.7862788071993094, "grad_norm": 344.22576904296875, "learning_rate": 5.929787813266996e-08, "loss": 21.4375, "step": 26896 }, { "epoch": 1.7863452214916649, "grad_norm": 248.82891845703125, "learning_rate": 5.926139832296506e-08, "loss": 13.8125, "step": 26897 }, { "epoch": 1.7864116357840207, "grad_norm": 190.9839630126953, "learning_rate": 5.92249293951822e-08, "loss": 12.7109, "step": 26898 }, { "epoch": 1.7864780500763764, "grad_norm": 177.64547729492188, "learning_rate": 5.918847134974281e-08, "loss": 12.2109, "step": 26899 }, { "epoch": 1.786544464368732, "grad_norm": 212.73387145996094, "learning_rate": 5.915202418706855e-08, "loss": 17.2344, "step": 26900 }, { "epoch": 1.786610878661088, "grad_norm": 154.58230590820312, "learning_rate": 5.91155879075812e-08, "loss": 15.5, "step": 26901 }, { "epoch": 1.7866772929534436, "grad_norm": 238.05490112304688, "learning_rate": 5.907916251170219e-08, "loss": 13.5312, "step": 26902 }, { "epoch": 1.7867437072457992, "grad_norm": 376.4456787109375, "learning_rate": 5.904274799985287e-08, "loss": 19.2734, "step": 26903 }, { "epoch": 1.786810121538155, "grad_norm": 205.29315185546875, "learning_rate": 5.9006344372454664e-08, "loss": 13.7969, "step": 26904 }, { "epoch": 1.7868765358305108, "grad_norm": 179.62367248535156, "learning_rate": 5.8969951629928125e-08, "loss": 15.0391, "step": 26905 }, { "epoch": 1.7869429501228664, "grad_norm": 106.91304016113281, "learning_rate": 5.8933569772694816e-08, "loss": 17.8281, "step": 26906 }, { "epoch": 1.7870093644152223, "grad_norm": 156.51966857910156, "learning_rate": 5.8897198801175164e-08, "loss": 11.5156, "step": 26907 }, { "epoch": 1.7870757787075777, "grad_norm": 233.89813232421875, "learning_rate": 5.8860838715790064e-08, "loss": 15.6875, "step": 26908 }, { "epoch": 1.7871421929999336, "grad_norm": 233.3765869140625, "learning_rate": 5.882448951696006e-08, "loss": 16.0312, "step": 26909 }, { "epoch": 1.7872086072922893, "grad_norm": 107.49835205078125, "learning_rate": 5.878815120510583e-08, "loss": 14.4844, "step": 26910 }, { "epoch": 1.787275021584645, "grad_norm": 142.48858642578125, "learning_rate": 5.8751823780647245e-08, "loss": 16.5781, "step": 26911 }, { "epoch": 1.7873414358770008, "grad_norm": 142.76797485351562, "learning_rate": 5.871550724400509e-08, "loss": 16.5625, "step": 26912 }, { "epoch": 1.7874078501693564, "grad_norm": 216.21482849121094, "learning_rate": 5.8679201595599026e-08, "loss": 19.0938, "step": 26913 }, { "epoch": 1.787474264461712, "grad_norm": 212.52513122558594, "learning_rate": 5.864290683584905e-08, "loss": 14.375, "step": 26914 }, { "epoch": 1.787540678754068, "grad_norm": 162.93678283691406, "learning_rate": 5.860662296517538e-08, "loss": 14.2578, "step": 26915 }, { "epoch": 1.7876070930464236, "grad_norm": 243.40234375, "learning_rate": 5.857034998399724e-08, "loss": 15.5, "step": 26916 }, { "epoch": 1.7876735073387793, "grad_norm": 115.93606567382812, "learning_rate": 5.853408789273451e-08, "loss": 14.1562, "step": 26917 }, { "epoch": 1.7877399216311352, "grad_norm": 307.06097412109375, "learning_rate": 5.849783669180652e-08, "loss": 18.3594, "step": 26918 }, { "epoch": 1.7878063359234906, "grad_norm": 133.39401245117188, "learning_rate": 5.846159638163273e-08, "loss": 15.375, "step": 26919 }, { "epoch": 1.7878727502158465, "grad_norm": 176.9988555908203, "learning_rate": 5.8425366962632225e-08, "loss": 13.4531, "step": 26920 }, { "epoch": 1.7879391645082021, "grad_norm": 221.9267120361328, "learning_rate": 5.8389148435224244e-08, "loss": 15.0156, "step": 26921 }, { "epoch": 1.7880055788005578, "grad_norm": 160.22413635253906, "learning_rate": 5.835294079982733e-08, "loss": 17.7344, "step": 26922 }, { "epoch": 1.7880719930929136, "grad_norm": 165.87112426757812, "learning_rate": 5.831674405686105e-08, "loss": 13.2812, "step": 26923 }, { "epoch": 1.7881384073852693, "grad_norm": 125.28572845458984, "learning_rate": 5.828055820674338e-08, "loss": 15.6719, "step": 26924 }, { "epoch": 1.788204821677625, "grad_norm": 593.605712890625, "learning_rate": 5.8244383249893223e-08, "loss": 26.0625, "step": 26925 }, { "epoch": 1.7882712359699808, "grad_norm": 105.72398376464844, "learning_rate": 5.820821918672902e-08, "loss": 11.8906, "step": 26926 }, { "epoch": 1.7883376502623365, "grad_norm": 202.36251831054688, "learning_rate": 5.8172066017668996e-08, "loss": 14.5938, "step": 26927 }, { "epoch": 1.7884040645546921, "grad_norm": 141.97271728515625, "learning_rate": 5.8135923743131476e-08, "loss": 12.6562, "step": 26928 }, { "epoch": 1.788470478847048, "grad_norm": 239.28810119628906, "learning_rate": 5.8099792363534686e-08, "loss": 18.8906, "step": 26929 }, { "epoch": 1.7885368931394034, "grad_norm": 100.58885192871094, "learning_rate": 5.806367187929595e-08, "loss": 15.375, "step": 26930 }, { "epoch": 1.7886033074317593, "grad_norm": 194.31385803222656, "learning_rate": 5.802756229083383e-08, "loss": 16.1719, "step": 26931 }, { "epoch": 1.788669721724115, "grad_norm": 269.46588134765625, "learning_rate": 5.799146359856566e-08, "loss": 15.625, "step": 26932 }, { "epoch": 1.7887361360164706, "grad_norm": 193.28704833984375, "learning_rate": 5.795537580290888e-08, "loss": 18.8281, "step": 26933 }, { "epoch": 1.7888025503088265, "grad_norm": 269.2447204589844, "learning_rate": 5.7919298904281264e-08, "loss": 15.1406, "step": 26934 }, { "epoch": 1.7888689646011822, "grad_norm": 157.6785125732422, "learning_rate": 5.788323290310004e-08, "loss": 13.2656, "step": 26935 }, { "epoch": 1.7889353788935378, "grad_norm": 145.56666564941406, "learning_rate": 5.784717779978199e-08, "loss": 11.6875, "step": 26936 }, { "epoch": 1.7890017931858937, "grad_norm": 302.2758483886719, "learning_rate": 5.781113359474488e-08, "loss": 16.3906, "step": 26937 }, { "epoch": 1.7890682074782494, "grad_norm": 498.7099914550781, "learning_rate": 5.777510028840504e-08, "loss": 14.9062, "step": 26938 }, { "epoch": 1.789134621770605, "grad_norm": 313.051025390625, "learning_rate": 5.77390778811796e-08, "loss": 14.3125, "step": 26939 }, { "epoch": 1.7892010360629609, "grad_norm": 160.04905700683594, "learning_rate": 5.77030663734851e-08, "loss": 18.6094, "step": 26940 }, { "epoch": 1.7892674503553163, "grad_norm": 103.14164733886719, "learning_rate": 5.766706576573821e-08, "loss": 13.6094, "step": 26941 }, { "epoch": 1.7893338646476722, "grad_norm": 254.96005249023438, "learning_rate": 5.763107605835527e-08, "loss": 18.0781, "step": 26942 }, { "epoch": 1.7894002789400278, "grad_norm": 256.6667785644531, "learning_rate": 5.759509725175293e-08, "loss": 14.0938, "step": 26943 }, { "epoch": 1.7894666932323835, "grad_norm": 303.0105285644531, "learning_rate": 5.7559129346346654e-08, "loss": 15.7812, "step": 26944 }, { "epoch": 1.7895331075247394, "grad_norm": 222.65191650390625, "learning_rate": 5.752317234255322e-08, "loss": 17.3281, "step": 26945 }, { "epoch": 1.789599521817095, "grad_norm": 472.7998962402344, "learning_rate": 5.748722624078817e-08, "loss": 14.6562, "step": 26946 }, { "epoch": 1.7896659361094507, "grad_norm": 236.54981994628906, "learning_rate": 5.74512910414674e-08, "loss": 11.0859, "step": 26947 }, { "epoch": 1.7897323504018066, "grad_norm": 205.8212432861328, "learning_rate": 5.741536674500658e-08, "loss": 17.3281, "step": 26948 }, { "epoch": 1.7897987646941622, "grad_norm": 109.47114562988281, "learning_rate": 5.737945335182126e-08, "loss": 12.6562, "step": 26949 }, { "epoch": 1.7898651789865179, "grad_norm": 263.968505859375, "learning_rate": 5.73435508623269e-08, "loss": 14.9062, "step": 26950 }, { "epoch": 1.7899315932788737, "grad_norm": 88.75465393066406, "learning_rate": 5.730765927693881e-08, "loss": 17.3125, "step": 26951 }, { "epoch": 1.7899980075712292, "grad_norm": 429.97515869140625, "learning_rate": 5.727177859607213e-08, "loss": 18.2188, "step": 26952 }, { "epoch": 1.790064421863585, "grad_norm": 510.65106201171875, "learning_rate": 5.723590882014184e-08, "loss": 26.4062, "step": 26953 }, { "epoch": 1.7901308361559407, "grad_norm": 190.3070526123047, "learning_rate": 5.7200049949563065e-08, "loss": 15.8281, "step": 26954 }, { "epoch": 1.7901972504482964, "grad_norm": 161.48284912109375, "learning_rate": 5.716420198475036e-08, "loss": 17.7031, "step": 26955 }, { "epoch": 1.7902636647406522, "grad_norm": 143.2560272216797, "learning_rate": 5.71283649261185e-08, "loss": 15.9375, "step": 26956 }, { "epoch": 1.790330079033008, "grad_norm": 300.24847412109375, "learning_rate": 5.709253877408204e-08, "loss": 17.2812, "step": 26957 }, { "epoch": 1.7903964933253635, "grad_norm": 240.51060485839844, "learning_rate": 5.705672352905533e-08, "loss": 16.4844, "step": 26958 }, { "epoch": 1.7904629076177194, "grad_norm": 271.8207702636719, "learning_rate": 5.702091919145269e-08, "loss": 14.9688, "step": 26959 }, { "epoch": 1.790529321910075, "grad_norm": 841.1875, "learning_rate": 5.698512576168857e-08, "loss": 30.1094, "step": 26960 }, { "epoch": 1.7905957362024307, "grad_norm": 153.65536499023438, "learning_rate": 5.694934324017631e-08, "loss": 14.75, "step": 26961 }, { "epoch": 1.7906621504947866, "grad_norm": 236.12872314453125, "learning_rate": 5.691357162733057e-08, "loss": 12.5156, "step": 26962 }, { "epoch": 1.790728564787142, "grad_norm": 154.6695556640625, "learning_rate": 5.68778109235648e-08, "loss": 17.375, "step": 26963 }, { "epoch": 1.790794979079498, "grad_norm": 158.2696075439453, "learning_rate": 5.6842061129292575e-08, "loss": 10.3828, "step": 26964 }, { "epoch": 1.7908613933718536, "grad_norm": 177.67713928222656, "learning_rate": 5.680632224492754e-08, "loss": 15.9688, "step": 26965 }, { "epoch": 1.7909278076642092, "grad_norm": 247.12832641601562, "learning_rate": 5.6770594270883045e-08, "loss": 12.3125, "step": 26966 }, { "epoch": 1.790994221956565, "grad_norm": 147.55699157714844, "learning_rate": 5.6734877207572526e-08, "loss": 12.9531, "step": 26967 }, { "epoch": 1.7910606362489208, "grad_norm": 175.51394653320312, "learning_rate": 5.669917105540911e-08, "loss": 21.2656, "step": 26968 }, { "epoch": 1.7911270505412764, "grad_norm": 294.8791198730469, "learning_rate": 5.666347581480546e-08, "loss": 15.9688, "step": 26969 }, { "epoch": 1.7911934648336323, "grad_norm": 229.22386169433594, "learning_rate": 5.6627791486175027e-08, "loss": 17.5469, "step": 26970 }, { "epoch": 1.791259879125988, "grad_norm": 349.43084716796875, "learning_rate": 5.659211806993025e-08, "loss": 23.0859, "step": 26971 }, { "epoch": 1.7913262934183436, "grad_norm": 211.209228515625, "learning_rate": 5.6556455566483806e-08, "loss": 16.7969, "step": 26972 }, { "epoch": 1.7913927077106995, "grad_norm": 287.0152893066406, "learning_rate": 5.6520803976248254e-08, "loss": 17.2344, "step": 26973 }, { "epoch": 1.791459122003055, "grad_norm": 164.92001342773438, "learning_rate": 5.648516329963615e-08, "loss": 16.1406, "step": 26974 }, { "epoch": 1.7915255362954108, "grad_norm": 255.36752319335938, "learning_rate": 5.644953353705928e-08, "loss": 13.3438, "step": 26975 }, { "epoch": 1.7915919505877664, "grad_norm": 366.3238525390625, "learning_rate": 5.641391468893053e-08, "loss": 15.0938, "step": 26976 }, { "epoch": 1.791658364880122, "grad_norm": 143.6311798095703, "learning_rate": 5.637830675566124e-08, "loss": 12.9141, "step": 26977 }, { "epoch": 1.791724779172478, "grad_norm": 207.36460876464844, "learning_rate": 5.634270973766364e-08, "loss": 16.2812, "step": 26978 }, { "epoch": 1.7917911934648336, "grad_norm": 185.2391815185547, "learning_rate": 5.6307123635349394e-08, "loss": 19.0625, "step": 26979 }, { "epoch": 1.7918576077571893, "grad_norm": 125.58830261230469, "learning_rate": 5.627154844913029e-08, "loss": 10.8906, "step": 26980 }, { "epoch": 1.7919240220495452, "grad_norm": 177.46267700195312, "learning_rate": 5.623598417941766e-08, "loss": 16.8438, "step": 26981 }, { "epoch": 1.7919904363419008, "grad_norm": 121.7055435180664, "learning_rate": 5.620043082662307e-08, "loss": 16.7344, "step": 26982 }, { "epoch": 1.7920568506342565, "grad_norm": 238.1724395751953, "learning_rate": 5.6164888391157404e-08, "loss": 17.6094, "step": 26983 }, { "epoch": 1.7921232649266123, "grad_norm": 164.6212615966797, "learning_rate": 5.6129356873432344e-08, "loss": 18.4219, "step": 26984 }, { "epoch": 1.7921896792189678, "grad_norm": 255.3919677734375, "learning_rate": 5.6093836273858444e-08, "loss": 13.5, "step": 26985 }, { "epoch": 1.7922560935113236, "grad_norm": 681.5491943359375, "learning_rate": 5.605832659284681e-08, "loss": 20.9844, "step": 26986 }, { "epoch": 1.7923225078036793, "grad_norm": 107.76194763183594, "learning_rate": 5.6022827830808136e-08, "loss": 12.5312, "step": 26987 }, { "epoch": 1.792388922096035, "grad_norm": 372.9545593261719, "learning_rate": 5.598733998815308e-08, "loss": 20.5156, "step": 26988 }, { "epoch": 1.7924553363883908, "grad_norm": 172.83053588867188, "learning_rate": 5.5951863065292095e-08, "loss": 15.1562, "step": 26989 }, { "epoch": 1.7925217506807465, "grad_norm": 88.9400405883789, "learning_rate": 5.591639706263551e-08, "loss": 20.7656, "step": 26990 }, { "epoch": 1.7925881649731021, "grad_norm": 197.58258056640625, "learning_rate": 5.588094198059379e-08, "loss": 15.25, "step": 26991 }, { "epoch": 1.792654579265458, "grad_norm": 177.23809814453125, "learning_rate": 5.5845497819576815e-08, "loss": 23.875, "step": 26992 }, { "epoch": 1.7927209935578137, "grad_norm": 270.9709777832031, "learning_rate": 5.5810064579994934e-08, "loss": 19.5312, "step": 26993 }, { "epoch": 1.7927874078501693, "grad_norm": 372.46478271484375, "learning_rate": 5.5774642262257474e-08, "loss": 25.6875, "step": 26994 }, { "epoch": 1.7928538221425252, "grad_norm": 295.66156005859375, "learning_rate": 5.573923086677468e-08, "loss": 16.8594, "step": 26995 }, { "epoch": 1.7929202364348806, "grad_norm": 174.32833862304688, "learning_rate": 5.5703830393955877e-08, "loss": 13.9219, "step": 26996 }, { "epoch": 1.7929866507272365, "grad_norm": 134.85409545898438, "learning_rate": 5.566844084421074e-08, "loss": 12.0938, "step": 26997 }, { "epoch": 1.7930530650195922, "grad_norm": 211.04898071289062, "learning_rate": 5.563306221794839e-08, "loss": 18.0, "step": 26998 }, { "epoch": 1.7931194793119478, "grad_norm": 229.5897979736328, "learning_rate": 5.5597694515578494e-08, "loss": 13.4219, "step": 26999 }, { "epoch": 1.7931858936043037, "grad_norm": 362.7469482421875, "learning_rate": 5.556233773750951e-08, "loss": 13.6719, "step": 27000 }, { "epoch": 1.7932523078966593, "grad_norm": 187.62400817871094, "learning_rate": 5.552699188415111e-08, "loss": 13.8516, "step": 27001 }, { "epoch": 1.793318722189015, "grad_norm": 123.01451110839844, "learning_rate": 5.5491656955911735e-08, "loss": 15.2344, "step": 27002 }, { "epoch": 1.7933851364813709, "grad_norm": 195.21852111816406, "learning_rate": 5.545633295320029e-08, "loss": 15.0938, "step": 27003 }, { "epoch": 1.7934515507737265, "grad_norm": 459.8405456542969, "learning_rate": 5.5421019876425334e-08, "loss": 19.9062, "step": 27004 }, { "epoch": 1.7935179650660822, "grad_norm": 107.60710144042969, "learning_rate": 5.5385717725995204e-08, "loss": 14.5469, "step": 27005 }, { "epoch": 1.793584379358438, "grad_norm": 174.8543701171875, "learning_rate": 5.535042650231858e-08, "loss": 23.6875, "step": 27006 }, { "epoch": 1.7936507936507935, "grad_norm": 92.01451873779297, "learning_rate": 5.5315146205803466e-08, "loss": 13.5781, "step": 27007 }, { "epoch": 1.7937172079431494, "grad_norm": 179.29417419433594, "learning_rate": 5.527987683685786e-08, "loss": 13.5703, "step": 27008 }, { "epoch": 1.793783622235505, "grad_norm": 91.29914855957031, "learning_rate": 5.524461839589012e-08, "loss": 12.5469, "step": 27009 }, { "epoch": 1.7938500365278607, "grad_norm": 221.45220947265625, "learning_rate": 5.520937088330768e-08, "loss": 19.3281, "step": 27010 }, { "epoch": 1.7939164508202166, "grad_norm": 129.17575073242188, "learning_rate": 5.517413429951845e-08, "loss": 13.5938, "step": 27011 }, { "epoch": 1.7939828651125722, "grad_norm": 269.7715759277344, "learning_rate": 5.513890864493009e-08, "loss": 17.0625, "step": 27012 }, { "epoch": 1.7940492794049279, "grad_norm": 116.85916137695312, "learning_rate": 5.510369391994996e-08, "loss": 12.7812, "step": 27013 }, { "epoch": 1.7941156936972837, "grad_norm": 245.64462280273438, "learning_rate": 5.506849012498538e-08, "loss": 16.0156, "step": 27014 }, { "epoch": 1.7941821079896394, "grad_norm": 161.29637145996094, "learning_rate": 5.503329726044381e-08, "loss": 15.6875, "step": 27015 }, { "epoch": 1.794248522281995, "grad_norm": 339.79351806640625, "learning_rate": 5.499811532673193e-08, "loss": 14.3125, "step": 27016 }, { "epoch": 1.794314936574351, "grad_norm": 714.5473022460938, "learning_rate": 5.496294432425708e-08, "loss": 13.4375, "step": 27017 }, { "epoch": 1.7943813508667064, "grad_norm": 394.5957336425781, "learning_rate": 5.492778425342592e-08, "loss": 19.1875, "step": 27018 }, { "epoch": 1.7944477651590622, "grad_norm": 162.17332458496094, "learning_rate": 5.489263511464515e-08, "loss": 15.4531, "step": 27019 }, { "epoch": 1.794514179451418, "grad_norm": 185.9333953857422, "learning_rate": 5.4857496908321424e-08, "loss": 20.5469, "step": 27020 }, { "epoch": 1.7945805937437735, "grad_norm": 108.28974914550781, "learning_rate": 5.482236963486131e-08, "loss": 14.2188, "step": 27021 }, { "epoch": 1.7946470080361294, "grad_norm": 304.6490783691406, "learning_rate": 5.478725329467071e-08, "loss": 15.3906, "step": 27022 }, { "epoch": 1.794713422328485, "grad_norm": 195.2476806640625, "learning_rate": 5.475214788815641e-08, "loss": 20.1094, "step": 27023 }, { "epoch": 1.7947798366208407, "grad_norm": 403.8119201660156, "learning_rate": 5.471705341572408e-08, "loss": 12.5938, "step": 27024 }, { "epoch": 1.7948462509131966, "grad_norm": 146.99095153808594, "learning_rate": 5.468196987777962e-08, "loss": 13.0312, "step": 27025 }, { "epoch": 1.7949126652055523, "grad_norm": 208.14378356933594, "learning_rate": 5.4646897274729376e-08, "loss": 16.7656, "step": 27026 }, { "epoch": 1.794979079497908, "grad_norm": 226.9109649658203, "learning_rate": 5.461183560697846e-08, "loss": 18.2812, "step": 27027 }, { "epoch": 1.7950454937902638, "grad_norm": 226.54954528808594, "learning_rate": 5.457678487493278e-08, "loss": 14.4844, "step": 27028 }, { "epoch": 1.7951119080826192, "grad_norm": 321.50640869140625, "learning_rate": 5.4541745078997556e-08, "loss": 19.4531, "step": 27029 }, { "epoch": 1.795178322374975, "grad_norm": 246.79588317871094, "learning_rate": 5.4506716219578364e-08, "loss": 15.7812, "step": 27030 }, { "epoch": 1.795244736667331, "grad_norm": 197.89276123046875, "learning_rate": 5.447169829708032e-08, "loss": 21.3906, "step": 27031 }, { "epoch": 1.7953111509596864, "grad_norm": 1937.1419677734375, "learning_rate": 5.4436691311908425e-08, "loss": 12.875, "step": 27032 }, { "epoch": 1.7953775652520423, "grad_norm": 361.54412841796875, "learning_rate": 5.4401695264467474e-08, "loss": 15.9062, "step": 27033 }, { "epoch": 1.795443979544398, "grad_norm": 165.0881805419922, "learning_rate": 5.43667101551627e-08, "loss": 12.25, "step": 27034 }, { "epoch": 1.7955103938367536, "grad_norm": 93.9129867553711, "learning_rate": 5.433173598439844e-08, "loss": 11.8281, "step": 27035 }, { "epoch": 1.7955768081291095, "grad_norm": 324.60382080078125, "learning_rate": 5.4296772752579264e-08, "loss": 12.6641, "step": 27036 }, { "epoch": 1.7956432224214651, "grad_norm": 168.5296173095703, "learning_rate": 5.4261820460109854e-08, "loss": 13.2656, "step": 27037 }, { "epoch": 1.7957096367138208, "grad_norm": 234.15707397460938, "learning_rate": 5.422687910739432e-08, "loss": 12.9062, "step": 27038 }, { "epoch": 1.7957760510061767, "grad_norm": 330.1966857910156, "learning_rate": 5.4191948694836674e-08, "loss": 14.375, "step": 27039 }, { "epoch": 1.795842465298532, "grad_norm": 335.6459045410156, "learning_rate": 5.415702922284149e-08, "loss": 16.2734, "step": 27040 }, { "epoch": 1.795908879590888, "grad_norm": 206.48068237304688, "learning_rate": 5.412212069181221e-08, "loss": 17.4531, "step": 27041 }, { "epoch": 1.7959752938832438, "grad_norm": 429.9043273925781, "learning_rate": 5.4087223102152856e-08, "loss": 15.4922, "step": 27042 }, { "epoch": 1.7960417081755993, "grad_norm": 126.8998031616211, "learning_rate": 5.40523364542671e-08, "loss": 13.8125, "step": 27043 }, { "epoch": 1.7961081224679551, "grad_norm": 145.35182189941406, "learning_rate": 5.40174607485584e-08, "loss": 10.1562, "step": 27044 }, { "epoch": 1.7961745367603108, "grad_norm": 165.77406311035156, "learning_rate": 5.3982595985430204e-08, "loss": 14.3281, "step": 27045 }, { "epoch": 1.7962409510526665, "grad_norm": 133.2537078857422, "learning_rate": 5.394774216528597e-08, "loss": 16.9688, "step": 27046 }, { "epoch": 1.7963073653450223, "grad_norm": 205.4800262451172, "learning_rate": 5.391289928852849e-08, "loss": 15.0312, "step": 27047 }, { "epoch": 1.796373779637378, "grad_norm": 225.22027587890625, "learning_rate": 5.3878067355561217e-08, "loss": 12.4219, "step": 27048 }, { "epoch": 1.7964401939297336, "grad_norm": 246.14598083496094, "learning_rate": 5.384324636678683e-08, "loss": 15.7656, "step": 27049 }, { "epoch": 1.7965066082220895, "grad_norm": 161.09228515625, "learning_rate": 5.380843632260823e-08, "loss": 14.0625, "step": 27050 }, { "epoch": 1.796573022514445, "grad_norm": 152.1573028564453, "learning_rate": 5.3773637223427984e-08, "loss": 15.2188, "step": 27051 }, { "epoch": 1.7966394368068008, "grad_norm": 403.274658203125, "learning_rate": 5.3738849069648764e-08, "loss": 24.25, "step": 27052 }, { "epoch": 1.7967058510991567, "grad_norm": 180.16445922851562, "learning_rate": 5.370407186167281e-08, "loss": 17.4375, "step": 27053 }, { "epoch": 1.7967722653915121, "grad_norm": 143.41661071777344, "learning_rate": 5.366930559990257e-08, "loss": 10.8125, "step": 27054 }, { "epoch": 1.796838679683868, "grad_norm": 139.43893432617188, "learning_rate": 5.363455028473996e-08, "loss": 11.6094, "step": 27055 }, { "epoch": 1.7969050939762237, "grad_norm": 149.76734924316406, "learning_rate": 5.359980591658741e-08, "loss": 10.125, "step": 27056 }, { "epoch": 1.7969715082685793, "grad_norm": 88.26734161376953, "learning_rate": 5.356507249584641e-08, "loss": 12.0, "step": 27057 }, { "epoch": 1.7970379225609352, "grad_norm": 187.58746337890625, "learning_rate": 5.3530350022918835e-08, "loss": 15.3438, "step": 27058 }, { "epoch": 1.7971043368532909, "grad_norm": 572.525634765625, "learning_rate": 5.3495638498206484e-08, "loss": 17.3281, "step": 27059 }, { "epoch": 1.7971707511456465, "grad_norm": 151.13035583496094, "learning_rate": 5.346093792211104e-08, "loss": 15.4375, "step": 27060 }, { "epoch": 1.7972371654380024, "grad_norm": 108.5231704711914, "learning_rate": 5.342624829503328e-08, "loss": 17.9531, "step": 27061 }, { "epoch": 1.7973035797303578, "grad_norm": 230.15768432617188, "learning_rate": 5.33915696173749e-08, "loss": 14.25, "step": 27062 }, { "epoch": 1.7973699940227137, "grad_norm": 385.60797119140625, "learning_rate": 5.335690188953734e-08, "loss": 19.3438, "step": 27063 }, { "epoch": 1.7974364083150696, "grad_norm": 135.0826416015625, "learning_rate": 5.332224511192085e-08, "loss": 29.5, "step": 27064 }, { "epoch": 1.797502822607425, "grad_norm": 279.7790832519531, "learning_rate": 5.3287599284927095e-08, "loss": 17.1406, "step": 27065 }, { "epoch": 1.7975692368997809, "grad_norm": 242.5946044921875, "learning_rate": 5.325296440895621e-08, "loss": 12.0781, "step": 27066 }, { "epoch": 1.7976356511921365, "grad_norm": 204.4095916748047, "learning_rate": 5.321834048440932e-08, "loss": 18.0156, "step": 27067 }, { "epoch": 1.7977020654844922, "grad_norm": 164.7870635986328, "learning_rate": 5.318372751168654e-08, "loss": 15.9688, "step": 27068 }, { "epoch": 1.797768479776848, "grad_norm": 80.21566009521484, "learning_rate": 5.314912549118855e-08, "loss": 9.7656, "step": 27069 }, { "epoch": 1.7978348940692037, "grad_norm": 164.5850830078125, "learning_rate": 5.311453442331537e-08, "loss": 16.125, "step": 27070 }, { "epoch": 1.7979013083615594, "grad_norm": 142.36514282226562, "learning_rate": 5.307995430846757e-08, "loss": 11.9766, "step": 27071 }, { "epoch": 1.7979677226539152, "grad_norm": 840.8176879882812, "learning_rate": 5.3045385147044374e-08, "loss": 18.5781, "step": 27072 }, { "epoch": 1.7980341369462707, "grad_norm": 176.26864624023438, "learning_rate": 5.3010826939446473e-08, "loss": 13.8438, "step": 27073 }, { "epoch": 1.7981005512386266, "grad_norm": 236.77230834960938, "learning_rate": 5.29762796860731e-08, "loss": 17.4609, "step": 27074 }, { "epoch": 1.7981669655309824, "grad_norm": 336.2553405761719, "learning_rate": 5.2941743387324046e-08, "loss": 11.8906, "step": 27075 }, { "epoch": 1.7982333798233379, "grad_norm": 186.9847869873047, "learning_rate": 5.290721804359888e-08, "loss": 17.2891, "step": 27076 }, { "epoch": 1.7982997941156937, "grad_norm": 294.1379089355469, "learning_rate": 5.2872703655296837e-08, "loss": 17.0469, "step": 27077 }, { "epoch": 1.7983662084080494, "grad_norm": 213.61094665527344, "learning_rate": 5.2838200222817156e-08, "loss": 9.9219, "step": 27078 }, { "epoch": 1.798432622700405, "grad_norm": 722.0380249023438, "learning_rate": 5.280370774655918e-08, "loss": 14.7344, "step": 27079 }, { "epoch": 1.798499036992761, "grad_norm": 264.8491516113281, "learning_rate": 5.2769226226921596e-08, "loss": 15.1562, "step": 27080 }, { "epoch": 1.7985654512851166, "grad_norm": 137.77008056640625, "learning_rate": 5.273475566430341e-08, "loss": 14.5312, "step": 27081 }, { "epoch": 1.7986318655774722, "grad_norm": 75.36256408691406, "learning_rate": 5.270029605910331e-08, "loss": 14.9219, "step": 27082 }, { "epoch": 1.798698279869828, "grad_norm": 85.9048843383789, "learning_rate": 5.2665847411719976e-08, "loss": 9.0938, "step": 27083 }, { "epoch": 1.7987646941621835, "grad_norm": 670.9632568359375, "learning_rate": 5.2631409722551865e-08, "loss": 25.0781, "step": 27084 }, { "epoch": 1.7988311084545394, "grad_norm": 116.64910888671875, "learning_rate": 5.259698299199744e-08, "loss": 18.4531, "step": 27085 }, { "epoch": 1.7988975227468953, "grad_norm": 177.5341033935547, "learning_rate": 5.25625672204546e-08, "loss": 18.75, "step": 27086 }, { "epoch": 1.7989639370392507, "grad_norm": 141.83926391601562, "learning_rate": 5.2528162408321805e-08, "loss": 15.6719, "step": 27087 }, { "epoch": 1.7990303513316066, "grad_norm": 151.64935302734375, "learning_rate": 5.249376855599685e-08, "loss": 16.3594, "step": 27088 }, { "epoch": 1.7990967656239623, "grad_norm": 198.14419555664062, "learning_rate": 5.2459385663877644e-08, "loss": 14.4844, "step": 27089 }, { "epoch": 1.799163179916318, "grad_norm": 679.03466796875, "learning_rate": 5.242501373236174e-08, "loss": 23.2344, "step": 27090 }, { "epoch": 1.7992295942086738, "grad_norm": 156.22640991210938, "learning_rate": 5.2390652761846954e-08, "loss": 16.5781, "step": 27091 }, { "epoch": 1.7992960085010294, "grad_norm": 144.2451629638672, "learning_rate": 5.235630275273073e-08, "loss": 14.2656, "step": 27092 }, { "epoch": 1.799362422793385, "grad_norm": 288.02960205078125, "learning_rate": 5.232196370541042e-08, "loss": 19.6562, "step": 27093 }, { "epoch": 1.799428837085741, "grad_norm": 629.2989501953125, "learning_rate": 5.228763562028293e-08, "loss": 25.7812, "step": 27094 }, { "epoch": 1.7994952513780964, "grad_norm": 145.50942993164062, "learning_rate": 5.2253318497745836e-08, "loss": 15.6172, "step": 27095 }, { "epoch": 1.7995616656704523, "grad_norm": 132.26934814453125, "learning_rate": 5.22190123381957e-08, "loss": 11.875, "step": 27096 }, { "epoch": 1.7996280799628082, "grad_norm": 297.0654602050781, "learning_rate": 5.2184717142029544e-08, "loss": 18.7969, "step": 27097 }, { "epoch": 1.7996944942551636, "grad_norm": 147.1315155029297, "learning_rate": 5.215043290964405e-08, "loss": 15.875, "step": 27098 }, { "epoch": 1.7997609085475195, "grad_norm": 481.89068603515625, "learning_rate": 5.2116159641435786e-08, "loss": 26.6719, "step": 27099 }, { "epoch": 1.7998273228398751, "grad_norm": 414.64947509765625, "learning_rate": 5.2081897337801107e-08, "loss": 15.2812, "step": 27100 }, { "epoch": 1.7998937371322308, "grad_norm": 162.3312530517578, "learning_rate": 5.204764599913658e-08, "loss": 12.0625, "step": 27101 }, { "epoch": 1.7999601514245867, "grad_norm": 86.04948425292969, "learning_rate": 5.201340562583834e-08, "loss": 11.8047, "step": 27102 }, { "epoch": 1.8000265657169423, "grad_norm": 209.7117462158203, "learning_rate": 5.197917621830206e-08, "loss": 17.3438, "step": 27103 }, { "epoch": 1.800092980009298, "grad_norm": 435.86248779296875, "learning_rate": 5.194495777692432e-08, "loss": 21.6875, "step": 27104 }, { "epoch": 1.8001593943016538, "grad_norm": 501.52191162109375, "learning_rate": 5.191075030210046e-08, "loss": 22.7344, "step": 27105 }, { "epoch": 1.8002258085940093, "grad_norm": 112.4549560546875, "learning_rate": 5.1876553794226394e-08, "loss": 14.7656, "step": 27106 }, { "epoch": 1.8002922228863651, "grad_norm": 369.99200439453125, "learning_rate": 5.18423682536977e-08, "loss": 15.3594, "step": 27107 }, { "epoch": 1.800358637178721, "grad_norm": 326.7244567871094, "learning_rate": 5.1808193680909605e-08, "loss": 13.8438, "step": 27108 }, { "epoch": 1.8004250514710765, "grad_norm": 319.1484680175781, "learning_rate": 5.177403007625769e-08, "loss": 15.8281, "step": 27109 }, { "epoch": 1.8004914657634323, "grad_norm": 204.4793243408203, "learning_rate": 5.173987744013708e-08, "loss": 16.1875, "step": 27110 }, { "epoch": 1.800557880055788, "grad_norm": 146.69688415527344, "learning_rate": 5.170573577294246e-08, "loss": 14.2656, "step": 27111 }, { "epoch": 1.8006242943481436, "grad_norm": 193.63302612304688, "learning_rate": 5.1671605075069404e-08, "loss": 12.9531, "step": 27112 }, { "epoch": 1.8006907086404995, "grad_norm": 262.8741455078125, "learning_rate": 5.1637485346912257e-08, "loss": 16.2422, "step": 27113 }, { "epoch": 1.8007571229328552, "grad_norm": 139.58006286621094, "learning_rate": 5.160337658886582e-08, "loss": 14.4375, "step": 27114 }, { "epoch": 1.8008235372252108, "grad_norm": 224.02041625976562, "learning_rate": 5.1569278801324554e-08, "loss": 18.8906, "step": 27115 }, { "epoch": 1.8008899515175667, "grad_norm": 283.029541015625, "learning_rate": 5.153519198468315e-08, "loss": 15.2344, "step": 27116 }, { "epoch": 1.8009563658099221, "grad_norm": 175.91925048828125, "learning_rate": 5.150111613933561e-08, "loss": 17.7344, "step": 27117 }, { "epoch": 1.801022780102278, "grad_norm": 271.7767333984375, "learning_rate": 5.146705126567641e-08, "loss": 23.6875, "step": 27118 }, { "epoch": 1.8010891943946339, "grad_norm": 237.6587371826172, "learning_rate": 5.143299736409901e-08, "loss": 18.3672, "step": 27119 }, { "epoch": 1.8011556086869893, "grad_norm": 436.2479248046875, "learning_rate": 5.1398954434998086e-08, "loss": 20.5625, "step": 27120 }, { "epoch": 1.8012220229793452, "grad_norm": 137.89581298828125, "learning_rate": 5.1364922478767e-08, "loss": 14.7734, "step": 27121 }, { "epoch": 1.8012884372717008, "grad_norm": 314.94573974609375, "learning_rate": 5.133090149579933e-08, "loss": 14.5312, "step": 27122 }, { "epoch": 1.8013548515640565, "grad_norm": 622.544189453125, "learning_rate": 5.129689148648886e-08, "loss": 17.7344, "step": 27123 }, { "epoch": 1.8014212658564124, "grad_norm": 184.8612060546875, "learning_rate": 5.1262892451229055e-08, "loss": 16.9688, "step": 27124 }, { "epoch": 1.801487680148768, "grad_norm": 306.64605712890625, "learning_rate": 5.122890439041261e-08, "loss": 16.4062, "step": 27125 }, { "epoch": 1.8015540944411237, "grad_norm": 257.57110595703125, "learning_rate": 5.119492730443353e-08, "loss": 18.4375, "step": 27126 }, { "epoch": 1.8016205087334796, "grad_norm": 281.2738342285156, "learning_rate": 5.116096119368418e-08, "loss": 18.4844, "step": 27127 }, { "epoch": 1.801686923025835, "grad_norm": 154.98031616210938, "learning_rate": 5.11270060585578e-08, "loss": 15.5156, "step": 27128 }, { "epoch": 1.8017533373181909, "grad_norm": 212.34136962890625, "learning_rate": 5.109306189944695e-08, "loss": 16.75, "step": 27129 }, { "epoch": 1.8018197516105467, "grad_norm": 159.7707061767578, "learning_rate": 5.105912871674456e-08, "loss": 18.3281, "step": 27130 }, { "epoch": 1.8018861659029022, "grad_norm": 128.70489501953125, "learning_rate": 5.102520651084285e-08, "loss": 13.9375, "step": 27131 }, { "epoch": 1.801952580195258, "grad_norm": 186.3326873779297, "learning_rate": 5.0991295282134414e-08, "loss": 16.2344, "step": 27132 }, { "epoch": 1.8020189944876137, "grad_norm": 324.50677490234375, "learning_rate": 5.095739503101126e-08, "loss": 19.5312, "step": 27133 }, { "epoch": 1.8020854087799694, "grad_norm": 160.63436889648438, "learning_rate": 5.092350575786608e-08, "loss": 16.3281, "step": 27134 }, { "epoch": 1.8021518230723252, "grad_norm": 184.28561401367188, "learning_rate": 5.088962746309022e-08, "loss": 18.3125, "step": 27135 }, { "epoch": 1.802218237364681, "grad_norm": 164.8873291015625, "learning_rate": 5.085576014707582e-08, "loss": 21.8125, "step": 27136 }, { "epoch": 1.8022846516570366, "grad_norm": 188.2382354736328, "learning_rate": 5.0821903810214896e-08, "loss": 16.0781, "step": 27137 }, { "epoch": 1.8023510659493924, "grad_norm": 673.4974975585938, "learning_rate": 5.0788058452898687e-08, "loss": 12.0312, "step": 27138 }, { "epoch": 1.8024174802417479, "grad_norm": 374.57525634765625, "learning_rate": 5.0754224075518994e-08, "loss": 15.4531, "step": 27139 }, { "epoch": 1.8024838945341037, "grad_norm": 576.0331420898438, "learning_rate": 5.072040067846695e-08, "loss": 21.0938, "step": 27140 }, { "epoch": 1.8025503088264596, "grad_norm": 225.0306396484375, "learning_rate": 5.068658826213401e-08, "loss": 19.1719, "step": 27141 }, { "epoch": 1.802616723118815, "grad_norm": 155.87966918945312, "learning_rate": 5.065278682691121e-08, "loss": 18.5625, "step": 27142 }, { "epoch": 1.802683137411171, "grad_norm": 140.0143280029297, "learning_rate": 5.061899637318967e-08, "loss": 14.0312, "step": 27143 }, { "epoch": 1.8027495517035266, "grad_norm": 176.25868225097656, "learning_rate": 5.0585216901359975e-08, "loss": 16.3281, "step": 27144 }, { "epoch": 1.8028159659958822, "grad_norm": 157.90272521972656, "learning_rate": 5.0551448411813024e-08, "loss": 12.3125, "step": 27145 }, { "epoch": 1.802882380288238, "grad_norm": 249.34390258789062, "learning_rate": 5.0517690904939405e-08, "loss": 15.7969, "step": 27146 }, { "epoch": 1.8029487945805938, "grad_norm": 309.872314453125, "learning_rate": 5.048394438112969e-08, "loss": 27.5781, "step": 27147 }, { "epoch": 1.8030152088729494, "grad_norm": 167.40518188476562, "learning_rate": 5.0450208840774114e-08, "loss": 17.7031, "step": 27148 }, { "epoch": 1.8030816231653053, "grad_norm": 109.72824096679688, "learning_rate": 5.041648428426326e-08, "loss": 12.6172, "step": 27149 }, { "epoch": 1.8031480374576607, "grad_norm": 200.72265625, "learning_rate": 5.03827707119866e-08, "loss": 21.2812, "step": 27150 }, { "epoch": 1.8032144517500166, "grad_norm": 226.63967895507812, "learning_rate": 5.03490681243347e-08, "loss": 12.2656, "step": 27151 }, { "epoch": 1.8032808660423725, "grad_norm": 111.12418365478516, "learning_rate": 5.031537652169704e-08, "loss": 10.1562, "step": 27152 }, { "epoch": 1.803347280334728, "grad_norm": 250.67129516601562, "learning_rate": 5.028169590446352e-08, "loss": 13.9844, "step": 27153 }, { "epoch": 1.8034136946270838, "grad_norm": 136.87896728515625, "learning_rate": 5.0248026273023716e-08, "loss": 16.7188, "step": 27154 }, { "epoch": 1.8034801089194394, "grad_norm": 111.0376968383789, "learning_rate": 5.02143676277671e-08, "loss": 14.0312, "step": 27155 }, { "epoch": 1.803546523211795, "grad_norm": 153.34671020507812, "learning_rate": 5.018071996908302e-08, "loss": 14.6562, "step": 27156 }, { "epoch": 1.803612937504151, "grad_norm": 160.7769012451172, "learning_rate": 5.0147083297360725e-08, "loss": 13.5625, "step": 27157 }, { "epoch": 1.8036793517965066, "grad_norm": 314.45721435546875, "learning_rate": 5.011345761298902e-08, "loss": 16.4531, "step": 27158 }, { "epoch": 1.8037457660888623, "grad_norm": 146.65231323242188, "learning_rate": 5.0079842916357365e-08, "loss": 18.875, "step": 27159 }, { "epoch": 1.8038121803812182, "grad_norm": 356.7129821777344, "learning_rate": 5.0046239207854226e-08, "loss": 22.3438, "step": 27160 }, { "epoch": 1.8038785946735736, "grad_norm": 140.83111572265625, "learning_rate": 5.0012646487868404e-08, "loss": 11.4922, "step": 27161 }, { "epoch": 1.8039450089659295, "grad_norm": 205.94189453125, "learning_rate": 4.997906475678848e-08, "loss": 21.5156, "step": 27162 }, { "epoch": 1.8040114232582853, "grad_norm": 111.78314208984375, "learning_rate": 4.994549401500303e-08, "loss": 14.0781, "step": 27163 }, { "epoch": 1.8040778375506408, "grad_norm": 575.2904663085938, "learning_rate": 4.991193426290008e-08, "loss": 21.4219, "step": 27164 }, { "epoch": 1.8041442518429966, "grad_norm": 152.3995819091797, "learning_rate": 4.98783855008682e-08, "loss": 22.375, "step": 27165 }, { "epoch": 1.8042106661353523, "grad_norm": 335.46197509765625, "learning_rate": 4.9844847729295204e-08, "loss": 16.2344, "step": 27166 }, { "epoch": 1.804277080427708, "grad_norm": 320.5553894042969, "learning_rate": 4.98113209485691e-08, "loss": 21.5, "step": 27167 }, { "epoch": 1.8043434947200638, "grad_norm": 230.14268493652344, "learning_rate": 4.9777805159077815e-08, "loss": 10.7031, "step": 27168 }, { "epoch": 1.8044099090124195, "grad_norm": 105.82353973388672, "learning_rate": 4.9744300361208804e-08, "loss": 11.6094, "step": 27169 }, { "epoch": 1.8044763233047751, "grad_norm": 584.2266235351562, "learning_rate": 4.971080655534987e-08, "loss": 25.2031, "step": 27170 }, { "epoch": 1.804542737597131, "grad_norm": 204.89649963378906, "learning_rate": 4.9677323741888374e-08, "loss": 15.9219, "step": 27171 }, { "epoch": 1.8046091518894865, "grad_norm": 223.18856811523438, "learning_rate": 4.9643851921211566e-08, "loss": 18.0938, "step": 27172 }, { "epoch": 1.8046755661818423, "grad_norm": 154.43667602539062, "learning_rate": 4.961039109370668e-08, "loss": 20.4844, "step": 27173 }, { "epoch": 1.8047419804741982, "grad_norm": 167.66696166992188, "learning_rate": 4.9576941259760863e-08, "loss": 9.2891, "step": 27174 }, { "epoch": 1.8048083947665536, "grad_norm": 684.602783203125, "learning_rate": 4.954350241976069e-08, "loss": 24.1719, "step": 27175 }, { "epoch": 1.8048748090589095, "grad_norm": 243.33267211914062, "learning_rate": 4.951007457409351e-08, "loss": 19.5312, "step": 27176 }, { "epoch": 1.8049412233512652, "grad_norm": 221.5083770751953, "learning_rate": 4.947665772314558e-08, "loss": 21.1094, "step": 27177 }, { "epoch": 1.8050076376436208, "grad_norm": 144.0144500732422, "learning_rate": 4.944325186730358e-08, "loss": 14.0625, "step": 27178 }, { "epoch": 1.8050740519359767, "grad_norm": 184.6792755126953, "learning_rate": 4.9409857006953884e-08, "loss": 14.4062, "step": 27179 }, { "epoch": 1.8051404662283324, "grad_norm": 200.30091857910156, "learning_rate": 4.9376473142482833e-08, "loss": 21.9688, "step": 27180 }, { "epoch": 1.805206880520688, "grad_norm": 321.5951232910156, "learning_rate": 4.934310027427657e-08, "loss": 11.7188, "step": 27181 }, { "epoch": 1.8052732948130439, "grad_norm": 166.6808319091797, "learning_rate": 4.930973840272124e-08, "loss": 13.2266, "step": 27182 }, { "epoch": 1.8053397091053995, "grad_norm": 143.89028930664062, "learning_rate": 4.92763875282024e-08, "loss": 17.7344, "step": 27183 }, { "epoch": 1.8054061233977552, "grad_norm": 306.3823547363281, "learning_rate": 4.924304765110643e-08, "loss": 17.6719, "step": 27184 }, { "epoch": 1.805472537690111, "grad_norm": 218.2613983154297, "learning_rate": 4.920971877181845e-08, "loss": 15.5156, "step": 27185 }, { "epoch": 1.8055389519824665, "grad_norm": 196.51931762695312, "learning_rate": 4.917640089072428e-08, "loss": 14.1094, "step": 27186 }, { "epoch": 1.8056053662748224, "grad_norm": 379.4331359863281, "learning_rate": 4.9143094008209154e-08, "loss": 15.5625, "step": 27187 }, { "epoch": 1.805671780567178, "grad_norm": 216.7561798095703, "learning_rate": 4.910979812465854e-08, "loss": 14.4531, "step": 27188 }, { "epoch": 1.8057381948595337, "grad_norm": 214.51681518554688, "learning_rate": 4.907651324045714e-08, "loss": 16.9375, "step": 27189 }, { "epoch": 1.8058046091518896, "grad_norm": 360.4029541015625, "learning_rate": 4.904323935599064e-08, "loss": 21.2812, "step": 27190 }, { "epoch": 1.8058710234442452, "grad_norm": 892.5703735351562, "learning_rate": 4.900997647164351e-08, "loss": 16.9531, "step": 27191 }, { "epoch": 1.8059374377366009, "grad_norm": 162.83193969726562, "learning_rate": 4.897672458780055e-08, "loss": 12.8984, "step": 27192 }, { "epoch": 1.8060038520289567, "grad_norm": 279.4352722167969, "learning_rate": 4.8943483704846465e-08, "loss": 18.2969, "step": 27193 }, { "epoch": 1.8060702663213124, "grad_norm": 230.1867218017578, "learning_rate": 4.891025382316571e-08, "loss": 18.25, "step": 27194 }, { "epoch": 1.806136680613668, "grad_norm": 290.9033203125, "learning_rate": 4.887703494314277e-08, "loss": 16.0781, "step": 27195 }, { "epoch": 1.806203094906024, "grad_norm": 203.0759735107422, "learning_rate": 4.884382706516188e-08, "loss": 19.9688, "step": 27196 }, { "epoch": 1.8062695091983794, "grad_norm": 176.57699584960938, "learning_rate": 4.881063018960696e-08, "loss": 16.1719, "step": 27197 }, { "epoch": 1.8063359234907352, "grad_norm": 234.09182739257812, "learning_rate": 4.877744431686248e-08, "loss": 15.2344, "step": 27198 }, { "epoch": 1.806402337783091, "grad_norm": 105.62008666992188, "learning_rate": 4.8744269447311806e-08, "loss": 14.5547, "step": 27199 }, { "epoch": 1.8064687520754465, "grad_norm": 185.10450744628906, "learning_rate": 4.871110558133884e-08, "loss": 16.75, "step": 27200 }, { "epoch": 1.8065351663678024, "grad_norm": 162.5513458251953, "learning_rate": 4.8677952719327397e-08, "loss": 15.3594, "step": 27201 }, { "epoch": 1.806601580660158, "grad_norm": 145.33132934570312, "learning_rate": 4.8644810861660725e-08, "loss": 16.3438, "step": 27202 }, { "epoch": 1.8066679949525137, "grad_norm": 193.53294372558594, "learning_rate": 4.86116800087224e-08, "loss": 19.8594, "step": 27203 }, { "epoch": 1.8067344092448696, "grad_norm": 131.84031677246094, "learning_rate": 4.857856016089556e-08, "loss": 15.1406, "step": 27204 }, { "epoch": 1.8068008235372253, "grad_norm": 251.34315490722656, "learning_rate": 4.854545131856313e-08, "loss": 14.4688, "step": 27205 }, { "epoch": 1.806867237829581, "grad_norm": 156.26065063476562, "learning_rate": 4.851235348210858e-08, "loss": 14.9219, "step": 27206 }, { "epoch": 1.8069336521219368, "grad_norm": 255.70501708984375, "learning_rate": 4.847926665191426e-08, "loss": 15.25, "step": 27207 }, { "epoch": 1.8070000664142922, "grad_norm": 154.70611572265625, "learning_rate": 4.84461908283631e-08, "loss": 12.25, "step": 27208 }, { "epoch": 1.807066480706648, "grad_norm": 567.4253540039062, "learning_rate": 4.841312601183778e-08, "loss": 13.5312, "step": 27209 }, { "epoch": 1.8071328949990038, "grad_norm": 204.6763458251953, "learning_rate": 4.838007220272067e-08, "loss": 14.9375, "step": 27210 }, { "epoch": 1.8071993092913594, "grad_norm": 259.6315002441406, "learning_rate": 4.8347029401394126e-08, "loss": 17.25, "step": 27211 }, { "epoch": 1.8072657235837153, "grad_norm": 394.8990783691406, "learning_rate": 4.83139976082404e-08, "loss": 16.6406, "step": 27212 }, { "epoch": 1.807332137876071, "grad_norm": 383.4065246582031, "learning_rate": 4.828097682364174e-08, "loss": 15.25, "step": 27213 }, { "epoch": 1.8073985521684266, "grad_norm": 244.7285614013672, "learning_rate": 4.824796704797962e-08, "loss": 13.5156, "step": 27214 }, { "epoch": 1.8074649664607825, "grad_norm": 165.51092529296875, "learning_rate": 4.821496828163652e-08, "loss": 15.75, "step": 27215 }, { "epoch": 1.8075313807531381, "grad_norm": 384.7915954589844, "learning_rate": 4.818198052499367e-08, "loss": 14.7109, "step": 27216 }, { "epoch": 1.8075977950454938, "grad_norm": 1104.5648193359375, "learning_rate": 4.8149003778432895e-08, "loss": 23.7188, "step": 27217 }, { "epoch": 1.8076642093378497, "grad_norm": 210.1620635986328, "learning_rate": 4.811603804233544e-08, "loss": 17.125, "step": 27218 }, { "epoch": 1.807730623630205, "grad_norm": 194.745849609375, "learning_rate": 4.8083083317082884e-08, "loss": 16.0469, "step": 27219 }, { "epoch": 1.807797037922561, "grad_norm": 763.6942138671875, "learning_rate": 4.805013960305626e-08, "loss": 17.7812, "step": 27220 }, { "epoch": 1.8078634522149166, "grad_norm": 232.03001403808594, "learning_rate": 4.801720690063693e-08, "loss": 16.2344, "step": 27221 }, { "epoch": 1.8079298665072723, "grad_norm": 28294.75390625, "learning_rate": 4.798428521020526e-08, "loss": 12.1094, "step": 27222 }, { "epoch": 1.8079962807996282, "grad_norm": 218.41793823242188, "learning_rate": 4.7951374532142596e-08, "loss": 15.0781, "step": 27223 }, { "epoch": 1.8080626950919838, "grad_norm": 255.86183166503906, "learning_rate": 4.791847486682943e-08, "loss": 14.2578, "step": 27224 }, { "epoch": 1.8081291093843395, "grad_norm": 170.61219787597656, "learning_rate": 4.788558621464633e-08, "loss": 14.6875, "step": 27225 }, { "epoch": 1.8081955236766953, "grad_norm": 380.5737609863281, "learning_rate": 4.785270857597368e-08, "loss": 18.75, "step": 27226 }, { "epoch": 1.808261937969051, "grad_norm": 179.83204650878906, "learning_rate": 4.781984195119204e-08, "loss": 14.7656, "step": 27227 }, { "epoch": 1.8083283522614066, "grad_norm": 121.97830963134766, "learning_rate": 4.7786986340681236e-08, "loss": 13.2344, "step": 27228 }, { "epoch": 1.8083947665537625, "grad_norm": 219.49615478515625, "learning_rate": 4.775414174482162e-08, "loss": 14.5781, "step": 27229 }, { "epoch": 1.808461180846118, "grad_norm": 152.6136016845703, "learning_rate": 4.772130816399289e-08, "loss": 14.6562, "step": 27230 }, { "epoch": 1.8085275951384738, "grad_norm": 156.34432983398438, "learning_rate": 4.7688485598574966e-08, "loss": 14.875, "step": 27231 }, { "epoch": 1.8085940094308295, "grad_norm": 208.58053588867188, "learning_rate": 4.765567404894755e-08, "loss": 18.2812, "step": 27232 }, { "epoch": 1.8086604237231851, "grad_norm": 95.93450927734375, "learning_rate": 4.76228735154901e-08, "loss": 14.5625, "step": 27233 }, { "epoch": 1.808726838015541, "grad_norm": 172.5395050048828, "learning_rate": 4.759008399858211e-08, "loss": 14.8906, "step": 27234 }, { "epoch": 1.8087932523078967, "grad_norm": 258.1904602050781, "learning_rate": 4.755730549860293e-08, "loss": 14.8438, "step": 27235 }, { "epoch": 1.8088596666002523, "grad_norm": 405.9841613769531, "learning_rate": 4.752453801593137e-08, "loss": 18.2656, "step": 27236 }, { "epoch": 1.8089260808926082, "grad_norm": 180.95957946777344, "learning_rate": 4.749178155094702e-08, "loss": 9.7969, "step": 27237 }, { "epoch": 1.8089924951849639, "grad_norm": 106.88650512695312, "learning_rate": 4.745903610402835e-08, "loss": 15.1562, "step": 27238 }, { "epoch": 1.8090589094773195, "grad_norm": 365.2715148925781, "learning_rate": 4.742630167555428e-08, "loss": 18.125, "step": 27239 }, { "epoch": 1.8091253237696754, "grad_norm": 244.48931884765625, "learning_rate": 4.739357826590351e-08, "loss": 19.1875, "step": 27240 }, { "epoch": 1.8091917380620308, "grad_norm": 556.0946655273438, "learning_rate": 4.7360865875454406e-08, "loss": 18.5312, "step": 27241 }, { "epoch": 1.8092581523543867, "grad_norm": 223.9905548095703, "learning_rate": 4.7328164504585545e-08, "loss": 16.5156, "step": 27242 }, { "epoch": 1.8093245666467423, "grad_norm": 123.34671020507812, "learning_rate": 4.7295474153675295e-08, "loss": 19.3438, "step": 27243 }, { "epoch": 1.809390980939098, "grad_norm": 169.74147033691406, "learning_rate": 4.726279482310136e-08, "loss": 19.4688, "step": 27244 }, { "epoch": 1.8094573952314539, "grad_norm": 225.21383666992188, "learning_rate": 4.723012651324221e-08, "loss": 16.7031, "step": 27245 }, { "epoch": 1.8095238095238095, "grad_norm": 564.6126708984375, "learning_rate": 4.719746922447565e-08, "loss": 12.7734, "step": 27246 }, { "epoch": 1.8095902238161652, "grad_norm": 220.52926635742188, "learning_rate": 4.716482295717905e-08, "loss": 19.4531, "step": 27247 }, { "epoch": 1.809656638108521, "grad_norm": 388.2171325683594, "learning_rate": 4.713218771173066e-08, "loss": 15.6719, "step": 27248 }, { "epoch": 1.8097230524008767, "grad_norm": 1222.58203125, "learning_rate": 4.709956348850752e-08, "loss": 26.7188, "step": 27249 }, { "epoch": 1.8097894666932324, "grad_norm": 160.97105407714844, "learning_rate": 4.7066950287887094e-08, "loss": 12.0859, "step": 27250 }, { "epoch": 1.8098558809855883, "grad_norm": 386.2282409667969, "learning_rate": 4.703434811024676e-08, "loss": 21.0, "step": 27251 }, { "epoch": 1.8099222952779437, "grad_norm": 250.95481872558594, "learning_rate": 4.700175695596364e-08, "loss": 17.8125, "step": 27252 }, { "epoch": 1.8099887095702996, "grad_norm": 222.28573608398438, "learning_rate": 4.696917682541446e-08, "loss": 17.7344, "step": 27253 }, { "epoch": 1.8100551238626552, "grad_norm": 287.0232238769531, "learning_rate": 4.693660771897656e-08, "loss": 18.8047, "step": 27254 }, { "epoch": 1.8101215381550109, "grad_norm": 293.2316589355469, "learning_rate": 4.690404963702632e-08, "loss": 15.1094, "step": 27255 }, { "epoch": 1.8101879524473667, "grad_norm": 191.43603515625, "learning_rate": 4.687150257994032e-08, "loss": 20.4219, "step": 27256 }, { "epoch": 1.8102543667397224, "grad_norm": 204.12501525878906, "learning_rate": 4.683896654809527e-08, "loss": 14.5781, "step": 27257 }, { "epoch": 1.810320781032078, "grad_norm": 223.5614471435547, "learning_rate": 4.680644154186741e-08, "loss": 14.5938, "step": 27258 }, { "epoch": 1.810387195324434, "grad_norm": 171.98631286621094, "learning_rate": 4.677392756163301e-08, "loss": 19.9062, "step": 27259 }, { "epoch": 1.8104536096167896, "grad_norm": 420.3955078125, "learning_rate": 4.674142460776831e-08, "loss": 22.5625, "step": 27260 }, { "epoch": 1.8105200239091452, "grad_norm": 282.68670654296875, "learning_rate": 4.670893268064879e-08, "loss": 19.9688, "step": 27261 }, { "epoch": 1.8105864382015011, "grad_norm": 167.63645935058594, "learning_rate": 4.667645178065094e-08, "loss": 17.4141, "step": 27262 }, { "epoch": 1.8106528524938565, "grad_norm": 551.37109375, "learning_rate": 4.664398190815011e-08, "loss": 16.5312, "step": 27263 }, { "epoch": 1.8107192667862124, "grad_norm": 238.5106658935547, "learning_rate": 4.66115230635219e-08, "loss": 21.6094, "step": 27264 }, { "epoch": 1.810785681078568, "grad_norm": 318.82977294921875, "learning_rate": 4.657907524714177e-08, "loss": 15.7344, "step": 27265 }, { "epoch": 1.8108520953709237, "grad_norm": 289.06512451171875, "learning_rate": 4.6546638459385226e-08, "loss": 14.2969, "step": 27266 }, { "epoch": 1.8109185096632796, "grad_norm": 303.67864990234375, "learning_rate": 4.6514212700627276e-08, "loss": 17.9688, "step": 27267 }, { "epoch": 1.8109849239556353, "grad_norm": 159.35537719726562, "learning_rate": 4.64817979712433e-08, "loss": 16.8906, "step": 27268 }, { "epoch": 1.811051338247991, "grad_norm": 771.7236328125, "learning_rate": 4.6449394271607766e-08, "loss": 16.0312, "step": 27269 }, { "epoch": 1.8111177525403468, "grad_norm": 176.47283935546875, "learning_rate": 4.641700160209605e-08, "loss": 15.8906, "step": 27270 }, { "epoch": 1.8111841668327024, "grad_norm": 232.7706298828125, "learning_rate": 4.638461996308252e-08, "loss": 15.5156, "step": 27271 }, { "epoch": 1.811250581125058, "grad_norm": 259.7520446777344, "learning_rate": 4.635224935494175e-08, "loss": 13.7969, "step": 27272 }, { "epoch": 1.811316995417414, "grad_norm": 220.41648864746094, "learning_rate": 4.631988977804835e-08, "loss": 13.6094, "step": 27273 }, { "epoch": 1.8113834097097694, "grad_norm": 230.4384002685547, "learning_rate": 4.628754123277656e-08, "loss": 17.75, "step": 27274 }, { "epoch": 1.8114498240021253, "grad_norm": 232.55581665039062, "learning_rate": 4.625520371950031e-08, "loss": 16.4219, "step": 27275 }, { "epoch": 1.811516238294481, "grad_norm": 193.17393493652344, "learning_rate": 4.622287723859419e-08, "loss": 16.4375, "step": 27276 }, { "epoch": 1.8115826525868366, "grad_norm": 135.13015747070312, "learning_rate": 4.619056179043179e-08, "loss": 14.9531, "step": 27277 }, { "epoch": 1.8116490668791925, "grad_norm": 145.51084899902344, "learning_rate": 4.6158257375386925e-08, "loss": 12.5, "step": 27278 }, { "epoch": 1.8117154811715481, "grad_norm": 268.8815612792969, "learning_rate": 4.612596399383328e-08, "loss": 17.7031, "step": 27279 }, { "epoch": 1.8117818954639038, "grad_norm": 125.94904327392578, "learning_rate": 4.609368164614447e-08, "loss": 15.75, "step": 27280 }, { "epoch": 1.8118483097562597, "grad_norm": 121.77349853515625, "learning_rate": 4.606141033269384e-08, "loss": 14.5938, "step": 27281 }, { "epoch": 1.8119147240486153, "grad_norm": 242.7513427734375, "learning_rate": 4.602915005385477e-08, "loss": 15.5156, "step": 27282 }, { "epoch": 1.811981138340971, "grad_norm": 184.8224334716797, "learning_rate": 4.5996900810000514e-08, "loss": 10.5312, "step": 27283 }, { "epoch": 1.8120475526333268, "grad_norm": 114.16036224365234, "learning_rate": 4.596466260150389e-08, "loss": 13.3125, "step": 27284 }, { "epoch": 1.8121139669256823, "grad_norm": 136.67547607421875, "learning_rate": 4.5932435428738035e-08, "loss": 14.5781, "step": 27285 }, { "epoch": 1.8121803812180381, "grad_norm": 248.4456329345703, "learning_rate": 4.590021929207544e-08, "loss": 17.375, "step": 27286 }, { "epoch": 1.8122467955103938, "grad_norm": 142.9591064453125, "learning_rate": 4.586801419188913e-08, "loss": 17.9375, "step": 27287 }, { "epoch": 1.8123132098027495, "grad_norm": 138.42173767089844, "learning_rate": 4.583582012855136e-08, "loss": 14.7188, "step": 27288 }, { "epoch": 1.8123796240951053, "grad_norm": 126.01813507080078, "learning_rate": 4.5803637102434624e-08, "loss": 15.2656, "step": 27289 }, { "epoch": 1.812446038387461, "grad_norm": 233.77581787109375, "learning_rate": 4.577146511391117e-08, "loss": 19.9688, "step": 27290 }, { "epoch": 1.8125124526798166, "grad_norm": 146.8875274658203, "learning_rate": 4.5739304163353256e-08, "loss": 16.2812, "step": 27291 }, { "epoch": 1.8125788669721725, "grad_norm": 227.53317260742188, "learning_rate": 4.5707154251132586e-08, "loss": 14.7734, "step": 27292 }, { "epoch": 1.8126452812645282, "grad_norm": 174.82171630859375, "learning_rate": 4.567501537762164e-08, "loss": 12.5781, "step": 27293 }, { "epoch": 1.8127116955568838, "grad_norm": 97.0578384399414, "learning_rate": 4.564288754319157e-08, "loss": 19.7031, "step": 27294 }, { "epoch": 1.8127781098492397, "grad_norm": 246.74533081054688, "learning_rate": 4.561077074821429e-08, "loss": 15.3125, "step": 27295 }, { "epoch": 1.8128445241415951, "grad_norm": 177.822265625, "learning_rate": 4.557866499306129e-08, "loss": 13.2734, "step": 27296 }, { "epoch": 1.812910938433951, "grad_norm": 278.732666015625, "learning_rate": 4.554657027810394e-08, "loss": 14.8281, "step": 27297 }, { "epoch": 1.8129773527263067, "grad_norm": 2690.918701171875, "learning_rate": 4.5514486603713484e-08, "loss": 11.8906, "step": 27298 }, { "epoch": 1.8130437670186623, "grad_norm": 96.00284576416016, "learning_rate": 4.54824139702612e-08, "loss": 14.0625, "step": 27299 }, { "epoch": 1.8131101813110182, "grad_norm": 356.5703125, "learning_rate": 4.545035237811767e-08, "loss": 17.9688, "step": 27300 }, { "epoch": 1.8131765956033739, "grad_norm": 183.01406860351562, "learning_rate": 4.541830182765438e-08, "loss": 20.125, "step": 27301 }, { "epoch": 1.8132430098957295, "grad_norm": 309.3520812988281, "learning_rate": 4.5386262319241475e-08, "loss": 28.1406, "step": 27302 }, { "epoch": 1.8133094241880854, "grad_norm": 131.27561950683594, "learning_rate": 4.535423385324999e-08, "loss": 16.2969, "step": 27303 }, { "epoch": 1.813375838480441, "grad_norm": 169.1138153076172, "learning_rate": 4.532221643005008e-08, "loss": 17.3594, "step": 27304 }, { "epoch": 1.8134422527727967, "grad_norm": 457.34765625, "learning_rate": 4.5290210050012324e-08, "loss": 22.6562, "step": 27305 }, { "epoch": 1.8135086670651526, "grad_norm": 129.1457061767578, "learning_rate": 4.5258214713506994e-08, "loss": 13.5312, "step": 27306 }, { "epoch": 1.813575081357508, "grad_norm": 188.96417236328125, "learning_rate": 4.522623042090412e-08, "loss": 15.7969, "step": 27307 }, { "epoch": 1.8136414956498639, "grad_norm": 323.1895751953125, "learning_rate": 4.5194257172573414e-08, "loss": 14.5938, "step": 27308 }, { "epoch": 1.8137079099422195, "grad_norm": 153.49349975585938, "learning_rate": 4.516229496888524e-08, "loss": 14.0625, "step": 27309 }, { "epoch": 1.8137743242345752, "grad_norm": 968.4833984375, "learning_rate": 4.513034381020897e-08, "loss": 13.7969, "step": 27310 }, { "epoch": 1.813840738526931, "grad_norm": 184.94894409179688, "learning_rate": 4.50984036969142e-08, "loss": 15.75, "step": 27311 }, { "epoch": 1.8139071528192867, "grad_norm": 271.4873352050781, "learning_rate": 4.506647462937052e-08, "loss": 13.1562, "step": 27312 }, { "epoch": 1.8139735671116424, "grad_norm": 252.00860595703125, "learning_rate": 4.5034556607947307e-08, "loss": 18.1875, "step": 27313 }, { "epoch": 1.8140399814039982, "grad_norm": 452.5859069824219, "learning_rate": 4.500264963301337e-08, "loss": 13.6406, "step": 27314 }, { "epoch": 1.814106395696354, "grad_norm": 284.5282897949219, "learning_rate": 4.4970753704938416e-08, "loss": 17.4531, "step": 27315 }, { "epoch": 1.8141728099887096, "grad_norm": 241.2311248779297, "learning_rate": 4.4938868824091034e-08, "loss": 17.5156, "step": 27316 }, { "epoch": 1.8142392242810654, "grad_norm": 143.5349884033203, "learning_rate": 4.490699499083994e-08, "loss": 18.9688, "step": 27317 }, { "epoch": 1.8143056385734209, "grad_norm": 209.34335327148438, "learning_rate": 4.487513220555406e-08, "loss": 17.7656, "step": 27318 }, { "epoch": 1.8143720528657767, "grad_norm": 577.7056884765625, "learning_rate": 4.4843280468601976e-08, "loss": 20.3438, "step": 27319 }, { "epoch": 1.8144384671581324, "grad_norm": 303.067626953125, "learning_rate": 4.4811439780351954e-08, "loss": 18.3594, "step": 27320 }, { "epoch": 1.814504881450488, "grad_norm": 112.68460845947266, "learning_rate": 4.477961014117238e-08, "loss": 11.6406, "step": 27321 }, { "epoch": 1.814571295742844, "grad_norm": 245.50408935546875, "learning_rate": 4.474779155143149e-08, "loss": 14.7188, "step": 27322 }, { "epoch": 1.8146377100351996, "grad_norm": 1378.1622314453125, "learning_rate": 4.4715984011497344e-08, "loss": 23.25, "step": 27323 }, { "epoch": 1.8147041243275552, "grad_norm": 123.75804901123047, "learning_rate": 4.468418752173797e-08, "loss": 11.6172, "step": 27324 }, { "epoch": 1.8147705386199111, "grad_norm": 126.1346664428711, "learning_rate": 4.465240208252086e-08, "loss": 15.125, "step": 27325 }, { "epoch": 1.8148369529122668, "grad_norm": 111.23799896240234, "learning_rate": 4.462062769421404e-08, "loss": 14.625, "step": 27326 }, { "epoch": 1.8149033672046224, "grad_norm": 243.28680419921875, "learning_rate": 4.458886435718479e-08, "loss": 22.8438, "step": 27327 }, { "epoch": 1.8149697814969783, "grad_norm": 252.587646484375, "learning_rate": 4.455711207180057e-08, "loss": 15.4375, "step": 27328 }, { "epoch": 1.8150361957893337, "grad_norm": 85.7817611694336, "learning_rate": 4.452537083842889e-08, "loss": 16.75, "step": 27329 }, { "epoch": 1.8151026100816896, "grad_norm": 274.54461669921875, "learning_rate": 4.449364065743666e-08, "loss": 14.0312, "step": 27330 }, { "epoch": 1.8151690243740453, "grad_norm": 160.1632843017578, "learning_rate": 4.4461921529191036e-08, "loss": 14.8906, "step": 27331 }, { "epoch": 1.815235438666401, "grad_norm": 157.068115234375, "learning_rate": 4.443021345405895e-08, "loss": 15.8906, "step": 27332 }, { "epoch": 1.8153018529587568, "grad_norm": 186.1058349609375, "learning_rate": 4.439851643240711e-08, "loss": 13.0312, "step": 27333 }, { "epoch": 1.8153682672511124, "grad_norm": 160.92080688476562, "learning_rate": 4.436683046460221e-08, "loss": 13.7031, "step": 27334 }, { "epoch": 1.815434681543468, "grad_norm": 114.90813446044922, "learning_rate": 4.433515555101075e-08, "loss": 10.2578, "step": 27335 }, { "epoch": 1.815501095835824, "grad_norm": 126.94082641601562, "learning_rate": 4.4303491691999095e-08, "loss": 12.4844, "step": 27336 }, { "epoch": 1.8155675101281796, "grad_norm": 700.4971313476562, "learning_rate": 4.427183888793362e-08, "loss": 16.3906, "step": 27337 }, { "epoch": 1.8156339244205353, "grad_norm": 171.42942810058594, "learning_rate": 4.4240197139180594e-08, "loss": 23.1719, "step": 27338 }, { "epoch": 1.8157003387128912, "grad_norm": 219.9073028564453, "learning_rate": 4.420856644610549e-08, "loss": 15.1562, "step": 27339 }, { "epoch": 1.8157667530052466, "grad_norm": 99.18643188476562, "learning_rate": 4.41769468090748e-08, "loss": 13.6719, "step": 27340 }, { "epoch": 1.8158331672976025, "grad_norm": 147.84657287597656, "learning_rate": 4.4145338228453896e-08, "loss": 17.6797, "step": 27341 }, { "epoch": 1.8158995815899581, "grad_norm": 214.75755310058594, "learning_rate": 4.41137407046086e-08, "loss": 13.8906, "step": 27342 }, { "epoch": 1.8159659958823138, "grad_norm": 82.72521209716797, "learning_rate": 4.4082154237904404e-08, "loss": 13.625, "step": 27343 }, { "epoch": 1.8160324101746697, "grad_norm": 191.5479736328125, "learning_rate": 4.4050578828706555e-08, "loss": 16.375, "step": 27344 }, { "epoch": 1.8160988244670253, "grad_norm": 201.29946899414062, "learning_rate": 4.401901447738032e-08, "loss": 16.875, "step": 27345 }, { "epoch": 1.816165238759381, "grad_norm": 226.14315795898438, "learning_rate": 4.398746118429108e-08, "loss": 16.875, "step": 27346 }, { "epoch": 1.8162316530517368, "grad_norm": 170.7612762451172, "learning_rate": 4.395591894980333e-08, "loss": 11.2812, "step": 27347 }, { "epoch": 1.8162980673440925, "grad_norm": 334.0626525878906, "learning_rate": 4.3924387774282536e-08, "loss": 19.3438, "step": 27348 }, { "epoch": 1.8163644816364481, "grad_norm": 110.5466079711914, "learning_rate": 4.3892867658092966e-08, "loss": 11.3906, "step": 27349 }, { "epoch": 1.816430895928804, "grad_norm": 251.760009765625, "learning_rate": 4.386135860159934e-08, "loss": 17.3594, "step": 27350 }, { "epoch": 1.8164973102211595, "grad_norm": 834.3196411132812, "learning_rate": 4.382986060516625e-08, "loss": 20.5625, "step": 27351 }, { "epoch": 1.8165637245135153, "grad_norm": 294.10797119140625, "learning_rate": 4.379837366915806e-08, "loss": 15.5312, "step": 27352 }, { "epoch": 1.816630138805871, "grad_norm": 166.0380401611328, "learning_rate": 4.376689779393861e-08, "loss": 14.2344, "step": 27353 }, { "epoch": 1.8166965530982266, "grad_norm": 153.05126953125, "learning_rate": 4.373543297987259e-08, "loss": 16.2969, "step": 27354 }, { "epoch": 1.8167629673905825, "grad_norm": 134.18955993652344, "learning_rate": 4.3703979227323495e-08, "loss": 14.9531, "step": 27355 }, { "epoch": 1.8168293816829382, "grad_norm": 403.9653015136719, "learning_rate": 4.367253653665526e-08, "loss": 16.1562, "step": 27356 }, { "epoch": 1.8168957959752938, "grad_norm": 325.3513488769531, "learning_rate": 4.364110490823192e-08, "loss": 19.8594, "step": 27357 }, { "epoch": 1.8169622102676497, "grad_norm": 245.75196838378906, "learning_rate": 4.3609684342416745e-08, "loss": 19.3281, "step": 27358 }, { "epoch": 1.8170286245600054, "grad_norm": 345.7121276855469, "learning_rate": 4.357827483957322e-08, "loss": 16.4844, "step": 27359 }, { "epoch": 1.817095038852361, "grad_norm": 202.50022888183594, "learning_rate": 4.3546876400064606e-08, "loss": 17.9062, "step": 27360 }, { "epoch": 1.8171614531447169, "grad_norm": 131.2678985595703, "learning_rate": 4.351548902425428e-08, "loss": 16.2344, "step": 27361 }, { "epoch": 1.8172278674370723, "grad_norm": 115.71707916259766, "learning_rate": 4.3484112712505186e-08, "loss": 14.5781, "step": 27362 }, { "epoch": 1.8172942817294282, "grad_norm": 225.3613739013672, "learning_rate": 4.345274746518057e-08, "loss": 17.9219, "step": 27363 }, { "epoch": 1.8173606960217839, "grad_norm": 101.74968719482422, "learning_rate": 4.3421393282642604e-08, "loss": 12.7656, "step": 27364 }, { "epoch": 1.8174271103141395, "grad_norm": 118.0396728515625, "learning_rate": 4.3390050165254655e-08, "loss": 16.8594, "step": 27365 }, { "epoch": 1.8174935246064954, "grad_norm": 180.85122680664062, "learning_rate": 4.335871811337888e-08, "loss": 16.7188, "step": 27366 }, { "epoch": 1.817559938898851, "grad_norm": 302.221923828125, "learning_rate": 4.332739712737787e-08, "loss": 16.3906, "step": 27367 }, { "epoch": 1.8176263531912067, "grad_norm": 293.761962890625, "learning_rate": 4.3296087207613795e-08, "loss": 19.125, "step": 27368 }, { "epoch": 1.8176927674835626, "grad_norm": 284.2803039550781, "learning_rate": 4.326478835444902e-08, "loss": 18.125, "step": 27369 }, { "epoch": 1.8177591817759182, "grad_norm": 392.3340148925781, "learning_rate": 4.3233500568245375e-08, "loss": 21.2969, "step": 27370 }, { "epoch": 1.8178255960682739, "grad_norm": 154.75360107421875, "learning_rate": 4.3202223849365114e-08, "loss": 14.7812, "step": 27371 }, { "epoch": 1.8178920103606298, "grad_norm": 188.39988708496094, "learning_rate": 4.3170958198169515e-08, "loss": 22.0781, "step": 27372 }, { "epoch": 1.8179584246529852, "grad_norm": 95.55889892578125, "learning_rate": 4.313970361502084e-08, "loss": 13.5625, "step": 27373 }, { "epoch": 1.818024838945341, "grad_norm": 235.30625915527344, "learning_rate": 4.310846010028013e-08, "loss": 19.1719, "step": 27374 }, { "epoch": 1.8180912532376967, "grad_norm": 203.7689666748047, "learning_rate": 4.3077227654308876e-08, "loss": 14.5156, "step": 27375 }, { "epoch": 1.8181576675300524, "grad_norm": 132.66603088378906, "learning_rate": 4.3046006277468574e-08, "loss": 18.1562, "step": 27376 }, { "epoch": 1.8182240818224082, "grad_norm": 211.23692321777344, "learning_rate": 4.3014795970120476e-08, "loss": 18.5156, "step": 27377 }, { "epoch": 1.818290496114764, "grad_norm": 137.9053192138672, "learning_rate": 4.2983596732624975e-08, "loss": 14.4062, "step": 27378 }, { "epoch": 1.8183569104071196, "grad_norm": 220.1365203857422, "learning_rate": 4.2952408565343655e-08, "loss": 19.0781, "step": 27379 }, { "epoch": 1.8184233246994754, "grad_norm": 235.406982421875, "learning_rate": 4.292123146863702e-08, "loss": 22.1094, "step": 27380 }, { "epoch": 1.818489738991831, "grad_norm": 342.7373046875, "learning_rate": 4.289006544286555e-08, "loss": 10.6328, "step": 27381 }, { "epoch": 1.8185561532841867, "grad_norm": 95.4413070678711, "learning_rate": 4.2858910488389964e-08, "loss": 14.0938, "step": 27382 }, { "epoch": 1.8186225675765426, "grad_norm": 215.42616271972656, "learning_rate": 4.2827766605570525e-08, "loss": 12.1719, "step": 27383 }, { "epoch": 1.818688981868898, "grad_norm": 329.5785217285156, "learning_rate": 4.2796633794767615e-08, "loss": 23.5625, "step": 27384 }, { "epoch": 1.818755396161254, "grad_norm": 319.1176452636719, "learning_rate": 4.276551205634138e-08, "loss": 15.4609, "step": 27385 }, { "epoch": 1.8188218104536096, "grad_norm": 312.81719970703125, "learning_rate": 4.273440139065143e-08, "loss": 19.0, "step": 27386 }, { "epoch": 1.8188882247459652, "grad_norm": 321.7813720703125, "learning_rate": 4.270330179805826e-08, "loss": 18.6094, "step": 27387 }, { "epoch": 1.818954639038321, "grad_norm": 285.6528015136719, "learning_rate": 4.267221327892112e-08, "loss": 13.7188, "step": 27388 }, { "epoch": 1.8190210533306768, "grad_norm": 407.4887390136719, "learning_rate": 4.264113583359985e-08, "loss": 12.9062, "step": 27389 }, { "epoch": 1.8190874676230324, "grad_norm": 119.58094024658203, "learning_rate": 4.2610069462453825e-08, "loss": 14.4219, "step": 27390 }, { "epoch": 1.8191538819153883, "grad_norm": 205.24082946777344, "learning_rate": 4.257901416584253e-08, "loss": 18.4844, "step": 27391 }, { "epoch": 1.819220296207744, "grad_norm": 161.69203186035156, "learning_rate": 4.254796994412502e-08, "loss": 18.9688, "step": 27392 }, { "epoch": 1.8192867105000996, "grad_norm": 507.62091064453125, "learning_rate": 4.251693679766055e-08, "loss": 13.4062, "step": 27393 }, { "epoch": 1.8193531247924555, "grad_norm": 99.28817749023438, "learning_rate": 4.2485914726807956e-08, "loss": 16.3047, "step": 27394 }, { "epoch": 1.819419539084811, "grad_norm": 195.9546661376953, "learning_rate": 4.245490373192628e-08, "loss": 8.8984, "step": 27395 }, { "epoch": 1.8194859533771668, "grad_norm": 212.4039306640625, "learning_rate": 4.242390381337413e-08, "loss": 11.6328, "step": 27396 }, { "epoch": 1.8195523676695224, "grad_norm": 347.50018310546875, "learning_rate": 4.23929149715101e-08, "loss": 14.0938, "step": 27397 }, { "epoch": 1.819618781961878, "grad_norm": 653.9368896484375, "learning_rate": 4.2361937206692565e-08, "loss": 23.9219, "step": 27398 }, { "epoch": 1.819685196254234, "grad_norm": 126.34720611572266, "learning_rate": 4.2330970519279915e-08, "loss": 16.2812, "step": 27399 }, { "epoch": 1.8197516105465896, "grad_norm": 280.03533935546875, "learning_rate": 4.230001490963042e-08, "loss": 19.5312, "step": 27400 }, { "epoch": 1.8198180248389453, "grad_norm": 287.598388671875, "learning_rate": 4.226907037810212e-08, "loss": 18.4844, "step": 27401 }, { "epoch": 1.8198844391313012, "grad_norm": 247.12954711914062, "learning_rate": 4.223813692505307e-08, "loss": 17.8281, "step": 27402 }, { "epoch": 1.8199508534236568, "grad_norm": 145.840087890625, "learning_rate": 4.2207214550840644e-08, "loss": 17.6562, "step": 27403 }, { "epoch": 1.8200172677160125, "grad_norm": 329.4765319824219, "learning_rate": 4.217630325582322e-08, "loss": 17.3594, "step": 27404 }, { "epoch": 1.8200836820083683, "grad_norm": 246.09939575195312, "learning_rate": 4.2145403040357855e-08, "loss": 18.0312, "step": 27405 }, { "epoch": 1.8201500963007238, "grad_norm": 320.2445373535156, "learning_rate": 4.211451390480203e-08, "loss": 15.7891, "step": 27406 }, { "epoch": 1.8202165105930797, "grad_norm": 480.3605651855469, "learning_rate": 4.208363584951335e-08, "loss": 18.8594, "step": 27407 }, { "epoch": 1.8202829248854353, "grad_norm": 149.7005615234375, "learning_rate": 4.2052768874848654e-08, "loss": 14.9844, "step": 27408 }, { "epoch": 1.820349339177791, "grad_norm": 188.84376525878906, "learning_rate": 4.2021912981165196e-08, "loss": 15.7969, "step": 27409 }, { "epoch": 1.8204157534701468, "grad_norm": 179.2437286376953, "learning_rate": 4.199106816882003e-08, "loss": 13.9688, "step": 27410 }, { "epoch": 1.8204821677625025, "grad_norm": 135.4834747314453, "learning_rate": 4.196023443816943e-08, "loss": 13.875, "step": 27411 }, { "epoch": 1.8205485820548581, "grad_norm": 113.61198425292969, "learning_rate": 4.1929411789570655e-08, "loss": 15.2656, "step": 27412 }, { "epoch": 1.820614996347214, "grad_norm": 618.6158447265625, "learning_rate": 4.1898600223379875e-08, "loss": 21.7188, "step": 27413 }, { "epoch": 1.8206814106395697, "grad_norm": 238.24435424804688, "learning_rate": 4.186779973995369e-08, "loss": 17.2422, "step": 27414 }, { "epoch": 1.8207478249319253, "grad_norm": 189.0668487548828, "learning_rate": 4.1837010339648257e-08, "loss": 21.5, "step": 27415 }, { "epoch": 1.8208142392242812, "grad_norm": 126.51496124267578, "learning_rate": 4.1806232022819854e-08, "loss": 16.6719, "step": 27416 }, { "epoch": 1.8208806535166366, "grad_norm": 149.78623962402344, "learning_rate": 4.177546478982419e-08, "loss": 15.2812, "step": 27417 }, { "epoch": 1.8209470678089925, "grad_norm": 319.50018310546875, "learning_rate": 4.174470864101775e-08, "loss": 20.375, "step": 27418 }, { "epoch": 1.8210134821013482, "grad_norm": 164.7349090576172, "learning_rate": 4.1713963576755716e-08, "loss": 16.1562, "step": 27419 }, { "epoch": 1.8210798963937038, "grad_norm": 200.91209411621094, "learning_rate": 4.168322959739401e-08, "loss": 10.4062, "step": 27420 }, { "epoch": 1.8211463106860597, "grad_norm": 213.74609375, "learning_rate": 4.1652506703288125e-08, "loss": 25.375, "step": 27421 }, { "epoch": 1.8212127249784154, "grad_norm": 1163.78271484375, "learning_rate": 4.162179489479345e-08, "loss": 11.6719, "step": 27422 }, { "epoch": 1.821279139270771, "grad_norm": 108.19284057617188, "learning_rate": 4.159109417226525e-08, "loss": 15.75, "step": 27423 }, { "epoch": 1.8213455535631269, "grad_norm": 195.69876098632812, "learning_rate": 4.156040453605869e-08, "loss": 13.2969, "step": 27424 }, { "epoch": 1.8214119678554825, "grad_norm": 234.6860809326172, "learning_rate": 4.1529725986528484e-08, "loss": 18.9375, "step": 27425 }, { "epoch": 1.8214783821478382, "grad_norm": 311.555419921875, "learning_rate": 4.149905852402991e-08, "loss": 20.1875, "step": 27426 }, { "epoch": 1.821544796440194, "grad_norm": 253.17483520507812, "learning_rate": 4.1468402148917445e-08, "loss": 15.3281, "step": 27427 }, { "epoch": 1.8216112107325495, "grad_norm": 267.23211669921875, "learning_rate": 4.143775686154572e-08, "loss": 17.2344, "step": 27428 }, { "epoch": 1.8216776250249054, "grad_norm": 139.762451171875, "learning_rate": 4.140712266226942e-08, "loss": 20.2188, "step": 27429 }, { "epoch": 1.821744039317261, "grad_norm": 166.16275024414062, "learning_rate": 4.1376499551442625e-08, "loss": 12.2812, "step": 27430 }, { "epoch": 1.8218104536096167, "grad_norm": 201.67442321777344, "learning_rate": 4.134588752941981e-08, "loss": 15.1406, "step": 27431 }, { "epoch": 1.8218768679019726, "grad_norm": 236.65289306640625, "learning_rate": 4.1315286596554924e-08, "loss": 14.8438, "step": 27432 }, { "epoch": 1.8219432821943282, "grad_norm": 295.3189697265625, "learning_rate": 4.128469675320201e-08, "loss": 16.0, "step": 27433 }, { "epoch": 1.8220096964866839, "grad_norm": 284.2839050292969, "learning_rate": 4.12541179997149e-08, "loss": 18.1406, "step": 27434 }, { "epoch": 1.8220761107790397, "grad_norm": 286.4992980957031, "learning_rate": 4.1223550336447424e-08, "loss": 15.9062, "step": 27435 }, { "epoch": 1.8221425250713954, "grad_norm": 170.64376831054688, "learning_rate": 4.119299376375274e-08, "loss": 14.25, "step": 27436 }, { "epoch": 1.822208939363751, "grad_norm": 227.54830932617188, "learning_rate": 4.116244828198501e-08, "loss": 9.9453, "step": 27437 }, { "epoch": 1.822275353656107, "grad_norm": 157.91018676757812, "learning_rate": 4.113191389149695e-08, "loss": 17.4062, "step": 27438 }, { "epoch": 1.8223417679484624, "grad_norm": 121.026123046875, "learning_rate": 4.110139059264206e-08, "loss": 20.5, "step": 27439 }, { "epoch": 1.8224081822408182, "grad_norm": 128.7362060546875, "learning_rate": 4.107087838577328e-08, "loss": 14.9219, "step": 27440 }, { "epoch": 1.822474596533174, "grad_norm": 142.1171875, "learning_rate": 4.104037727124388e-08, "loss": 18.7656, "step": 27441 }, { "epoch": 1.8225410108255296, "grad_norm": 184.7765350341797, "learning_rate": 4.1009887249406014e-08, "loss": 14.7344, "step": 27442 }, { "epoch": 1.8226074251178854, "grad_norm": 362.721435546875, "learning_rate": 4.09794083206132e-08, "loss": 15.5625, "step": 27443 }, { "epoch": 1.822673839410241, "grad_norm": 155.0651397705078, "learning_rate": 4.094894048521736e-08, "loss": 13.9844, "step": 27444 }, { "epoch": 1.8227402537025967, "grad_norm": 99.05110168457031, "learning_rate": 4.09184837435711e-08, "loss": 8.9219, "step": 27445 }, { "epoch": 1.8228066679949526, "grad_norm": 175.47470092773438, "learning_rate": 4.088803809602692e-08, "loss": 12.6094, "step": 27446 }, { "epoch": 1.8228730822873083, "grad_norm": 163.67596435546875, "learning_rate": 4.085760354293677e-08, "loss": 16.1562, "step": 27447 }, { "epoch": 1.822939496579664, "grad_norm": 261.2879943847656, "learning_rate": 4.08271800846528e-08, "loss": 18.5781, "step": 27448 }, { "epoch": 1.8230059108720198, "grad_norm": 384.2460632324219, "learning_rate": 4.079676772152707e-08, "loss": 20.1562, "step": 27449 }, { "epoch": 1.8230723251643752, "grad_norm": 292.1014709472656, "learning_rate": 4.076636645391096e-08, "loss": 15.9062, "step": 27450 }, { "epoch": 1.823138739456731, "grad_norm": 1352.6982421875, "learning_rate": 4.0735976282156635e-08, "loss": 17.0, "step": 27451 }, { "epoch": 1.8232051537490868, "grad_norm": 380.651611328125, "learning_rate": 4.070559720661537e-08, "loss": 21.1172, "step": 27452 }, { "epoch": 1.8232715680414424, "grad_norm": 144.04600524902344, "learning_rate": 4.067522922763844e-08, "loss": 11.9688, "step": 27453 }, { "epoch": 1.8233379823337983, "grad_norm": 248.32081604003906, "learning_rate": 4.064487234557734e-08, "loss": 18.5469, "step": 27454 }, { "epoch": 1.823404396626154, "grad_norm": 201.49765014648438, "learning_rate": 4.0614526560783236e-08, "loss": 20.6406, "step": 27455 }, { "epoch": 1.8234708109185096, "grad_norm": 692.9125366210938, "learning_rate": 4.058419187360696e-08, "loss": 15.5312, "step": 27456 }, { "epoch": 1.8235372252108655, "grad_norm": 391.8607177734375, "learning_rate": 4.0553868284399774e-08, "loss": 18.5156, "step": 27457 }, { "epoch": 1.8236036395032211, "grad_norm": 151.12351989746094, "learning_rate": 4.0523555793511853e-08, "loss": 15.125, "step": 27458 }, { "epoch": 1.8236700537955768, "grad_norm": 230.48329162597656, "learning_rate": 4.0493254401294365e-08, "loss": 16.3125, "step": 27459 }, { "epoch": 1.8237364680879327, "grad_norm": 90.63835906982422, "learning_rate": 4.0462964108097575e-08, "loss": 15.5469, "step": 27460 }, { "epoch": 1.823802882380288, "grad_norm": 179.31460571289062, "learning_rate": 4.0432684914271985e-08, "loss": 14.0938, "step": 27461 }, { "epoch": 1.823869296672644, "grad_norm": 125.01387023925781, "learning_rate": 4.040241682016765e-08, "loss": 11.6719, "step": 27462 }, { "epoch": 1.8239357109649996, "grad_norm": 244.64088439941406, "learning_rate": 4.037215982613506e-08, "loss": 19.7812, "step": 27463 }, { "epoch": 1.8240021252573553, "grad_norm": 153.48361206054688, "learning_rate": 4.034191393252362e-08, "loss": 15.8281, "step": 27464 }, { "epoch": 1.8240685395497112, "grad_norm": 156.19549560546875, "learning_rate": 4.031167913968381e-08, "loss": 16.0, "step": 27465 }, { "epoch": 1.8241349538420668, "grad_norm": 258.1057434082031, "learning_rate": 4.028145544796502e-08, "loss": 25.5938, "step": 27466 }, { "epoch": 1.8242013681344225, "grad_norm": 157.2666473388672, "learning_rate": 4.0251242857716864e-08, "loss": 13.5156, "step": 27467 }, { "epoch": 1.8242677824267783, "grad_norm": 164.9515380859375, "learning_rate": 4.022104136928906e-08, "loss": 13.2109, "step": 27468 }, { "epoch": 1.824334196719134, "grad_norm": 356.9450378417969, "learning_rate": 4.019085098303077e-08, "loss": 13.375, "step": 27469 }, { "epoch": 1.8244006110114896, "grad_norm": 203.89706420898438, "learning_rate": 4.016067169929127e-08, "loss": 15.3594, "step": 27470 }, { "epoch": 1.8244670253038455, "grad_norm": 195.72781372070312, "learning_rate": 4.0130503518419515e-08, "loss": 11.4688, "step": 27471 }, { "epoch": 1.824533439596201, "grad_norm": 163.57562255859375, "learning_rate": 4.0100346440764764e-08, "loss": 14.125, "step": 27472 }, { "epoch": 1.8245998538885568, "grad_norm": 171.64854431152344, "learning_rate": 4.007020046667553e-08, "loss": 15.2656, "step": 27473 }, { "epoch": 1.8246662681809125, "grad_norm": 130.458251953125, "learning_rate": 4.0040065596500974e-08, "loss": 12.0469, "step": 27474 }, { "epoch": 1.8247326824732681, "grad_norm": 409.579345703125, "learning_rate": 4.000994183058904e-08, "loss": 13.8359, "step": 27475 }, { "epoch": 1.824799096765624, "grad_norm": 121.61354064941406, "learning_rate": 3.997982916928877e-08, "loss": 14.6875, "step": 27476 }, { "epoch": 1.8248655110579797, "grad_norm": 193.49044799804688, "learning_rate": 3.994972761294824e-08, "loss": 14.3438, "step": 27477 }, { "epoch": 1.8249319253503353, "grad_norm": 233.0362548828125, "learning_rate": 3.99196371619156e-08, "loss": 25.0625, "step": 27478 }, { "epoch": 1.8249983396426912, "grad_norm": 156.3497314453125, "learning_rate": 3.988955781653902e-08, "loss": 19.3125, "step": 27479 }, { "epoch": 1.8250647539350469, "grad_norm": 404.997314453125, "learning_rate": 3.985948957716645e-08, "loss": 23.6719, "step": 27480 }, { "epoch": 1.8251311682274025, "grad_norm": 366.15325927734375, "learning_rate": 3.9829432444145495e-08, "loss": 19.9062, "step": 27481 }, { "epoch": 1.8251975825197584, "grad_norm": 455.6777648925781, "learning_rate": 3.979938641782421e-08, "loss": 22.6875, "step": 27482 }, { "epoch": 1.8252639968121138, "grad_norm": 157.01333618164062, "learning_rate": 3.976935149854976e-08, "loss": 13.75, "step": 27483 }, { "epoch": 1.8253304111044697, "grad_norm": 336.1252746582031, "learning_rate": 3.9739327686669875e-08, "loss": 11.3125, "step": 27484 }, { "epoch": 1.8253968253968254, "grad_norm": 264.4012756347656, "learning_rate": 3.9709314982531606e-08, "loss": 17.0, "step": 27485 }, { "epoch": 1.825463239689181, "grad_norm": 289.2637939453125, "learning_rate": 3.967931338648234e-08, "loss": 20.0312, "step": 27486 }, { "epoch": 1.8255296539815369, "grad_norm": 217.53086853027344, "learning_rate": 3.964932289886891e-08, "loss": 14.625, "step": 27487 }, { "epoch": 1.8255960682738925, "grad_norm": 254.46926879882812, "learning_rate": 3.96193435200386e-08, "loss": 10.6875, "step": 27488 }, { "epoch": 1.8256624825662482, "grad_norm": 277.1929016113281, "learning_rate": 3.9589375250337565e-08, "loss": 15.9688, "step": 27489 }, { "epoch": 1.825728896858604, "grad_norm": 256.4743957519531, "learning_rate": 3.9559418090113206e-08, "loss": 16.8594, "step": 27490 }, { "epoch": 1.8257953111509597, "grad_norm": 247.42213439941406, "learning_rate": 3.9529472039711467e-08, "loss": 15.4375, "step": 27491 }, { "epoch": 1.8258617254433154, "grad_norm": 258.6251220703125, "learning_rate": 3.9499537099478955e-08, "loss": 17.5469, "step": 27492 }, { "epoch": 1.8259281397356713, "grad_norm": 126.47811889648438, "learning_rate": 3.946961326976195e-08, "loss": 12.1719, "step": 27493 }, { "epoch": 1.8259945540280267, "grad_norm": 165.23660278320312, "learning_rate": 3.943970055090662e-08, "loss": 14.5312, "step": 27494 }, { "epoch": 1.8260609683203826, "grad_norm": 177.90548706054688, "learning_rate": 3.940979894325891e-08, "loss": 13.125, "step": 27495 }, { "epoch": 1.8261273826127382, "grad_norm": 155.6039276123047, "learning_rate": 3.9379908447164876e-08, "loss": 14.4375, "step": 27496 }, { "epoch": 1.8261937969050939, "grad_norm": 95.79127502441406, "learning_rate": 3.9350029062969916e-08, "loss": 15.3906, "step": 27497 }, { "epoch": 1.8262602111974497, "grad_norm": 173.9050750732422, "learning_rate": 3.9320160791020077e-08, "loss": 15.4219, "step": 27498 }, { "epoch": 1.8263266254898054, "grad_norm": 287.79058837890625, "learning_rate": 3.9290303631660636e-08, "loss": 13.75, "step": 27499 }, { "epoch": 1.826393039782161, "grad_norm": 102.4111099243164, "learning_rate": 3.9260457585236887e-08, "loss": 9.7031, "step": 27500 }, { "epoch": 1.826459454074517, "grad_norm": 653.35791015625, "learning_rate": 3.9230622652094205e-08, "loss": 30.4375, "step": 27501 }, { "epoch": 1.8265258683668726, "grad_norm": 156.19273376464844, "learning_rate": 3.9200798832577764e-08, "loss": 14.2344, "step": 27502 }, { "epoch": 1.8265922826592282, "grad_norm": 197.1183319091797, "learning_rate": 3.917098612703218e-08, "loss": 17.7188, "step": 27503 }, { "epoch": 1.8266586969515841, "grad_norm": 214.24346923828125, "learning_rate": 3.9141184535802727e-08, "loss": 15.625, "step": 27504 }, { "epoch": 1.8267251112439395, "grad_norm": 154.57496643066406, "learning_rate": 3.911139405923414e-08, "loss": 14.8594, "step": 27505 }, { "epoch": 1.8267915255362954, "grad_norm": 575.4332275390625, "learning_rate": 3.9081614697670573e-08, "loss": 28.375, "step": 27506 }, { "epoch": 1.826857939828651, "grad_norm": 181.71527099609375, "learning_rate": 3.9051846451457095e-08, "loss": 15.1875, "step": 27507 }, { "epoch": 1.8269243541210067, "grad_norm": 316.1285705566406, "learning_rate": 3.902208932093764e-08, "loss": 16.3438, "step": 27508 }, { "epoch": 1.8269907684133626, "grad_norm": 135.9466094970703, "learning_rate": 3.89923433064564e-08, "loss": 12.6875, "step": 27509 }, { "epoch": 1.8270571827057183, "grad_norm": 216.86973571777344, "learning_rate": 3.8962608408357744e-08, "loss": 19.0469, "step": 27510 }, { "epoch": 1.827123596998074, "grad_norm": 137.03390502929688, "learning_rate": 3.8932884626985406e-08, "loss": 13.0156, "step": 27511 }, { "epoch": 1.8271900112904298, "grad_norm": 188.95367431640625, "learning_rate": 3.8903171962683225e-08, "loss": 21.6094, "step": 27512 }, { "epoch": 1.8272564255827854, "grad_norm": 183.89974975585938, "learning_rate": 3.887347041579514e-08, "loss": 14.5469, "step": 27513 }, { "epoch": 1.827322839875141, "grad_norm": 178.19400024414062, "learning_rate": 3.884377998666422e-08, "loss": 9.6406, "step": 27514 }, { "epoch": 1.827389254167497, "grad_norm": 152.6298828125, "learning_rate": 3.881410067563451e-08, "loss": 10.1875, "step": 27515 }, { "epoch": 1.8274556684598524, "grad_norm": 283.82745361328125, "learning_rate": 3.878443248304886e-08, "loss": 15.9844, "step": 27516 }, { "epoch": 1.8275220827522083, "grad_norm": 160.79965209960938, "learning_rate": 3.875477540925065e-08, "loss": 15.8594, "step": 27517 }, { "epoch": 1.827588497044564, "grad_norm": 428.6166687011719, "learning_rate": 3.872512945458295e-08, "loss": 18.625, "step": 27518 }, { "epoch": 1.8276549113369196, "grad_norm": 293.95599365234375, "learning_rate": 3.869549461938859e-08, "loss": 24.0, "step": 27519 }, { "epoch": 1.8277213256292755, "grad_norm": 207.1015625, "learning_rate": 3.866587090401052e-08, "loss": 18.4531, "step": 27520 }, { "epoch": 1.8277877399216311, "grad_norm": 298.4587707519531, "learning_rate": 3.863625830879136e-08, "loss": 16.5625, "step": 27521 }, { "epoch": 1.8278541542139868, "grad_norm": 661.1490478515625, "learning_rate": 3.860665683407338e-08, "loss": 24.5938, "step": 27522 }, { "epoch": 1.8279205685063427, "grad_norm": 129.06356811523438, "learning_rate": 3.8577066480199425e-08, "loss": 14.875, "step": 27523 }, { "epoch": 1.8279869827986983, "grad_norm": 283.9903259277344, "learning_rate": 3.854748724751156e-08, "loss": 19.4844, "step": 27524 }, { "epoch": 1.828053397091054, "grad_norm": 176.78317260742188, "learning_rate": 3.851791913635194e-08, "loss": 17.9375, "step": 27525 }, { "epoch": 1.8281198113834098, "grad_norm": 108.57228088378906, "learning_rate": 3.8488362147062525e-08, "loss": 11.5469, "step": 27526 }, { "epoch": 1.8281862256757653, "grad_norm": 229.79647827148438, "learning_rate": 3.8458816279985485e-08, "loss": 17.6094, "step": 27527 }, { "epoch": 1.8282526399681212, "grad_norm": 109.35694885253906, "learning_rate": 3.84292815354621e-08, "loss": 10.1719, "step": 27528 }, { "epoch": 1.8283190542604768, "grad_norm": 135.04888916015625, "learning_rate": 3.839975791383465e-08, "loss": 13.1875, "step": 27529 }, { "epoch": 1.8283854685528325, "grad_norm": 133.07786560058594, "learning_rate": 3.837024541544409e-08, "loss": 17.7969, "step": 27530 }, { "epoch": 1.8284518828451883, "grad_norm": 182.03924560546875, "learning_rate": 3.8340744040632035e-08, "loss": 12.9844, "step": 27531 }, { "epoch": 1.828518297137544, "grad_norm": 307.4942932128906, "learning_rate": 3.831125378973976e-08, "loss": 13.3906, "step": 27532 }, { "epoch": 1.8285847114298996, "grad_norm": 239.73460388183594, "learning_rate": 3.8281774663108444e-08, "loss": 17.6562, "step": 27533 }, { "epoch": 1.8286511257222555, "grad_norm": 229.84246826171875, "learning_rate": 3.825230666107882e-08, "loss": 15.6094, "step": 27534 }, { "epoch": 1.8287175400146112, "grad_norm": 152.291015625, "learning_rate": 3.822284978399215e-08, "loss": 16.0312, "step": 27535 }, { "epoch": 1.8287839543069668, "grad_norm": 286.4669494628906, "learning_rate": 3.8193404032188734e-08, "loss": 20.7188, "step": 27536 }, { "epoch": 1.8288503685993227, "grad_norm": 133.0933380126953, "learning_rate": 3.816396940600963e-08, "loss": 11.625, "step": 27537 }, { "epoch": 1.8289167828916781, "grad_norm": 292.1531982421875, "learning_rate": 3.813454590579501e-08, "loss": 18.4219, "step": 27538 }, { "epoch": 1.828983197184034, "grad_norm": 232.0365447998047, "learning_rate": 3.810513353188527e-08, "loss": 17.375, "step": 27539 }, { "epoch": 1.8290496114763897, "grad_norm": 135.22097778320312, "learning_rate": 3.8075732284620795e-08, "loss": 15.25, "step": 27540 }, { "epoch": 1.8291160257687453, "grad_norm": 165.8556671142578, "learning_rate": 3.804634216434144e-08, "loss": 12.7656, "step": 27541 }, { "epoch": 1.8291824400611012, "grad_norm": 181.5133056640625, "learning_rate": 3.801696317138736e-08, "loss": 13.2656, "step": 27542 }, { "epoch": 1.8292488543534569, "grad_norm": 316.39337158203125, "learning_rate": 3.7987595306098406e-08, "loss": 19.75, "step": 27543 }, { "epoch": 1.8293152686458125, "grad_norm": 355.61700439453125, "learning_rate": 3.7958238568814194e-08, "loss": 18.25, "step": 27544 }, { "epoch": 1.8293816829381684, "grad_norm": 301.1529541015625, "learning_rate": 3.792889295987423e-08, "loss": 16.2812, "step": 27545 }, { "epoch": 1.829448097230524, "grad_norm": 147.40536499023438, "learning_rate": 3.789955847961823e-08, "loss": 13.6094, "step": 27546 }, { "epoch": 1.8295145115228797, "grad_norm": 221.94143676757812, "learning_rate": 3.787023512838527e-08, "loss": 18.8281, "step": 27547 }, { "epoch": 1.8295809258152356, "grad_norm": 161.9336395263672, "learning_rate": 3.784092290651464e-08, "loss": 11.5, "step": 27548 }, { "epoch": 1.829647340107591, "grad_norm": 323.490966796875, "learning_rate": 3.7811621814345386e-08, "loss": 15.8281, "step": 27549 }, { "epoch": 1.8297137543999469, "grad_norm": 252.03273010253906, "learning_rate": 3.778233185221646e-08, "loss": 12.5469, "step": 27550 }, { "epoch": 1.8297801686923025, "grad_norm": 259.7434387207031, "learning_rate": 3.7753053020466607e-08, "loss": 17.1562, "step": 27551 }, { "epoch": 1.8298465829846582, "grad_norm": 467.0867614746094, "learning_rate": 3.772378531943477e-08, "loss": 18.0312, "step": 27552 }, { "epoch": 1.829912997277014, "grad_norm": 123.11827850341797, "learning_rate": 3.769452874945911e-08, "loss": 14.8125, "step": 27553 }, { "epoch": 1.8299794115693697, "grad_norm": 472.1348876953125, "learning_rate": 3.766528331087837e-08, "loss": 17.7812, "step": 27554 }, { "epoch": 1.8300458258617254, "grad_norm": 114.13251495361328, "learning_rate": 3.7636049004030725e-08, "loss": 17.5938, "step": 27555 }, { "epoch": 1.8301122401540812, "grad_norm": 224.32838439941406, "learning_rate": 3.760682582925423e-08, "loss": 10.2656, "step": 27556 }, { "epoch": 1.830178654446437, "grad_norm": 100.43283081054688, "learning_rate": 3.757761378688706e-08, "loss": 18.625, "step": 27557 }, { "epoch": 1.8302450687387926, "grad_norm": 235.6757049560547, "learning_rate": 3.7548412877267066e-08, "loss": 14.5312, "step": 27558 }, { "epoch": 1.8303114830311484, "grad_norm": 249.3382568359375, "learning_rate": 3.751922310073208e-08, "loss": 17.7969, "step": 27559 }, { "epoch": 1.8303778973235039, "grad_norm": 135.38973999023438, "learning_rate": 3.749004445761983e-08, "loss": 12.7969, "step": 27560 }, { "epoch": 1.8304443116158597, "grad_norm": 140.5766143798828, "learning_rate": 3.7460876948267493e-08, "loss": 11.5312, "step": 27561 }, { "epoch": 1.8305107259082154, "grad_norm": 193.52371215820312, "learning_rate": 3.743172057301303e-08, "loss": 11.5625, "step": 27562 }, { "epoch": 1.830577140200571, "grad_norm": 251.69334411621094, "learning_rate": 3.7402575332193154e-08, "loss": 17.8906, "step": 27563 }, { "epoch": 1.830643554492927, "grad_norm": 358.1869201660156, "learning_rate": 3.737344122614516e-08, "loss": 12.2969, "step": 27564 }, { "epoch": 1.8307099687852826, "grad_norm": 191.9263458251953, "learning_rate": 3.734431825520623e-08, "loss": 19.2031, "step": 27565 }, { "epoch": 1.8307763830776382, "grad_norm": 235.87466430664062, "learning_rate": 3.7315206419713196e-08, "loss": 14.9375, "step": 27566 }, { "epoch": 1.8308427973699941, "grad_norm": 181.04385375976562, "learning_rate": 3.7286105720002567e-08, "loss": 14.9375, "step": 27567 }, { "epoch": 1.8309092116623498, "grad_norm": 184.3559112548828, "learning_rate": 3.72570161564113e-08, "loss": 14.0938, "step": 27568 }, { "epoch": 1.8309756259547054, "grad_norm": 135.4034423828125, "learning_rate": 3.722793772927557e-08, "loss": 13.0156, "step": 27569 }, { "epoch": 1.8310420402470613, "grad_norm": 184.3011474609375, "learning_rate": 3.7198870438931996e-08, "loss": 17.1875, "step": 27570 }, { "epoch": 1.8311084545394167, "grad_norm": 313.56292724609375, "learning_rate": 3.7169814285716636e-08, "loss": 22.1406, "step": 27571 }, { "epoch": 1.8311748688317726, "grad_norm": 768.27294921875, "learning_rate": 3.7140769269965677e-08, "loss": 28.9219, "step": 27572 }, { "epoch": 1.8312412831241283, "grad_norm": 365.28875732421875, "learning_rate": 3.711173539201507e-08, "loss": 19.6953, "step": 27573 }, { "epoch": 1.831307697416484, "grad_norm": 256.981201171875, "learning_rate": 3.708271265220087e-08, "loss": 15.2344, "step": 27574 }, { "epoch": 1.8313741117088398, "grad_norm": 280.5467834472656, "learning_rate": 3.7053701050858256e-08, "loss": 20.7812, "step": 27575 }, { "epoch": 1.8314405260011954, "grad_norm": 179.93804931640625, "learning_rate": 3.7024700588323296e-08, "loss": 13.2031, "step": 27576 }, { "epoch": 1.831506940293551, "grad_norm": 109.57490539550781, "learning_rate": 3.6995711264931504e-08, "loss": 14.8594, "step": 27577 }, { "epoch": 1.831573354585907, "grad_norm": 212.6148223876953, "learning_rate": 3.696673308101772e-08, "loss": 16.0781, "step": 27578 }, { "epoch": 1.8316397688782626, "grad_norm": 172.0092010498047, "learning_rate": 3.6937766036917674e-08, "loss": 16.9062, "step": 27579 }, { "epoch": 1.8317061831706183, "grad_norm": 105.19711303710938, "learning_rate": 3.690881013296621e-08, "loss": 15.7969, "step": 27580 }, { "epoch": 1.8317725974629742, "grad_norm": 372.24560546875, "learning_rate": 3.687986536949816e-08, "loss": 16.1406, "step": 27581 }, { "epoch": 1.8318390117553296, "grad_norm": 499.6265563964844, "learning_rate": 3.68509317468485e-08, "loss": 14.2188, "step": 27582 }, { "epoch": 1.8319054260476855, "grad_norm": 577.37060546875, "learning_rate": 3.682200926535195e-08, "loss": 18.4531, "step": 27583 }, { "epoch": 1.8319718403400411, "grad_norm": 307.2063903808594, "learning_rate": 3.679309792534291e-08, "loss": 19.3125, "step": 27584 }, { "epoch": 1.8320382546323968, "grad_norm": 161.9479522705078, "learning_rate": 3.6764197727156e-08, "loss": 17.2031, "step": 27585 }, { "epoch": 1.8321046689247527, "grad_norm": 558.2109985351562, "learning_rate": 3.6735308671125284e-08, "loss": 22.2812, "step": 27586 }, { "epoch": 1.8321710832171083, "grad_norm": 218.49249267578125, "learning_rate": 3.670643075758517e-08, "loss": 15.8125, "step": 27587 }, { "epoch": 1.832237497509464, "grad_norm": 113.34717559814453, "learning_rate": 3.6677563986869606e-08, "loss": 11.8047, "step": 27588 }, { "epoch": 1.8323039118018198, "grad_norm": 271.9991455078125, "learning_rate": 3.664870835931244e-08, "loss": 23.75, "step": 27589 }, { "epoch": 1.8323703260941755, "grad_norm": 342.2291564941406, "learning_rate": 3.6619863875247514e-08, "loss": 19.4531, "step": 27590 }, { "epoch": 1.8324367403865311, "grad_norm": 265.12579345703125, "learning_rate": 3.659103053500856e-08, "loss": 19.6719, "step": 27591 }, { "epoch": 1.832503154678887, "grad_norm": 309.89556884765625, "learning_rate": 3.6562208338928865e-08, "loss": 20.8281, "step": 27592 }, { "epoch": 1.8325695689712425, "grad_norm": 155.7394256591797, "learning_rate": 3.6533397287342174e-08, "loss": 15.5156, "step": 27593 }, { "epoch": 1.8326359832635983, "grad_norm": 302.6685791015625, "learning_rate": 3.6504597380581534e-08, "loss": 21.5469, "step": 27594 }, { "epoch": 1.832702397555954, "grad_norm": 126.34700012207031, "learning_rate": 3.647580861898014e-08, "loss": 12.125, "step": 27595 }, { "epoch": 1.8327688118483096, "grad_norm": 200.17550659179688, "learning_rate": 3.6447031002870835e-08, "loss": 19.0938, "step": 27596 }, { "epoch": 1.8328352261406655, "grad_norm": 145.49349975585938, "learning_rate": 3.641826453258678e-08, "loss": 15.7031, "step": 27597 }, { "epoch": 1.8329016404330212, "grad_norm": 187.38169860839844, "learning_rate": 3.638950920846062e-08, "loss": 18.875, "step": 27598 }, { "epoch": 1.8329680547253768, "grad_norm": 182.05316162109375, "learning_rate": 3.636076503082508e-08, "loss": 14.9375, "step": 27599 }, { "epoch": 1.8330344690177327, "grad_norm": 281.6129150390625, "learning_rate": 3.6332032000012223e-08, "loss": 19.75, "step": 27600 }, { "epoch": 1.8331008833100884, "grad_norm": 192.95411682128906, "learning_rate": 3.6303310116355126e-08, "loss": 15.5938, "step": 27601 }, { "epoch": 1.833167297602444, "grad_norm": 157.98085021972656, "learning_rate": 3.62745993801854e-08, "loss": 14.0625, "step": 27602 }, { "epoch": 1.8332337118947999, "grad_norm": 173.205322265625, "learning_rate": 3.624589979183534e-08, "loss": 16.0938, "step": 27603 }, { "epoch": 1.8333001261871553, "grad_norm": 252.129638671875, "learning_rate": 3.6217211351637136e-08, "loss": 21.9375, "step": 27604 }, { "epoch": 1.8333665404795112, "grad_norm": 253.47238159179688, "learning_rate": 3.61885340599225e-08, "loss": 15.0938, "step": 27605 }, { "epoch": 1.8334329547718669, "grad_norm": 186.48793029785156, "learning_rate": 3.615986791702297e-08, "loss": 11.9375, "step": 27606 }, { "epoch": 1.8334993690642225, "grad_norm": 176.90289306640625, "learning_rate": 3.613121292327048e-08, "loss": 18.9688, "step": 27607 }, { "epoch": 1.8335657833565784, "grad_norm": 370.6241455078125, "learning_rate": 3.6102569078996226e-08, "loss": 12.6094, "step": 27608 }, { "epoch": 1.833632197648934, "grad_norm": 293.8852233886719, "learning_rate": 3.60739363845316e-08, "loss": 15.375, "step": 27609 }, { "epoch": 1.8336986119412897, "grad_norm": 107.3060531616211, "learning_rate": 3.60453148402079e-08, "loss": 11.9375, "step": 27610 }, { "epoch": 1.8337650262336456, "grad_norm": 196.05091857910156, "learning_rate": 3.601670444635607e-08, "loss": 17.8594, "step": 27611 }, { "epoch": 1.8338314405260012, "grad_norm": 326.8628234863281, "learning_rate": 3.598810520330719e-08, "loss": 23.2344, "step": 27612 }, { "epoch": 1.8338978548183569, "grad_norm": 355.7415771484375, "learning_rate": 3.5959517111392e-08, "loss": 11.6562, "step": 27613 }, { "epoch": 1.8339642691107128, "grad_norm": 143.1339874267578, "learning_rate": 3.593094017094134e-08, "loss": 21.125, "step": 27614 }, { "epoch": 1.8340306834030682, "grad_norm": 669.6046142578125, "learning_rate": 3.59023743822855e-08, "loss": 14.7188, "step": 27615 }, { "epoch": 1.834097097695424, "grad_norm": 190.90936279296875, "learning_rate": 3.587381974575521e-08, "loss": 16.6875, "step": 27616 }, { "epoch": 1.8341635119877797, "grad_norm": 200.90670776367188, "learning_rate": 3.5845276261680436e-08, "loss": 15.3125, "step": 27617 }, { "epoch": 1.8342299262801354, "grad_norm": 141.59600830078125, "learning_rate": 3.58167439303918e-08, "loss": 14.6406, "step": 27618 }, { "epoch": 1.8342963405724912, "grad_norm": 248.84005737304688, "learning_rate": 3.578822275221893e-08, "loss": 16.8906, "step": 27619 }, { "epoch": 1.834362754864847, "grad_norm": 191.28521728515625, "learning_rate": 3.57597127274919e-08, "loss": 11.2344, "step": 27620 }, { "epoch": 1.8344291691572026, "grad_norm": 118.88006591796875, "learning_rate": 3.5731213856540544e-08, "loss": 13.1562, "step": 27621 }, { "epoch": 1.8344955834495584, "grad_norm": 458.6921081542969, "learning_rate": 3.5702726139694385e-08, "loss": 23.2031, "step": 27622 }, { "epoch": 1.834561997741914, "grad_norm": 221.98292541503906, "learning_rate": 3.567424957728316e-08, "loss": 15.7188, "step": 27623 }, { "epoch": 1.8346284120342697, "grad_norm": 230.70535278320312, "learning_rate": 3.564578416963615e-08, "loss": 14.8594, "step": 27624 }, { "epoch": 1.8346948263266256, "grad_norm": 178.79710388183594, "learning_rate": 3.561732991708244e-08, "loss": 16.7969, "step": 27625 }, { "epoch": 1.834761240618981, "grad_norm": 159.46224975585938, "learning_rate": 3.558888681995165e-08, "loss": 14.9688, "step": 27626 }, { "epoch": 1.834827654911337, "grad_norm": 460.6955261230469, "learning_rate": 3.55604548785724e-08, "loss": 20.4531, "step": 27627 }, { "epoch": 1.8348940692036926, "grad_norm": 655.5364990234375, "learning_rate": 3.553203409327354e-08, "loss": 20.6875, "step": 27628 }, { "epoch": 1.8349604834960482, "grad_norm": 130.9761199951172, "learning_rate": 3.550362446438415e-08, "loss": 16.625, "step": 27629 }, { "epoch": 1.835026897788404, "grad_norm": 124.397705078125, "learning_rate": 3.5475225992232626e-08, "loss": 17.2969, "step": 27630 }, { "epoch": 1.8350933120807598, "grad_norm": 456.54833984375, "learning_rate": 3.5446838677147375e-08, "loss": 17.6875, "step": 27631 }, { "epoch": 1.8351597263731154, "grad_norm": 121.82394409179688, "learning_rate": 3.5418462519457124e-08, "loss": 14.2812, "step": 27632 }, { "epoch": 1.8352261406654713, "grad_norm": 90.9559555053711, "learning_rate": 3.5390097519489736e-08, "loss": 13.4219, "step": 27633 }, { "epoch": 1.835292554957827, "grad_norm": 608.4363403320312, "learning_rate": 3.5361743677573494e-08, "loss": 17.5781, "step": 27634 }, { "epoch": 1.8353589692501826, "grad_norm": 178.36431884765625, "learning_rate": 3.533340099403637e-08, "loss": 13.375, "step": 27635 }, { "epoch": 1.8354253835425385, "grad_norm": 1010.9653930664062, "learning_rate": 3.5305069469206195e-08, "loss": 14.8125, "step": 27636 }, { "epoch": 1.835491797834894, "grad_norm": 159.6234588623047, "learning_rate": 3.527674910341072e-08, "loss": 10.7969, "step": 27637 }, { "epoch": 1.8355582121272498, "grad_norm": 222.54248046875, "learning_rate": 3.524843989697757e-08, "loss": 15.625, "step": 27638 }, { "epoch": 1.8356246264196057, "grad_norm": 138.5693359375, "learning_rate": 3.522014185023403e-08, "loss": 13.75, "step": 27639 }, { "epoch": 1.835691040711961, "grad_norm": 159.69215393066406, "learning_rate": 3.519185496350774e-08, "loss": 13.8594, "step": 27640 }, { "epoch": 1.835757455004317, "grad_norm": 102.82758331298828, "learning_rate": 3.516357923712554e-08, "loss": 18.5156, "step": 27641 }, { "epoch": 1.8358238692966726, "grad_norm": 245.0314483642578, "learning_rate": 3.513531467141484e-08, "loss": 18.2188, "step": 27642 }, { "epoch": 1.8358902835890283, "grad_norm": 263.9476013183594, "learning_rate": 3.510706126670237e-08, "loss": 18.875, "step": 27643 }, { "epoch": 1.8359566978813842, "grad_norm": 253.43380737304688, "learning_rate": 3.5078819023314997e-08, "loss": 18.0781, "step": 27644 }, { "epoch": 1.8360231121737398, "grad_norm": 308.849365234375, "learning_rate": 3.505058794157956e-08, "loss": 16.5156, "step": 27645 }, { "epoch": 1.8360895264660955, "grad_norm": 160.31732177734375, "learning_rate": 3.502236802182246e-08, "loss": 20.375, "step": 27646 }, { "epoch": 1.8361559407584513, "grad_norm": 667.4771728515625, "learning_rate": 3.499415926436999e-08, "loss": 28.75, "step": 27647 }, { "epoch": 1.8362223550508068, "grad_norm": 237.2167510986328, "learning_rate": 3.496596166954891e-08, "loss": 14.9375, "step": 27648 }, { "epoch": 1.8362887693431627, "grad_norm": 179.53363037109375, "learning_rate": 3.493777523768504e-08, "loss": 19.625, "step": 27649 }, { "epoch": 1.8363551836355185, "grad_norm": 153.54196166992188, "learning_rate": 3.4909599969104365e-08, "loss": 14.1406, "step": 27650 }, { "epoch": 1.836421597927874, "grad_norm": 154.7926483154297, "learning_rate": 3.4881435864133056e-08, "loss": 13.9219, "step": 27651 }, { "epoch": 1.8364880122202298, "grad_norm": 294.6415710449219, "learning_rate": 3.485328292309675e-08, "loss": 17.75, "step": 27652 }, { "epoch": 1.8365544265125855, "grad_norm": 185.3605499267578, "learning_rate": 3.482514114632107e-08, "loss": 13.875, "step": 27653 }, { "epoch": 1.8366208408049411, "grad_norm": 331.75830078125, "learning_rate": 3.4797010534131645e-08, "loss": 15.7812, "step": 27654 }, { "epoch": 1.836687255097297, "grad_norm": 113.73580169677734, "learning_rate": 3.476889108685399e-08, "loss": 14.9219, "step": 27655 }, { "epoch": 1.8367536693896527, "grad_norm": 145.01153564453125, "learning_rate": 3.474078280481285e-08, "loss": 15.6094, "step": 27656 }, { "epoch": 1.8368200836820083, "grad_norm": 129.21656799316406, "learning_rate": 3.4712685688334075e-08, "loss": 15.625, "step": 27657 }, { "epoch": 1.8368864979743642, "grad_norm": 207.60507202148438, "learning_rate": 3.4684599737742295e-08, "loss": 17.1094, "step": 27658 }, { "epoch": 1.8369529122667196, "grad_norm": 282.7659912109375, "learning_rate": 3.465652495336224e-08, "loss": 15.2969, "step": 27659 }, { "epoch": 1.8370193265590755, "grad_norm": 205.08811950683594, "learning_rate": 3.4628461335519e-08, "loss": 16.9531, "step": 27660 }, { "epoch": 1.8370857408514314, "grad_norm": 224.0203857421875, "learning_rate": 3.460040888453697e-08, "loss": 17.0938, "step": 27661 }, { "epoch": 1.8371521551437868, "grad_norm": 382.5326232910156, "learning_rate": 3.457236760074078e-08, "loss": 17.2969, "step": 27662 }, { "epoch": 1.8372185694361427, "grad_norm": 168.85089111328125, "learning_rate": 3.454433748445484e-08, "loss": 14.5312, "step": 27663 }, { "epoch": 1.8372849837284984, "grad_norm": 207.16424560546875, "learning_rate": 3.451631853600312e-08, "loss": 17.75, "step": 27664 }, { "epoch": 1.837351398020854, "grad_norm": 119.23562622070312, "learning_rate": 3.448831075571013e-08, "loss": 10.1172, "step": 27665 }, { "epoch": 1.8374178123132099, "grad_norm": 155.9288330078125, "learning_rate": 3.446031414389938e-08, "loss": 15.3125, "step": 27666 }, { "epoch": 1.8374842266055655, "grad_norm": 109.97491455078125, "learning_rate": 3.443232870089507e-08, "loss": 12.7656, "step": 27667 }, { "epoch": 1.8375506408979212, "grad_norm": 144.72036743164062, "learning_rate": 3.4404354427020724e-08, "loss": 18.3594, "step": 27668 }, { "epoch": 1.837617055190277, "grad_norm": 156.12107849121094, "learning_rate": 3.437639132260017e-08, "loss": 17.9688, "step": 27669 }, { "epoch": 1.8376834694826325, "grad_norm": 271.3564758300781, "learning_rate": 3.434843938795651e-08, "loss": 26.8906, "step": 27670 }, { "epoch": 1.8377498837749884, "grad_norm": 374.60882568359375, "learning_rate": 3.432049862341346e-08, "loss": 14.8125, "step": 27671 }, { "epoch": 1.8378162980673443, "grad_norm": 154.59182739257812, "learning_rate": 3.429256902929389e-08, "loss": 11.875, "step": 27672 }, { "epoch": 1.8378827123596997, "grad_norm": 129.5036163330078, "learning_rate": 3.42646506059211e-08, "loss": 14.3281, "step": 27673 }, { "epoch": 1.8379491266520556, "grad_norm": 103.77871704101562, "learning_rate": 3.4236743353617923e-08, "loss": 16.7812, "step": 27674 }, { "epoch": 1.8380155409444112, "grad_norm": 248.2584991455078, "learning_rate": 3.420884727270712e-08, "loss": 25.9375, "step": 27675 }, { "epoch": 1.8380819552367669, "grad_norm": 127.01333618164062, "learning_rate": 3.418096236351164e-08, "loss": 16.5469, "step": 27676 }, { "epoch": 1.8381483695291227, "grad_norm": 168.2052764892578, "learning_rate": 3.4153088626353796e-08, "loss": 15.0625, "step": 27677 }, { "epoch": 1.8382147838214784, "grad_norm": 108.18930053710938, "learning_rate": 3.412522606155599e-08, "loss": 12.9062, "step": 27678 }, { "epoch": 1.838281198113834, "grad_norm": 127.14019775390625, "learning_rate": 3.409737466944085e-08, "loss": 12.4688, "step": 27679 }, { "epoch": 1.83834761240619, "grad_norm": 138.17454528808594, "learning_rate": 3.406953445033012e-08, "loss": 20.2188, "step": 27680 }, { "epoch": 1.8384140266985454, "grad_norm": 244.7974090576172, "learning_rate": 3.404170540454598e-08, "loss": 14.4219, "step": 27681 }, { "epoch": 1.8384804409909012, "grad_norm": 907.5635375976562, "learning_rate": 3.401388753241052e-08, "loss": 15.1719, "step": 27682 }, { "epoch": 1.8385468552832571, "grad_norm": 192.2335968017578, "learning_rate": 3.398608083424537e-08, "loss": 18.5781, "step": 27683 }, { "epoch": 1.8386132695756126, "grad_norm": 252.4705047607422, "learning_rate": 3.395828531037215e-08, "loss": 18.3906, "step": 27684 }, { "epoch": 1.8386796838679684, "grad_norm": 563.8812255859375, "learning_rate": 3.3930500961112497e-08, "loss": 21.75, "step": 27685 }, { "epoch": 1.838746098160324, "grad_norm": 429.3652648925781, "learning_rate": 3.39027277867876e-08, "loss": 11.6875, "step": 27686 }, { "epoch": 1.8388125124526797, "grad_norm": 171.4114532470703, "learning_rate": 3.387496578771898e-08, "loss": 16.8594, "step": 27687 }, { "epoch": 1.8388789267450356, "grad_norm": 960.3645629882812, "learning_rate": 3.384721496422771e-08, "loss": 15.1094, "step": 27688 }, { "epoch": 1.8389453410373913, "grad_norm": 946.123291015625, "learning_rate": 3.381947531663454e-08, "loss": 12.2188, "step": 27689 }, { "epoch": 1.839011755329747, "grad_norm": 244.1175537109375, "learning_rate": 3.379174684526076e-08, "loss": 14.3125, "step": 27690 }, { "epoch": 1.8390781696221028, "grad_norm": 146.8308563232422, "learning_rate": 3.376402955042679e-08, "loss": 13.4844, "step": 27691 }, { "epoch": 1.8391445839144582, "grad_norm": 365.0181884765625, "learning_rate": 3.373632343245347e-08, "loss": 12.9844, "step": 27692 }, { "epoch": 1.839210998206814, "grad_norm": 342.3547668457031, "learning_rate": 3.3708628491661004e-08, "loss": 18.6562, "step": 27693 }, { "epoch": 1.83927741249917, "grad_norm": 222.74026489257812, "learning_rate": 3.368094472837024e-08, "loss": 10.8906, "step": 27694 }, { "epoch": 1.8393438267915254, "grad_norm": 214.19406127929688, "learning_rate": 3.36532721429007e-08, "loss": 18.4688, "step": 27695 }, { "epoch": 1.8394102410838813, "grad_norm": 119.26560974121094, "learning_rate": 3.362561073557324e-08, "loss": 13.5781, "step": 27696 }, { "epoch": 1.839476655376237, "grad_norm": 420.0871276855469, "learning_rate": 3.3597960506707267e-08, "loss": 14.375, "step": 27697 }, { "epoch": 1.8395430696685926, "grad_norm": 111.76600646972656, "learning_rate": 3.3570321456622864e-08, "loss": 12.3125, "step": 27698 }, { "epoch": 1.8396094839609485, "grad_norm": 257.0706481933594, "learning_rate": 3.354269358563966e-08, "loss": 16.5469, "step": 27699 }, { "epoch": 1.8396758982533041, "grad_norm": 507.287841796875, "learning_rate": 3.3515076894077287e-08, "loss": 14.0, "step": 27700 }, { "epoch": 1.8397423125456598, "grad_norm": 280.42864990234375, "learning_rate": 3.348747138225527e-08, "loss": 18.3594, "step": 27701 }, { "epoch": 1.8398087268380157, "grad_norm": 180.00509643554688, "learning_rate": 3.345987705049291e-08, "loss": 18.0312, "step": 27702 }, { "epoch": 1.839875141130371, "grad_norm": 611.7348022460938, "learning_rate": 3.3432293899109177e-08, "loss": 18.0469, "step": 27703 }, { "epoch": 1.839941555422727, "grad_norm": 253.75592041015625, "learning_rate": 3.340472192842348e-08, "loss": 18.7344, "step": 27704 }, { "epoch": 1.8400079697150828, "grad_norm": 127.5250015258789, "learning_rate": 3.337716113875455e-08, "loss": 15.6562, "step": 27705 }, { "epoch": 1.8400743840074383, "grad_norm": 313.5515441894531, "learning_rate": 3.334961153042115e-08, "loss": 16.7656, "step": 27706 }, { "epoch": 1.8401407982997942, "grad_norm": 98.17146301269531, "learning_rate": 3.332207310374213e-08, "loss": 12.9375, "step": 27707 }, { "epoch": 1.8402072125921498, "grad_norm": 221.7793731689453, "learning_rate": 3.3294545859035906e-08, "loss": 16.7031, "step": 27708 }, { "epoch": 1.8402736268845055, "grad_norm": 190.95335388183594, "learning_rate": 3.3267029796620883e-08, "loss": 13.9688, "step": 27709 }, { "epoch": 1.8403400411768613, "grad_norm": 196.27542114257812, "learning_rate": 3.3239524916815584e-08, "loss": 16.5312, "step": 27710 }, { "epoch": 1.840406455469217, "grad_norm": 139.29469299316406, "learning_rate": 3.321203121993776e-08, "loss": 15.0625, "step": 27711 }, { "epoch": 1.8404728697615726, "grad_norm": 252.88612365722656, "learning_rate": 3.318454870630594e-08, "loss": 15.7969, "step": 27712 }, { "epoch": 1.8405392840539285, "grad_norm": 197.18850708007812, "learning_rate": 3.315707737623752e-08, "loss": 20.9844, "step": 27713 }, { "epoch": 1.840605698346284, "grad_norm": 277.4803466796875, "learning_rate": 3.31296172300507e-08, "loss": 17.2188, "step": 27714 }, { "epoch": 1.8406721126386398, "grad_norm": 218.07647705078125, "learning_rate": 3.310216826806278e-08, "loss": 20.0625, "step": 27715 }, { "epoch": 1.8407385269309957, "grad_norm": 187.221435546875, "learning_rate": 3.307473049059162e-08, "loss": 19.5469, "step": 27716 }, { "epoch": 1.8408049412233511, "grad_norm": 179.5660858154297, "learning_rate": 3.3047303897954184e-08, "loss": 17.0, "step": 27717 }, { "epoch": 1.840871355515707, "grad_norm": 377.8445739746094, "learning_rate": 3.301988849046822e-08, "loss": 11.8594, "step": 27718 }, { "epoch": 1.8409377698080627, "grad_norm": 352.7519226074219, "learning_rate": 3.2992484268450474e-08, "loss": 25.0, "step": 27719 }, { "epoch": 1.8410041841004183, "grad_norm": 493.9662780761719, "learning_rate": 3.2965091232218023e-08, "loss": 21.4062, "step": 27720 }, { "epoch": 1.8410705983927742, "grad_norm": 212.9531707763672, "learning_rate": 3.293770938208773e-08, "loss": 16.8438, "step": 27721 }, { "epoch": 1.8411370126851299, "grad_norm": 278.3186950683594, "learning_rate": 3.2910338718376455e-08, "loss": 22.0312, "step": 27722 }, { "epoch": 1.8412034269774855, "grad_norm": 258.385009765625, "learning_rate": 3.288297924140071e-08, "loss": 18.6406, "step": 27723 }, { "epoch": 1.8412698412698414, "grad_norm": 303.8539733886719, "learning_rate": 3.285563095147692e-08, "loss": 12.0547, "step": 27724 }, { "epoch": 1.8413362555621968, "grad_norm": 133.56053161621094, "learning_rate": 3.2828293848921604e-08, "loss": 12.0312, "step": 27725 }, { "epoch": 1.8414026698545527, "grad_norm": 412.336181640625, "learning_rate": 3.280096793405085e-08, "loss": 16.2188, "step": 27726 }, { "epoch": 1.8414690841469086, "grad_norm": 419.31494140625, "learning_rate": 3.277365320718084e-08, "loss": 19.0781, "step": 27727 }, { "epoch": 1.841535498439264, "grad_norm": 181.602783203125, "learning_rate": 3.2746349668627324e-08, "loss": 17.9844, "step": 27728 }, { "epoch": 1.8416019127316199, "grad_norm": 183.0924835205078, "learning_rate": 3.27190573187065e-08, "loss": 17.2344, "step": 27729 }, { "epoch": 1.8416683270239755, "grad_norm": 142.2228240966797, "learning_rate": 3.269177615773366e-08, "loss": 15.8594, "step": 27730 }, { "epoch": 1.8417347413163312, "grad_norm": 209.45303344726562, "learning_rate": 3.266450618602468e-08, "loss": 17.8594, "step": 27731 }, { "epoch": 1.841801155608687, "grad_norm": 246.15782165527344, "learning_rate": 3.263724740389484e-08, "loss": 19.0156, "step": 27732 }, { "epoch": 1.8418675699010427, "grad_norm": 233.1036834716797, "learning_rate": 3.260999981165957e-08, "loss": 12.5781, "step": 27733 }, { "epoch": 1.8419339841933984, "grad_norm": 243.75233459472656, "learning_rate": 3.258276340963384e-08, "loss": 13.6562, "step": 27734 }, { "epoch": 1.8420003984857543, "grad_norm": 319.94970703125, "learning_rate": 3.255553819813306e-08, "loss": 18.4219, "step": 27735 }, { "epoch": 1.8420668127781097, "grad_norm": 153.41184997558594, "learning_rate": 3.252832417747187e-08, "loss": 16.0156, "step": 27736 }, { "epoch": 1.8421332270704656, "grad_norm": 172.8212432861328, "learning_rate": 3.250112134796512e-08, "loss": 14.375, "step": 27737 }, { "epoch": 1.8421996413628214, "grad_norm": 157.69406127929688, "learning_rate": 3.247392970992757e-08, "loss": 15.1406, "step": 27738 }, { "epoch": 1.8422660556551769, "grad_norm": 195.7269744873047, "learning_rate": 3.2446749263673636e-08, "loss": 15.9141, "step": 27739 }, { "epoch": 1.8423324699475327, "grad_norm": 326.14154052734375, "learning_rate": 3.241958000951772e-08, "loss": 19.2031, "step": 27740 }, { "epoch": 1.8423988842398884, "grad_norm": 261.2200622558594, "learning_rate": 3.239242194777436e-08, "loss": 25.5938, "step": 27741 }, { "epoch": 1.842465298532244, "grad_norm": 136.2470245361328, "learning_rate": 3.2365275078757306e-08, "loss": 15.2656, "step": 27742 }, { "epoch": 1.8425317128246, "grad_norm": 401.5310974121094, "learning_rate": 3.2338139402780964e-08, "loss": 20.7812, "step": 27743 }, { "epoch": 1.8425981271169556, "grad_norm": 253.0309295654297, "learning_rate": 3.2311014920158865e-08, "loss": 24.0625, "step": 27744 }, { "epoch": 1.8426645414093112, "grad_norm": 571.7666625976562, "learning_rate": 3.228390163120509e-08, "loss": 18.5469, "step": 27745 }, { "epoch": 1.8427309557016671, "grad_norm": 1126.641845703125, "learning_rate": 3.225679953623295e-08, "loss": 22.7656, "step": 27746 }, { "epoch": 1.8427973699940225, "grad_norm": 151.5131072998047, "learning_rate": 3.2229708635556294e-08, "loss": 15.2656, "step": 27747 }, { "epoch": 1.8428637842863784, "grad_norm": 97.29241943359375, "learning_rate": 3.220262892948822e-08, "loss": 12.0312, "step": 27748 }, { "epoch": 1.8429301985787343, "grad_norm": 368.1306457519531, "learning_rate": 3.217556041834213e-08, "loss": 10.6875, "step": 27749 }, { "epoch": 1.8429966128710897, "grad_norm": 161.54275512695312, "learning_rate": 3.2148503102431e-08, "loss": 15.5156, "step": 27750 }, { "epoch": 1.8430630271634456, "grad_norm": 152.49392700195312, "learning_rate": 3.212145698206803e-08, "loss": 11.8828, "step": 27751 }, { "epoch": 1.8431294414558013, "grad_norm": 210.21585083007812, "learning_rate": 3.209442205756585e-08, "loss": 16.9844, "step": 27752 }, { "epoch": 1.843195855748157, "grad_norm": 590.2120361328125, "learning_rate": 3.206739832923721e-08, "loss": 14.6719, "step": 27753 }, { "epoch": 1.8432622700405128, "grad_norm": 269.20697021484375, "learning_rate": 3.2040385797394766e-08, "loss": 24.1875, "step": 27754 }, { "epoch": 1.8433286843328685, "grad_norm": 179.1820526123047, "learning_rate": 3.2013384462351134e-08, "loss": 18.5156, "step": 27755 }, { "epoch": 1.843395098625224, "grad_norm": 128.48953247070312, "learning_rate": 3.1986394324418297e-08, "loss": 13.5, "step": 27756 }, { "epoch": 1.84346151291758, "grad_norm": 116.4505844116211, "learning_rate": 3.1959415383908894e-08, "loss": 11.5312, "step": 27757 }, { "epoch": 1.8435279272099354, "grad_norm": 390.86798095703125, "learning_rate": 3.193244764113456e-08, "loss": 19.6719, "step": 27758 }, { "epoch": 1.8435943415022913, "grad_norm": 435.0970458984375, "learning_rate": 3.19054910964075e-08, "loss": 17.4375, "step": 27759 }, { "epoch": 1.8436607557946472, "grad_norm": 151.73443603515625, "learning_rate": 3.187854575003956e-08, "loss": 17.7969, "step": 27760 }, { "epoch": 1.8437271700870026, "grad_norm": 304.41094970703125, "learning_rate": 3.1851611602342286e-08, "loss": 17.5, "step": 27761 }, { "epoch": 1.8437935843793585, "grad_norm": 123.93978118896484, "learning_rate": 3.182468865362731e-08, "loss": 19.6562, "step": 27762 }, { "epoch": 1.8438599986717141, "grad_norm": 168.70614624023438, "learning_rate": 3.179777690420604e-08, "loss": 13.6562, "step": 27763 }, { "epoch": 1.8439264129640698, "grad_norm": 313.2351379394531, "learning_rate": 3.17708763543898e-08, "loss": 23.9531, "step": 27764 }, { "epoch": 1.8439928272564257, "grad_norm": 158.21681213378906, "learning_rate": 3.1743987004489766e-08, "loss": 12.7812, "step": 27765 }, { "epoch": 1.8440592415487813, "grad_norm": 108.87806701660156, "learning_rate": 3.171710885481704e-08, "loss": 11.2578, "step": 27766 }, { "epoch": 1.844125655841137, "grad_norm": 206.75738525390625, "learning_rate": 3.169024190568226e-08, "loss": 12.4844, "step": 27767 }, { "epoch": 1.8441920701334928, "grad_norm": 169.8123321533203, "learning_rate": 3.166338615739661e-08, "loss": 12.9844, "step": 27768 }, { "epoch": 1.8442584844258483, "grad_norm": 180.40045166015625, "learning_rate": 3.163654161027041e-08, "loss": 15.7344, "step": 27769 }, { "epoch": 1.8443248987182042, "grad_norm": 376.2169494628906, "learning_rate": 3.1609708264614287e-08, "loss": 18.0312, "step": 27770 }, { "epoch": 1.84439131301056, "grad_norm": 394.5909423828125, "learning_rate": 3.1582886120738785e-08, "loss": 17.2344, "step": 27771 }, { "epoch": 1.8444577273029155, "grad_norm": 222.8966064453125, "learning_rate": 3.1556075178953864e-08, "loss": 18.3281, "step": 27772 }, { "epoch": 1.8445241415952713, "grad_norm": 122.55640411376953, "learning_rate": 3.152927543956996e-08, "loss": 12.8281, "step": 27773 }, { "epoch": 1.844590555887627, "grad_norm": 192.45240783691406, "learning_rate": 3.1502486902896917e-08, "loss": 12.5938, "step": 27774 }, { "epoch": 1.8446569701799826, "grad_norm": 229.5560302734375, "learning_rate": 3.14757095692445e-08, "loss": 17.1719, "step": 27775 }, { "epoch": 1.8447233844723385, "grad_norm": 291.45745849609375, "learning_rate": 3.14489434389229e-08, "loss": 13.9688, "step": 27776 }, { "epoch": 1.8447897987646942, "grad_norm": 115.38225555419922, "learning_rate": 3.142218851224121e-08, "loss": 16.3125, "step": 27777 }, { "epoch": 1.8448562130570498, "grad_norm": 382.2188720703125, "learning_rate": 3.139544478950917e-08, "loss": 18.6562, "step": 27778 }, { "epoch": 1.8449226273494057, "grad_norm": 417.653564453125, "learning_rate": 3.1368712271035993e-08, "loss": 25.6875, "step": 27779 }, { "epoch": 1.8449890416417611, "grad_norm": 139.70399475097656, "learning_rate": 3.134199095713119e-08, "loss": 13.3125, "step": 27780 }, { "epoch": 1.845055455934117, "grad_norm": 165.02626037597656, "learning_rate": 3.1315280848103533e-08, "loss": 16.9375, "step": 27781 }, { "epoch": 1.845121870226473, "grad_norm": 270.7176513671875, "learning_rate": 3.1288581944262203e-08, "loss": 15.1094, "step": 27782 }, { "epoch": 1.8451882845188283, "grad_norm": 218.4941864013672, "learning_rate": 3.1261894245915964e-08, "loss": 19.8281, "step": 27783 }, { "epoch": 1.8452546988111842, "grad_norm": 3978.44287109375, "learning_rate": 3.123521775337346e-08, "loss": 12.6875, "step": 27784 }, { "epoch": 1.8453211131035399, "grad_norm": 257.7686767578125, "learning_rate": 3.120855246694343e-08, "loss": 20.1562, "step": 27785 }, { "epoch": 1.8453875273958955, "grad_norm": 156.02853393554688, "learning_rate": 3.1181898386934194e-08, "loss": 16.6875, "step": 27786 }, { "epoch": 1.8454539416882514, "grad_norm": 130.20535278320312, "learning_rate": 3.1155255513654056e-08, "loss": 10.8281, "step": 27787 }, { "epoch": 1.845520355980607, "grad_norm": 527.6923828125, "learning_rate": 3.112862384741133e-08, "loss": 17.7031, "step": 27788 }, { "epoch": 1.8455867702729627, "grad_norm": 198.22073364257812, "learning_rate": 3.1102003388513876e-08, "loss": 14.9219, "step": 27789 }, { "epoch": 1.8456531845653186, "grad_norm": 179.47093200683594, "learning_rate": 3.107539413726989e-08, "loss": 14.6875, "step": 27790 }, { "epoch": 1.8457195988576742, "grad_norm": 280.36297607421875, "learning_rate": 3.104879609398703e-08, "loss": 19.8672, "step": 27791 }, { "epoch": 1.8457860131500299, "grad_norm": 207.67269897460938, "learning_rate": 3.102220925897281e-08, "loss": 17.9688, "step": 27792 }, { "epoch": 1.8458524274423858, "grad_norm": 245.90267944335938, "learning_rate": 3.09956336325351e-08, "loss": 20.2812, "step": 27793 }, { "epoch": 1.8459188417347412, "grad_norm": 207.71620178222656, "learning_rate": 3.0969069214980995e-08, "loss": 21.5312, "step": 27794 }, { "epoch": 1.845985256027097, "grad_norm": 209.46722412109375, "learning_rate": 3.094251600661801e-08, "loss": 13.8125, "step": 27795 }, { "epoch": 1.8460516703194527, "grad_norm": 530.2205810546875, "learning_rate": 3.091597400775325e-08, "loss": 12.0312, "step": 27796 }, { "epoch": 1.8461180846118084, "grad_norm": 127.37958526611328, "learning_rate": 3.088944321869369e-08, "loss": 12.8906, "step": 27797 }, { "epoch": 1.8461844989041643, "grad_norm": 146.7894287109375, "learning_rate": 3.0862923639746073e-08, "loss": 13.3281, "step": 27798 }, { "epoch": 1.84625091319652, "grad_norm": 145.25975036621094, "learning_rate": 3.08364152712175e-08, "loss": 13.3906, "step": 27799 }, { "epoch": 1.8463173274888756, "grad_norm": 198.53321838378906, "learning_rate": 3.080991811341427e-08, "loss": 17.7969, "step": 27800 }, { "epoch": 1.8463837417812314, "grad_norm": 136.7001495361328, "learning_rate": 3.078343216664314e-08, "loss": 14.1406, "step": 27801 }, { "epoch": 1.846450156073587, "grad_norm": 251.7418975830078, "learning_rate": 3.075695743121032e-08, "loss": 14.2344, "step": 27802 }, { "epoch": 1.8465165703659427, "grad_norm": 240.28369140625, "learning_rate": 3.0730493907422214e-08, "loss": 10.0625, "step": 27803 }, { "epoch": 1.8465829846582986, "grad_norm": 269.07659912109375, "learning_rate": 3.0704041595584816e-08, "loss": 13.4219, "step": 27804 }, { "epoch": 1.846649398950654, "grad_norm": 227.73635864257812, "learning_rate": 3.0677600496004207e-08, "loss": 18.4375, "step": 27805 }, { "epoch": 1.84671581324301, "grad_norm": 251.34518432617188, "learning_rate": 3.065117060898592e-08, "loss": 18.6562, "step": 27806 }, { "epoch": 1.8467822275353656, "grad_norm": 341.9607849121094, "learning_rate": 3.062475193483616e-08, "loss": 17.7188, "step": 27807 }, { "epoch": 1.8468486418277212, "grad_norm": 259.1274108886719, "learning_rate": 3.0598344473860225e-08, "loss": 17.0156, "step": 27808 }, { "epoch": 1.8469150561200771, "grad_norm": 110.55490112304688, "learning_rate": 3.0571948226363666e-08, "loss": 14.0938, "step": 27809 }, { "epoch": 1.8469814704124328, "grad_norm": 226.3621063232422, "learning_rate": 3.054556319265178e-08, "loss": 20.4375, "step": 27810 }, { "epoch": 1.8470478847047884, "grad_norm": 292.89373779296875, "learning_rate": 3.051918937302978e-08, "loss": 14.7812, "step": 27811 }, { "epoch": 1.8471142989971443, "grad_norm": 466.7330627441406, "learning_rate": 3.049282676780274e-08, "loss": 14.125, "step": 27812 }, { "epoch": 1.8471807132895, "grad_norm": 290.9980773925781, "learning_rate": 3.046647537727576e-08, "loss": 19.8438, "step": 27813 }, { "epoch": 1.8472471275818556, "grad_norm": 253.67152404785156, "learning_rate": 3.044013520175337e-08, "loss": 19.3125, "step": 27814 }, { "epoch": 1.8473135418742115, "grad_norm": 242.00502014160156, "learning_rate": 3.041380624154055e-08, "loss": 17.8438, "step": 27815 }, { "epoch": 1.847379956166567, "grad_norm": 288.8100280761719, "learning_rate": 3.0387488496941726e-08, "loss": 14.6875, "step": 27816 }, { "epoch": 1.8474463704589228, "grad_norm": 258.47125244140625, "learning_rate": 3.036118196826121e-08, "loss": 14.375, "step": 27817 }, { "epoch": 1.8475127847512784, "grad_norm": 116.4915771484375, "learning_rate": 3.0334886655803523e-08, "loss": 12.9062, "step": 27818 }, { "epoch": 1.847579199043634, "grad_norm": 116.86028289794922, "learning_rate": 3.030860255987277e-08, "loss": 17.0469, "step": 27819 }, { "epoch": 1.84764561333599, "grad_norm": 609.5338745117188, "learning_rate": 3.028232968077271e-08, "loss": 14.5938, "step": 27820 }, { "epoch": 1.8477120276283456, "grad_norm": 168.26699829101562, "learning_rate": 3.0256068018807756e-08, "loss": 18.4375, "step": 27821 }, { "epoch": 1.8477784419207013, "grad_norm": 114.84081268310547, "learning_rate": 3.022981757428122e-08, "loss": 15.6094, "step": 27822 }, { "epoch": 1.8478448562130572, "grad_norm": 292.4281005859375, "learning_rate": 3.0203578347497094e-08, "loss": 27.5625, "step": 27823 }, { "epoch": 1.8479112705054128, "grad_norm": 338.2765808105469, "learning_rate": 3.0177350338758676e-08, "loss": 19.6875, "step": 27824 }, { "epoch": 1.8479776847977685, "grad_norm": 182.1544952392578, "learning_rate": 3.0151133548369404e-08, "loss": 16.625, "step": 27825 }, { "epoch": 1.8480440990901243, "grad_norm": 205.19009399414062, "learning_rate": 3.0124927976632575e-08, "loss": 15.8125, "step": 27826 }, { "epoch": 1.8481105133824798, "grad_norm": 269.6099548339844, "learning_rate": 3.009873362385151e-08, "loss": 13.1719, "step": 27827 }, { "epoch": 1.8481769276748357, "grad_norm": 765.0701904296875, "learning_rate": 3.0072550490328754e-08, "loss": 27.8594, "step": 27828 }, { "epoch": 1.8482433419671913, "grad_norm": 127.33287048339844, "learning_rate": 3.0046378576367716e-08, "loss": 16.7188, "step": 27829 }, { "epoch": 1.848309756259547, "grad_norm": 369.4306945800781, "learning_rate": 3.00202178822706e-08, "loss": 16.7969, "step": 27830 }, { "epoch": 1.8483761705519028, "grad_norm": 222.06826782226562, "learning_rate": 2.99940684083404e-08, "loss": 15.0625, "step": 27831 }, { "epoch": 1.8484425848442585, "grad_norm": 387.9906311035156, "learning_rate": 2.996793015487953e-08, "loss": 23.3281, "step": 27832 }, { "epoch": 1.8485089991366142, "grad_norm": 204.23069763183594, "learning_rate": 2.994180312219019e-08, "loss": 13.625, "step": 27833 }, { "epoch": 1.84857541342897, "grad_norm": 175.56246948242188, "learning_rate": 2.99156873105747e-08, "loss": 18.4219, "step": 27834 }, { "epoch": 1.8486418277213257, "grad_norm": 265.4908142089844, "learning_rate": 2.9889582720335146e-08, "loss": 18.8906, "step": 27835 }, { "epoch": 1.8487082420136813, "grad_norm": 160.98753356933594, "learning_rate": 2.986348935177363e-08, "loss": 14.3438, "step": 27836 }, { "epoch": 1.8487746563060372, "grad_norm": 137.4964141845703, "learning_rate": 2.983740720519168e-08, "loss": 15.1406, "step": 27837 }, { "epoch": 1.8488410705983926, "grad_norm": 211.61819458007812, "learning_rate": 2.981133628089139e-08, "loss": 17.0938, "step": 27838 }, { "epoch": 1.8489074848907485, "grad_norm": 221.7665557861328, "learning_rate": 2.978527657917396e-08, "loss": 13.2344, "step": 27839 }, { "epoch": 1.8489738991831042, "grad_norm": 118.68134307861328, "learning_rate": 2.975922810034115e-08, "loss": 14.1406, "step": 27840 }, { "epoch": 1.8490403134754598, "grad_norm": 305.42767333984375, "learning_rate": 2.9733190844693945e-08, "loss": 13.1406, "step": 27841 }, { "epoch": 1.8491067277678157, "grad_norm": 264.8851013183594, "learning_rate": 2.970716481253377e-08, "loss": 18.9062, "step": 27842 }, { "epoch": 1.8491731420601714, "grad_norm": 506.6617126464844, "learning_rate": 2.9681150004161603e-08, "loss": 17.0156, "step": 27843 }, { "epoch": 1.849239556352527, "grad_norm": 434.3768005371094, "learning_rate": 2.965514641987843e-08, "loss": 21.5938, "step": 27844 }, { "epoch": 1.849305970644883, "grad_norm": 530.5263061523438, "learning_rate": 2.9629154059984785e-08, "loss": 19.5938, "step": 27845 }, { "epoch": 1.8493723849372385, "grad_norm": 127.05590057373047, "learning_rate": 2.960317292478176e-08, "loss": 12.3281, "step": 27846 }, { "epoch": 1.8494387992295942, "grad_norm": 348.4676513671875, "learning_rate": 2.9577203014569674e-08, "loss": 11.9688, "step": 27847 }, { "epoch": 1.84950521352195, "grad_norm": 227.5509796142578, "learning_rate": 2.9551244329648838e-08, "loss": 15.0, "step": 27848 }, { "epoch": 1.8495716278143055, "grad_norm": 131.56483459472656, "learning_rate": 2.952529687031957e-08, "loss": 10.375, "step": 27849 }, { "epoch": 1.8496380421066614, "grad_norm": 182.20355224609375, "learning_rate": 2.9499360636882075e-08, "loss": 14.4219, "step": 27850 }, { "epoch": 1.849704456399017, "grad_norm": 252.16273498535156, "learning_rate": 2.9473435629636444e-08, "loss": 22.7969, "step": 27851 }, { "epoch": 1.8497708706913727, "grad_norm": 238.31777954101562, "learning_rate": 2.944752184888244e-08, "loss": 14.7969, "step": 27852 }, { "epoch": 1.8498372849837286, "grad_norm": 140.11419677734375, "learning_rate": 2.9421619294919708e-08, "loss": 14.5156, "step": 27853 }, { "epoch": 1.8499036992760842, "grad_norm": 119.83738708496094, "learning_rate": 2.9395727968048233e-08, "loss": 14.875, "step": 27854 }, { "epoch": 1.8499701135684399, "grad_norm": 364.1699523925781, "learning_rate": 2.936984786856722e-08, "loss": 19.0312, "step": 27855 }, { "epoch": 1.8500365278607958, "grad_norm": 172.9222869873047, "learning_rate": 2.9343978996776098e-08, "loss": 11.0703, "step": 27856 }, { "epoch": 1.8501029421531514, "grad_norm": 181.0792999267578, "learning_rate": 2.9318121352974068e-08, "loss": 15.1875, "step": 27857 }, { "epoch": 1.850169356445507, "grad_norm": 101.12670135498047, "learning_rate": 2.9292274937460447e-08, "loss": 12.9375, "step": 27858 }, { "epoch": 1.850235770737863, "grad_norm": 237.89479064941406, "learning_rate": 2.926643975053378e-08, "loss": 14.1875, "step": 27859 }, { "epoch": 1.8503021850302184, "grad_norm": 167.30931091308594, "learning_rate": 2.9240615792493373e-08, "loss": 15.125, "step": 27860 }, { "epoch": 1.8503685993225742, "grad_norm": 136.4510498046875, "learning_rate": 2.9214803063637662e-08, "loss": 9.9375, "step": 27861 }, { "epoch": 1.85043501361493, "grad_norm": 361.5093994140625, "learning_rate": 2.9189001564265402e-08, "loss": 14.6406, "step": 27862 }, { "epoch": 1.8505014279072856, "grad_norm": 234.77256774902344, "learning_rate": 2.9163211294674916e-08, "loss": 20.3125, "step": 27863 }, { "epoch": 1.8505678421996414, "grad_norm": 252.7702178955078, "learning_rate": 2.913743225516463e-08, "loss": 17.5, "step": 27864 }, { "epoch": 1.850634256491997, "grad_norm": 214.11781311035156, "learning_rate": 2.9111664446032636e-08, "loss": 19.0, "step": 27865 }, { "epoch": 1.8507006707843527, "grad_norm": 161.9912109375, "learning_rate": 2.9085907867577142e-08, "loss": 21.7969, "step": 27866 }, { "epoch": 1.8507670850767086, "grad_norm": 458.4871520996094, "learning_rate": 2.9060162520095798e-08, "loss": 16.0938, "step": 27867 }, { "epoch": 1.8508334993690643, "grad_norm": 373.9129333496094, "learning_rate": 2.9034428403886923e-08, "loss": 13.9844, "step": 27868 }, { "epoch": 1.85089991366142, "grad_norm": 191.21580505371094, "learning_rate": 2.9008705519247724e-08, "loss": 20.7031, "step": 27869 }, { "epoch": 1.8509663279537758, "grad_norm": 183.65127563476562, "learning_rate": 2.8982993866475735e-08, "loss": 15.6094, "step": 27870 }, { "epoch": 1.8510327422461312, "grad_norm": 189.39059448242188, "learning_rate": 2.8957293445868945e-08, "loss": 16.8125, "step": 27871 }, { "epoch": 1.8510991565384871, "grad_norm": 140.0579833984375, "learning_rate": 2.893160425772401e-08, "loss": 12.1406, "step": 27872 }, { "epoch": 1.8511655708308428, "grad_norm": 171.49815368652344, "learning_rate": 2.8905926302338346e-08, "loss": 12.1953, "step": 27873 }, { "epoch": 1.8512319851231984, "grad_norm": 231.6190185546875, "learning_rate": 2.8880259580008948e-08, "loss": 19.2344, "step": 27874 }, { "epoch": 1.8512983994155543, "grad_norm": 157.99034118652344, "learning_rate": 2.8854604091032797e-08, "loss": 15.1406, "step": 27875 }, { "epoch": 1.85136481370791, "grad_norm": 209.8189697265625, "learning_rate": 2.8828959835706545e-08, "loss": 17.1719, "step": 27876 }, { "epoch": 1.8514312280002656, "grad_norm": 143.6122589111328, "learning_rate": 2.8803326814326955e-08, "loss": 19.8281, "step": 27877 }, { "epoch": 1.8514976422926215, "grad_norm": 215.9734344482422, "learning_rate": 2.8777705027190234e-08, "loss": 15.875, "step": 27878 }, { "epoch": 1.8515640565849771, "grad_norm": 290.8660888671875, "learning_rate": 2.8752094474593257e-08, "loss": 18.2031, "step": 27879 }, { "epoch": 1.8516304708773328, "grad_norm": 190.6802215576172, "learning_rate": 2.8726495156831897e-08, "loss": 17.2344, "step": 27880 }, { "epoch": 1.8516968851696887, "grad_norm": 186.8647918701172, "learning_rate": 2.870090707420225e-08, "loss": 11.9219, "step": 27881 }, { "epoch": 1.851763299462044, "grad_norm": 216.71090698242188, "learning_rate": 2.8675330227000528e-08, "loss": 15.6328, "step": 27882 }, { "epoch": 1.8518297137544, "grad_norm": 231.44947814941406, "learning_rate": 2.86497646155226e-08, "loss": 15.375, "step": 27883 }, { "epoch": 1.8518961280467556, "grad_norm": 137.06365966796875, "learning_rate": 2.8624210240063894e-08, "loss": 16.6406, "step": 27884 }, { "epoch": 1.8519625423391113, "grad_norm": 122.31446838378906, "learning_rate": 2.8598667100920293e-08, "loss": 14.1719, "step": 27885 }, { "epoch": 1.8520289566314672, "grad_norm": 309.8663024902344, "learning_rate": 2.857313519838722e-08, "loss": 13.625, "step": 27886 }, { "epoch": 1.8520953709238228, "grad_norm": 152.01365661621094, "learning_rate": 2.854761453275989e-08, "loss": 15.6875, "step": 27887 }, { "epoch": 1.8521617852161785, "grad_norm": 219.92884826660156, "learning_rate": 2.8522105104333504e-08, "loss": 18.0625, "step": 27888 }, { "epoch": 1.8522281995085343, "grad_norm": 367.3368835449219, "learning_rate": 2.8496606913403276e-08, "loss": 16.1875, "step": 27889 }, { "epoch": 1.85229461380089, "grad_norm": 593.6968994140625, "learning_rate": 2.8471119960264188e-08, "loss": 28.9688, "step": 27890 }, { "epoch": 1.8523610280932457, "grad_norm": 144.11143493652344, "learning_rate": 2.8445644245210898e-08, "loss": 18.9531, "step": 27891 }, { "epoch": 1.8524274423856015, "grad_norm": 123.29937744140625, "learning_rate": 2.8420179768538056e-08, "loss": 14.5625, "step": 27892 }, { "epoch": 1.852493856677957, "grad_norm": 63.80809020996094, "learning_rate": 2.8394726530540535e-08, "loss": 13.2109, "step": 27893 }, { "epoch": 1.8525602709703128, "grad_norm": 376.1766052246094, "learning_rate": 2.836928453151255e-08, "loss": 17.75, "step": 27894 }, { "epoch": 1.8526266852626685, "grad_norm": 137.8417205810547, "learning_rate": 2.8343853771748307e-08, "loss": 19.4688, "step": 27895 }, { "epoch": 1.8526930995550241, "grad_norm": 219.9077606201172, "learning_rate": 2.8318434251542013e-08, "loss": 14.9531, "step": 27896 }, { "epoch": 1.85275951384738, "grad_norm": 183.49594116210938, "learning_rate": 2.829302597118788e-08, "loss": 19.5781, "step": 27897 }, { "epoch": 1.8528259281397357, "grad_norm": 257.43804931640625, "learning_rate": 2.826762893097967e-08, "loss": 13.3984, "step": 27898 }, { "epoch": 1.8528923424320913, "grad_norm": 73.54826354980469, "learning_rate": 2.8242243131211264e-08, "loss": 13.5547, "step": 27899 }, { "epoch": 1.8529587567244472, "grad_norm": 204.5144805908203, "learning_rate": 2.8216868572176088e-08, "loss": 14.8594, "step": 27900 }, { "epoch": 1.8530251710168029, "grad_norm": 279.1872863769531, "learning_rate": 2.8191505254167912e-08, "loss": 22.7969, "step": 27901 }, { "epoch": 1.8530915853091585, "grad_norm": 121.66496276855469, "learning_rate": 2.8166153177480056e-08, "loss": 14.8438, "step": 27902 }, { "epoch": 1.8531579996015144, "grad_norm": 118.02760314941406, "learning_rate": 2.814081234240573e-08, "loss": 12.5156, "step": 27903 }, { "epoch": 1.8532244138938698, "grad_norm": 160.76754760742188, "learning_rate": 2.811548274923803e-08, "loss": 13.4844, "step": 27904 }, { "epoch": 1.8532908281862257, "grad_norm": 117.57839965820312, "learning_rate": 2.8090164398270056e-08, "loss": 11.4062, "step": 27905 }, { "epoch": 1.8533572424785814, "grad_norm": 186.75306701660156, "learning_rate": 2.8064857289794576e-08, "loss": 16.9688, "step": 27906 }, { "epoch": 1.853423656770937, "grad_norm": 283.0330810546875, "learning_rate": 2.803956142410435e-08, "loss": 20.0625, "step": 27907 }, { "epoch": 1.8534900710632929, "grad_norm": 361.3955078125, "learning_rate": 2.8014276801492155e-08, "loss": 14.6094, "step": 27908 }, { "epoch": 1.8535564853556485, "grad_norm": 235.24729919433594, "learning_rate": 2.7989003422250078e-08, "loss": 17.6719, "step": 27909 }, { "epoch": 1.8536228996480042, "grad_norm": 230.1611328125, "learning_rate": 2.7963741286671007e-08, "loss": 22.5938, "step": 27910 }, { "epoch": 1.85368931394036, "grad_norm": 244.7154541015625, "learning_rate": 2.79384903950467e-08, "loss": 16.0625, "step": 27911 }, { "epoch": 1.8537557282327157, "grad_norm": 229.33773803710938, "learning_rate": 2.791325074766948e-08, "loss": 12.8906, "step": 27912 }, { "epoch": 1.8538221425250714, "grad_norm": 247.0680389404297, "learning_rate": 2.7888022344831118e-08, "loss": 23.7188, "step": 27913 }, { "epoch": 1.8538885568174273, "grad_norm": 756.1221923828125, "learning_rate": 2.786280518682371e-08, "loss": 14.9062, "step": 27914 }, { "epoch": 1.8539549711097827, "grad_norm": 169.93716430664062, "learning_rate": 2.7837599273938696e-08, "loss": 19.6406, "step": 27915 }, { "epoch": 1.8540213854021386, "grad_norm": 499.1666564941406, "learning_rate": 2.781240460646794e-08, "loss": 13.6875, "step": 27916 }, { "epoch": 1.8540877996944942, "grad_norm": 211.12515258789062, "learning_rate": 2.7787221184702446e-08, "loss": 17.5312, "step": 27917 }, { "epoch": 1.8541542139868499, "grad_norm": 290.5, "learning_rate": 2.7762049008933974e-08, "loss": 16.0625, "step": 27918 }, { "epoch": 1.8542206282792058, "grad_norm": 299.2926330566406, "learning_rate": 2.773688807945329e-08, "loss": 16.6406, "step": 27919 }, { "epoch": 1.8542870425715614, "grad_norm": 194.2888946533203, "learning_rate": 2.771173839655183e-08, "loss": 18.0625, "step": 27920 }, { "epoch": 1.854353456863917, "grad_norm": 255.6963653564453, "learning_rate": 2.768659996052025e-08, "loss": 15.3594, "step": 27921 }, { "epoch": 1.854419871156273, "grad_norm": 135.55409240722656, "learning_rate": 2.766147277164943e-08, "loss": 20.0469, "step": 27922 }, { "epoch": 1.8544862854486286, "grad_norm": 213.7857666015625, "learning_rate": 2.7636356830229912e-08, "loss": 15.4531, "step": 27923 }, { "epoch": 1.8545526997409842, "grad_norm": 631.2410278320312, "learning_rate": 2.7611252136552467e-08, "loss": 16.6562, "step": 27924 }, { "epoch": 1.8546191140333401, "grad_norm": 383.3766784667969, "learning_rate": 2.7586158690907302e-08, "loss": 14.2656, "step": 27925 }, { "epoch": 1.8546855283256956, "grad_norm": 183.71351623535156, "learning_rate": 2.7561076493584633e-08, "loss": 12.3906, "step": 27926 }, { "epoch": 1.8547519426180514, "grad_norm": 94.4376449584961, "learning_rate": 2.7536005544874786e-08, "loss": 13.6719, "step": 27927 }, { "epoch": 1.854818356910407, "grad_norm": 184.7463836669922, "learning_rate": 2.7510945845067635e-08, "loss": 20.4297, "step": 27928 }, { "epoch": 1.8548847712027627, "grad_norm": 175.8179473876953, "learning_rate": 2.748589739445306e-08, "loss": 12.9688, "step": 27929 }, { "epoch": 1.8549511854951186, "grad_norm": 137.24501037597656, "learning_rate": 2.7460860193321055e-08, "loss": 16.3281, "step": 27930 }, { "epoch": 1.8550175997874743, "grad_norm": 132.04061889648438, "learning_rate": 2.743583424196072e-08, "loss": 10.9375, "step": 27931 }, { "epoch": 1.85508401407983, "grad_norm": 796.1834106445312, "learning_rate": 2.7410819540662045e-08, "loss": 17.9531, "step": 27932 }, { "epoch": 1.8551504283721858, "grad_norm": 223.88906860351562, "learning_rate": 2.7385816089714132e-08, "loss": 16.9688, "step": 27933 }, { "epoch": 1.8552168426645415, "grad_norm": 137.61976623535156, "learning_rate": 2.7360823889406193e-08, "loss": 14.4844, "step": 27934 }, { "epoch": 1.855283256956897, "grad_norm": 173.04217529296875, "learning_rate": 2.7335842940027444e-08, "loss": 15.5, "step": 27935 }, { "epoch": 1.855349671249253, "grad_norm": 343.882568359375, "learning_rate": 2.7310873241866762e-08, "loss": 22.1094, "step": 27936 }, { "epoch": 1.8554160855416084, "grad_norm": 343.3634033203125, "learning_rate": 2.7285914795213026e-08, "loss": 16.5469, "step": 27937 }, { "epoch": 1.8554824998339643, "grad_norm": 256.63616943359375, "learning_rate": 2.726096760035501e-08, "loss": 15.8438, "step": 27938 }, { "epoch": 1.85554891412632, "grad_norm": 192.3213653564453, "learning_rate": 2.7236031657581038e-08, "loss": 19.9375, "step": 27939 }, { "epoch": 1.8556153284186756, "grad_norm": 356.3000183105469, "learning_rate": 2.7211106967179874e-08, "loss": 22.625, "step": 27940 }, { "epoch": 1.8556817427110315, "grad_norm": 181.8751220703125, "learning_rate": 2.7186193529439626e-08, "loss": 15.1406, "step": 27941 }, { "epoch": 1.8557481570033871, "grad_norm": 117.88665008544922, "learning_rate": 2.7161291344648506e-08, "loss": 20.9688, "step": 27942 }, { "epoch": 1.8558145712957428, "grad_norm": 373.1926574707031, "learning_rate": 2.7136400413094617e-08, "loss": 27.6094, "step": 27943 }, { "epoch": 1.8558809855880987, "grad_norm": 124.6669921875, "learning_rate": 2.7111520735065952e-08, "loss": 12.6719, "step": 27944 }, { "epoch": 1.8559473998804543, "grad_norm": 174.1172332763672, "learning_rate": 2.7086652310850055e-08, "loss": 16.7188, "step": 27945 }, { "epoch": 1.85601381417281, "grad_norm": 212.67738342285156, "learning_rate": 2.706179514073481e-08, "loss": 15.3281, "step": 27946 }, { "epoch": 1.8560802284651658, "grad_norm": 163.50840759277344, "learning_rate": 2.7036949225007876e-08, "loss": 17.1875, "step": 27947 }, { "epoch": 1.8561466427575213, "grad_norm": 166.50543212890625, "learning_rate": 2.7012114563956135e-08, "loss": 11.2812, "step": 27948 }, { "epoch": 1.8562130570498772, "grad_norm": 257.40069580078125, "learning_rate": 2.6987291157867465e-08, "loss": 17.6094, "step": 27949 }, { "epoch": 1.8562794713422328, "grad_norm": 393.919189453125, "learning_rate": 2.6962479007028637e-08, "loss": 13.625, "step": 27950 }, { "epoch": 1.8563458856345885, "grad_norm": 181.93836975097656, "learning_rate": 2.693767811172676e-08, "loss": 13.6406, "step": 27951 }, { "epoch": 1.8564122999269443, "grad_norm": 285.3083801269531, "learning_rate": 2.6912888472248706e-08, "loss": 16.3125, "step": 27952 }, { "epoch": 1.8564787142193, "grad_norm": 193.1547088623047, "learning_rate": 2.6888110088881256e-08, "loss": 12.8125, "step": 27953 }, { "epoch": 1.8565451285116557, "grad_norm": 196.5362091064453, "learning_rate": 2.6863342961911063e-08, "loss": 14.2812, "step": 27954 }, { "epoch": 1.8566115428040115, "grad_norm": 128.70004272460938, "learning_rate": 2.6838587091624676e-08, "loss": 15.0625, "step": 27955 }, { "epoch": 1.8566779570963672, "grad_norm": 242.72312927246094, "learning_rate": 2.681384247830809e-08, "loss": 12.4062, "step": 27956 }, { "epoch": 1.8567443713887228, "grad_norm": 325.62298583984375, "learning_rate": 2.6789109122247965e-08, "loss": 17.5469, "step": 27957 }, { "epoch": 1.8568107856810787, "grad_norm": 544.7413940429688, "learning_rate": 2.6764387023730295e-08, "loss": 14.7656, "step": 27958 }, { "epoch": 1.8568771999734341, "grad_norm": 205.6602783203125, "learning_rate": 2.6739676183040848e-08, "loss": 15.4219, "step": 27959 }, { "epoch": 1.85694361426579, "grad_norm": 284.1385498046875, "learning_rate": 2.6714976600465733e-08, "loss": 16.8438, "step": 27960 }, { "epoch": 1.8570100285581457, "grad_norm": 125.47586822509766, "learning_rate": 2.6690288276290383e-08, "loss": 10.6953, "step": 27961 }, { "epoch": 1.8570764428505013, "grad_norm": 593.5737915039062, "learning_rate": 2.6665611210800686e-08, "loss": 23.1484, "step": 27962 }, { "epoch": 1.8571428571428572, "grad_norm": 103.95451354980469, "learning_rate": 2.6640945404281966e-08, "loss": 16.6094, "step": 27963 }, { "epoch": 1.8572092714352129, "grad_norm": 452.014892578125, "learning_rate": 2.661629085701933e-08, "loss": 11.0469, "step": 27964 }, { "epoch": 1.8572756857275685, "grad_norm": 280.5733642578125, "learning_rate": 2.6591647569298438e-08, "loss": 13.9688, "step": 27965 }, { "epoch": 1.8573421000199244, "grad_norm": 326.90478515625, "learning_rate": 2.6567015541403836e-08, "loss": 14.0156, "step": 27966 }, { "epoch": 1.85740851431228, "grad_norm": 307.9014587402344, "learning_rate": 2.6542394773620748e-08, "loss": 28.8125, "step": 27967 }, { "epoch": 1.8574749286046357, "grad_norm": 115.64092254638672, "learning_rate": 2.6517785266233937e-08, "loss": 11.3281, "step": 27968 }, { "epoch": 1.8575413428969916, "grad_norm": 207.9487762451172, "learning_rate": 2.6493187019528074e-08, "loss": 14.1094, "step": 27969 }, { "epoch": 1.857607757189347, "grad_norm": 310.83642578125, "learning_rate": 2.646860003378748e-08, "loss": 17.5156, "step": 27970 }, { "epoch": 1.8576741714817029, "grad_norm": 202.2508544921875, "learning_rate": 2.644402430929704e-08, "loss": 17.2969, "step": 27971 }, { "epoch": 1.8577405857740585, "grad_norm": 344.79998779296875, "learning_rate": 2.641945984634053e-08, "loss": 17.0938, "step": 27972 }, { "epoch": 1.8578070000664142, "grad_norm": 205.81057739257812, "learning_rate": 2.6394906645202386e-08, "loss": 14.4688, "step": 27973 }, { "epoch": 1.85787341435877, "grad_norm": 190.49581909179688, "learning_rate": 2.6370364706166492e-08, "loss": 16.1562, "step": 27974 }, { "epoch": 1.8579398286511257, "grad_norm": 282.1241149902344, "learning_rate": 2.6345834029516845e-08, "loss": 17.0781, "step": 27975 }, { "epoch": 1.8580062429434814, "grad_norm": 165.01390075683594, "learning_rate": 2.6321314615537104e-08, "loss": 13.7969, "step": 27976 }, { "epoch": 1.8580726572358373, "grad_norm": 288.2967834472656, "learning_rate": 2.6296806464511047e-08, "loss": 17.5469, "step": 27977 }, { "epoch": 1.858139071528193, "grad_norm": 937.0618286132812, "learning_rate": 2.6272309576721885e-08, "loss": 11.7812, "step": 27978 }, { "epoch": 1.8582054858205486, "grad_norm": 221.9115447998047, "learning_rate": 2.6247823952453395e-08, "loss": 19.75, "step": 27979 }, { "epoch": 1.8582719001129044, "grad_norm": 123.62146759033203, "learning_rate": 2.6223349591988463e-08, "loss": 19.0781, "step": 27980 }, { "epoch": 1.8583383144052599, "grad_norm": 103.40072631835938, "learning_rate": 2.6198886495610196e-08, "loss": 11.1172, "step": 27981 }, { "epoch": 1.8584047286976157, "grad_norm": 136.420654296875, "learning_rate": 2.6174434663601808e-08, "loss": 16.9844, "step": 27982 }, { "epoch": 1.8584711429899714, "grad_norm": 166.7035675048828, "learning_rate": 2.6149994096246075e-08, "loss": 15.5156, "step": 27983 }, { "epoch": 1.858537557282327, "grad_norm": 243.9340057373047, "learning_rate": 2.612556479382555e-08, "loss": 20.8438, "step": 27984 }, { "epoch": 1.858603971574683, "grad_norm": 178.7204132080078, "learning_rate": 2.6101146756622894e-08, "loss": 19.5938, "step": 27985 }, { "epoch": 1.8586703858670386, "grad_norm": 113.3291244506836, "learning_rate": 2.607673998492077e-08, "loss": 17.3594, "step": 27986 }, { "epoch": 1.8587368001593942, "grad_norm": 195.32388305664062, "learning_rate": 2.6052344479001064e-08, "loss": 18.2812, "step": 27987 }, { "epoch": 1.8588032144517501, "grad_norm": 121.0368881225586, "learning_rate": 2.602796023914644e-08, "loss": 14.875, "step": 27988 }, { "epoch": 1.8588696287441058, "grad_norm": 523.91357421875, "learning_rate": 2.600358726563867e-08, "loss": 17.0625, "step": 27989 }, { "epoch": 1.8589360430364614, "grad_norm": 192.01425170898438, "learning_rate": 2.5979225558759755e-08, "loss": 18.4688, "step": 27990 }, { "epoch": 1.8590024573288173, "grad_norm": 183.6663055419922, "learning_rate": 2.5954875118791576e-08, "loss": 14.5781, "step": 27991 }, { "epoch": 1.8590688716211727, "grad_norm": 116.66888427734375, "learning_rate": 2.5930535946015576e-08, "loss": 10.7188, "step": 27992 }, { "epoch": 1.8591352859135286, "grad_norm": 413.21142578125, "learning_rate": 2.590620804071353e-08, "loss": 20.9844, "step": 27993 }, { "epoch": 1.8592017002058843, "grad_norm": 133.3406982421875, "learning_rate": 2.5881891403166988e-08, "loss": 13.7188, "step": 27994 }, { "epoch": 1.85926811449824, "grad_norm": 586.0858764648438, "learning_rate": 2.585758603365662e-08, "loss": 16.3906, "step": 27995 }, { "epoch": 1.8593345287905958, "grad_norm": 142.6236114501953, "learning_rate": 2.5833291932464307e-08, "loss": 13.0312, "step": 27996 }, { "epoch": 1.8594009430829515, "grad_norm": 364.7259521484375, "learning_rate": 2.5809009099870603e-08, "loss": 17.5938, "step": 27997 }, { "epoch": 1.859467357375307, "grad_norm": 154.68145751953125, "learning_rate": 2.5784737536156508e-08, "loss": 17.0312, "step": 27998 }, { "epoch": 1.859533771667663, "grad_norm": 597.0567626953125, "learning_rate": 2.5760477241602907e-08, "loss": 23.3594, "step": 27999 }, { "epoch": 1.8596001859600186, "grad_norm": 234.73342895507812, "learning_rate": 2.5736228216490242e-08, "loss": 13.2656, "step": 28000 }, { "epoch": 1.8596666002523743, "grad_norm": 314.77972412109375, "learning_rate": 2.5711990461099063e-08, "loss": 11.9844, "step": 28001 }, { "epoch": 1.8597330145447302, "grad_norm": 118.54358673095703, "learning_rate": 2.568776397570993e-08, "loss": 14.6875, "step": 28002 }, { "epoch": 1.8597994288370856, "grad_norm": 434.83868408203125, "learning_rate": 2.5663548760602727e-08, "loss": 11.25, "step": 28003 }, { "epoch": 1.8598658431294415, "grad_norm": 229.6840362548828, "learning_rate": 2.5639344816057895e-08, "loss": 11.6719, "step": 28004 }, { "epoch": 1.8599322574217971, "grad_norm": 355.2227478027344, "learning_rate": 2.5615152142355213e-08, "loss": 17.5469, "step": 28005 }, { "epoch": 1.8599986717141528, "grad_norm": 212.2221221923828, "learning_rate": 2.5590970739774566e-08, "loss": 14.7812, "step": 28006 }, { "epoch": 1.8600650860065087, "grad_norm": 102.3873519897461, "learning_rate": 2.556680060859573e-08, "loss": 15.0781, "step": 28007 }, { "epoch": 1.8601315002988643, "grad_norm": 137.52450561523438, "learning_rate": 2.554264174909837e-08, "loss": 12.9453, "step": 28008 }, { "epoch": 1.86019791459122, "grad_norm": 200.41143798828125, "learning_rate": 2.5518494161561598e-08, "loss": 16.1719, "step": 28009 }, { "epoch": 1.8602643288835758, "grad_norm": 304.69476318359375, "learning_rate": 2.549435784626508e-08, "loss": 15.6406, "step": 28010 }, { "epoch": 1.8603307431759315, "grad_norm": 149.4758758544922, "learning_rate": 2.5470232803487923e-08, "loss": 15.7344, "step": 28011 }, { "epoch": 1.8603971574682872, "grad_norm": 292.9798889160156, "learning_rate": 2.544611903350913e-08, "loss": 13.0, "step": 28012 }, { "epoch": 1.860463571760643, "grad_norm": 401.44970703125, "learning_rate": 2.5422016536607693e-08, "loss": 17.8281, "step": 28013 }, { "epoch": 1.8605299860529985, "grad_norm": 297.8338928222656, "learning_rate": 2.5397925313062285e-08, "loss": 13.9766, "step": 28014 }, { "epoch": 1.8605964003453543, "grad_norm": 2177.572265625, "learning_rate": 2.537384536315179e-08, "loss": 12.3281, "step": 28015 }, { "epoch": 1.86066281463771, "grad_norm": 164.18536376953125, "learning_rate": 2.5349776687154767e-08, "loss": 12.5781, "step": 28016 }, { "epoch": 1.8607292289300656, "grad_norm": 311.4625549316406, "learning_rate": 2.532571928534921e-08, "loss": 17.3281, "step": 28017 }, { "epoch": 1.8607956432224215, "grad_norm": 394.93505859375, "learning_rate": 2.5301673158013903e-08, "loss": 17.3125, "step": 28018 }, { "epoch": 1.8608620575147772, "grad_norm": 112.59237670898438, "learning_rate": 2.527763830542684e-08, "loss": 15.3906, "step": 28019 }, { "epoch": 1.8609284718071328, "grad_norm": 117.22393035888672, "learning_rate": 2.5253614727865912e-08, "loss": 13.9062, "step": 28020 }, { "epoch": 1.8609948860994887, "grad_norm": 260.20526123046875, "learning_rate": 2.522960242560912e-08, "loss": 14.3594, "step": 28021 }, { "epoch": 1.8610613003918444, "grad_norm": 457.19903564453125, "learning_rate": 2.520560139893424e-08, "loss": 12.6094, "step": 28022 }, { "epoch": 1.8611277146842, "grad_norm": 221.16098022460938, "learning_rate": 2.518161164811883e-08, "loss": 14.9062, "step": 28023 }, { "epoch": 1.861194128976556, "grad_norm": 114.1255874633789, "learning_rate": 2.515763317344044e-08, "loss": 13.1875, "step": 28024 }, { "epoch": 1.8612605432689113, "grad_norm": 217.81163024902344, "learning_rate": 2.5133665975176298e-08, "loss": 14.1094, "step": 28025 }, { "epoch": 1.8613269575612672, "grad_norm": 468.22882080078125, "learning_rate": 2.5109710053603962e-08, "loss": 19.3438, "step": 28026 }, { "epoch": 1.8613933718536229, "grad_norm": 192.5638427734375, "learning_rate": 2.5085765409000316e-08, "loss": 11.6094, "step": 28027 }, { "epoch": 1.8614597861459785, "grad_norm": 151.45022583007812, "learning_rate": 2.506183204164214e-08, "loss": 14.5, "step": 28028 }, { "epoch": 1.8615262004383344, "grad_norm": 245.18577575683594, "learning_rate": 2.5037909951806767e-08, "loss": 16.5781, "step": 28029 }, { "epoch": 1.86159261473069, "grad_norm": 154.34718322753906, "learning_rate": 2.5013999139770648e-08, "loss": 11.5078, "step": 28030 }, { "epoch": 1.8616590290230457, "grad_norm": 187.16372680664062, "learning_rate": 2.4990099605810222e-08, "loss": 15.0625, "step": 28031 }, { "epoch": 1.8617254433154016, "grad_norm": 1274.4605712890625, "learning_rate": 2.496621135020216e-08, "loss": 27.7188, "step": 28032 }, { "epoch": 1.8617918576077572, "grad_norm": 116.75674438476562, "learning_rate": 2.4942334373222906e-08, "loss": 12.875, "step": 28033 }, { "epoch": 1.8618582719001129, "grad_norm": 284.28131103515625, "learning_rate": 2.491846867514813e-08, "loss": 13.6953, "step": 28034 }, { "epoch": 1.8619246861924688, "grad_norm": 202.0127410888672, "learning_rate": 2.489461425625461e-08, "loss": 16.2812, "step": 28035 }, { "epoch": 1.8619911004848242, "grad_norm": 1807.539306640625, "learning_rate": 2.487077111681768e-08, "loss": 11.3125, "step": 28036 }, { "epoch": 1.86205751477718, "grad_norm": 471.9446105957031, "learning_rate": 2.4846939257113454e-08, "loss": 11.2656, "step": 28037 }, { "epoch": 1.8621239290695357, "grad_norm": 406.29766845703125, "learning_rate": 2.4823118677417486e-08, "loss": 17.5781, "step": 28038 }, { "epoch": 1.8621903433618914, "grad_norm": 159.23101806640625, "learning_rate": 2.479930937800534e-08, "loss": 11.5625, "step": 28039 }, { "epoch": 1.8622567576542473, "grad_norm": 183.57293701171875, "learning_rate": 2.4775511359152458e-08, "loss": 16.1562, "step": 28040 }, { "epoch": 1.862323171946603, "grad_norm": 183.462158203125, "learning_rate": 2.4751724621134283e-08, "loss": 17.4688, "step": 28041 }, { "epoch": 1.8623895862389586, "grad_norm": 330.74639892578125, "learning_rate": 2.4727949164225602e-08, "loss": 20.5312, "step": 28042 }, { "epoch": 1.8624560005313144, "grad_norm": 146.87257385253906, "learning_rate": 2.470418498870175e-08, "loss": 13.875, "step": 28043 }, { "epoch": 1.86252241482367, "grad_norm": 190.68003845214844, "learning_rate": 2.4680432094837388e-08, "loss": 11.7344, "step": 28044 }, { "epoch": 1.8625888291160257, "grad_norm": 179.84747314453125, "learning_rate": 2.4656690482907417e-08, "loss": 16.5938, "step": 28045 }, { "epoch": 1.8626552434083816, "grad_norm": 176.4897003173828, "learning_rate": 2.46329601531865e-08, "loss": 13.8281, "step": 28046 }, { "epoch": 1.862721657700737, "grad_norm": 155.6878204345703, "learning_rate": 2.4609241105948975e-08, "loss": 13.0312, "step": 28047 }, { "epoch": 1.862788071993093, "grad_norm": 150.04066467285156, "learning_rate": 2.45855333414694e-08, "loss": 15.25, "step": 28048 }, { "epoch": 1.8628544862854486, "grad_norm": 303.5935363769531, "learning_rate": 2.4561836860022e-08, "loss": 18.0312, "step": 28049 }, { "epoch": 1.8629209005778042, "grad_norm": 172.68338012695312, "learning_rate": 2.4538151661880667e-08, "loss": 18.0938, "step": 28050 }, { "epoch": 1.8629873148701601, "grad_norm": 120.55509185791016, "learning_rate": 2.4514477747319518e-08, "loss": 15.4688, "step": 28051 }, { "epoch": 1.8630537291625158, "grad_norm": 200.413330078125, "learning_rate": 2.4490815116612327e-08, "loss": 13.4531, "step": 28052 }, { "epoch": 1.8631201434548714, "grad_norm": 104.05343627929688, "learning_rate": 2.4467163770032996e-08, "loss": 14.1719, "step": 28053 }, { "epoch": 1.8631865577472273, "grad_norm": 259.1726379394531, "learning_rate": 2.4443523707854853e-08, "loss": 18.7812, "step": 28054 }, { "epoch": 1.863252972039583, "grad_norm": 296.32666015625, "learning_rate": 2.441989493035157e-08, "loss": 15.3906, "step": 28055 }, { "epoch": 1.8633193863319386, "grad_norm": 145.93385314941406, "learning_rate": 2.4396277437796487e-08, "loss": 17.7812, "step": 28056 }, { "epoch": 1.8633858006242945, "grad_norm": 254.0309600830078, "learning_rate": 2.4372671230462604e-08, "loss": 13.3906, "step": 28057 }, { "epoch": 1.86345221491665, "grad_norm": 154.264892578125, "learning_rate": 2.434907630862315e-08, "loss": 13.5938, "step": 28058 }, { "epoch": 1.8635186292090058, "grad_norm": 228.4060821533203, "learning_rate": 2.4325492672550797e-08, "loss": 19.3906, "step": 28059 }, { "epoch": 1.8635850435013614, "grad_norm": 326.8092346191406, "learning_rate": 2.4301920322518766e-08, "loss": 24.8281, "step": 28060 }, { "epoch": 1.863651457793717, "grad_norm": 120.18582916259766, "learning_rate": 2.4278359258799395e-08, "loss": 12.8359, "step": 28061 }, { "epoch": 1.863717872086073, "grad_norm": 335.2617492675781, "learning_rate": 2.425480948166536e-08, "loss": 16.7188, "step": 28062 }, { "epoch": 1.8637842863784286, "grad_norm": 238.63272094726562, "learning_rate": 2.4231270991388998e-08, "loss": 13.625, "step": 28063 }, { "epoch": 1.8638507006707843, "grad_norm": 191.89761352539062, "learning_rate": 2.4207743788242642e-08, "loss": 15.9375, "step": 28064 }, { "epoch": 1.8639171149631402, "grad_norm": 177.9340057373047, "learning_rate": 2.4184227872498407e-08, "loss": 13.2969, "step": 28065 }, { "epoch": 1.8639835292554958, "grad_norm": 286.1654357910156, "learning_rate": 2.4160723244428527e-08, "loss": 17.0, "step": 28066 }, { "epoch": 1.8640499435478515, "grad_norm": 456.86077880859375, "learning_rate": 2.4137229904304445e-08, "loss": 33.4844, "step": 28067 }, { "epoch": 1.8641163578402073, "grad_norm": 522.9984130859375, "learning_rate": 2.411374785239839e-08, "loss": 22.7188, "step": 28068 }, { "epoch": 1.8641827721325628, "grad_norm": 220.00527954101562, "learning_rate": 2.4090277088981594e-08, "loss": 15.1406, "step": 28069 }, { "epoch": 1.8642491864249187, "grad_norm": 171.10647583007812, "learning_rate": 2.406681761432583e-08, "loss": 16.4844, "step": 28070 }, { "epoch": 1.8643156007172743, "grad_norm": 243.02745056152344, "learning_rate": 2.404336942870233e-08, "loss": 12.6875, "step": 28071 }, { "epoch": 1.86438201500963, "grad_norm": 179.12667846679688, "learning_rate": 2.4019932532382436e-08, "loss": 13.0625, "step": 28072 }, { "epoch": 1.8644484293019858, "grad_norm": 136.89694213867188, "learning_rate": 2.399650692563704e-08, "loss": 17.125, "step": 28073 }, { "epoch": 1.8645148435943415, "grad_norm": 143.2327423095703, "learning_rate": 2.3973092608737365e-08, "loss": 14.4375, "step": 28074 }, { "epoch": 1.8645812578866972, "grad_norm": 310.3141784667969, "learning_rate": 2.3949689581954092e-08, "loss": 12.5781, "step": 28075 }, { "epoch": 1.864647672179053, "grad_norm": 597.3853149414062, "learning_rate": 2.3926297845557996e-08, "loss": 32.625, "step": 28076 }, { "epoch": 1.8647140864714087, "grad_norm": 219.16314697265625, "learning_rate": 2.3902917399819644e-08, "loss": 17.6719, "step": 28077 }, { "epoch": 1.8647805007637643, "grad_norm": 254.460205078125, "learning_rate": 2.3879548245009486e-08, "loss": 21.125, "step": 28078 }, { "epoch": 1.8648469150561202, "grad_norm": 127.52030944824219, "learning_rate": 2.3856190381397856e-08, "loss": 16.6094, "step": 28079 }, { "epoch": 1.8649133293484756, "grad_norm": 357.95989990234375, "learning_rate": 2.3832843809255098e-08, "loss": 16.5938, "step": 28080 }, { "epoch": 1.8649797436408315, "grad_norm": 260.23590087890625, "learning_rate": 2.3809508528850776e-08, "loss": 17.7812, "step": 28081 }, { "epoch": 1.8650461579331872, "grad_norm": 286.6656799316406, "learning_rate": 2.3786184540455445e-08, "loss": 12.5781, "step": 28082 }, { "epoch": 1.8651125722255428, "grad_norm": 211.83436584472656, "learning_rate": 2.376287184433856e-08, "loss": 21.6562, "step": 28083 }, { "epoch": 1.8651789865178987, "grad_norm": 197.61087036132812, "learning_rate": 2.3739570440769795e-08, "loss": 17.7031, "step": 28084 }, { "epoch": 1.8652454008102544, "grad_norm": 201.2660675048828, "learning_rate": 2.3716280330018712e-08, "loss": 12.8438, "step": 28085 }, { "epoch": 1.86531181510261, "grad_norm": 264.3538513183594, "learning_rate": 2.369300151235476e-08, "loss": 14.7969, "step": 28086 }, { "epoch": 1.865378229394966, "grad_norm": 280.6097106933594, "learning_rate": 2.3669733988047168e-08, "loss": 19.375, "step": 28087 }, { "epoch": 1.8654446436873215, "grad_norm": 216.76011657714844, "learning_rate": 2.364647775736528e-08, "loss": 22.2656, "step": 28088 }, { "epoch": 1.8655110579796772, "grad_norm": 212.6092071533203, "learning_rate": 2.3623232820577654e-08, "loss": 12.4062, "step": 28089 }, { "epoch": 1.865577472272033, "grad_norm": 207.21029663085938, "learning_rate": 2.3599999177953743e-08, "loss": 13.0938, "step": 28090 }, { "epoch": 1.8656438865643885, "grad_norm": 243.40577697753906, "learning_rate": 2.357677682976189e-08, "loss": 20.7188, "step": 28091 }, { "epoch": 1.8657103008567444, "grad_norm": 99.55070495605469, "learning_rate": 2.3553565776270657e-08, "loss": 11.6875, "step": 28092 }, { "epoch": 1.8657767151491, "grad_norm": 140.8425750732422, "learning_rate": 2.353036601774905e-08, "loss": 15.2656, "step": 28093 }, { "epoch": 1.8658431294414557, "grad_norm": 218.37911987304688, "learning_rate": 2.350717755446496e-08, "loss": 14.9062, "step": 28094 }, { "epoch": 1.8659095437338116, "grad_norm": 239.2484130859375, "learning_rate": 2.3484000386686744e-08, "loss": 11.0781, "step": 28095 }, { "epoch": 1.8659759580261672, "grad_norm": 331.503173828125, "learning_rate": 2.3460834514682505e-08, "loss": 12.6094, "step": 28096 }, { "epoch": 1.8660423723185229, "grad_norm": 508.7262268066406, "learning_rate": 2.3437679938720368e-08, "loss": 13.7031, "step": 28097 }, { "epoch": 1.8661087866108788, "grad_norm": 208.7892608642578, "learning_rate": 2.3414536659067786e-08, "loss": 18.5469, "step": 28098 }, { "epoch": 1.8661752009032344, "grad_norm": 171.5885772705078, "learning_rate": 2.3391404675992988e-08, "loss": 15.8594, "step": 28099 }, { "epoch": 1.86624161519559, "grad_norm": 102.15863800048828, "learning_rate": 2.3368283989763093e-08, "loss": 10.3438, "step": 28100 }, { "epoch": 1.866308029487946, "grad_norm": 173.3975830078125, "learning_rate": 2.3345174600645778e-08, "loss": 12.6875, "step": 28101 }, { "epoch": 1.8663744437803014, "grad_norm": 97.59793853759766, "learning_rate": 2.3322076508908274e-08, "loss": 19.0625, "step": 28102 }, { "epoch": 1.8664408580726572, "grad_norm": 166.52207946777344, "learning_rate": 2.3298989714817807e-08, "loss": 14.3906, "step": 28103 }, { "epoch": 1.866507272365013, "grad_norm": 260.3708801269531, "learning_rate": 2.3275914218641503e-08, "loss": 21.5156, "step": 28104 }, { "epoch": 1.8665736866573686, "grad_norm": 231.0716552734375, "learning_rate": 2.3252850020646254e-08, "loss": 23.0625, "step": 28105 }, { "epoch": 1.8666401009497244, "grad_norm": 344.1092224121094, "learning_rate": 2.3229797121098628e-08, "loss": 17.0469, "step": 28106 }, { "epoch": 1.86670651524208, "grad_norm": 89.62232208251953, "learning_rate": 2.320675552026563e-08, "loss": 12.5469, "step": 28107 }, { "epoch": 1.8667729295344357, "grad_norm": 145.23606872558594, "learning_rate": 2.318372521841361e-08, "loss": 12.5625, "step": 28108 }, { "epoch": 1.8668393438267916, "grad_norm": 120.14851379394531, "learning_rate": 2.31607062158089e-08, "loss": 13.7031, "step": 28109 }, { "epoch": 1.8669057581191473, "grad_norm": 232.4411163330078, "learning_rate": 2.3137698512717852e-08, "loss": 16.5938, "step": 28110 }, { "epoch": 1.866972172411503, "grad_norm": 282.9934997558594, "learning_rate": 2.3114702109406693e-08, "loss": 17.2031, "step": 28111 }, { "epoch": 1.8670385867038588, "grad_norm": 267.1562194824219, "learning_rate": 2.309171700614132e-08, "loss": 13.2812, "step": 28112 }, { "epoch": 1.8671050009962142, "grad_norm": 132.0327606201172, "learning_rate": 2.306874320318786e-08, "loss": 17.1094, "step": 28113 }, { "epoch": 1.8671714152885701, "grad_norm": 240.45228576660156, "learning_rate": 2.3045780700811644e-08, "loss": 23.7031, "step": 28114 }, { "epoch": 1.8672378295809258, "grad_norm": 337.23529052734375, "learning_rate": 2.3022829499278474e-08, "loss": 14.3281, "step": 28115 }, { "epoch": 1.8673042438732814, "grad_norm": 597.77783203125, "learning_rate": 2.2999889598853907e-08, "loss": 16.2188, "step": 28116 }, { "epoch": 1.8673706581656373, "grad_norm": 198.35403442382812, "learning_rate": 2.2976960999803173e-08, "loss": 14.6875, "step": 28117 }, { "epoch": 1.867437072457993, "grad_norm": 375.7310485839844, "learning_rate": 2.2954043702391625e-08, "loss": 18.1562, "step": 28118 }, { "epoch": 1.8675034867503486, "grad_norm": 372.89605712890625, "learning_rate": 2.2931137706884374e-08, "loss": 11.5938, "step": 28119 }, { "epoch": 1.8675699010427045, "grad_norm": 204.1353759765625, "learning_rate": 2.2908243013546102e-08, "loss": 10.5234, "step": 28120 }, { "epoch": 1.8676363153350601, "grad_norm": 367.74261474609375, "learning_rate": 2.2885359622642152e-08, "loss": 17.1406, "step": 28121 }, { "epoch": 1.8677027296274158, "grad_norm": 313.07110595703125, "learning_rate": 2.2862487534436758e-08, "loss": 15.8281, "step": 28122 }, { "epoch": 1.8677691439197717, "grad_norm": 197.6966094970703, "learning_rate": 2.2839626749194595e-08, "loss": 15.0938, "step": 28123 }, { "epoch": 1.867835558212127, "grad_norm": 263.9705810546875, "learning_rate": 2.281677726718023e-08, "loss": 12.2344, "step": 28124 }, { "epoch": 1.867901972504483, "grad_norm": 281.6598205566406, "learning_rate": 2.27939390886579e-08, "loss": 13.1484, "step": 28125 }, { "epoch": 1.8679683867968386, "grad_norm": 296.6639709472656, "learning_rate": 2.277111221389183e-08, "loss": 11.7656, "step": 28126 }, { "epoch": 1.8680348010891943, "grad_norm": 319.7044372558594, "learning_rate": 2.274829664314615e-08, "loss": 12.6562, "step": 28127 }, { "epoch": 1.8681012153815502, "grad_norm": 227.78599548339844, "learning_rate": 2.2725492376684308e-08, "loss": 14.7344, "step": 28128 }, { "epoch": 1.8681676296739058, "grad_norm": 171.3059844970703, "learning_rate": 2.2702699414770655e-08, "loss": 15.7031, "step": 28129 }, { "epoch": 1.8682340439662615, "grad_norm": 144.57742309570312, "learning_rate": 2.2679917757668753e-08, "loss": 10.75, "step": 28130 }, { "epoch": 1.8683004582586173, "grad_norm": 190.5488739013672, "learning_rate": 2.265714740564184e-08, "loss": 15.3594, "step": 28131 }, { "epoch": 1.868366872550973, "grad_norm": 434.3789367675781, "learning_rate": 2.2634388358953595e-08, "loss": 16.1406, "step": 28132 }, { "epoch": 1.8684332868433287, "grad_norm": 205.5074462890625, "learning_rate": 2.2611640617867024e-08, "loss": 17.7344, "step": 28133 }, { "epoch": 1.8684997011356845, "grad_norm": 127.10562133789062, "learning_rate": 2.2588904182645473e-08, "loss": 16.4531, "step": 28134 }, { "epoch": 1.86856611542804, "grad_norm": 177.90550231933594, "learning_rate": 2.2566179053551847e-08, "loss": 17.75, "step": 28135 }, { "epoch": 1.8686325297203958, "grad_norm": 168.95216369628906, "learning_rate": 2.2543465230849156e-08, "loss": 14.2344, "step": 28136 }, { "epoch": 1.8686989440127515, "grad_norm": 140.91822814941406, "learning_rate": 2.252076271479986e-08, "loss": 14.3594, "step": 28137 }, { "epoch": 1.8687653583051071, "grad_norm": 136.78778076171875, "learning_rate": 2.2498071505666848e-08, "loss": 13.1094, "step": 28138 }, { "epoch": 1.868831772597463, "grad_norm": 234.5351104736328, "learning_rate": 2.247539160371248e-08, "loss": 16.9688, "step": 28139 }, { "epoch": 1.8688981868898187, "grad_norm": 149.7211151123047, "learning_rate": 2.2452723009198983e-08, "loss": 12.8281, "step": 28140 }, { "epoch": 1.8689646011821743, "grad_norm": 226.1001434326172, "learning_rate": 2.2430065722388814e-08, "loss": 13.8203, "step": 28141 }, { "epoch": 1.8690310154745302, "grad_norm": 286.7243957519531, "learning_rate": 2.2407419743543876e-08, "loss": 15.9844, "step": 28142 }, { "epoch": 1.8690974297668859, "grad_norm": 244.46168518066406, "learning_rate": 2.2384785072926293e-08, "loss": 15.9062, "step": 28143 }, { "epoch": 1.8691638440592415, "grad_norm": 206.66860961914062, "learning_rate": 2.2362161710797743e-08, "loss": 16.3438, "step": 28144 }, { "epoch": 1.8692302583515974, "grad_norm": 119.44662475585938, "learning_rate": 2.233954965741991e-08, "loss": 12.0469, "step": 28145 }, { "epoch": 1.8692966726439528, "grad_norm": 318.5842590332031, "learning_rate": 2.2316948913054577e-08, "loss": 13.7188, "step": 28146 }, { "epoch": 1.8693630869363087, "grad_norm": 333.5596008300781, "learning_rate": 2.2294359477962878e-08, "loss": 16.5, "step": 28147 }, { "epoch": 1.8694295012286644, "grad_norm": 152.66070556640625, "learning_rate": 2.227178135240626e-08, "loss": 18.0938, "step": 28148 }, { "epoch": 1.86949591552102, "grad_norm": 529.044189453125, "learning_rate": 2.2249214536645855e-08, "loss": 24.0156, "step": 28149 }, { "epoch": 1.869562329813376, "grad_norm": 210.4412078857422, "learning_rate": 2.2226659030942784e-08, "loss": 15.2031, "step": 28150 }, { "epoch": 1.8696287441057315, "grad_norm": 223.62022399902344, "learning_rate": 2.2204114835557952e-08, "loss": 17.5, "step": 28151 }, { "epoch": 1.8696951583980872, "grad_norm": 468.47967529296875, "learning_rate": 2.2181581950752037e-08, "loss": 17.25, "step": 28152 }, { "epoch": 1.869761572690443, "grad_norm": 272.1799621582031, "learning_rate": 2.215906037678572e-08, "loss": 18.7031, "step": 28153 }, { "epoch": 1.8698279869827987, "grad_norm": 297.88037109375, "learning_rate": 2.2136550113919572e-08, "loss": 14.8281, "step": 28154 }, { "epoch": 1.8698944012751544, "grad_norm": 1934.1513671875, "learning_rate": 2.2114051162413937e-08, "loss": 15.2188, "step": 28155 }, { "epoch": 1.8699608155675103, "grad_norm": 142.61102294921875, "learning_rate": 2.2091563522529056e-08, "loss": 16.4062, "step": 28156 }, { "epoch": 1.8700272298598657, "grad_norm": 109.86335754394531, "learning_rate": 2.206908719452505e-08, "loss": 12.8594, "step": 28157 }, { "epoch": 1.8700936441522216, "grad_norm": 130.49069213867188, "learning_rate": 2.2046622178662043e-08, "loss": 18.0, "step": 28158 }, { "epoch": 1.8701600584445772, "grad_norm": 176.99301147460938, "learning_rate": 2.202416847519961e-08, "loss": 15.5, "step": 28159 }, { "epoch": 1.8702264727369329, "grad_norm": 156.1046142578125, "learning_rate": 2.2001726084397876e-08, "loss": 15.1875, "step": 28160 }, { "epoch": 1.8702928870292888, "grad_norm": 184.40594482421875, "learning_rate": 2.1979295006516184e-08, "loss": 13.1719, "step": 28161 }, { "epoch": 1.8703593013216444, "grad_norm": 477.89215087890625, "learning_rate": 2.1956875241814e-08, "loss": 16.9688, "step": 28162 }, { "epoch": 1.870425715614, "grad_norm": 350.5848388671875, "learning_rate": 2.1934466790550666e-08, "loss": 16.6875, "step": 28163 }, { "epoch": 1.870492129906356, "grad_norm": 227.9142303466797, "learning_rate": 2.1912069652985422e-08, "loss": 12.875, "step": 28164 }, { "epoch": 1.8705585441987116, "grad_norm": 428.8512268066406, "learning_rate": 2.1889683829377393e-08, "loss": 21.6562, "step": 28165 }, { "epoch": 1.8706249584910672, "grad_norm": 307.2124328613281, "learning_rate": 2.1867309319985482e-08, "loss": 17.2656, "step": 28166 }, { "epoch": 1.8706913727834231, "grad_norm": 239.04434204101562, "learning_rate": 2.1844946125068598e-08, "loss": 17.5469, "step": 28167 }, { "epoch": 1.8707577870757786, "grad_norm": 354.171630859375, "learning_rate": 2.1822594244885195e-08, "loss": 17.9844, "step": 28168 }, { "epoch": 1.8708242013681344, "grad_norm": 147.2117156982422, "learning_rate": 2.180025367969418e-08, "loss": 22.8906, "step": 28169 }, { "epoch": 1.87089061566049, "grad_norm": 214.9638671875, "learning_rate": 2.1777924429753568e-08, "loss": 14.875, "step": 28170 }, { "epoch": 1.8709570299528457, "grad_norm": 285.866943359375, "learning_rate": 2.175560649532193e-08, "loss": 23.6562, "step": 28171 }, { "epoch": 1.8710234442452016, "grad_norm": 260.95111083984375, "learning_rate": 2.1733299876657395e-08, "loss": 17.8125, "step": 28172 }, { "epoch": 1.8710898585375573, "grad_norm": 121.0927505493164, "learning_rate": 2.1711004574017866e-08, "loss": 16.0469, "step": 28173 }, { "epoch": 1.871156272829913, "grad_norm": 187.91015625, "learning_rate": 2.1688720587661357e-08, "loss": 15.75, "step": 28174 }, { "epoch": 1.8712226871222688, "grad_norm": 189.1320037841797, "learning_rate": 2.1666447917845666e-08, "loss": 11.3594, "step": 28175 }, { "epoch": 1.8712891014146245, "grad_norm": 154.55648803710938, "learning_rate": 2.164418656482825e-08, "loss": 18.6562, "step": 28176 }, { "epoch": 1.87135551570698, "grad_norm": 373.7798156738281, "learning_rate": 2.162193652886679e-08, "loss": 18.0312, "step": 28177 }, { "epoch": 1.871421929999336, "grad_norm": 875.6913452148438, "learning_rate": 2.1599697810218643e-08, "loss": 24.0938, "step": 28178 }, { "epoch": 1.8714883442916914, "grad_norm": 252.3319549560547, "learning_rate": 2.157747040914093e-08, "loss": 18.5, "step": 28179 }, { "epoch": 1.8715547585840473, "grad_norm": 101.443115234375, "learning_rate": 2.1555254325890892e-08, "loss": 13.3828, "step": 28180 }, { "epoch": 1.871621172876403, "grad_norm": 225.41844177246094, "learning_rate": 2.1533049560725436e-08, "loss": 18.3438, "step": 28181 }, { "epoch": 1.8716875871687586, "grad_norm": 157.69444274902344, "learning_rate": 2.1510856113901467e-08, "loss": 14.8438, "step": 28182 }, { "epoch": 1.8717540014611145, "grad_norm": 265.6592102050781, "learning_rate": 2.1488673985675776e-08, "loss": 19.7812, "step": 28183 }, { "epoch": 1.8718204157534701, "grad_norm": 206.39691162109375, "learning_rate": 2.1466503176304607e-08, "loss": 17.4688, "step": 28184 }, { "epoch": 1.8718868300458258, "grad_norm": 244.10231018066406, "learning_rate": 2.1444343686044975e-08, "loss": 15.7344, "step": 28185 }, { "epoch": 1.8719532443381817, "grad_norm": 159.6217041015625, "learning_rate": 2.1422195515152787e-08, "loss": 21.6328, "step": 28186 }, { "epoch": 1.8720196586305373, "grad_norm": 162.01072692871094, "learning_rate": 2.140005866388428e-08, "loss": 14.3906, "step": 28187 }, { "epoch": 1.872086072922893, "grad_norm": 879.0081176757812, "learning_rate": 2.1377933132495696e-08, "loss": 12.5625, "step": 28188 }, { "epoch": 1.8721524872152489, "grad_norm": 258.9836730957031, "learning_rate": 2.1355818921242828e-08, "loss": 16.1719, "step": 28189 }, { "epoch": 1.8722189015076043, "grad_norm": 141.5777130126953, "learning_rate": 2.1333716030381477e-08, "loss": 13.2578, "step": 28190 }, { "epoch": 1.8722853157999602, "grad_norm": 251.00082397460938, "learning_rate": 2.1311624460167545e-08, "loss": 18.5625, "step": 28191 }, { "epoch": 1.8723517300923158, "grad_norm": 101.53150939941406, "learning_rate": 2.1289544210856158e-08, "loss": 13.3125, "step": 28192 }, { "epoch": 1.8724181443846715, "grad_norm": 184.76687622070312, "learning_rate": 2.126747528270323e-08, "loss": 16.7812, "step": 28193 }, { "epoch": 1.8724845586770273, "grad_norm": 237.14309692382812, "learning_rate": 2.1245417675963662e-08, "loss": 16.8438, "step": 28194 }, { "epoch": 1.872550972969383, "grad_norm": 141.89886474609375, "learning_rate": 2.12233713908927e-08, "loss": 14.375, "step": 28195 }, { "epoch": 1.8726173872617387, "grad_norm": 280.189208984375, "learning_rate": 2.1201336427745352e-08, "loss": 32.8828, "step": 28196 }, { "epoch": 1.8726838015540945, "grad_norm": 122.32296752929688, "learning_rate": 2.117931278677665e-08, "loss": 11.9062, "step": 28197 }, { "epoch": 1.8727502158464502, "grad_norm": 224.09225463867188, "learning_rate": 2.1157300468241046e-08, "loss": 14.7031, "step": 28198 }, { "epoch": 1.8728166301388058, "grad_norm": 153.65426635742188, "learning_rate": 2.1135299472393563e-08, "loss": 12.5625, "step": 28199 }, { "epoch": 1.8728830444311617, "grad_norm": 8787.5625, "learning_rate": 2.1113309799488443e-08, "loss": 14.4219, "step": 28200 }, { "epoch": 1.8729494587235171, "grad_norm": 247.92750549316406, "learning_rate": 2.1091331449780037e-08, "loss": 14.0547, "step": 28201 }, { "epoch": 1.873015873015873, "grad_norm": 579.6483764648438, "learning_rate": 2.1069364423522583e-08, "loss": 22.9688, "step": 28202 }, { "epoch": 1.8730822873082287, "grad_norm": 190.50929260253906, "learning_rate": 2.1047408720970328e-08, "loss": 15.6094, "step": 28203 }, { "epoch": 1.8731487016005843, "grad_norm": 149.5988311767578, "learning_rate": 2.1025464342377064e-08, "loss": 14.7812, "step": 28204 }, { "epoch": 1.8732151158929402, "grad_norm": 210.82333374023438, "learning_rate": 2.100353128799681e-08, "loss": 15.9375, "step": 28205 }, { "epoch": 1.8732815301852959, "grad_norm": 223.4718017578125, "learning_rate": 2.0981609558083145e-08, "loss": 18.6562, "step": 28206 }, { "epoch": 1.8733479444776515, "grad_norm": 156.23971557617188, "learning_rate": 2.0959699152889755e-08, "loss": 15.2422, "step": 28207 }, { "epoch": 1.8734143587700074, "grad_norm": 666.7557983398438, "learning_rate": 2.0937800072669985e-08, "loss": 21.7656, "step": 28208 }, { "epoch": 1.873480773062363, "grad_norm": 116.2535400390625, "learning_rate": 2.0915912317677088e-08, "loss": 10.9531, "step": 28209 }, { "epoch": 1.8735471873547187, "grad_norm": 263.6060485839844, "learning_rate": 2.089403588816452e-08, "loss": 19.2812, "step": 28210 }, { "epoch": 1.8736136016470746, "grad_norm": 161.76248168945312, "learning_rate": 2.0872170784385078e-08, "loss": 12.3281, "step": 28211 }, { "epoch": 1.87368001593943, "grad_norm": 152.84986877441406, "learning_rate": 2.0850317006591676e-08, "loss": 16.9219, "step": 28212 }, { "epoch": 1.8737464302317859, "grad_norm": 264.11669921875, "learning_rate": 2.0828474555037333e-08, "loss": 19.4062, "step": 28213 }, { "epoch": 1.8738128445241415, "grad_norm": 75.3812255859375, "learning_rate": 2.0806643429974403e-08, "loss": 13.8125, "step": 28214 }, { "epoch": 1.8738792588164972, "grad_norm": 385.3575744628906, "learning_rate": 2.078482363165579e-08, "loss": 24.4531, "step": 28215 }, { "epoch": 1.873945673108853, "grad_norm": 172.37803649902344, "learning_rate": 2.076301516033363e-08, "loss": 15.375, "step": 28216 }, { "epoch": 1.8740120874012087, "grad_norm": 196.0188751220703, "learning_rate": 2.0741218016260164e-08, "loss": 17.875, "step": 28217 }, { "epoch": 1.8740785016935644, "grad_norm": 486.494873046875, "learning_rate": 2.0719432199687747e-08, "loss": 17.1719, "step": 28218 }, { "epoch": 1.8741449159859203, "grad_norm": 247.1370849609375, "learning_rate": 2.0697657710868068e-08, "loss": 16.9219, "step": 28219 }, { "epoch": 1.874211330278276, "grad_norm": 168.61810302734375, "learning_rate": 2.0675894550053364e-08, "loss": 13.8281, "step": 28220 }, { "epoch": 1.8742777445706316, "grad_norm": 158.26075744628906, "learning_rate": 2.0654142717495103e-08, "loss": 13.6406, "step": 28221 }, { "epoch": 1.8743441588629874, "grad_norm": 260.3311462402344, "learning_rate": 2.0632402213445088e-08, "loss": 13.7031, "step": 28222 }, { "epoch": 1.8744105731553429, "grad_norm": 189.27769470214844, "learning_rate": 2.061067303815456e-08, "loss": 14.0781, "step": 28223 }, { "epoch": 1.8744769874476988, "grad_norm": 314.1694641113281, "learning_rate": 2.0588955191875203e-08, "loss": 14.0938, "step": 28224 }, { "epoch": 1.8745434017400544, "grad_norm": 108.74877166748047, "learning_rate": 2.0567248674857928e-08, "loss": 14.5312, "step": 28225 }, { "epoch": 1.87460981603241, "grad_norm": 172.79066467285156, "learning_rate": 2.0545553487353873e-08, "loss": 21.8125, "step": 28226 }, { "epoch": 1.874676230324766, "grad_norm": 220.69000244140625, "learning_rate": 2.0523869629614165e-08, "loss": 16.3906, "step": 28227 }, { "epoch": 1.8747426446171216, "grad_norm": 266.5322265625, "learning_rate": 2.0502197101889607e-08, "loss": 17.4531, "step": 28228 }, { "epoch": 1.8748090589094772, "grad_norm": 392.6725769042969, "learning_rate": 2.0480535904430663e-08, "loss": 15.4688, "step": 28229 }, { "epoch": 1.8748754732018331, "grad_norm": 441.53033447265625, "learning_rate": 2.0458886037488244e-08, "loss": 14.7188, "step": 28230 }, { "epoch": 1.8749418874941888, "grad_norm": 407.24591064453125, "learning_rate": 2.0437247501312372e-08, "loss": 24.0, "step": 28231 }, { "epoch": 1.8750083017865444, "grad_norm": 465.0217590332031, "learning_rate": 2.0415620296153847e-08, "loss": 20.9375, "step": 28232 }, { "epoch": 1.8750747160789003, "grad_norm": 345.2637023925781, "learning_rate": 2.0394004422262357e-08, "loss": 22.875, "step": 28233 }, { "epoch": 1.8751411303712557, "grad_norm": 550.9498291015625, "learning_rate": 2.0372399879888148e-08, "loss": 18.5312, "step": 28234 }, { "epoch": 1.8752075446636116, "grad_norm": 200.2586669921875, "learning_rate": 2.035080666928124e-08, "loss": 11.4844, "step": 28235 }, { "epoch": 1.8752739589559675, "grad_norm": 358.6073913574219, "learning_rate": 2.0329224790691212e-08, "loss": 21.4219, "step": 28236 }, { "epoch": 1.875340373248323, "grad_norm": 844.9952392578125, "learning_rate": 2.0307654244367868e-08, "loss": 13.75, "step": 28237 }, { "epoch": 1.8754067875406788, "grad_norm": 318.15655517578125, "learning_rate": 2.0286095030560556e-08, "loss": 16.5469, "step": 28238 }, { "epoch": 1.8754732018330345, "grad_norm": 278.3216857910156, "learning_rate": 2.0264547149518975e-08, "loss": 18.2031, "step": 28239 }, { "epoch": 1.87553961612539, "grad_norm": 531.0392456054688, "learning_rate": 2.0243010601491918e-08, "loss": 12.2969, "step": 28240 }, { "epoch": 1.875606030417746, "grad_norm": 326.0338134765625, "learning_rate": 2.0221485386728964e-08, "loss": 15.1719, "step": 28241 }, { "epoch": 1.8756724447101016, "grad_norm": 419.3274230957031, "learning_rate": 2.019997150547881e-08, "loss": 20.6875, "step": 28242 }, { "epoch": 1.8757388590024573, "grad_norm": 138.66592407226562, "learning_rate": 2.0178468957990356e-08, "loss": 12.7656, "step": 28243 }, { "epoch": 1.8758052732948132, "grad_norm": 200.0592803955078, "learning_rate": 2.0156977744512304e-08, "loss": 18.5938, "step": 28244 }, { "epoch": 1.8758716875871686, "grad_norm": 194.2452392578125, "learning_rate": 2.013549786529345e-08, "loss": 15.5625, "step": 28245 }, { "epoch": 1.8759381018795245, "grad_norm": 136.08154296875, "learning_rate": 2.0114029320582148e-08, "loss": 14.4531, "step": 28246 }, { "epoch": 1.8760045161718804, "grad_norm": 242.62167358398438, "learning_rate": 2.009257211062676e-08, "loss": 16.3906, "step": 28247 }, { "epoch": 1.8760709304642358, "grad_norm": 241.27207946777344, "learning_rate": 2.0071126235675197e-08, "loss": 15.8125, "step": 28248 }, { "epoch": 1.8761373447565917, "grad_norm": 1507.2047119140625, "learning_rate": 2.0049691695976035e-08, "loss": 14.4219, "step": 28249 }, { "epoch": 1.8762037590489473, "grad_norm": 215.44932556152344, "learning_rate": 2.0028268491776968e-08, "loss": 18.1719, "step": 28250 }, { "epoch": 1.876270173341303, "grad_norm": 216.8155517578125, "learning_rate": 2.0006856623325685e-08, "loss": 16.6875, "step": 28251 }, { "epoch": 1.8763365876336588, "grad_norm": 273.11285400390625, "learning_rate": 1.998545609086999e-08, "loss": 19.0625, "step": 28252 }, { "epoch": 1.8764030019260145, "grad_norm": 193.63221740722656, "learning_rate": 1.996406689465746e-08, "loss": 16.1562, "step": 28253 }, { "epoch": 1.8764694162183702, "grad_norm": 272.51910400390625, "learning_rate": 1.9942689034935456e-08, "loss": 14.0938, "step": 28254 }, { "epoch": 1.876535830510726, "grad_norm": 368.515869140625, "learning_rate": 1.9921322511951445e-08, "loss": 16.7656, "step": 28255 }, { "epoch": 1.8766022448030815, "grad_norm": 604.0803833007812, "learning_rate": 1.9899967325952227e-08, "loss": 11.5078, "step": 28256 }, { "epoch": 1.8766686590954373, "grad_norm": 148.8046875, "learning_rate": 1.9878623477185164e-08, "loss": 15.8594, "step": 28257 }, { "epoch": 1.8767350733877932, "grad_norm": 127.95410919189453, "learning_rate": 1.9857290965897054e-08, "loss": 11.8672, "step": 28258 }, { "epoch": 1.8768014876801487, "grad_norm": 285.3748779296875, "learning_rate": 1.9835969792334484e-08, "loss": 15.0938, "step": 28259 }, { "epoch": 1.8768679019725045, "grad_norm": 159.90719604492188, "learning_rate": 1.981465995674425e-08, "loss": 16.4375, "step": 28260 }, { "epoch": 1.8769343162648602, "grad_norm": 86.53592681884766, "learning_rate": 1.9793361459372937e-08, "loss": 15.1562, "step": 28261 }, { "epoch": 1.8770007305572158, "grad_norm": 341.84124755859375, "learning_rate": 1.977207430046657e-08, "loss": 19.0625, "step": 28262 }, { "epoch": 1.8770671448495717, "grad_norm": 141.0252227783203, "learning_rate": 1.9750798480271835e-08, "loss": 11.25, "step": 28263 }, { "epoch": 1.8771335591419274, "grad_norm": 233.4674835205078, "learning_rate": 1.9729533999034544e-08, "loss": 19.5312, "step": 28264 }, { "epoch": 1.877199973434283, "grad_norm": 281.8678894042969, "learning_rate": 1.9708280857000715e-08, "loss": 15.8281, "step": 28265 }, { "epoch": 1.877266387726639, "grad_norm": 160.6113739013672, "learning_rate": 1.9687039054416267e-08, "loss": 16.6875, "step": 28266 }, { "epoch": 1.8773328020189943, "grad_norm": 176.12913513183594, "learning_rate": 1.966580859152678e-08, "loss": 15.1719, "step": 28267 }, { "epoch": 1.8773992163113502, "grad_norm": 262.4942932128906, "learning_rate": 1.9644589468578054e-08, "loss": 13.6562, "step": 28268 }, { "epoch": 1.877465630603706, "grad_norm": 336.32196044921875, "learning_rate": 1.9623381685815455e-08, "loss": 12.6875, "step": 28269 }, { "epoch": 1.8775320448960615, "grad_norm": 239.34169006347656, "learning_rate": 1.9602185243484003e-08, "loss": 13.4531, "step": 28270 }, { "epoch": 1.8775984591884174, "grad_norm": 198.25221252441406, "learning_rate": 1.9581000141829395e-08, "loss": 18.1094, "step": 28271 }, { "epoch": 1.877664873480773, "grad_norm": 202.6751708984375, "learning_rate": 1.955982638109632e-08, "loss": 20.0312, "step": 28272 }, { "epoch": 1.8777312877731287, "grad_norm": 371.4981384277344, "learning_rate": 1.953866396152981e-08, "loss": 14.8906, "step": 28273 }, { "epoch": 1.8777977020654846, "grad_norm": 268.8198547363281, "learning_rate": 1.9517512883374664e-08, "loss": 19.4375, "step": 28274 }, { "epoch": 1.8778641163578402, "grad_norm": 190.4718017578125, "learning_rate": 1.9496373146875577e-08, "loss": 17.3438, "step": 28275 }, { "epoch": 1.8779305306501959, "grad_norm": 134.0428009033203, "learning_rate": 1.947524475227713e-08, "loss": 10.6016, "step": 28276 }, { "epoch": 1.8779969449425518, "grad_norm": 784.2117919921875, "learning_rate": 1.9454127699823575e-08, "loss": 11.3281, "step": 28277 }, { "epoch": 1.8780633592349072, "grad_norm": 276.968017578125, "learning_rate": 1.9433021989759156e-08, "loss": 14.9219, "step": 28278 }, { "epoch": 1.878129773527263, "grad_norm": 289.50726318359375, "learning_rate": 1.941192762232824e-08, "loss": 32.3438, "step": 28279 }, { "epoch": 1.878196187819619, "grad_norm": 142.76019287109375, "learning_rate": 1.9390844597774737e-08, "loss": 13.8125, "step": 28280 }, { "epoch": 1.8782626021119744, "grad_norm": 201.01612854003906, "learning_rate": 1.9369772916342342e-08, "loss": 16.0625, "step": 28281 }, { "epoch": 1.8783290164043303, "grad_norm": 136.53347778320312, "learning_rate": 1.9348712578275194e-08, "loss": 11.8047, "step": 28282 }, { "epoch": 1.878395430696686, "grad_norm": 170.7807159423828, "learning_rate": 1.9327663583816434e-08, "loss": 14.5625, "step": 28283 }, { "epoch": 1.8784618449890416, "grad_norm": 258.89581298828125, "learning_rate": 1.9306625933209863e-08, "loss": 11.6875, "step": 28284 }, { "epoch": 1.8785282592813974, "grad_norm": 324.1020202636719, "learning_rate": 1.9285599626698733e-08, "loss": 15.2812, "step": 28285 }, { "epoch": 1.878594673573753, "grad_norm": 112.00654602050781, "learning_rate": 1.9264584664526408e-08, "loss": 13.2188, "step": 28286 }, { "epoch": 1.8786610878661087, "grad_norm": 312.9886779785156, "learning_rate": 1.9243581046935577e-08, "loss": 17.25, "step": 28287 }, { "epoch": 1.8787275021584646, "grad_norm": 341.3362731933594, "learning_rate": 1.9222588774169602e-08, "loss": 13.0, "step": 28288 }, { "epoch": 1.87879391645082, "grad_norm": 151.05906677246094, "learning_rate": 1.920160784647118e-08, "loss": 17.2969, "step": 28289 }, { "epoch": 1.878860330743176, "grad_norm": 146.31922912597656, "learning_rate": 1.9180638264083004e-08, "loss": 17.9062, "step": 28290 }, { "epoch": 1.8789267450355318, "grad_norm": 173.07667541503906, "learning_rate": 1.915968002724755e-08, "loss": 14.7656, "step": 28291 }, { "epoch": 1.8789931593278872, "grad_norm": 163.23446655273438, "learning_rate": 1.9138733136207396e-08, "loss": 12.3828, "step": 28292 }, { "epoch": 1.8790595736202431, "grad_norm": 187.95187377929688, "learning_rate": 1.9117797591204798e-08, "loss": 15.3906, "step": 28293 }, { "epoch": 1.8791259879125988, "grad_norm": 132.70819091796875, "learning_rate": 1.9096873392482005e-08, "loss": 14.75, "step": 28294 }, { "epoch": 1.8791924022049544, "grad_norm": 1182.263427734375, "learning_rate": 1.9075960540280712e-08, "loss": 17.3125, "step": 28295 }, { "epoch": 1.8792588164973103, "grad_norm": 476.6328125, "learning_rate": 1.905505903484328e-08, "loss": 13.3594, "step": 28296 }, { "epoch": 1.879325230789666, "grad_norm": 215.76051330566406, "learning_rate": 1.9034168876411184e-08, "loss": 21.625, "step": 28297 }, { "epoch": 1.8793916450820216, "grad_norm": 266.17559814453125, "learning_rate": 1.9013290065226228e-08, "loss": 16.3438, "step": 28298 }, { "epoch": 1.8794580593743775, "grad_norm": 186.91212463378906, "learning_rate": 1.899242260152978e-08, "loss": 13.3438, "step": 28299 }, { "epoch": 1.879524473666733, "grad_norm": 196.43206787109375, "learning_rate": 1.8971566485563305e-08, "loss": 13.4531, "step": 28300 }, { "epoch": 1.8795908879590888, "grad_norm": 170.76791381835938, "learning_rate": 1.8950721717568063e-08, "loss": 20.2969, "step": 28301 }, { "epoch": 1.8796573022514447, "grad_norm": 183.00022888183594, "learning_rate": 1.89298882977853e-08, "loss": 14.4688, "step": 28302 }, { "epoch": 1.8797237165438, "grad_norm": 188.2234649658203, "learning_rate": 1.890906622645583e-08, "loss": 12.9609, "step": 28303 }, { "epoch": 1.879790130836156, "grad_norm": 509.3818054199219, "learning_rate": 1.888825550382045e-08, "loss": 13.9688, "step": 28304 }, { "epoch": 1.8798565451285116, "grad_norm": 251.22422790527344, "learning_rate": 1.8867456130119973e-08, "loss": 15.4531, "step": 28305 }, { "epoch": 1.8799229594208673, "grad_norm": 170.75833129882812, "learning_rate": 1.8846668105595098e-08, "loss": 15.2188, "step": 28306 }, { "epoch": 1.8799893737132232, "grad_norm": 409.61468505859375, "learning_rate": 1.8825891430486186e-08, "loss": 12.1562, "step": 28307 }, { "epoch": 1.8800557880055788, "grad_norm": 258.3174133300781, "learning_rate": 1.8805126105033597e-08, "loss": 19.0, "step": 28308 }, { "epoch": 1.8801222022979345, "grad_norm": 757.0247192382812, "learning_rate": 1.8784372129477477e-08, "loss": 12.3438, "step": 28309 }, { "epoch": 1.8801886165902904, "grad_norm": 131.71853637695312, "learning_rate": 1.8763629504057964e-08, "loss": 14.8047, "step": 28310 }, { "epoch": 1.8802550308826458, "grad_norm": 248.2534942626953, "learning_rate": 1.874289822901498e-08, "loss": 21.1875, "step": 28311 }, { "epoch": 1.8803214451750017, "grad_norm": 152.5607452392578, "learning_rate": 1.872217830458822e-08, "loss": 16.4688, "step": 28312 }, { "epoch": 1.8803878594673575, "grad_norm": 145.58447265625, "learning_rate": 1.8701469731017604e-08, "loss": 14.0469, "step": 28313 }, { "epoch": 1.880454273759713, "grad_norm": 211.71560668945312, "learning_rate": 1.8680772508542498e-08, "loss": 20.7344, "step": 28314 }, { "epoch": 1.8805206880520688, "grad_norm": 261.4472351074219, "learning_rate": 1.8660086637402372e-08, "loss": 11.6406, "step": 28315 }, { "epoch": 1.8805871023444245, "grad_norm": 280.9984436035156, "learning_rate": 1.8639412117836373e-08, "loss": 15.8594, "step": 28316 }, { "epoch": 1.8806535166367802, "grad_norm": 148.3756866455078, "learning_rate": 1.861874895008386e-08, "loss": 15.4531, "step": 28317 }, { "epoch": 1.880719930929136, "grad_norm": 233.1880645751953, "learning_rate": 1.8598097134383762e-08, "loss": 18.2188, "step": 28318 }, { "epoch": 1.8807863452214917, "grad_norm": 87.24056243896484, "learning_rate": 1.857745667097499e-08, "loss": 13.5391, "step": 28319 }, { "epoch": 1.8808527595138473, "grad_norm": 149.6499481201172, "learning_rate": 1.8556827560096245e-08, "loss": 12.2812, "step": 28320 }, { "epoch": 1.8809191738062032, "grad_norm": 290.30206298828125, "learning_rate": 1.8536209801986225e-08, "loss": 18.4844, "step": 28321 }, { "epoch": 1.8809855880985586, "grad_norm": 170.33615112304688, "learning_rate": 1.8515603396883406e-08, "loss": 15.9219, "step": 28322 }, { "epoch": 1.8810520023909145, "grad_norm": 206.37405395507812, "learning_rate": 1.849500834502604e-08, "loss": 12.8438, "step": 28323 }, { "epoch": 1.8811184166832704, "grad_norm": 312.7946472167969, "learning_rate": 1.8474424646652497e-08, "loss": 16.5469, "step": 28324 }, { "epoch": 1.8811848309756258, "grad_norm": 143.2984619140625, "learning_rate": 1.8453852302000806e-08, "loss": 15.7344, "step": 28325 }, { "epoch": 1.8812512452679817, "grad_norm": 215.87301635742188, "learning_rate": 1.8433291311308885e-08, "loss": 11.5781, "step": 28326 }, { "epoch": 1.8813176595603374, "grad_norm": 371.6259460449219, "learning_rate": 1.841274167481488e-08, "loss": 18.4688, "step": 28327 }, { "epoch": 1.881384073852693, "grad_norm": 277.224609375, "learning_rate": 1.8392203392756044e-08, "loss": 15.1719, "step": 28328 }, { "epoch": 1.881450488145049, "grad_norm": 598.8436279296875, "learning_rate": 1.83716764653703e-08, "loss": 14.0781, "step": 28329 }, { "epoch": 1.8815169024374045, "grad_norm": 183.16232299804688, "learning_rate": 1.8351160892894902e-08, "loss": 11.9688, "step": 28330 }, { "epoch": 1.8815833167297602, "grad_norm": 243.5357208251953, "learning_rate": 1.8330656675567214e-08, "loss": 15.0469, "step": 28331 }, { "epoch": 1.881649731022116, "grad_norm": 178.3851776123047, "learning_rate": 1.831016381362438e-08, "loss": 13.9375, "step": 28332 }, { "epoch": 1.8817161453144715, "grad_norm": 130.0161590576172, "learning_rate": 1.8289682307303654e-08, "loss": 15.625, "step": 28333 }, { "epoch": 1.8817825596068274, "grad_norm": 297.2086181640625, "learning_rate": 1.8269212156841628e-08, "loss": 13.4609, "step": 28334 }, { "epoch": 1.8818489738991833, "grad_norm": 239.3963165283203, "learning_rate": 1.8248753362475445e-08, "loss": 18.2969, "step": 28335 }, { "epoch": 1.8819153881915387, "grad_norm": 110.9446792602539, "learning_rate": 1.8228305924441466e-08, "loss": 12.6562, "step": 28336 }, { "epoch": 1.8819818024838946, "grad_norm": 351.1202697753906, "learning_rate": 1.8207869842976286e-08, "loss": 17.875, "step": 28337 }, { "epoch": 1.8820482167762502, "grad_norm": 248.08016967773438, "learning_rate": 1.818744511831638e-08, "loss": 13.1797, "step": 28338 }, { "epoch": 1.8821146310686059, "grad_norm": 222.58627319335938, "learning_rate": 1.816703175069789e-08, "loss": 15.2188, "step": 28339 }, { "epoch": 1.8821810453609618, "grad_norm": 219.05242919921875, "learning_rate": 1.8146629740357076e-08, "loss": 15.5625, "step": 28340 }, { "epoch": 1.8822474596533174, "grad_norm": 160.78909301757812, "learning_rate": 1.8126239087529972e-08, "loss": 15.0781, "step": 28341 }, { "epoch": 1.882313873945673, "grad_norm": 245.1977081298828, "learning_rate": 1.810585979245216e-08, "loss": 12.6406, "step": 28342 }, { "epoch": 1.882380288238029, "grad_norm": 167.34286499023438, "learning_rate": 1.808549185535979e-08, "loss": 16.6719, "step": 28343 }, { "epoch": 1.8824467025303844, "grad_norm": 339.990478515625, "learning_rate": 1.806513527648812e-08, "loss": 17.125, "step": 28344 }, { "epoch": 1.8825131168227403, "grad_norm": 160.0294952392578, "learning_rate": 1.8044790056072734e-08, "loss": 21.5625, "step": 28345 }, { "epoch": 1.8825795311150961, "grad_norm": 184.62657165527344, "learning_rate": 1.8024456194349003e-08, "loss": 15.0, "step": 28346 }, { "epoch": 1.8826459454074516, "grad_norm": 454.7027587890625, "learning_rate": 1.800413369155218e-08, "loss": 15.9688, "step": 28347 }, { "epoch": 1.8827123596998074, "grad_norm": 342.1954650878906, "learning_rate": 1.7983822547917192e-08, "loss": 19.4062, "step": 28348 }, { "epoch": 1.882778773992163, "grad_norm": 353.3032531738281, "learning_rate": 1.7963522763679073e-08, "loss": 13.4531, "step": 28349 }, { "epoch": 1.8828451882845187, "grad_norm": 130.66188049316406, "learning_rate": 1.7943234339072744e-08, "loss": 14.25, "step": 28350 }, { "epoch": 1.8829116025768746, "grad_norm": 349.86993408203125, "learning_rate": 1.7922957274332684e-08, "loss": 22.625, "step": 28351 }, { "epoch": 1.8829780168692303, "grad_norm": 151.84967041015625, "learning_rate": 1.7902691569693706e-08, "loss": 18.0312, "step": 28352 }, { "epoch": 1.883044431161586, "grad_norm": 127.42434692382812, "learning_rate": 1.7882437225389956e-08, "loss": 12.4375, "step": 28353 }, { "epoch": 1.8831108454539418, "grad_norm": 480.7781677246094, "learning_rate": 1.7862194241655916e-08, "loss": 17.9453, "step": 28354 }, { "epoch": 1.8831772597462972, "grad_norm": 188.46023559570312, "learning_rate": 1.7841962618725615e-08, "loss": 11.8516, "step": 28355 }, { "epoch": 1.8832436740386531, "grad_norm": 139.03561401367188, "learning_rate": 1.7821742356833202e-08, "loss": 15.1406, "step": 28356 }, { "epoch": 1.883310088331009, "grad_norm": 222.26708984375, "learning_rate": 1.780153345621249e-08, "loss": 13.7188, "step": 28357 }, { "epoch": 1.8833765026233644, "grad_norm": 161.82171630859375, "learning_rate": 1.7781335917097405e-08, "loss": 15.8281, "step": 28358 }, { "epoch": 1.8834429169157203, "grad_norm": 136.8736572265625, "learning_rate": 1.7761149739721206e-08, "loss": 13.7266, "step": 28359 }, { "epoch": 1.883509331208076, "grad_norm": 334.19122314453125, "learning_rate": 1.774097492431781e-08, "loss": 18.8281, "step": 28360 }, { "epoch": 1.8835757455004316, "grad_norm": 235.68324279785156, "learning_rate": 1.7720811471120367e-08, "loss": 13.9844, "step": 28361 }, { "epoch": 1.8836421597927875, "grad_norm": 211.5272979736328, "learning_rate": 1.7700659380362138e-08, "loss": 14.625, "step": 28362 }, { "epoch": 1.8837085740851431, "grad_norm": 226.03623962402344, "learning_rate": 1.7680518652276266e-08, "loss": 19.2188, "step": 28363 }, { "epoch": 1.8837749883774988, "grad_norm": 227.65078735351562, "learning_rate": 1.7660389287095788e-08, "loss": 16.5, "step": 28364 }, { "epoch": 1.8838414026698547, "grad_norm": 366.682861328125, "learning_rate": 1.764027128505341e-08, "loss": 19.5, "step": 28365 }, { "epoch": 1.88390781696221, "grad_norm": 359.0133361816406, "learning_rate": 1.762016464638194e-08, "loss": 31.6562, "step": 28366 }, { "epoch": 1.883974231254566, "grad_norm": 164.07176208496094, "learning_rate": 1.7600069371313864e-08, "loss": 18.6875, "step": 28367 }, { "epoch": 1.8840406455469219, "grad_norm": 189.91632080078125, "learning_rate": 1.757998546008177e-08, "loss": 16.8125, "step": 28368 }, { "epoch": 1.8841070598392773, "grad_norm": 153.04983520507812, "learning_rate": 1.7559912912917918e-08, "loss": 13.625, "step": 28369 }, { "epoch": 1.8841734741316332, "grad_norm": 403.0487365722656, "learning_rate": 1.7539851730054457e-08, "loss": 14.6875, "step": 28370 }, { "epoch": 1.8842398884239888, "grad_norm": 347.7681884765625, "learning_rate": 1.751980191172342e-08, "loss": 19.6719, "step": 28371 }, { "epoch": 1.8843063027163445, "grad_norm": 86.1606216430664, "learning_rate": 1.7499763458156847e-08, "loss": 11.7656, "step": 28372 }, { "epoch": 1.8843727170087003, "grad_norm": 138.12123107910156, "learning_rate": 1.747973636958644e-08, "loss": 13.9062, "step": 28373 }, { "epoch": 1.884439131301056, "grad_norm": 214.54148864746094, "learning_rate": 1.7459720646243902e-08, "loss": 14.9688, "step": 28374 }, { "epoch": 1.8845055455934117, "grad_norm": 498.6021423339844, "learning_rate": 1.7439716288360828e-08, "loss": 16.1094, "step": 28375 }, { "epoch": 1.8845719598857675, "grad_norm": 233.9193878173828, "learning_rate": 1.7419723296168366e-08, "loss": 11.0078, "step": 28376 }, { "epoch": 1.884638374178123, "grad_norm": 228.8492889404297, "learning_rate": 1.7399741669898106e-08, "loss": 20.2969, "step": 28377 }, { "epoch": 1.8847047884704788, "grad_norm": 266.32708740234375, "learning_rate": 1.7379771409780975e-08, "loss": 23.1875, "step": 28378 }, { "epoch": 1.8847712027628347, "grad_norm": 228.0210418701172, "learning_rate": 1.735981251604801e-08, "loss": 15.3438, "step": 28379 }, { "epoch": 1.8848376170551902, "grad_norm": 229.483154296875, "learning_rate": 1.7339864988930254e-08, "loss": 18.0781, "step": 28380 }, { "epoch": 1.884904031347546, "grad_norm": 172.4936981201172, "learning_rate": 1.731992882865807e-08, "loss": 17.3125, "step": 28381 }, { "epoch": 1.8849704456399017, "grad_norm": 161.2156524658203, "learning_rate": 1.73000040354625e-08, "loss": 13.375, "step": 28382 }, { "epoch": 1.8850368599322573, "grad_norm": 147.48141479492188, "learning_rate": 1.7280090609573695e-08, "loss": 18.0781, "step": 28383 }, { "epoch": 1.8851032742246132, "grad_norm": 190.073486328125, "learning_rate": 1.7260188551222242e-08, "loss": 16.1094, "step": 28384 }, { "epoch": 1.8851696885169689, "grad_norm": 180.11837768554688, "learning_rate": 1.7240297860638185e-08, "loss": 18.4766, "step": 28385 }, { "epoch": 1.8852361028093245, "grad_norm": 252.5537109375, "learning_rate": 1.7220418538051672e-08, "loss": 14.6719, "step": 28386 }, { "epoch": 1.8853025171016804, "grad_norm": 148.41493225097656, "learning_rate": 1.720055058369263e-08, "loss": 13.9062, "step": 28387 }, { "epoch": 1.885368931394036, "grad_norm": 286.8529968261719, "learning_rate": 1.7180693997790986e-08, "loss": 14.8125, "step": 28388 }, { "epoch": 1.8854353456863917, "grad_norm": 222.2826690673828, "learning_rate": 1.7160848780576332e-08, "loss": 11.1406, "step": 28389 }, { "epoch": 1.8855017599787476, "grad_norm": 331.3114318847656, "learning_rate": 1.7141014932278153e-08, "loss": 14.9688, "step": 28390 }, { "epoch": 1.885568174271103, "grad_norm": 181.88616943359375, "learning_rate": 1.7121192453126042e-08, "loss": 15.5781, "step": 28391 }, { "epoch": 1.885634588563459, "grad_norm": 236.22715759277344, "learning_rate": 1.7101381343349154e-08, "loss": 14.0938, "step": 28392 }, { "epoch": 1.8857010028558145, "grad_norm": 133.7020263671875, "learning_rate": 1.7081581603176743e-08, "loss": 12.375, "step": 28393 }, { "epoch": 1.8857674171481702, "grad_norm": 761.673828125, "learning_rate": 1.706179323283785e-08, "loss": 14.4688, "step": 28394 }, { "epoch": 1.885833831440526, "grad_norm": 142.81593322753906, "learning_rate": 1.7042016232561296e-08, "loss": 16.9531, "step": 28395 }, { "epoch": 1.8859002457328817, "grad_norm": 206.97125244140625, "learning_rate": 1.7022250602575892e-08, "loss": 16.3594, "step": 28396 }, { "epoch": 1.8859666600252374, "grad_norm": 180.65098571777344, "learning_rate": 1.7002496343110463e-08, "loss": 12.6875, "step": 28397 }, { "epoch": 1.8860330743175933, "grad_norm": 337.6407775878906, "learning_rate": 1.698275345439304e-08, "loss": 17.1406, "step": 28398 }, { "epoch": 1.886099488609949, "grad_norm": 372.0052795410156, "learning_rate": 1.6963021936652446e-08, "loss": 15.7188, "step": 28399 }, { "epoch": 1.8861659029023046, "grad_norm": 236.26669311523438, "learning_rate": 1.6943301790116716e-08, "loss": 15.5469, "step": 28400 }, { "epoch": 1.8862323171946604, "grad_norm": 122.11125183105469, "learning_rate": 1.6923593015014002e-08, "loss": 14.1562, "step": 28401 }, { "epoch": 1.8862987314870159, "grad_norm": 163.4884490966797, "learning_rate": 1.6903895611572237e-08, "loss": 15.3438, "step": 28402 }, { "epoch": 1.8863651457793718, "grad_norm": 113.18782043457031, "learning_rate": 1.688420958001946e-08, "loss": 14.0781, "step": 28403 }, { "epoch": 1.8864315600717274, "grad_norm": 199.49578857421875, "learning_rate": 1.686453492058315e-08, "loss": 17.1406, "step": 28404 }, { "epoch": 1.886497974364083, "grad_norm": 986.1841430664062, "learning_rate": 1.6844871633491132e-08, "loss": 15.2969, "step": 28405 }, { "epoch": 1.886564388656439, "grad_norm": 152.63876342773438, "learning_rate": 1.682521971897044e-08, "loss": 14.8281, "step": 28406 }, { "epoch": 1.8866308029487946, "grad_norm": 393.5640869140625, "learning_rate": 1.6805579177248897e-08, "loss": 17.9844, "step": 28407 }, { "epoch": 1.8866972172411502, "grad_norm": 144.84800720214844, "learning_rate": 1.6785950008553428e-08, "loss": 12.4062, "step": 28408 }, { "epoch": 1.8867636315335061, "grad_norm": 148.65469360351562, "learning_rate": 1.676633221311108e-08, "loss": 16.8906, "step": 28409 }, { "epoch": 1.8868300458258618, "grad_norm": 141.5421905517578, "learning_rate": 1.6746725791148886e-08, "loss": 13.7188, "step": 28410 }, { "epoch": 1.8868964601182174, "grad_norm": 320.1067199707031, "learning_rate": 1.672713074289367e-08, "loss": 19.2812, "step": 28411 }, { "epoch": 1.8869628744105733, "grad_norm": 142.4094696044922, "learning_rate": 1.6707547068571805e-08, "loss": 13.1719, "step": 28412 }, { "epoch": 1.8870292887029287, "grad_norm": 235.45957946777344, "learning_rate": 1.668797476841033e-08, "loss": 21.625, "step": 28413 }, { "epoch": 1.8870957029952846, "grad_norm": 298.51177978515625, "learning_rate": 1.6668413842635175e-08, "loss": 16.4688, "step": 28414 }, { "epoch": 1.8871621172876403, "grad_norm": 199.5161895751953, "learning_rate": 1.6648864291472718e-08, "loss": 15.6562, "step": 28415 }, { "epoch": 1.887228531579996, "grad_norm": 275.28021240234375, "learning_rate": 1.6629326115149223e-08, "loss": 13.5625, "step": 28416 }, { "epoch": 1.8872949458723518, "grad_norm": 130.90573120117188, "learning_rate": 1.6609799313890727e-08, "loss": 14.6719, "step": 28417 }, { "epoch": 1.8873613601647075, "grad_norm": 143.07362365722656, "learning_rate": 1.659028388792294e-08, "loss": 19.6094, "step": 28418 }, { "epoch": 1.8874277744570631, "grad_norm": 239.08395385742188, "learning_rate": 1.657077983747168e-08, "loss": 14.4844, "step": 28419 }, { "epoch": 1.887494188749419, "grad_norm": 163.04319763183594, "learning_rate": 1.655128716276255e-08, "loss": 14.625, "step": 28420 }, { "epoch": 1.8875606030417746, "grad_norm": 219.2458953857422, "learning_rate": 1.6531805864021142e-08, "loss": 14.0625, "step": 28421 }, { "epoch": 1.8876270173341303, "grad_norm": 259.6915283203125, "learning_rate": 1.651233594147261e-08, "loss": 16.3438, "step": 28422 }, { "epoch": 1.8876934316264862, "grad_norm": 413.9083251953125, "learning_rate": 1.649287739534222e-08, "loss": 23.6875, "step": 28423 }, { "epoch": 1.8877598459188416, "grad_norm": 225.51515197753906, "learning_rate": 1.6473430225855234e-08, "loss": 13.4375, "step": 28424 }, { "epoch": 1.8878262602111975, "grad_norm": 301.674072265625, "learning_rate": 1.6453994433236363e-08, "loss": 21.3125, "step": 28425 }, { "epoch": 1.8878926745035531, "grad_norm": 203.09603881835938, "learning_rate": 1.6434570017710536e-08, "loss": 18.2031, "step": 28426 }, { "epoch": 1.8879590887959088, "grad_norm": 286.08843994140625, "learning_rate": 1.6415156979502463e-08, "loss": 15.7969, "step": 28427 }, { "epoch": 1.8880255030882647, "grad_norm": 423.8344421386719, "learning_rate": 1.6395755318836858e-08, "loss": 18.0781, "step": 28428 }, { "epoch": 1.8880919173806203, "grad_norm": 308.29815673828125, "learning_rate": 1.6376365035937756e-08, "loss": 10.5312, "step": 28429 }, { "epoch": 1.888158331672976, "grad_norm": 194.81268310546875, "learning_rate": 1.635698613102976e-08, "loss": 13.875, "step": 28430 }, { "epoch": 1.8882247459653319, "grad_norm": 199.78143310546875, "learning_rate": 1.6337618604336913e-08, "loss": 15.0781, "step": 28431 }, { "epoch": 1.8882911602576875, "grad_norm": 186.935302734375, "learning_rate": 1.6318262456083364e-08, "loss": 19.75, "step": 28432 }, { "epoch": 1.8883575745500432, "grad_norm": 154.0738067626953, "learning_rate": 1.629891768649283e-08, "loss": 17.6094, "step": 28433 }, { "epoch": 1.888423988842399, "grad_norm": 236.01341247558594, "learning_rate": 1.627958429578924e-08, "loss": 15.0312, "step": 28434 }, { "epoch": 1.8884904031347545, "grad_norm": 155.03768920898438, "learning_rate": 1.626026228419608e-08, "loss": 16.625, "step": 28435 }, { "epoch": 1.8885568174271103, "grad_norm": 275.2067565917969, "learning_rate": 1.6240951651937173e-08, "loss": 17.0938, "step": 28436 }, { "epoch": 1.888623231719466, "grad_norm": 166.7862091064453, "learning_rate": 1.6221652399235453e-08, "loss": 15.0, "step": 28437 }, { "epoch": 1.8886896460118217, "grad_norm": 428.2873840332031, "learning_rate": 1.6202364526314515e-08, "loss": 16.3281, "step": 28438 }, { "epoch": 1.8887560603041775, "grad_norm": 408.9044494628906, "learning_rate": 1.6183088033397184e-08, "loss": 17.6719, "step": 28439 }, { "epoch": 1.8888224745965332, "grad_norm": 177.7811279296875, "learning_rate": 1.6163822920706726e-08, "loss": 13.5547, "step": 28440 }, { "epoch": 1.8888888888888888, "grad_norm": 353.8228759765625, "learning_rate": 1.614456918846574e-08, "loss": 15.6562, "step": 28441 }, { "epoch": 1.8889553031812447, "grad_norm": 525.7887573242188, "learning_rate": 1.6125326836897158e-08, "loss": 16.7656, "step": 28442 }, { "epoch": 1.8890217174736004, "grad_norm": 187.08367919921875, "learning_rate": 1.6106095866223358e-08, "loss": 24.5, "step": 28443 }, { "epoch": 1.889088131765956, "grad_norm": 114.13835144042969, "learning_rate": 1.608687627666705e-08, "loss": 18.9062, "step": 28444 }, { "epoch": 1.889154546058312, "grad_norm": 115.83586883544922, "learning_rate": 1.6067668068450167e-08, "loss": 11.4062, "step": 28445 }, { "epoch": 1.8892209603506673, "grad_norm": 314.0089416503906, "learning_rate": 1.6048471241795425e-08, "loss": 12.375, "step": 28446 }, { "epoch": 1.8892873746430232, "grad_norm": 275.13787841796875, "learning_rate": 1.602928579692442e-08, "loss": 15.4141, "step": 28447 }, { "epoch": 1.8893537889353789, "grad_norm": 273.5812683105469, "learning_rate": 1.6010111734059195e-08, "loss": 12.1328, "step": 28448 }, { "epoch": 1.8894202032277345, "grad_norm": 360.69024658203125, "learning_rate": 1.5990949053421687e-08, "loss": 18.2188, "step": 28449 }, { "epoch": 1.8894866175200904, "grad_norm": 467.23785400390625, "learning_rate": 1.5971797755233496e-08, "loss": 16.6562, "step": 28450 }, { "epoch": 1.889553031812446, "grad_norm": 500.8082275390625, "learning_rate": 1.5952657839716e-08, "loss": 16.8906, "step": 28451 }, { "epoch": 1.8896194461048017, "grad_norm": 215.6774444580078, "learning_rate": 1.5933529307090797e-08, "loss": 18.7969, "step": 28452 }, { "epoch": 1.8896858603971576, "grad_norm": 107.8657455444336, "learning_rate": 1.591441215757916e-08, "loss": 19.0156, "step": 28453 }, { "epoch": 1.8897522746895132, "grad_norm": 186.71400451660156, "learning_rate": 1.589530639140202e-08, "loss": 11.6172, "step": 28454 }, { "epoch": 1.8898186889818689, "grad_norm": 271.6476745605469, "learning_rate": 1.5876212008780533e-08, "loss": 28.1719, "step": 28455 }, { "epoch": 1.8898851032742248, "grad_norm": 188.08250427246094, "learning_rate": 1.585712900993552e-08, "loss": 19.25, "step": 28456 }, { "epoch": 1.8899515175665802, "grad_norm": 187.2963409423828, "learning_rate": 1.5838057395087812e-08, "loss": 13.7031, "step": 28457 }, { "epoch": 1.890017931858936, "grad_norm": 180.72293090820312, "learning_rate": 1.5818997164458005e-08, "loss": 11.5312, "step": 28458 }, { "epoch": 1.8900843461512917, "grad_norm": 481.5364685058594, "learning_rate": 1.5799948318266366e-08, "loss": 21.0469, "step": 28459 }, { "epoch": 1.8901507604436474, "grad_norm": 188.69476318359375, "learning_rate": 1.5780910856733498e-08, "loss": 19.9688, "step": 28460 }, { "epoch": 1.8902171747360033, "grad_norm": 229.40182495117188, "learning_rate": 1.576188478007967e-08, "loss": 12.4375, "step": 28461 }, { "epoch": 1.890283589028359, "grad_norm": 162.6761932373047, "learning_rate": 1.5742870088524484e-08, "loss": 15.0938, "step": 28462 }, { "epoch": 1.8903500033207146, "grad_norm": 226.5873260498047, "learning_rate": 1.572386678228854e-08, "loss": 14.8906, "step": 28463 }, { "epoch": 1.8904164176130704, "grad_norm": 395.2247314453125, "learning_rate": 1.570487486159122e-08, "loss": 18.0312, "step": 28464 }, { "epoch": 1.890482831905426, "grad_norm": 851.6021118164062, "learning_rate": 1.568589432665224e-08, "loss": 17.625, "step": 28465 }, { "epoch": 1.8905492461977818, "grad_norm": 247.15386962890625, "learning_rate": 1.5666925177691303e-08, "loss": 22.1719, "step": 28466 }, { "epoch": 1.8906156604901376, "grad_norm": 202.4992218017578, "learning_rate": 1.56479674149278e-08, "loss": 13.1562, "step": 28467 }, { "epoch": 1.890682074782493, "grad_norm": 238.49594116210938, "learning_rate": 1.562902103858099e-08, "loss": 22.2188, "step": 28468 }, { "epoch": 1.890748489074849, "grad_norm": 261.1666564941406, "learning_rate": 1.561008604887015e-08, "loss": 14.7656, "step": 28469 }, { "epoch": 1.8908149033672046, "grad_norm": 259.0743408203125, "learning_rate": 1.5591162446013995e-08, "loss": 21.5, "step": 28470 }, { "epoch": 1.8908813176595602, "grad_norm": 436.3118896484375, "learning_rate": 1.5572250230231786e-08, "loss": 21.2812, "step": 28471 }, { "epoch": 1.8909477319519161, "grad_norm": 162.53623962402344, "learning_rate": 1.5553349401742134e-08, "loss": 14.2656, "step": 28472 }, { "epoch": 1.8910141462442718, "grad_norm": 205.41720581054688, "learning_rate": 1.5534459960763524e-08, "loss": 15.4375, "step": 28473 }, { "epoch": 1.8910805605366274, "grad_norm": 148.28892517089844, "learning_rate": 1.5515581907514786e-08, "loss": 11.9609, "step": 28474 }, { "epoch": 1.8911469748289833, "grad_norm": 134.8681182861328, "learning_rate": 1.5496715242214076e-08, "loss": 14.6719, "step": 28475 }, { "epoch": 1.891213389121339, "grad_norm": 194.9335479736328, "learning_rate": 1.5477859965079554e-08, "loss": 17.1875, "step": 28476 }, { "epoch": 1.8912798034136946, "grad_norm": 286.3386535644531, "learning_rate": 1.545901607632949e-08, "loss": 12.1875, "step": 28477 }, { "epoch": 1.8913462177060505, "grad_norm": 124.74164581298828, "learning_rate": 1.544018357618182e-08, "loss": 14.2344, "step": 28478 }, { "epoch": 1.891412631998406, "grad_norm": 229.80242919921875, "learning_rate": 1.542136246485437e-08, "loss": 13.5312, "step": 28479 }, { "epoch": 1.8914790462907618, "grad_norm": 459.82757568359375, "learning_rate": 1.5402552742564855e-08, "loss": 14.5625, "step": 28480 }, { "epoch": 1.8915454605831175, "grad_norm": 166.35855102539062, "learning_rate": 1.538375440953088e-08, "loss": 15.2656, "step": 28481 }, { "epoch": 1.891611874875473, "grad_norm": 171.2981719970703, "learning_rate": 1.5364967465969826e-08, "loss": 14.25, "step": 28482 }, { "epoch": 1.891678289167829, "grad_norm": 183.00225830078125, "learning_rate": 1.534619191209918e-08, "loss": 19.7188, "step": 28483 }, { "epoch": 1.8917447034601846, "grad_norm": 132.2032928466797, "learning_rate": 1.532742774813589e-08, "loss": 13.6016, "step": 28484 }, { "epoch": 1.8918111177525403, "grad_norm": 245.41038513183594, "learning_rate": 1.530867497429722e-08, "loss": 16.9688, "step": 28485 }, { "epoch": 1.8918775320448962, "grad_norm": 135.06883239746094, "learning_rate": 1.5289933590799887e-08, "loss": 16.8438, "step": 28486 }, { "epoch": 1.8919439463372518, "grad_norm": 146.7137451171875, "learning_rate": 1.5271203597860825e-08, "loss": 14.9844, "step": 28487 }, { "epoch": 1.8920103606296075, "grad_norm": 117.21281433105469, "learning_rate": 1.5252484995696645e-08, "loss": 11.6719, "step": 28488 }, { "epoch": 1.8920767749219634, "grad_norm": 128.07298278808594, "learning_rate": 1.523377778452384e-08, "loss": 15.2656, "step": 28489 }, { "epoch": 1.8921431892143188, "grad_norm": 340.9763488769531, "learning_rate": 1.5215081964558895e-08, "loss": 19.2031, "step": 28490 }, { "epoch": 1.8922096035066747, "grad_norm": 141.50491333007812, "learning_rate": 1.519639753601809e-08, "loss": 14.7188, "step": 28491 }, { "epoch": 1.8922760177990303, "grad_norm": 560.1906127929688, "learning_rate": 1.517772449911736e-08, "loss": 15.5156, "step": 28492 }, { "epoch": 1.892342432091386, "grad_norm": 133.35557556152344, "learning_rate": 1.5159062854072867e-08, "loss": 14.8438, "step": 28493 }, { "epoch": 1.8924088463837418, "grad_norm": 338.6029357910156, "learning_rate": 1.514041260110044e-08, "loss": 16.6719, "step": 28494 }, { "epoch": 1.8924752606760975, "grad_norm": 261.52545166015625, "learning_rate": 1.51217737404159e-08, "loss": 16.7188, "step": 28495 }, { "epoch": 1.8925416749684532, "grad_norm": 226.8739776611328, "learning_rate": 1.5103146272234635e-08, "loss": 20.2031, "step": 28496 }, { "epoch": 1.892608089260809, "grad_norm": 146.6625518798828, "learning_rate": 1.508453019677236e-08, "loss": 19.0781, "step": 28497 }, { "epoch": 1.8926745035531647, "grad_norm": 222.93788146972656, "learning_rate": 1.5065925514244237e-08, "loss": 13.6719, "step": 28498 }, { "epoch": 1.8927409178455203, "grad_norm": 183.01979064941406, "learning_rate": 1.5047332224865538e-08, "loss": 10.5156, "step": 28499 }, { "epoch": 1.8928073321378762, "grad_norm": 222.38966369628906, "learning_rate": 1.5028750328851424e-08, "loss": 14.8594, "step": 28500 }, { "epoch": 1.8928737464302317, "grad_norm": 477.7447204589844, "learning_rate": 1.5010179826416613e-08, "loss": 17.875, "step": 28501 }, { "epoch": 1.8929401607225875, "grad_norm": 131.92857360839844, "learning_rate": 1.4991620717776266e-08, "loss": 17.1094, "step": 28502 }, { "epoch": 1.8930065750149432, "grad_norm": 255.5330810546875, "learning_rate": 1.4973073003144767e-08, "loss": 14.0312, "step": 28503 }, { "epoch": 1.8930729893072988, "grad_norm": 204.06097412109375, "learning_rate": 1.4954536682736718e-08, "loss": 13.3594, "step": 28504 }, { "epoch": 1.8931394035996547, "grad_norm": 194.7232208251953, "learning_rate": 1.493601175676662e-08, "loss": 16.3125, "step": 28505 }, { "epoch": 1.8932058178920104, "grad_norm": 182.36036682128906, "learning_rate": 1.4917498225448632e-08, "loss": 16.5156, "step": 28506 }, { "epoch": 1.893272232184366, "grad_norm": 1206.541748046875, "learning_rate": 1.489899608899714e-08, "loss": 17.3125, "step": 28507 }, { "epoch": 1.893338646476722, "grad_norm": 209.59425354003906, "learning_rate": 1.4880505347625971e-08, "loss": 20.0625, "step": 28508 }, { "epoch": 1.8934050607690776, "grad_norm": 285.8411560058594, "learning_rate": 1.4862026001548955e-08, "loss": 18.7031, "step": 28509 }, { "epoch": 1.8934714750614332, "grad_norm": 186.04762268066406, "learning_rate": 1.4843558050980142e-08, "loss": 16.0781, "step": 28510 }, { "epoch": 1.893537889353789, "grad_norm": 183.19692993164062, "learning_rate": 1.4825101496132808e-08, "loss": 13.9141, "step": 28511 }, { "epoch": 1.8936043036461445, "grad_norm": 140.29281616210938, "learning_rate": 1.4806656337220669e-08, "loss": 13.8125, "step": 28512 }, { "epoch": 1.8936707179385004, "grad_norm": 232.94833374023438, "learning_rate": 1.478822257445711e-08, "loss": 15.125, "step": 28513 }, { "epoch": 1.893737132230856, "grad_norm": 210.6964569091797, "learning_rate": 1.4769800208055293e-08, "loss": 17.3906, "step": 28514 }, { "epoch": 1.8938035465232117, "grad_norm": 116.9620132446289, "learning_rate": 1.4751389238228163e-08, "loss": 13.4844, "step": 28515 }, { "epoch": 1.8938699608155676, "grad_norm": 226.26016235351562, "learning_rate": 1.4732989665188988e-08, "loss": 14.3125, "step": 28516 }, { "epoch": 1.8939363751079232, "grad_norm": 146.28582763671875, "learning_rate": 1.471460148915038e-08, "loss": 19.0469, "step": 28517 }, { "epoch": 1.8940027894002789, "grad_norm": 128.0049591064453, "learning_rate": 1.4696224710325056e-08, "loss": 16.2344, "step": 28518 }, { "epoch": 1.8940692036926348, "grad_norm": 452.85919189453125, "learning_rate": 1.4677859328925734e-08, "loss": 20.4219, "step": 28519 }, { "epoch": 1.8941356179849904, "grad_norm": 217.56741333007812, "learning_rate": 1.4659505345164691e-08, "loss": 14.6719, "step": 28520 }, { "epoch": 1.894202032277346, "grad_norm": 311.8575134277344, "learning_rate": 1.4641162759254422e-08, "loss": 15.7656, "step": 28521 }, { "epoch": 1.894268446569702, "grad_norm": 209.82574462890625, "learning_rate": 1.4622831571406979e-08, "loss": 10.4062, "step": 28522 }, { "epoch": 1.8943348608620574, "grad_norm": 240.59368896484375, "learning_rate": 1.4604511781834195e-08, "loss": 16.3594, "step": 28523 }, { "epoch": 1.8944012751544133, "grad_norm": 210.15093994140625, "learning_rate": 1.4586203390748452e-08, "loss": 20.4688, "step": 28524 }, { "epoch": 1.894467689446769, "grad_norm": 167.34022521972656, "learning_rate": 1.4567906398361251e-08, "loss": 18.7031, "step": 28525 }, { "epoch": 1.8945341037391246, "grad_norm": 254.40016174316406, "learning_rate": 1.45496208048842e-08, "loss": 16.6719, "step": 28526 }, { "epoch": 1.8946005180314804, "grad_norm": 113.19888305664062, "learning_rate": 1.4531346610528905e-08, "loss": 12.4453, "step": 28527 }, { "epoch": 1.894666932323836, "grad_norm": 211.89242553710938, "learning_rate": 1.4513083815506755e-08, "loss": 16.6719, "step": 28528 }, { "epoch": 1.8947333466161917, "grad_norm": 147.89056396484375, "learning_rate": 1.4494832420029023e-08, "loss": 13.3906, "step": 28529 }, { "epoch": 1.8947997609085476, "grad_norm": 253.37974548339844, "learning_rate": 1.4476592424306876e-08, "loss": 14.2031, "step": 28530 }, { "epoch": 1.8948661752009033, "grad_norm": 151.42733764648438, "learning_rate": 1.4458363828551035e-08, "loss": 13.9219, "step": 28531 }, { "epoch": 1.894932589493259, "grad_norm": 281.6352844238281, "learning_rate": 1.4440146632972661e-08, "loss": 14.125, "step": 28532 }, { "epoch": 1.8949990037856148, "grad_norm": 279.4637451171875, "learning_rate": 1.4421940837782366e-08, "loss": 20.1719, "step": 28533 }, { "epoch": 1.8950654180779702, "grad_norm": 888.9945068359375, "learning_rate": 1.4403746443190534e-08, "loss": 11.6094, "step": 28534 }, { "epoch": 1.8951318323703261, "grad_norm": 99.2033920288086, "learning_rate": 1.438556344940811e-08, "loss": 15.5469, "step": 28535 }, { "epoch": 1.8951982466626818, "grad_norm": 304.557861328125, "learning_rate": 1.4367391856645039e-08, "loss": 17.5469, "step": 28536 }, { "epoch": 1.8952646609550374, "grad_norm": 577.1751708984375, "learning_rate": 1.4349231665111594e-08, "loss": 13.7969, "step": 28537 }, { "epoch": 1.8953310752473933, "grad_norm": 352.114013671875, "learning_rate": 1.4331082875017942e-08, "loss": 13.5312, "step": 28538 }, { "epoch": 1.895397489539749, "grad_norm": 151.3284912109375, "learning_rate": 1.4312945486574025e-08, "loss": 13.3125, "step": 28539 }, { "epoch": 1.8954639038321046, "grad_norm": 181.18714904785156, "learning_rate": 1.4294819499989341e-08, "loss": 15.9844, "step": 28540 }, { "epoch": 1.8955303181244605, "grad_norm": 162.40231323242188, "learning_rate": 1.4276704915474057e-08, "loss": 14.5, "step": 28541 }, { "epoch": 1.8955967324168161, "grad_norm": 383.9144287109375, "learning_rate": 1.425860173323734e-08, "loss": 19.7422, "step": 28542 }, { "epoch": 1.8956631467091718, "grad_norm": 1177.2613525390625, "learning_rate": 1.4240509953488689e-08, "loss": 20.5, "step": 28543 }, { "epoch": 1.8957295610015277, "grad_norm": 273.0982971191406, "learning_rate": 1.4222429576437378e-08, "loss": 18.7656, "step": 28544 }, { "epoch": 1.895795975293883, "grad_norm": 139.9864501953125, "learning_rate": 1.4204360602292576e-08, "loss": 11.8906, "step": 28545 }, { "epoch": 1.895862389586239, "grad_norm": 190.16244506835938, "learning_rate": 1.4186303031263335e-08, "loss": 16.5938, "step": 28546 }, { "epoch": 1.8959288038785946, "grad_norm": 344.6815185546875, "learning_rate": 1.4168256863558493e-08, "loss": 15.1406, "step": 28547 }, { "epoch": 1.8959952181709503, "grad_norm": 345.99041748046875, "learning_rate": 1.4150222099386654e-08, "loss": 19.8438, "step": 28548 }, { "epoch": 1.8960616324633062, "grad_norm": 474.9165344238281, "learning_rate": 1.4132198738956768e-08, "loss": 20.2344, "step": 28549 }, { "epoch": 1.8961280467556618, "grad_norm": 157.99388122558594, "learning_rate": 1.4114186782477e-08, "loss": 16.0781, "step": 28550 }, { "epoch": 1.8961944610480175, "grad_norm": 332.8052978515625, "learning_rate": 1.409618623015585e-08, "loss": 21.7344, "step": 28551 }, { "epoch": 1.8962608753403734, "grad_norm": 288.7763366699219, "learning_rate": 1.4078197082201593e-08, "loss": 12.0938, "step": 28552 }, { "epoch": 1.896327289632729, "grad_norm": 201.85755920410156, "learning_rate": 1.4060219338822066e-08, "loss": 14.0469, "step": 28553 }, { "epoch": 1.8963937039250847, "grad_norm": 327.6708679199219, "learning_rate": 1.4042253000225546e-08, "loss": 15.8906, "step": 28554 }, { "epoch": 1.8964601182174405, "grad_norm": 211.7831268310547, "learning_rate": 1.4024298066619754e-08, "loss": 16.4141, "step": 28555 }, { "epoch": 1.896526532509796, "grad_norm": 512.3478393554688, "learning_rate": 1.4006354538212194e-08, "loss": 14.3594, "step": 28556 }, { "epoch": 1.8965929468021518, "grad_norm": 176.9117889404297, "learning_rate": 1.3988422415210587e-08, "loss": 18.0469, "step": 28557 }, { "epoch": 1.8966593610945075, "grad_norm": 298.46343994140625, "learning_rate": 1.3970501697822434e-08, "loss": 19.1094, "step": 28558 }, { "epoch": 1.8967257753868632, "grad_norm": 340.0863952636719, "learning_rate": 1.3952592386254791e-08, "loss": 17.0156, "step": 28559 }, { "epoch": 1.896792189679219, "grad_norm": 145.77084350585938, "learning_rate": 1.3934694480715047e-08, "loss": 14.1562, "step": 28560 }, { "epoch": 1.8968586039715747, "grad_norm": 1723.887939453125, "learning_rate": 1.3916807981410262e-08, "loss": 17.1719, "step": 28561 }, { "epoch": 1.8969250182639303, "grad_norm": 2090.325439453125, "learning_rate": 1.3898932888547044e-08, "loss": 16.4219, "step": 28562 }, { "epoch": 1.8969914325562862, "grad_norm": 147.34385681152344, "learning_rate": 1.3881069202332563e-08, "loss": 17.3438, "step": 28563 }, { "epoch": 1.8970578468486419, "grad_norm": 158.22634887695312, "learning_rate": 1.3863216922973098e-08, "loss": 14.1562, "step": 28564 }, { "epoch": 1.8971242611409975, "grad_norm": 323.8310241699219, "learning_rate": 1.3845376050675372e-08, "loss": 15.2656, "step": 28565 }, { "epoch": 1.8971906754333534, "grad_norm": 130.98638916015625, "learning_rate": 1.3827546585645555e-08, "loss": 12.0156, "step": 28566 }, { "epoch": 1.8972570897257088, "grad_norm": 144.7712860107422, "learning_rate": 1.3809728528090147e-08, "loss": 14.3906, "step": 28567 }, { "epoch": 1.8973235040180647, "grad_norm": 126.9769287109375, "learning_rate": 1.3791921878215095e-08, "loss": 12.4062, "step": 28568 }, { "epoch": 1.8973899183104204, "grad_norm": 257.45166015625, "learning_rate": 1.3774126636226346e-08, "loss": 16.8281, "step": 28569 }, { "epoch": 1.897456332602776, "grad_norm": 151.3298797607422, "learning_rate": 1.3756342802329957e-08, "loss": 15.6562, "step": 28570 }, { "epoch": 1.897522746895132, "grad_norm": 579.36328125, "learning_rate": 1.3738570376731317e-08, "loss": 17.3984, "step": 28571 }, { "epoch": 1.8975891611874875, "grad_norm": 190.6582794189453, "learning_rate": 1.3720809359636377e-08, "loss": 11.7031, "step": 28572 }, { "epoch": 1.8976555754798432, "grad_norm": 399.7127380371094, "learning_rate": 1.370305975125019e-08, "loss": 22.5781, "step": 28573 }, { "epoch": 1.897721989772199, "grad_norm": 153.795654296875, "learning_rate": 1.3685321551778484e-08, "loss": 13.5156, "step": 28574 }, { "epoch": 1.8977884040645547, "grad_norm": 152.99769592285156, "learning_rate": 1.3667594761426093e-08, "loss": 16.4219, "step": 28575 }, { "epoch": 1.8978548183569104, "grad_norm": 294.4422302246094, "learning_rate": 1.3649879380398299e-08, "loss": 21.75, "step": 28576 }, { "epoch": 1.8979212326492663, "grad_norm": 580.352783203125, "learning_rate": 1.3632175408899827e-08, "loss": 15.9375, "step": 28577 }, { "epoch": 1.8979876469416217, "grad_norm": 124.51459503173828, "learning_rate": 1.3614482847135733e-08, "loss": 14.875, "step": 28578 }, { "epoch": 1.8980540612339776, "grad_norm": 204.3888702392578, "learning_rate": 1.3596801695310411e-08, "loss": 20.75, "step": 28579 }, { "epoch": 1.8981204755263332, "grad_norm": 148.88502502441406, "learning_rate": 1.3579131953628476e-08, "loss": 18.1406, "step": 28580 }, { "epoch": 1.8981868898186889, "grad_norm": 272.09490966796875, "learning_rate": 1.3561473622294429e-08, "loss": 14.9375, "step": 28581 }, { "epoch": 1.8982533041110448, "grad_norm": 321.53570556640625, "learning_rate": 1.3543826701512329e-08, "loss": 14.4844, "step": 28582 }, { "epoch": 1.8983197184034004, "grad_norm": 264.0735168457031, "learning_rate": 1.3526191191486458e-08, "loss": 19.2656, "step": 28583 }, { "epoch": 1.898386132695756, "grad_norm": 227.3258056640625, "learning_rate": 1.3508567092420875e-08, "loss": 14.6719, "step": 28584 }, { "epoch": 1.898452546988112, "grad_norm": 418.0722351074219, "learning_rate": 1.3490954404519194e-08, "loss": 18.8125, "step": 28585 }, { "epoch": 1.8985189612804676, "grad_norm": 314.79833984375, "learning_rate": 1.3473353127985477e-08, "loss": 17.8906, "step": 28586 }, { "epoch": 1.8985853755728233, "grad_norm": 122.05550384521484, "learning_rate": 1.3455763263023e-08, "loss": 17.4375, "step": 28587 }, { "epoch": 1.8986517898651791, "grad_norm": 184.4736328125, "learning_rate": 1.3438184809835385e-08, "loss": 18.9844, "step": 28588 }, { "epoch": 1.8987182041575346, "grad_norm": 352.2445373535156, "learning_rate": 1.342061776862602e-08, "loss": 11.5312, "step": 28589 }, { "epoch": 1.8987846184498904, "grad_norm": 274.84246826171875, "learning_rate": 1.3403062139598077e-08, "loss": 19.125, "step": 28590 }, { "epoch": 1.898851032742246, "grad_norm": 596.9600830078125, "learning_rate": 1.3385517922954504e-08, "loss": 18.4219, "step": 28591 }, { "epoch": 1.8989174470346017, "grad_norm": 162.29881286621094, "learning_rate": 1.3367985118898362e-08, "loss": 14.6875, "step": 28592 }, { "epoch": 1.8989838613269576, "grad_norm": 110.2358169555664, "learning_rate": 1.3350463727632489e-08, "loss": 14.0625, "step": 28593 }, { "epoch": 1.8990502756193133, "grad_norm": 111.6072006225586, "learning_rate": 1.333295374935961e-08, "loss": 9.2656, "step": 28594 }, { "epoch": 1.899116689911669, "grad_norm": 132.4402618408203, "learning_rate": 1.331545518428201e-08, "loss": 12.7656, "step": 28595 }, { "epoch": 1.8991831042040248, "grad_norm": 266.23675537109375, "learning_rate": 1.3297968032602413e-08, "loss": 17.8125, "step": 28596 }, { "epoch": 1.8992495184963805, "grad_norm": 270.0845947265625, "learning_rate": 1.3280492294522883e-08, "loss": 16.9375, "step": 28597 }, { "epoch": 1.8993159327887361, "grad_norm": 498.4801940917969, "learning_rate": 1.3263027970245589e-08, "loss": 11.3906, "step": 28598 }, { "epoch": 1.899382347081092, "grad_norm": 168.02243041992188, "learning_rate": 1.3245575059972702e-08, "loss": 20.3438, "step": 28599 }, { "epoch": 1.8994487613734474, "grad_norm": 180.09133911132812, "learning_rate": 1.3228133563905951e-08, "loss": 15.4766, "step": 28600 }, { "epoch": 1.8995151756658033, "grad_norm": 226.85614013671875, "learning_rate": 1.3210703482247066e-08, "loss": 12.9062, "step": 28601 }, { "epoch": 1.899581589958159, "grad_norm": 218.42405700683594, "learning_rate": 1.3193284815197992e-08, "loss": 15.4531, "step": 28602 }, { "epoch": 1.8996480042505146, "grad_norm": 223.4818878173828, "learning_rate": 1.3175877562959793e-08, "loss": 18.1719, "step": 28603 }, { "epoch": 1.8997144185428705, "grad_norm": 116.48368835449219, "learning_rate": 1.3158481725733972e-08, "loss": 11.8438, "step": 28604 }, { "epoch": 1.8997808328352261, "grad_norm": 187.60585021972656, "learning_rate": 1.3141097303721927e-08, "loss": 11.6953, "step": 28605 }, { "epoch": 1.8998472471275818, "grad_norm": 432.9128723144531, "learning_rate": 1.3123724297124494e-08, "loss": 18.5781, "step": 28606 }, { "epoch": 1.8999136614199377, "grad_norm": 158.4957275390625, "learning_rate": 1.3106362706142737e-08, "loss": 13.4219, "step": 28607 }, { "epoch": 1.8999800757122933, "grad_norm": 218.08900451660156, "learning_rate": 1.3089012530977606e-08, "loss": 16.1719, "step": 28608 }, { "epoch": 1.900046490004649, "grad_norm": 319.2317810058594, "learning_rate": 1.3071673771829606e-08, "loss": 14.6562, "step": 28609 }, { "epoch": 1.9001129042970049, "grad_norm": 204.05624389648438, "learning_rate": 1.3054346428899466e-08, "loss": 24.3906, "step": 28610 }, { "epoch": 1.9001793185893603, "grad_norm": 1664.3270263671875, "learning_rate": 1.303703050238758e-08, "loss": 18.2031, "step": 28611 }, { "epoch": 1.9002457328817162, "grad_norm": 213.99143981933594, "learning_rate": 1.3019725992494013e-08, "loss": 14.625, "step": 28612 }, { "epoch": 1.9003121471740718, "grad_norm": 249.47201538085938, "learning_rate": 1.3002432899419268e-08, "loss": 19.25, "step": 28613 }, { "epoch": 1.9003785614664275, "grad_norm": 172.7863311767578, "learning_rate": 1.29851512233633e-08, "loss": 17.4531, "step": 28614 }, { "epoch": 1.9004449757587834, "grad_norm": 147.7305450439453, "learning_rate": 1.2967880964525834e-08, "loss": 15.25, "step": 28615 }, { "epoch": 1.900511390051139, "grad_norm": 96.8583755493164, "learning_rate": 1.2950622123106825e-08, "loss": 12.6406, "step": 28616 }, { "epoch": 1.9005778043434947, "grad_norm": 221.3162078857422, "learning_rate": 1.2933374699305776e-08, "loss": 16.0469, "step": 28617 }, { "epoch": 1.9006442186358505, "grad_norm": 186.48275756835938, "learning_rate": 1.291613869332242e-08, "loss": 14.1406, "step": 28618 }, { "epoch": 1.9007106329282062, "grad_norm": 207.72874450683594, "learning_rate": 1.289891410535593e-08, "loss": 15.2969, "step": 28619 }, { "epoch": 1.9007770472205618, "grad_norm": 163.43255615234375, "learning_rate": 1.2881700935605589e-08, "loss": 13.7812, "step": 28620 }, { "epoch": 1.9008434615129177, "grad_norm": 382.94537353515625, "learning_rate": 1.2864499184270461e-08, "loss": 14.2656, "step": 28621 }, { "epoch": 1.9009098758052732, "grad_norm": 571.3294677734375, "learning_rate": 1.284730885154972e-08, "loss": 30.8125, "step": 28622 }, { "epoch": 1.900976290097629, "grad_norm": 138.62118530273438, "learning_rate": 1.2830129937641987e-08, "loss": 18.5, "step": 28623 }, { "epoch": 1.9010427043899847, "grad_norm": 189.2244110107422, "learning_rate": 1.28129624427461e-08, "loss": 15.0781, "step": 28624 }, { "epoch": 1.9011091186823403, "grad_norm": 128.733154296875, "learning_rate": 1.2795806367060679e-08, "loss": 14.2812, "step": 28625 }, { "epoch": 1.9011755329746962, "grad_norm": 272.06201171875, "learning_rate": 1.2778661710784011e-08, "loss": 17.0781, "step": 28626 }, { "epoch": 1.9012419472670519, "grad_norm": 90.63809967041016, "learning_rate": 1.2761528474114603e-08, "loss": 12.4688, "step": 28627 }, { "epoch": 1.9013083615594075, "grad_norm": 158.94223022460938, "learning_rate": 1.2744406657250517e-08, "loss": 17.3125, "step": 28628 }, { "epoch": 1.9013747758517634, "grad_norm": 170.56597900390625, "learning_rate": 1.272729626038982e-08, "loss": 13.3125, "step": 28629 }, { "epoch": 1.901441190144119, "grad_norm": 168.75938415527344, "learning_rate": 1.2710197283730573e-08, "loss": 18.4219, "step": 28630 }, { "epoch": 1.9015076044364747, "grad_norm": 186.81591796875, "learning_rate": 1.2693109727470397e-08, "loss": 17.1094, "step": 28631 }, { "epoch": 1.9015740187288306, "grad_norm": 181.36866760253906, "learning_rate": 1.2676033591806911e-08, "loss": 18.9531, "step": 28632 }, { "epoch": 1.901640433021186, "grad_norm": 126.26083374023438, "learning_rate": 1.2658968876937959e-08, "loss": 15.8125, "step": 28633 }, { "epoch": 1.901706847313542, "grad_norm": 322.91558837890625, "learning_rate": 1.2641915583060492e-08, "loss": 19.4375, "step": 28634 }, { "epoch": 1.9017732616058975, "grad_norm": 145.204345703125, "learning_rate": 1.2624873710372241e-08, "loss": 14.9844, "step": 28635 }, { "epoch": 1.9018396758982532, "grad_norm": 162.47019958496094, "learning_rate": 1.260784325906994e-08, "loss": 18.2969, "step": 28636 }, { "epoch": 1.901906090190609, "grad_norm": 235.69073486328125, "learning_rate": 1.259082422935076e-08, "loss": 13.3281, "step": 28637 }, { "epoch": 1.9019725044829647, "grad_norm": 345.91265869140625, "learning_rate": 1.2573816621411549e-08, "loss": 13.3906, "step": 28638 }, { "epoch": 1.9020389187753204, "grad_norm": 102.67013549804688, "learning_rate": 1.2556820435449145e-08, "loss": 12.5859, "step": 28639 }, { "epoch": 1.9021053330676763, "grad_norm": 158.28546142578125, "learning_rate": 1.253983567165995e-08, "loss": 14.2812, "step": 28640 }, { "epoch": 1.902171747360032, "grad_norm": 186.01971435546875, "learning_rate": 1.252286233024058e-08, "loss": 18.6719, "step": 28641 }, { "epoch": 1.9022381616523876, "grad_norm": 107.18952941894531, "learning_rate": 1.2505900411387327e-08, "loss": 17.6875, "step": 28642 }, { "epoch": 1.9023045759447434, "grad_norm": 762.4367065429688, "learning_rate": 1.2488949915296254e-08, "loss": 14.5, "step": 28643 }, { "epoch": 1.9023709902370989, "grad_norm": 314.38458251953125, "learning_rate": 1.247201084216376e-08, "loss": 16.9062, "step": 28644 }, { "epoch": 1.9024374045294548, "grad_norm": 124.670654296875, "learning_rate": 1.2455083192185467e-08, "loss": 13.875, "step": 28645 }, { "epoch": 1.9025038188218104, "grad_norm": 160.47080993652344, "learning_rate": 1.2438166965557439e-08, "loss": 16.9531, "step": 28646 }, { "epoch": 1.902570233114166, "grad_norm": 185.13775634765625, "learning_rate": 1.2421262162475188e-08, "loss": 16.3906, "step": 28647 }, { "epoch": 1.902636647406522, "grad_norm": 258.87701416015625, "learning_rate": 1.2404368783134333e-08, "loss": 15.4062, "step": 28648 }, { "epoch": 1.9027030616988776, "grad_norm": 550.368896484375, "learning_rate": 1.2387486827730165e-08, "loss": 23.1094, "step": 28649 }, { "epoch": 1.9027694759912333, "grad_norm": 441.20916748046875, "learning_rate": 1.2370616296458192e-08, "loss": 12.2812, "step": 28650 }, { "epoch": 1.9028358902835891, "grad_norm": 337.1912841796875, "learning_rate": 1.2353757189513259e-08, "loss": 23.3281, "step": 28651 }, { "epoch": 1.9029023045759448, "grad_norm": 194.27999877929688, "learning_rate": 1.2336909507090765e-08, "loss": 17.5312, "step": 28652 }, { "epoch": 1.9029687188683004, "grad_norm": 179.41168212890625, "learning_rate": 1.2320073249385221e-08, "loss": 15.7969, "step": 28653 }, { "epoch": 1.9030351331606563, "grad_norm": 175.61062622070312, "learning_rate": 1.2303248416591583e-08, "loss": 18.3125, "step": 28654 }, { "epoch": 1.9031015474530117, "grad_norm": 252.76841735839844, "learning_rate": 1.2286435008904472e-08, "loss": 10.0625, "step": 28655 }, { "epoch": 1.9031679617453676, "grad_norm": 139.23703002929688, "learning_rate": 1.226963302651829e-08, "loss": 20.2578, "step": 28656 }, { "epoch": 1.9032343760377233, "grad_norm": 458.8199157714844, "learning_rate": 1.2252842469627433e-08, "loss": 13.3438, "step": 28657 }, { "epoch": 1.903300790330079, "grad_norm": 127.67459106445312, "learning_rate": 1.2236063338426195e-08, "loss": 13.8594, "step": 28658 }, { "epoch": 1.9033672046224348, "grad_norm": 190.74838256835938, "learning_rate": 1.2219295633108528e-08, "loss": 10.125, "step": 28659 }, { "epoch": 1.9034336189147905, "grad_norm": 299.4691467285156, "learning_rate": 1.2202539353868502e-08, "loss": 14.8438, "step": 28660 }, { "epoch": 1.9035000332071461, "grad_norm": 122.66451263427734, "learning_rate": 1.2185794500899848e-08, "loss": 17.6562, "step": 28661 }, { "epoch": 1.903566447499502, "grad_norm": 185.5406951904297, "learning_rate": 1.2169061074396413e-08, "loss": 14.0, "step": 28662 }, { "epoch": 1.9036328617918576, "grad_norm": 232.59019470214844, "learning_rate": 1.2152339074551599e-08, "loss": 15.0469, "step": 28663 }, { "epoch": 1.9036992760842133, "grad_norm": 106.40248107910156, "learning_rate": 1.2135628501558914e-08, "loss": 20.1875, "step": 28664 }, { "epoch": 1.9037656903765692, "grad_norm": 140.3807373046875, "learning_rate": 1.211892935561154e-08, "loss": 15.3906, "step": 28665 }, { "epoch": 1.9038321046689246, "grad_norm": 253.74996948242188, "learning_rate": 1.2102241636902877e-08, "loss": 14.125, "step": 28666 }, { "epoch": 1.9038985189612805, "grad_norm": 107.00032043457031, "learning_rate": 1.208556534562577e-08, "loss": 11.0469, "step": 28667 }, { "epoch": 1.9039649332536361, "grad_norm": 283.7550048828125, "learning_rate": 1.2068900481973288e-08, "loss": 15.6406, "step": 28668 }, { "epoch": 1.9040313475459918, "grad_norm": 277.25439453125, "learning_rate": 1.2052247046137943e-08, "loss": 17.7344, "step": 28669 }, { "epoch": 1.9040977618383477, "grad_norm": 108.02774047851562, "learning_rate": 1.203560503831258e-08, "loss": 17.7656, "step": 28670 }, { "epoch": 1.9041641761307033, "grad_norm": 162.46737670898438, "learning_rate": 1.2018974458689601e-08, "loss": 16.5938, "step": 28671 }, { "epoch": 1.904230590423059, "grad_norm": 158.20509338378906, "learning_rate": 1.200235530746152e-08, "loss": 15.2344, "step": 28672 }, { "epoch": 1.9042970047154149, "grad_norm": 175.0531463623047, "learning_rate": 1.1985747584820405e-08, "loss": 15.0469, "step": 28673 }, { "epoch": 1.9043634190077705, "grad_norm": 156.5927734375, "learning_rate": 1.1969151290958545e-08, "loss": 15.7969, "step": 28674 }, { "epoch": 1.9044298333001262, "grad_norm": 138.49484252929688, "learning_rate": 1.1952566426067679e-08, "loss": 15.6562, "step": 28675 }, { "epoch": 1.904496247592482, "grad_norm": 263.2017822265625, "learning_rate": 1.1935992990339872e-08, "loss": 17.4688, "step": 28676 }, { "epoch": 1.9045626618848375, "grad_norm": 184.90086364746094, "learning_rate": 1.1919430983966638e-08, "loss": 10.3438, "step": 28677 }, { "epoch": 1.9046290761771933, "grad_norm": 232.95791625976562, "learning_rate": 1.1902880407139714e-08, "loss": 10.3125, "step": 28678 }, { "epoch": 1.904695490469549, "grad_norm": 128.56991577148438, "learning_rate": 1.1886341260050503e-08, "loss": 16.3906, "step": 28679 }, { "epoch": 1.9047619047619047, "grad_norm": 184.69451904296875, "learning_rate": 1.1869813542890405e-08, "loss": 19.0312, "step": 28680 }, { "epoch": 1.9048283190542605, "grad_norm": 363.0787658691406, "learning_rate": 1.1853297255850491e-08, "loss": 21.6406, "step": 28681 }, { "epoch": 1.9048947333466162, "grad_norm": 808.0192260742188, "learning_rate": 1.183679239912172e-08, "loss": 21.3281, "step": 28682 }, { "epoch": 1.9049611476389718, "grad_norm": 358.3266296386719, "learning_rate": 1.182029897289527e-08, "loss": 14.5078, "step": 28683 }, { "epoch": 1.9050275619313277, "grad_norm": 86.65643310546875, "learning_rate": 1.1803816977361659e-08, "loss": 12.4688, "step": 28684 }, { "epoch": 1.9050939762236834, "grad_norm": 168.4200439453125, "learning_rate": 1.1787346412711729e-08, "loss": 19.9688, "step": 28685 }, { "epoch": 1.905160390516039, "grad_norm": 309.6970520019531, "learning_rate": 1.1770887279135888e-08, "loss": 14.5781, "step": 28686 }, { "epoch": 1.905226804808395, "grad_norm": 206.28846740722656, "learning_rate": 1.175443957682465e-08, "loss": 16.1406, "step": 28687 }, { "epoch": 1.9052932191007503, "grad_norm": 302.72589111328125, "learning_rate": 1.1738003305968191e-08, "loss": 17.6406, "step": 28688 }, { "epoch": 1.9053596333931062, "grad_norm": 231.560791015625, "learning_rate": 1.1721578466756698e-08, "loss": 17.0, "step": 28689 }, { "epoch": 1.9054260476854619, "grad_norm": 194.38710021972656, "learning_rate": 1.1705165059379907e-08, "loss": 19.7344, "step": 28690 }, { "epoch": 1.9054924619778175, "grad_norm": 246.6640167236328, "learning_rate": 1.1688763084028109e-08, "loss": 21.8906, "step": 28691 }, { "epoch": 1.9055588762701734, "grad_norm": 318.6588134765625, "learning_rate": 1.1672372540890707e-08, "loss": 15.4844, "step": 28692 }, { "epoch": 1.905625290562529, "grad_norm": 181.72512817382812, "learning_rate": 1.165599343015744e-08, "loss": 12.9688, "step": 28693 }, { "epoch": 1.9056917048548847, "grad_norm": 174.21560668945312, "learning_rate": 1.16396257520176e-08, "loss": 16.0312, "step": 28694 }, { "epoch": 1.9057581191472406, "grad_norm": 343.8806457519531, "learning_rate": 1.1623269506660815e-08, "loss": 19.5469, "step": 28695 }, { "epoch": 1.9058245334395962, "grad_norm": 159.02880859375, "learning_rate": 1.1606924694276044e-08, "loss": 20.2188, "step": 28696 }, { "epoch": 1.905890947731952, "grad_norm": 289.2335510253906, "learning_rate": 1.1590591315052467e-08, "loss": 18.9219, "step": 28697 }, { "epoch": 1.9059573620243078, "grad_norm": 573.5740966796875, "learning_rate": 1.1574269369178823e-08, "loss": 23.0625, "step": 28698 }, { "epoch": 1.9060237763166632, "grad_norm": 141.44761657714844, "learning_rate": 1.1557958856844297e-08, "loss": 19.0, "step": 28699 }, { "epoch": 1.906090190609019, "grad_norm": 249.82421875, "learning_rate": 1.1541659778237178e-08, "loss": 20.2031, "step": 28700 }, { "epoch": 1.9061566049013747, "grad_norm": 302.08795166015625, "learning_rate": 1.1525372133546207e-08, "loss": 22.1406, "step": 28701 }, { "epoch": 1.9062230191937304, "grad_norm": 250.33612060546875, "learning_rate": 1.1509095922959789e-08, "loss": 16.0625, "step": 28702 }, { "epoch": 1.9062894334860863, "grad_norm": 160.29910278320312, "learning_rate": 1.1492831146666104e-08, "loss": 27.0938, "step": 28703 }, { "epoch": 1.906355847778442, "grad_norm": 137.32789611816406, "learning_rate": 1.1476577804853339e-08, "loss": 12.4688, "step": 28704 }, { "epoch": 1.9064222620707976, "grad_norm": 400.23248291015625, "learning_rate": 1.1460335897709561e-08, "loss": 20.8438, "step": 28705 }, { "epoch": 1.9064886763631534, "grad_norm": 118.81854248046875, "learning_rate": 1.1444105425422513e-08, "loss": 18.0156, "step": 28706 }, { "epoch": 1.906555090655509, "grad_norm": 113.93714904785156, "learning_rate": 1.142788638818004e-08, "loss": 14.0156, "step": 28707 }, { "epoch": 1.9066215049478648, "grad_norm": 139.40011596679688, "learning_rate": 1.1411678786169777e-08, "loss": 12.5156, "step": 28708 }, { "epoch": 1.9066879192402206, "grad_norm": 835.8688354492188, "learning_rate": 1.1395482619579122e-08, "loss": 13.3438, "step": 28709 }, { "epoch": 1.906754333532576, "grad_norm": 232.66831970214844, "learning_rate": 1.1379297888595485e-08, "loss": 13.8906, "step": 28710 }, { "epoch": 1.906820747824932, "grad_norm": 385.7242736816406, "learning_rate": 1.1363124593406048e-08, "loss": 18.1406, "step": 28711 }, { "epoch": 1.9068871621172876, "grad_norm": 237.55760192871094, "learning_rate": 1.1346962734197885e-08, "loss": 14.1094, "step": 28712 }, { "epoch": 1.9069535764096432, "grad_norm": 278.8322448730469, "learning_rate": 1.1330812311157955e-08, "loss": 25.3438, "step": 28713 }, { "epoch": 1.9070199907019991, "grad_norm": 113.89965057373047, "learning_rate": 1.1314673324473112e-08, "loss": 18.75, "step": 28714 }, { "epoch": 1.9070864049943548, "grad_norm": 131.71774291992188, "learning_rate": 1.1298545774330092e-08, "loss": 17.1875, "step": 28715 }, { "epoch": 1.9071528192867104, "grad_norm": 384.9224548339844, "learning_rate": 1.1282429660915305e-08, "loss": 20.0625, "step": 28716 }, { "epoch": 1.9072192335790663, "grad_norm": 114.7093505859375, "learning_rate": 1.1266324984415265e-08, "loss": 14.2344, "step": 28717 }, { "epoch": 1.907285647871422, "grad_norm": 333.1054992675781, "learning_rate": 1.1250231745016159e-08, "loss": 12.7969, "step": 28718 }, { "epoch": 1.9073520621637776, "grad_norm": 169.80795288085938, "learning_rate": 1.1234149942904281e-08, "loss": 10.9062, "step": 28719 }, { "epoch": 1.9074184764561335, "grad_norm": 485.3925476074219, "learning_rate": 1.1218079578265593e-08, "loss": 25.5156, "step": 28720 }, { "epoch": 1.907484890748489, "grad_norm": 350.1080627441406, "learning_rate": 1.120202065128606e-08, "loss": 25.9375, "step": 28721 }, { "epoch": 1.9075513050408448, "grad_norm": 227.5816650390625, "learning_rate": 1.118597316215142e-08, "loss": 18.0469, "step": 28722 }, { "epoch": 1.9076177193332005, "grad_norm": 181.03353881835938, "learning_rate": 1.1169937111047078e-08, "loss": 19.6875, "step": 28723 }, { "epoch": 1.907684133625556, "grad_norm": 250.35963439941406, "learning_rate": 1.115391249815889e-08, "loss": 13.0625, "step": 28724 }, { "epoch": 1.907750547917912, "grad_norm": 174.66197204589844, "learning_rate": 1.1137899323671928e-08, "loss": 19.5156, "step": 28725 }, { "epoch": 1.9078169622102676, "grad_norm": 205.63796997070312, "learning_rate": 1.1121897587771601e-08, "loss": 13.7656, "step": 28726 }, { "epoch": 1.9078833765026233, "grad_norm": 302.6981201171875, "learning_rate": 1.1105907290642869e-08, "loss": 21.5469, "step": 28727 }, { "epoch": 1.9079497907949792, "grad_norm": 254.6483154296875, "learning_rate": 1.1089928432470807e-08, "loss": 16.9844, "step": 28728 }, { "epoch": 1.9080162050873348, "grad_norm": 1243.912841796875, "learning_rate": 1.107396101344016e-08, "loss": 16.8281, "step": 28729 }, { "epoch": 1.9080826193796905, "grad_norm": 209.28973388671875, "learning_rate": 1.1058005033735885e-08, "loss": 17.0, "step": 28730 }, { "epoch": 1.9081490336720464, "grad_norm": 139.47666931152344, "learning_rate": 1.1042060493542171e-08, "loss": 16.3594, "step": 28731 }, { "epoch": 1.9082154479644018, "grad_norm": 203.0020751953125, "learning_rate": 1.1026127393043649e-08, "loss": 28.9844, "step": 28732 }, { "epoch": 1.9082818622567577, "grad_norm": 258.1160888671875, "learning_rate": 1.1010205732424615e-08, "loss": 20.2969, "step": 28733 }, { "epoch": 1.9083482765491133, "grad_norm": 418.43389892578125, "learning_rate": 1.0994295511869257e-08, "loss": 16.8281, "step": 28734 }, { "epoch": 1.908414690841469, "grad_norm": 342.65234375, "learning_rate": 1.0978396731561645e-08, "loss": 21.375, "step": 28735 }, { "epoch": 1.9084811051338249, "grad_norm": 125.63666534423828, "learning_rate": 1.0962509391685637e-08, "loss": 17.7656, "step": 28736 }, { "epoch": 1.9085475194261805, "grad_norm": 258.1496276855469, "learning_rate": 1.0946633492424861e-08, "loss": 16.75, "step": 28737 }, { "epoch": 1.9086139337185362, "grad_norm": 131.6513214111328, "learning_rate": 1.0930769033963283e-08, "loss": 13.3594, "step": 28738 }, { "epoch": 1.908680348010892, "grad_norm": 1196.42529296875, "learning_rate": 1.09149160164842e-08, "loss": 11.8594, "step": 28739 }, { "epoch": 1.9087467623032477, "grad_norm": 133.2801055908203, "learning_rate": 1.0899074440170908e-08, "loss": 15.3438, "step": 28740 }, { "epoch": 1.9088131765956033, "grad_norm": 185.7906951904297, "learning_rate": 1.0883244305206929e-08, "loss": 14.875, "step": 28741 }, { "epoch": 1.9088795908879592, "grad_norm": 218.92465209960938, "learning_rate": 1.0867425611775117e-08, "loss": 17.4219, "step": 28742 }, { "epoch": 1.9089460051803147, "grad_norm": 222.63821411132812, "learning_rate": 1.0851618360058546e-08, "loss": 19.125, "step": 28743 }, { "epoch": 1.9090124194726705, "grad_norm": 188.5940704345703, "learning_rate": 1.0835822550240181e-08, "loss": 21.4844, "step": 28744 }, { "epoch": 1.9090788337650262, "grad_norm": 136.1032257080078, "learning_rate": 1.0820038182502656e-08, "loss": 12.9375, "step": 28745 }, { "epoch": 1.9091452480573818, "grad_norm": 195.20095825195312, "learning_rate": 1.0804265257028378e-08, "loss": 12.5938, "step": 28746 }, { "epoch": 1.9092116623497377, "grad_norm": 243.58131408691406, "learning_rate": 1.0788503774000091e-08, "loss": 14.8672, "step": 28747 }, { "epoch": 1.9092780766420934, "grad_norm": 191.2933807373047, "learning_rate": 1.077275373359987e-08, "loss": 16.6719, "step": 28748 }, { "epoch": 1.909344490934449, "grad_norm": 327.3548889160156, "learning_rate": 1.0757015136010128e-08, "loss": 17.5312, "step": 28749 }, { "epoch": 1.909410905226805, "grad_norm": 277.5547790527344, "learning_rate": 1.0741287981412716e-08, "loss": 11.8906, "step": 28750 }, { "epoch": 1.9094773195191606, "grad_norm": 237.67730712890625, "learning_rate": 1.0725572269989602e-08, "loss": 11.8672, "step": 28751 }, { "epoch": 1.9095437338115162, "grad_norm": 413.8987121582031, "learning_rate": 1.070986800192275e-08, "loss": 13.6953, "step": 28752 }, { "epoch": 1.909610148103872, "grad_norm": 133.1212921142578, "learning_rate": 1.0694175177393683e-08, "loss": 17.2422, "step": 28753 }, { "epoch": 1.9096765623962275, "grad_norm": 322.7271728515625, "learning_rate": 1.067849379658381e-08, "loss": 16.8594, "step": 28754 }, { "epoch": 1.9097429766885834, "grad_norm": 168.90902709960938, "learning_rate": 1.0662823859674763e-08, "loss": 19.4062, "step": 28755 }, { "epoch": 1.909809390980939, "grad_norm": 660.9531860351562, "learning_rate": 1.0647165366847621e-08, "loss": 14.6094, "step": 28756 }, { "epoch": 1.9098758052732947, "grad_norm": 173.54222106933594, "learning_rate": 1.0631518318283573e-08, "loss": 16.7109, "step": 28757 }, { "epoch": 1.9099422195656506, "grad_norm": 324.9059753417969, "learning_rate": 1.0615882714163692e-08, "loss": 14.2656, "step": 28758 }, { "epoch": 1.9100086338580062, "grad_norm": 133.5319366455078, "learning_rate": 1.0600258554668728e-08, "loss": 13.9688, "step": 28759 }, { "epoch": 1.9100750481503619, "grad_norm": 191.50709533691406, "learning_rate": 1.0584645839979422e-08, "loss": 13.3594, "step": 28760 }, { "epoch": 1.9101414624427178, "grad_norm": 193.70968627929688, "learning_rate": 1.0569044570276409e-08, "loss": 17.5, "step": 28761 }, { "epoch": 1.9102078767350734, "grad_norm": 279.164794921875, "learning_rate": 1.0553454745740098e-08, "loss": 20.1562, "step": 28762 }, { "epoch": 1.910274291027429, "grad_norm": 236.94247436523438, "learning_rate": 1.0537876366551013e-08, "loss": 13.9531, "step": 28763 }, { "epoch": 1.910340705319785, "grad_norm": 142.60704040527344, "learning_rate": 1.052230943288912e-08, "loss": 18.875, "step": 28764 }, { "epoch": 1.9104071196121404, "grad_norm": 255.31884765625, "learning_rate": 1.050675394493461e-08, "loss": 18.1562, "step": 28765 }, { "epoch": 1.9104735339044963, "grad_norm": 112.21440124511719, "learning_rate": 1.0491209902867448e-08, "loss": 13.0781, "step": 28766 }, { "epoch": 1.910539948196852, "grad_norm": 254.16310119628906, "learning_rate": 1.0475677306867381e-08, "loss": 15.0781, "step": 28767 }, { "epoch": 1.9106063624892076, "grad_norm": 313.8671875, "learning_rate": 1.046015615711393e-08, "loss": 15.4375, "step": 28768 }, { "epoch": 1.9106727767815634, "grad_norm": 148.01962280273438, "learning_rate": 1.0444646453786954e-08, "loss": 13.9141, "step": 28769 }, { "epoch": 1.910739191073919, "grad_norm": 161.91644287109375, "learning_rate": 1.0429148197065641e-08, "loss": 16.4062, "step": 28770 }, { "epoch": 1.9108056053662748, "grad_norm": 619.696044921875, "learning_rate": 1.041366138712929e-08, "loss": 22.0312, "step": 28771 }, { "epoch": 1.9108720196586306, "grad_norm": 176.9677734375, "learning_rate": 1.0398186024157207e-08, "loss": 17.3906, "step": 28772 }, { "epoch": 1.9109384339509863, "grad_norm": 115.7865982055664, "learning_rate": 1.0382722108328134e-08, "loss": 16.125, "step": 28773 }, { "epoch": 1.911004848243342, "grad_norm": 234.16531372070312, "learning_rate": 1.0367269639821152e-08, "loss": 14.3438, "step": 28774 }, { "epoch": 1.9110712625356978, "grad_norm": 281.93670654296875, "learning_rate": 1.0351828618814895e-08, "loss": 17.2812, "step": 28775 }, { "epoch": 1.9111376768280532, "grad_norm": 330.8829650878906, "learning_rate": 1.0336399045487997e-08, "loss": 19.1719, "step": 28776 }, { "epoch": 1.9112040911204091, "grad_norm": 158.75149536132812, "learning_rate": 1.0320980920019095e-08, "loss": 12.5391, "step": 28777 }, { "epoch": 1.9112705054127648, "grad_norm": 205.32301330566406, "learning_rate": 1.0305574242586269e-08, "loss": 13.5625, "step": 28778 }, { "epoch": 1.9113369197051204, "grad_norm": 195.64657592773438, "learning_rate": 1.0290179013367928e-08, "loss": 17.75, "step": 28779 }, { "epoch": 1.9114033339974763, "grad_norm": 131.43023681640625, "learning_rate": 1.0274795232542043e-08, "loss": 17.7969, "step": 28780 }, { "epoch": 1.911469748289832, "grad_norm": 222.0775909423828, "learning_rate": 1.0259422900286696e-08, "loss": 13.1406, "step": 28781 }, { "epoch": 1.9115361625821876, "grad_norm": 195.4894256591797, "learning_rate": 1.024406201677952e-08, "loss": 13.625, "step": 28782 }, { "epoch": 1.9116025768745435, "grad_norm": 196.08082580566406, "learning_rate": 1.0228712582198484e-08, "loss": 19.9062, "step": 28783 }, { "epoch": 1.9116689911668991, "grad_norm": 640.5206298828125, "learning_rate": 1.021337459672078e-08, "loss": 14.6797, "step": 28784 }, { "epoch": 1.9117354054592548, "grad_norm": 270.7161865234375, "learning_rate": 1.0198048060524155e-08, "loss": 21.5938, "step": 28785 }, { "epoch": 1.9118018197516107, "grad_norm": 263.8105163574219, "learning_rate": 1.018273297378569e-08, "loss": 19.2812, "step": 28786 }, { "epoch": 1.911868234043966, "grad_norm": 247.62051391601562, "learning_rate": 1.0167429336682575e-08, "loss": 15.1406, "step": 28787 }, { "epoch": 1.911934648336322, "grad_norm": 211.32867431640625, "learning_rate": 1.0152137149392004e-08, "loss": 17.2188, "step": 28788 }, { "epoch": 1.9120010626286776, "grad_norm": 198.4484405517578, "learning_rate": 1.013685641209061e-08, "loss": 16.5, "step": 28789 }, { "epoch": 1.9120674769210333, "grad_norm": 245.097900390625, "learning_rate": 1.0121587124955255e-08, "loss": 19.2344, "step": 28790 }, { "epoch": 1.9121338912133892, "grad_norm": 215.11370849609375, "learning_rate": 1.0106329288162685e-08, "loss": 15.2344, "step": 28791 }, { "epoch": 1.9122003055057448, "grad_norm": 452.5083312988281, "learning_rate": 1.0091082901889204e-08, "loss": 25.3281, "step": 28792 }, { "epoch": 1.9122667197981005, "grad_norm": 187.86529541015625, "learning_rate": 1.0075847966311223e-08, "loss": 15.3281, "step": 28793 }, { "epoch": 1.9123331340904564, "grad_norm": 4241.15966796875, "learning_rate": 1.0060624481605162e-08, "loss": 16.3359, "step": 28794 }, { "epoch": 1.912399548382812, "grad_norm": 271.1521911621094, "learning_rate": 1.0045412447946876e-08, "loss": 15.6406, "step": 28795 }, { "epoch": 1.9124659626751677, "grad_norm": 562.9198608398438, "learning_rate": 1.003021186551245e-08, "loss": 12.6797, "step": 28796 }, { "epoch": 1.9125323769675235, "grad_norm": 259.92919921875, "learning_rate": 1.0015022734477629e-08, "loss": 16.3125, "step": 28797 }, { "epoch": 1.912598791259879, "grad_norm": 330.4595031738281, "learning_rate": 9.999845055018164e-09, "loss": 15.3438, "step": 28798 }, { "epoch": 1.9126652055522348, "grad_norm": 442.1220397949219, "learning_rate": 9.984678827309689e-09, "loss": 23.3281, "step": 28799 }, { "epoch": 1.9127316198445905, "grad_norm": 135.11790466308594, "learning_rate": 9.969524051527623e-09, "loss": 15.2969, "step": 28800 }, { "epoch": 1.9127980341369462, "grad_norm": 305.1195983886719, "learning_rate": 9.954380727847045e-09, "loss": 18.7656, "step": 28801 }, { "epoch": 1.912864448429302, "grad_norm": 282.6468200683594, "learning_rate": 9.939248856443483e-09, "loss": 13.5469, "step": 28802 }, { "epoch": 1.9129308627216577, "grad_norm": 283.0414123535156, "learning_rate": 9.924128437491686e-09, "loss": 22.9531, "step": 28803 }, { "epoch": 1.9129972770140133, "grad_norm": 222.1929931640625, "learning_rate": 9.909019471166624e-09, "loss": 18.5781, "step": 28804 }, { "epoch": 1.9130636913063692, "grad_norm": 85.70726776123047, "learning_rate": 9.893921957643159e-09, "loss": 14.8594, "step": 28805 }, { "epoch": 1.9131301055987249, "grad_norm": 528.8031005859375, "learning_rate": 9.878835897095927e-09, "loss": 19.625, "step": 28806 }, { "epoch": 1.9131965198910805, "grad_norm": 356.322265625, "learning_rate": 9.863761289699235e-09, "loss": 11.1016, "step": 28807 }, { "epoch": 1.9132629341834364, "grad_norm": 165.7197265625, "learning_rate": 9.848698135627831e-09, "loss": 16.5469, "step": 28808 }, { "epoch": 1.9133293484757918, "grad_norm": 292.1413879394531, "learning_rate": 9.833646435055576e-09, "loss": 14.25, "step": 28809 }, { "epoch": 1.9133957627681477, "grad_norm": 388.7093200683594, "learning_rate": 9.818606188156776e-09, "loss": 17.3594, "step": 28810 }, { "epoch": 1.9134621770605034, "grad_norm": 252.0303192138672, "learning_rate": 9.803577395105288e-09, "loss": 20.4219, "step": 28811 }, { "epoch": 1.913528591352859, "grad_norm": 151.69528198242188, "learning_rate": 9.788560056075091e-09, "loss": 14.2812, "step": 28812 }, { "epoch": 1.913595005645215, "grad_norm": 380.54345703125, "learning_rate": 9.773554171239928e-09, "loss": 14.6406, "step": 28813 }, { "epoch": 1.9136614199375706, "grad_norm": 210.38525390625, "learning_rate": 9.75855974077322e-09, "loss": 18.3281, "step": 28814 }, { "epoch": 1.9137278342299262, "grad_norm": 106.31120300292969, "learning_rate": 9.743576764848494e-09, "loss": 15.6094, "step": 28815 }, { "epoch": 1.913794248522282, "grad_norm": 194.10838317871094, "learning_rate": 9.728605243639054e-09, "loss": 15.0, "step": 28816 }, { "epoch": 1.9138606628146377, "grad_norm": 169.8047637939453, "learning_rate": 9.713645177318097e-09, "loss": 15.2422, "step": 28817 }, { "epoch": 1.9139270771069934, "grad_norm": 153.0226593017578, "learning_rate": 9.698696566058595e-09, "loss": 13.0156, "step": 28818 }, { "epoch": 1.9139934913993493, "grad_norm": 281.0224609375, "learning_rate": 9.683759410033632e-09, "loss": 13.4531, "step": 28819 }, { "epoch": 1.9140599056917047, "grad_norm": 225.0381622314453, "learning_rate": 9.668833709415847e-09, "loss": 14.1875, "step": 28820 }, { "epoch": 1.9141263199840606, "grad_norm": 127.44159698486328, "learning_rate": 9.65391946437788e-09, "loss": 17.9688, "step": 28821 }, { "epoch": 1.9141927342764162, "grad_norm": 219.97039794921875, "learning_rate": 9.63901667509237e-09, "loss": 16.4219, "step": 28822 }, { "epoch": 1.9142591485687719, "grad_norm": 130.93582153320312, "learning_rate": 9.624125341731625e-09, "loss": 14.4297, "step": 28823 }, { "epoch": 1.9143255628611278, "grad_norm": 374.8971252441406, "learning_rate": 9.609245464467953e-09, "loss": 18.0312, "step": 28824 }, { "epoch": 1.9143919771534834, "grad_norm": 345.3998107910156, "learning_rate": 9.594377043473434e-09, "loss": 16.0156, "step": 28825 }, { "epoch": 1.914458391445839, "grad_norm": 663.6966552734375, "learning_rate": 9.579520078919934e-09, "loss": 18.1172, "step": 28826 }, { "epoch": 1.914524805738195, "grad_norm": 167.07603454589844, "learning_rate": 9.564674570979537e-09, "loss": 13.8125, "step": 28827 }, { "epoch": 1.9145912200305506, "grad_norm": 148.7765350341797, "learning_rate": 9.549840519823881e-09, "loss": 15.1562, "step": 28828 }, { "epoch": 1.9146576343229063, "grad_norm": 190.59701538085938, "learning_rate": 9.5350179256245e-09, "loss": 15.6094, "step": 28829 }, { "epoch": 1.9147240486152621, "grad_norm": 310.0860595703125, "learning_rate": 9.52020678855292e-09, "loss": 17.2969, "step": 28830 }, { "epoch": 1.9147904629076176, "grad_norm": 512.402099609375, "learning_rate": 9.50540710878056e-09, "loss": 23.4141, "step": 28831 }, { "epoch": 1.9148568771999734, "grad_norm": 185.14544677734375, "learning_rate": 9.490618886478285e-09, "loss": 11.8594, "step": 28832 }, { "epoch": 1.914923291492329, "grad_norm": 248.8162841796875, "learning_rate": 9.475842121817512e-09, "loss": 18.5156, "step": 28833 }, { "epoch": 1.9149897057846847, "grad_norm": 148.09164428710938, "learning_rate": 9.461076814968994e-09, "loss": 16.0469, "step": 28834 }, { "epoch": 1.9150561200770406, "grad_norm": 214.33737182617188, "learning_rate": 9.446322966103481e-09, "loss": 18.5938, "step": 28835 }, { "epoch": 1.9151225343693963, "grad_norm": 156.18307495117188, "learning_rate": 9.43158057539184e-09, "loss": 16.0469, "step": 28836 }, { "epoch": 1.915188948661752, "grad_norm": 334.20941162109375, "learning_rate": 9.416849643004376e-09, "loss": 14.6094, "step": 28837 }, { "epoch": 1.9152553629541078, "grad_norm": 230.00918579101562, "learning_rate": 9.402130169111733e-09, "loss": 16.7656, "step": 28838 }, { "epoch": 1.9153217772464635, "grad_norm": 140.06484985351562, "learning_rate": 9.387422153883995e-09, "loss": 15.1562, "step": 28839 }, { "epoch": 1.9153881915388191, "grad_norm": 188.707275390625, "learning_rate": 9.372725597491137e-09, "loss": 13.4375, "step": 28840 }, { "epoch": 1.915454605831175, "grad_norm": 285.0670166015625, "learning_rate": 9.358040500103581e-09, "loss": 16.7969, "step": 28841 }, { "epoch": 1.9155210201235304, "grad_norm": 191.97303771972656, "learning_rate": 9.343366861890966e-09, "loss": 13.9688, "step": 28842 }, { "epoch": 1.9155874344158863, "grad_norm": 159.235107421875, "learning_rate": 9.328704683022937e-09, "loss": 11.3281, "step": 28843 }, { "epoch": 1.9156538487082422, "grad_norm": 222.344482421875, "learning_rate": 9.314053963669244e-09, "loss": 19.9219, "step": 28844 }, { "epoch": 1.9157202630005976, "grad_norm": 106.2016830444336, "learning_rate": 9.299414703999308e-09, "loss": 16.8594, "step": 28845 }, { "epoch": 1.9157866772929535, "grad_norm": 403.8282470703125, "learning_rate": 9.28478690418244e-09, "loss": 16.3281, "step": 28846 }, { "epoch": 1.9158530915853091, "grad_norm": 195.7682647705078, "learning_rate": 9.270170564388058e-09, "loss": 12.5469, "step": 28847 }, { "epoch": 1.9159195058776648, "grad_norm": 358.3421936035156, "learning_rate": 9.255565684784806e-09, "loss": 15.75, "step": 28848 }, { "epoch": 1.9159859201700207, "grad_norm": 283.43255615234375, "learning_rate": 9.240972265541991e-09, "loss": 15.4062, "step": 28849 }, { "epoch": 1.9160523344623763, "grad_norm": 415.39447021484375, "learning_rate": 9.226390306828369e-09, "loss": 18.5625, "step": 28850 }, { "epoch": 1.916118748754732, "grad_norm": 279.9032897949219, "learning_rate": 9.211819808812472e-09, "loss": 18.6562, "step": 28851 }, { "epoch": 1.9161851630470879, "grad_norm": 130.03826904296875, "learning_rate": 9.197260771662829e-09, "loss": 18.2656, "step": 28852 }, { "epoch": 1.9162515773394433, "grad_norm": 169.21389770507812, "learning_rate": 9.182713195548086e-09, "loss": 15.1875, "step": 28853 }, { "epoch": 1.9163179916317992, "grad_norm": 210.515380859375, "learning_rate": 9.16817708063633e-09, "loss": 23.1719, "step": 28854 }, { "epoch": 1.916384405924155, "grad_norm": 353.5343322753906, "learning_rate": 9.153652427095759e-09, "loss": 23.375, "step": 28855 }, { "epoch": 1.9164508202165105, "grad_norm": 154.9173583984375, "learning_rate": 9.139139235094462e-09, "loss": 23.9688, "step": 28856 }, { "epoch": 1.9165172345088664, "grad_norm": 341.99505615234375, "learning_rate": 9.124637504800082e-09, "loss": 14.7031, "step": 28857 }, { "epoch": 1.916583648801222, "grad_norm": 135.25440979003906, "learning_rate": 9.110147236380594e-09, "loss": 16.1094, "step": 28858 }, { "epoch": 1.9166500630935777, "grad_norm": 269.7969665527344, "learning_rate": 9.095668430003534e-09, "loss": 17.125, "step": 28859 }, { "epoch": 1.9167164773859335, "grad_norm": 264.17864990234375, "learning_rate": 9.081201085836431e-09, "loss": 12.9922, "step": 28860 }, { "epoch": 1.9167828916782892, "grad_norm": 229.08143615722656, "learning_rate": 9.0667452040466e-09, "loss": 16.2812, "step": 28861 }, { "epoch": 1.9168493059706448, "grad_norm": 325.94976806640625, "learning_rate": 9.052300784801237e-09, "loss": 11.0156, "step": 28862 }, { "epoch": 1.9169157202630007, "grad_norm": 179.87925720214844, "learning_rate": 9.037867828267432e-09, "loss": 13.6016, "step": 28863 }, { "epoch": 1.9169821345553562, "grad_norm": 105.87964630126953, "learning_rate": 9.023446334612161e-09, "loss": 18.2812, "step": 28864 }, { "epoch": 1.917048548847712, "grad_norm": 350.3820495605469, "learning_rate": 9.009036304002182e-09, "loss": 12.6562, "step": 28865 }, { "epoch": 1.917114963140068, "grad_norm": 320.9119567871094, "learning_rate": 8.994637736604361e-09, "loss": 16.5, "step": 28866 }, { "epoch": 1.9171813774324233, "grad_norm": 415.32855224609375, "learning_rate": 8.980250632585007e-09, "loss": 17.2969, "step": 28867 }, { "epoch": 1.9172477917247792, "grad_norm": 176.66085815429688, "learning_rate": 8.965874992110656e-09, "loss": 13.7344, "step": 28868 }, { "epoch": 1.9173142060171349, "grad_norm": 596.277099609375, "learning_rate": 8.951510815347618e-09, "loss": 13.3438, "step": 28869 }, { "epoch": 1.9173806203094905, "grad_norm": 180.74530029296875, "learning_rate": 8.937158102462095e-09, "loss": 18.7812, "step": 28870 }, { "epoch": 1.9174470346018464, "grad_norm": 265.31439208984375, "learning_rate": 8.922816853619953e-09, "loss": 12.7344, "step": 28871 }, { "epoch": 1.917513448894202, "grad_norm": 244.55201721191406, "learning_rate": 8.90848706898728e-09, "loss": 15.75, "step": 28872 }, { "epoch": 1.9175798631865577, "grad_norm": 301.6509094238281, "learning_rate": 8.894168748729725e-09, "loss": 18.2031, "step": 28873 }, { "epoch": 1.9176462774789136, "grad_norm": 330.61798095703125, "learning_rate": 8.879861893012818e-09, "loss": 17.625, "step": 28874 }, { "epoch": 1.917712691771269, "grad_norm": 215.6647186279297, "learning_rate": 8.86556650200232e-09, "loss": 17.2812, "step": 28875 }, { "epoch": 1.917779106063625, "grad_norm": 379.0773620605469, "learning_rate": 8.851282575863316e-09, "loss": 16.6094, "step": 28876 }, { "epoch": 1.9178455203559808, "grad_norm": 116.42048645019531, "learning_rate": 8.837010114761122e-09, "loss": 14.7031, "step": 28877 }, { "epoch": 1.9179119346483362, "grad_norm": 437.7364501953125, "learning_rate": 8.822749118860939e-09, "loss": 16.2969, "step": 28878 }, { "epoch": 1.917978348940692, "grad_norm": 97.30752563476562, "learning_rate": 8.808499588327523e-09, "loss": 10.9844, "step": 28879 }, { "epoch": 1.9180447632330477, "grad_norm": 363.3397521972656, "learning_rate": 8.794261523325964e-09, "loss": 17.9375, "step": 28880 }, { "epoch": 1.9181111775254034, "grad_norm": 173.31039428710938, "learning_rate": 8.780034924020797e-09, "loss": 16.125, "step": 28881 }, { "epoch": 1.9181775918177593, "grad_norm": 241.64134216308594, "learning_rate": 8.765819790576556e-09, "loss": 14.1562, "step": 28882 }, { "epoch": 1.918244006110115, "grad_norm": 196.56570434570312, "learning_rate": 8.75161612315778e-09, "loss": 21.1562, "step": 28883 }, { "epoch": 1.9183104204024706, "grad_norm": 357.8627624511719, "learning_rate": 8.737423921928666e-09, "loss": 16.2812, "step": 28884 }, { "epoch": 1.9183768346948264, "grad_norm": 184.51925659179688, "learning_rate": 8.72324318705342e-09, "loss": 14.625, "step": 28885 }, { "epoch": 1.9184432489871819, "grad_norm": 215.0825958251953, "learning_rate": 8.709073918696241e-09, "loss": 15.2344, "step": 28886 }, { "epoch": 1.9185096632795378, "grad_norm": 676.824462890625, "learning_rate": 8.694916117020779e-09, "loss": 15.4844, "step": 28887 }, { "epoch": 1.9185760775718936, "grad_norm": 141.24317932128906, "learning_rate": 8.6807697821909e-09, "loss": 14.375, "step": 28888 }, { "epoch": 1.918642491864249, "grad_norm": 320.3020324707031, "learning_rate": 8.666634914370362e-09, "loss": 13.375, "step": 28889 }, { "epoch": 1.918708906156605, "grad_norm": 418.23321533203125, "learning_rate": 8.652511513722372e-09, "loss": 14.5469, "step": 28890 }, { "epoch": 1.9187753204489606, "grad_norm": 307.9825744628906, "learning_rate": 8.63839958041057e-09, "loss": 19.5312, "step": 28891 }, { "epoch": 1.9188417347413163, "grad_norm": 208.18898010253906, "learning_rate": 8.624299114598166e-09, "loss": 13.1875, "step": 28892 }, { "epoch": 1.9189081490336721, "grad_norm": 210.5719451904297, "learning_rate": 8.610210116448136e-09, "loss": 17.7422, "step": 28893 }, { "epoch": 1.9189745633260278, "grad_norm": 645.7066650390625, "learning_rate": 8.596132586123573e-09, "loss": 12.3594, "step": 28894 }, { "epoch": 1.9190409776183834, "grad_norm": 520.1358032226562, "learning_rate": 8.582066523787235e-09, "loss": 13.9062, "step": 28895 }, { "epoch": 1.9191073919107393, "grad_norm": 187.17904663085938, "learning_rate": 8.568011929601882e-09, "loss": 12.3438, "step": 28896 }, { "epoch": 1.9191738062030947, "grad_norm": 138.82350158691406, "learning_rate": 8.553968803730161e-09, "loss": 15.1094, "step": 28897 }, { "epoch": 1.9192402204954506, "grad_norm": 412.5465393066406, "learning_rate": 8.539937146334274e-09, "loss": 11.125, "step": 28898 }, { "epoch": 1.9193066347878065, "grad_norm": 241.21377563476562, "learning_rate": 8.525916957576762e-09, "loss": 13.4219, "step": 28899 }, { "epoch": 1.919373049080162, "grad_norm": 367.12713623046875, "learning_rate": 8.511908237619714e-09, "loss": 16.7344, "step": 28900 }, { "epoch": 1.9194394633725178, "grad_norm": 119.63156127929688, "learning_rate": 8.497910986625223e-09, "loss": 11.8984, "step": 28901 }, { "epoch": 1.9195058776648735, "grad_norm": 212.8458709716797, "learning_rate": 8.48392520475516e-09, "loss": 15.3906, "step": 28902 }, { "epoch": 1.9195722919572291, "grad_norm": 138.13522338867188, "learning_rate": 8.469950892171285e-09, "loss": 14.1094, "step": 28903 }, { "epoch": 1.919638706249585, "grad_norm": 146.1986083984375, "learning_rate": 8.455988049035246e-09, "loss": 17.75, "step": 28904 }, { "epoch": 1.9197051205419406, "grad_norm": 252.77842712402344, "learning_rate": 8.442036675508579e-09, "loss": 19.2578, "step": 28905 }, { "epoch": 1.9197715348342963, "grad_norm": 135.52667236328125, "learning_rate": 8.428096771752712e-09, "loss": 18.25, "step": 28906 }, { "epoch": 1.9198379491266522, "grad_norm": 244.80335998535156, "learning_rate": 8.41416833792885e-09, "loss": 17.1562, "step": 28907 }, { "epoch": 1.9199043634190076, "grad_norm": 418.6618347167969, "learning_rate": 8.400251374198086e-09, "loss": 18.2109, "step": 28908 }, { "epoch": 1.9199707777113635, "grad_norm": 185.47251892089844, "learning_rate": 8.386345880721402e-09, "loss": 15.7656, "step": 28909 }, { "epoch": 1.9200371920037194, "grad_norm": 160.45286560058594, "learning_rate": 8.372451857659667e-09, "loss": 14.2031, "step": 28910 }, { "epoch": 1.9201036062960748, "grad_norm": 127.39112091064453, "learning_rate": 8.358569305173646e-09, "loss": 11.4688, "step": 28911 }, { "epoch": 1.9201700205884307, "grad_norm": 107.36515045166016, "learning_rate": 8.344698223423873e-09, "loss": 15.1406, "step": 28912 }, { "epoch": 1.9202364348807863, "grad_norm": 209.2504425048828, "learning_rate": 8.330838612570778e-09, "loss": 13.8438, "step": 28913 }, { "epoch": 1.920302849173142, "grad_norm": 277.3270568847656, "learning_rate": 8.316990472774787e-09, "loss": 12.1953, "step": 28914 }, { "epoch": 1.9203692634654979, "grad_norm": 188.43800354003906, "learning_rate": 8.303153804195995e-09, "loss": 12.9688, "step": 28915 }, { "epoch": 1.9204356777578535, "grad_norm": 326.0628662109375, "learning_rate": 8.289328606994384e-09, "loss": 14.5781, "step": 28916 }, { "epoch": 1.9205020920502092, "grad_norm": 168.21475219726562, "learning_rate": 8.275514881330047e-09, "loss": 11.125, "step": 28917 }, { "epoch": 1.920568506342565, "grad_norm": 504.11907958984375, "learning_rate": 8.261712627362638e-09, "loss": 15.0, "step": 28918 }, { "epoch": 1.9206349206349205, "grad_norm": 336.1625671386719, "learning_rate": 8.247921845251915e-09, "loss": 14.4375, "step": 28919 }, { "epoch": 1.9207013349272763, "grad_norm": 430.7225646972656, "learning_rate": 8.234142535157418e-09, "loss": 14.3125, "step": 28920 }, { "epoch": 1.9207677492196322, "grad_norm": 447.9360046386719, "learning_rate": 8.220374697238352e-09, "loss": 17.5312, "step": 28921 }, { "epoch": 1.9208341635119877, "grad_norm": 217.92030334472656, "learning_rate": 8.206618331654147e-09, "loss": 21.4375, "step": 28922 }, { "epoch": 1.9209005778043435, "grad_norm": 167.15313720703125, "learning_rate": 8.192873438563897e-09, "loss": 16.4219, "step": 28923 }, { "epoch": 1.9209669920966992, "grad_norm": 259.1160583496094, "learning_rate": 8.179140018126584e-09, "loss": 15.3438, "step": 28924 }, { "epoch": 1.9210334063890548, "grad_norm": 138.2694091796875, "learning_rate": 8.165418070501085e-09, "loss": 10.9531, "step": 28925 }, { "epoch": 1.9210998206814107, "grad_norm": 208.85574340820312, "learning_rate": 8.151707595845936e-09, "loss": 18.75, "step": 28926 }, { "epoch": 1.9211662349737664, "grad_norm": 215.13587951660156, "learning_rate": 8.138008594320122e-09, "loss": 18.6875, "step": 28927 }, { "epoch": 1.921232649266122, "grad_norm": 136.5631561279297, "learning_rate": 8.12432106608174e-09, "loss": 13.9219, "step": 28928 }, { "epoch": 1.921299063558478, "grad_norm": 323.768798828125, "learning_rate": 8.11064501128933e-09, "loss": 14.875, "step": 28929 }, { "epoch": 1.9213654778508333, "grad_norm": 123.66876983642578, "learning_rate": 8.096980430100985e-09, "loss": 17.0312, "step": 28930 }, { "epoch": 1.9214318921431892, "grad_norm": 176.63864135742188, "learning_rate": 8.083327322674804e-09, "loss": 14.0781, "step": 28931 }, { "epoch": 1.921498306435545, "grad_norm": 313.5412902832031, "learning_rate": 8.069685689168771e-09, "loss": 18.3594, "step": 28932 }, { "epoch": 1.9215647207279005, "grad_norm": 136.21983337402344, "learning_rate": 8.056055529740646e-09, "loss": 14.5391, "step": 28933 }, { "epoch": 1.9216311350202564, "grad_norm": 236.587158203125, "learning_rate": 8.042436844547973e-09, "loss": 15.9844, "step": 28934 }, { "epoch": 1.921697549312612, "grad_norm": 262.97723388671875, "learning_rate": 8.028829633748513e-09, "loss": 14.0625, "step": 28935 }, { "epoch": 1.9217639636049677, "grad_norm": 164.58087158203125, "learning_rate": 8.015233897499474e-09, "loss": 17.9531, "step": 28936 }, { "epoch": 1.9218303778973236, "grad_norm": 354.5811767578125, "learning_rate": 8.001649635958173e-09, "loss": 19.2656, "step": 28937 }, { "epoch": 1.9218967921896792, "grad_norm": 217.86004638671875, "learning_rate": 7.988076849281823e-09, "loss": 13.3594, "step": 28938 }, { "epoch": 1.921963206482035, "grad_norm": 556.5390625, "learning_rate": 7.974515537627402e-09, "loss": 17.3594, "step": 28939 }, { "epoch": 1.9220296207743908, "grad_norm": 126.08838653564453, "learning_rate": 7.960965701151679e-09, "loss": 17.4688, "step": 28940 }, { "epoch": 1.9220960350667462, "grad_norm": 608.93359375, "learning_rate": 7.947427340011526e-09, "loss": 22.2656, "step": 28941 }, { "epoch": 1.922162449359102, "grad_norm": 122.51346588134766, "learning_rate": 7.933900454363484e-09, "loss": 12.375, "step": 28942 }, { "epoch": 1.922228863651458, "grad_norm": 285.3244934082031, "learning_rate": 7.920385044363987e-09, "loss": 16.3438, "step": 28943 }, { "epoch": 1.9222952779438134, "grad_norm": 190.5166015625, "learning_rate": 7.90688111016946e-09, "loss": 23.2969, "step": 28944 }, { "epoch": 1.9223616922361693, "grad_norm": 252.9131317138672, "learning_rate": 7.893388651936005e-09, "loss": 14.8125, "step": 28945 }, { "epoch": 1.922428106528525, "grad_norm": 163.05667114257812, "learning_rate": 7.879907669819719e-09, "loss": 16.4531, "step": 28946 }, { "epoch": 1.9224945208208806, "grad_norm": 250.71983337402344, "learning_rate": 7.866438163976585e-09, "loss": 13.1016, "step": 28947 }, { "epoch": 1.9225609351132364, "grad_norm": 344.1650695800781, "learning_rate": 7.85298013456248e-09, "loss": 19.9219, "step": 28948 }, { "epoch": 1.922627349405592, "grad_norm": 145.3589324951172, "learning_rate": 7.839533581732948e-09, "loss": 16.375, "step": 28949 }, { "epoch": 1.9226937636979478, "grad_norm": 253.57110595703125, "learning_rate": 7.826098505643642e-09, "loss": 17.9062, "step": 28950 }, { "epoch": 1.9227601779903036, "grad_norm": 441.7061767578125, "learning_rate": 7.812674906449768e-09, "loss": 18.8125, "step": 28951 }, { "epoch": 1.922826592282659, "grad_norm": 229.74835205078125, "learning_rate": 7.79926278430687e-09, "loss": 15.6562, "step": 28952 }, { "epoch": 1.922893006575015, "grad_norm": 558.7726440429688, "learning_rate": 7.785862139369825e-09, "loss": 23.5781, "step": 28953 }, { "epoch": 1.9229594208673708, "grad_norm": 410.15435791015625, "learning_rate": 7.772472971793841e-09, "loss": 17.2344, "step": 28954 }, { "epoch": 1.9230258351597262, "grad_norm": 183.63201904296875, "learning_rate": 7.759095281733796e-09, "loss": 12.9844, "step": 28955 }, { "epoch": 1.9230922494520821, "grad_norm": 194.258544921875, "learning_rate": 7.745729069344453e-09, "loss": 19.5781, "step": 28956 }, { "epoch": 1.9231586637444378, "grad_norm": 677.7096557617188, "learning_rate": 7.732374334780134e-09, "loss": 15.7969, "step": 28957 }, { "epoch": 1.9232250780367934, "grad_norm": 168.2091827392578, "learning_rate": 7.719031078195715e-09, "loss": 16.5156, "step": 28958 }, { "epoch": 1.9232914923291493, "grad_norm": 234.19549560546875, "learning_rate": 7.705699299745294e-09, "loss": 17.375, "step": 28959 }, { "epoch": 1.923357906621505, "grad_norm": 208.34991455078125, "learning_rate": 7.692378999583083e-09, "loss": 18.6094, "step": 28960 }, { "epoch": 1.9234243209138606, "grad_norm": 228.4475860595703, "learning_rate": 7.67907017786329e-09, "loss": 15.0156, "step": 28961 }, { "epoch": 1.9234907352062165, "grad_norm": 226.3255615234375, "learning_rate": 7.665772834739903e-09, "loss": 19.2812, "step": 28962 }, { "epoch": 1.923557149498572, "grad_norm": 1219.5169677734375, "learning_rate": 7.652486970366467e-09, "loss": 21.7344, "step": 28963 }, { "epoch": 1.9236235637909278, "grad_norm": 204.73533630371094, "learning_rate": 7.639212584897081e-09, "loss": 12.2031, "step": 28964 }, { "epoch": 1.9236899780832837, "grad_norm": 177.3224639892578, "learning_rate": 7.625949678484844e-09, "loss": 12.5312, "step": 28965 }, { "epoch": 1.9237563923756391, "grad_norm": 190.82192993164062, "learning_rate": 7.612698251283411e-09, "loss": 21.625, "step": 28966 }, { "epoch": 1.923822806667995, "grad_norm": 157.50450134277344, "learning_rate": 7.599458303446105e-09, "loss": 12.3594, "step": 28967 }, { "epoch": 1.9238892209603506, "grad_norm": 448.5962829589844, "learning_rate": 7.586229835126024e-09, "loss": 23.1406, "step": 28968 }, { "epoch": 1.9239556352527063, "grad_norm": 233.77769470214844, "learning_rate": 7.573012846476157e-09, "loss": 12.5625, "step": 28969 }, { "epoch": 1.9240220495450622, "grad_norm": 695.0130004882812, "learning_rate": 7.559807337649381e-09, "loss": 17.5, "step": 28970 }, { "epoch": 1.9240884638374178, "grad_norm": 393.17169189453125, "learning_rate": 7.546613308798466e-09, "loss": 18.2812, "step": 28971 }, { "epoch": 1.9241548781297735, "grad_norm": 171.39334106445312, "learning_rate": 7.533430760076176e-09, "loss": 11.6562, "step": 28972 }, { "epoch": 1.9242212924221294, "grad_norm": 123.61903381347656, "learning_rate": 7.520259691634612e-09, "loss": 14.8906, "step": 28973 }, { "epoch": 1.9242877067144848, "grad_norm": 280.6957092285156, "learning_rate": 7.50710010362654e-09, "loss": 20.6406, "step": 28974 }, { "epoch": 1.9243541210068407, "grad_norm": 263.9081726074219, "learning_rate": 7.493951996204173e-09, "loss": 26.0469, "step": 28975 }, { "epoch": 1.9244205352991965, "grad_norm": 124.64488983154297, "learning_rate": 7.480815369519278e-09, "loss": 13.8594, "step": 28976 }, { "epoch": 1.924486949591552, "grad_norm": 163.49166870117188, "learning_rate": 7.467690223724066e-09, "loss": 14.4219, "step": 28977 }, { "epoch": 1.9245533638839079, "grad_norm": 120.36660766601562, "learning_rate": 7.454576558970194e-09, "loss": 14.3125, "step": 28978 }, { "epoch": 1.9246197781762635, "grad_norm": 256.2149353027344, "learning_rate": 7.44147437540954e-09, "loss": 16.5, "step": 28979 }, { "epoch": 1.9246861924686192, "grad_norm": 225.50917053222656, "learning_rate": 7.428383673193539e-09, "loss": 13.5938, "step": 28980 }, { "epoch": 1.924752606760975, "grad_norm": 167.98794555664062, "learning_rate": 7.415304452473625e-09, "loss": 17.3125, "step": 28981 }, { "epoch": 1.9248190210533307, "grad_norm": 136.27198791503906, "learning_rate": 7.402236713401122e-09, "loss": 17.2031, "step": 28982 }, { "epoch": 1.9248854353456863, "grad_norm": 275.81036376953125, "learning_rate": 7.389180456127242e-09, "loss": 15.1406, "step": 28983 }, { "epoch": 1.9249518496380422, "grad_norm": 102.75628662109375, "learning_rate": 7.376135680802864e-09, "loss": 13.4062, "step": 28984 }, { "epoch": 1.9250182639303979, "grad_norm": 164.83534240722656, "learning_rate": 7.363102387578979e-09, "loss": 16.5625, "step": 28985 }, { "epoch": 1.9250846782227535, "grad_norm": 240.2567901611328, "learning_rate": 7.350080576606354e-09, "loss": 14.9844, "step": 28986 }, { "epoch": 1.9251510925151094, "grad_norm": 433.16064453125, "learning_rate": 7.3370702480356486e-09, "loss": 20.125, "step": 28987 }, { "epoch": 1.9252175068074648, "grad_norm": 184.779052734375, "learning_rate": 7.324071402017295e-09, "loss": 17.2031, "step": 28988 }, { "epoch": 1.9252839210998207, "grad_norm": 355.0743103027344, "learning_rate": 7.311084038701621e-09, "loss": 14.1875, "step": 28989 }, { "epoch": 1.9253503353921764, "grad_norm": 263.4293518066406, "learning_rate": 7.298108158238947e-09, "loss": 14.6875, "step": 28990 }, { "epoch": 1.925416749684532, "grad_norm": 131.43453979492188, "learning_rate": 7.2851437607793776e-09, "loss": 12.3125, "step": 28991 }, { "epoch": 1.925483163976888, "grad_norm": 379.9932861328125, "learning_rate": 7.272190846472792e-09, "loss": 15.8438, "step": 28992 }, { "epoch": 1.9255495782692436, "grad_norm": 201.11647033691406, "learning_rate": 7.25924941546896e-09, "loss": 16.6875, "step": 28993 }, { "epoch": 1.9256159925615992, "grad_norm": 152.76622009277344, "learning_rate": 7.246319467917761e-09, "loss": 15.25, "step": 28994 }, { "epoch": 1.925682406853955, "grad_norm": 271.0065002441406, "learning_rate": 7.233401003968742e-09, "loss": 12.8125, "step": 28995 }, { "epoch": 1.9257488211463107, "grad_norm": 228.9629364013672, "learning_rate": 7.220494023771228e-09, "loss": 18.1719, "step": 28996 }, { "epoch": 1.9258152354386664, "grad_norm": 192.4558563232422, "learning_rate": 7.207598527474656e-09, "loss": 14.4219, "step": 28997 }, { "epoch": 1.9258816497310223, "grad_norm": 259.9837646484375, "learning_rate": 7.194714515228018e-09, "loss": 18.8438, "step": 28998 }, { "epoch": 1.9259480640233777, "grad_norm": 109.02729797363281, "learning_rate": 7.181841987180415e-09, "loss": 12.6406, "step": 28999 }, { "epoch": 1.9260144783157336, "grad_norm": 4013.60693359375, "learning_rate": 7.16898094348084e-09, "loss": 16.0312, "step": 29000 }, { "epoch": 1.9260808926080892, "grad_norm": 264.6934509277344, "learning_rate": 7.156131384277953e-09, "loss": 17.2188, "step": 29001 }, { "epoch": 1.9261473069004449, "grad_norm": 231.1672821044922, "learning_rate": 7.143293309720411e-09, "loss": 12.4531, "step": 29002 }, { "epoch": 1.9262137211928008, "grad_norm": 206.86471557617188, "learning_rate": 7.130466719956763e-09, "loss": 15.0938, "step": 29003 }, { "epoch": 1.9262801354851564, "grad_norm": 180.68359375, "learning_rate": 7.117651615135334e-09, "loss": 21.1562, "step": 29004 }, { "epoch": 1.926346549777512, "grad_norm": 393.352294921875, "learning_rate": 7.10484799540445e-09, "loss": 18.7656, "step": 29005 }, { "epoch": 1.926412964069868, "grad_norm": 219.03916931152344, "learning_rate": 7.092055860912105e-09, "loss": 14.1953, "step": 29006 }, { "epoch": 1.9264793783622236, "grad_norm": 238.08901977539062, "learning_rate": 7.07927521180629e-09, "loss": 14.8281, "step": 29007 }, { "epoch": 1.9265457926545793, "grad_norm": 378.1111755371094, "learning_rate": 7.066506048234888e-09, "loss": 17.3125, "step": 29008 }, { "epoch": 1.9266122069469351, "grad_norm": 196.67039489746094, "learning_rate": 7.053748370345558e-09, "loss": 16.2188, "step": 29009 }, { "epoch": 1.9266786212392906, "grad_norm": 168.8986358642578, "learning_rate": 7.0410021782859595e-09, "loss": 16.5312, "step": 29010 }, { "epoch": 1.9267450355316464, "grad_norm": 308.7221374511719, "learning_rate": 7.02826747220342e-09, "loss": 15.5469, "step": 29011 }, { "epoch": 1.926811449824002, "grad_norm": 181.05943298339844, "learning_rate": 7.015544252245375e-09, "loss": 10.5156, "step": 29012 }, { "epoch": 1.9268778641163578, "grad_norm": 587.3541870117188, "learning_rate": 7.002832518558821e-09, "loss": 16.2031, "step": 29013 }, { "epoch": 1.9269442784087136, "grad_norm": 93.33770751953125, "learning_rate": 6.990132271291082e-09, "loss": 13.9219, "step": 29014 }, { "epoch": 1.9270106927010693, "grad_norm": 404.8390197753906, "learning_rate": 6.977443510588709e-09, "loss": 18.5938, "step": 29015 }, { "epoch": 1.927077106993425, "grad_norm": 196.49002075195312, "learning_rate": 6.964766236598696e-09, "loss": 13.6406, "step": 29016 }, { "epoch": 1.9271435212857808, "grad_norm": 234.06805419921875, "learning_rate": 6.9521004494677016e-09, "loss": 14.375, "step": 29017 }, { "epoch": 1.9272099355781365, "grad_norm": 345.1189880371094, "learning_rate": 6.939446149342165e-09, "loss": 18.3281, "step": 29018 }, { "epoch": 1.9272763498704921, "grad_norm": 243.01690673828125, "learning_rate": 6.926803336368414e-09, "loss": 18.375, "step": 29019 }, { "epoch": 1.927342764162848, "grad_norm": 323.33905029296875, "learning_rate": 6.914172010692887e-09, "loss": 21.8906, "step": 29020 }, { "epoch": 1.9274091784552034, "grad_norm": 295.3172607421875, "learning_rate": 6.9015521724614665e-09, "loss": 12.6562, "step": 29021 }, { "epoch": 1.9274755927475593, "grad_norm": 125.54110717773438, "learning_rate": 6.88894382182037e-09, "loss": 13.0625, "step": 29022 }, { "epoch": 1.927542007039915, "grad_norm": 335.57275390625, "learning_rate": 6.876346958915258e-09, "loss": 14.4062, "step": 29023 }, { "epoch": 1.9276084213322706, "grad_norm": 123.16934204101562, "learning_rate": 6.863761583891792e-09, "loss": 14.5469, "step": 29024 }, { "epoch": 1.9276748356246265, "grad_norm": 160.27305603027344, "learning_rate": 6.851187696895744e-09, "loss": 21.7969, "step": 29025 }, { "epoch": 1.9277412499169821, "grad_norm": 155.79689025878906, "learning_rate": 6.838625298072442e-09, "loss": 15.625, "step": 29026 }, { "epoch": 1.9278076642093378, "grad_norm": 152.7315216064453, "learning_rate": 6.826074387567216e-09, "loss": 14.8594, "step": 29027 }, { "epoch": 1.9278740785016937, "grad_norm": 480.8712158203125, "learning_rate": 6.8135349655253915e-09, "loss": 13.9062, "step": 29028 }, { "epoch": 1.9279404927940493, "grad_norm": 193.32687377929688, "learning_rate": 6.801007032091744e-09, "loss": 23.5781, "step": 29029 }, { "epoch": 1.928006907086405, "grad_norm": 153.86343383789062, "learning_rate": 6.7884905874113775e-09, "loss": 14.5078, "step": 29030 }, { "epoch": 1.9280733213787609, "grad_norm": 304.5033264160156, "learning_rate": 6.7759856316290665e-09, "loss": 14.4844, "step": 29031 }, { "epoch": 1.9281397356711163, "grad_norm": 209.5664520263672, "learning_rate": 6.763492164889472e-09, "loss": 13.0312, "step": 29032 }, { "epoch": 1.9282061499634722, "grad_norm": 107.58078002929688, "learning_rate": 6.7510101873369256e-09, "loss": 14.5312, "step": 29033 }, { "epoch": 1.9282725642558278, "grad_norm": 145.98208618164062, "learning_rate": 6.738539699116086e-09, "loss": 13.1641, "step": 29034 }, { "epoch": 1.9283389785481835, "grad_norm": 216.7095489501953, "learning_rate": 6.726080700370951e-09, "loss": 11.7578, "step": 29035 }, { "epoch": 1.9284053928405394, "grad_norm": 227.94305419921875, "learning_rate": 6.713633191245849e-09, "loss": 15.5312, "step": 29036 }, { "epoch": 1.928471807132895, "grad_norm": 197.5819854736328, "learning_rate": 6.701197171884665e-09, "loss": 19.2656, "step": 29037 }, { "epoch": 1.9285382214252507, "grad_norm": 207.6213836669922, "learning_rate": 6.688772642431284e-09, "loss": 17.2188, "step": 29038 }, { "epoch": 1.9286046357176065, "grad_norm": 288.4000549316406, "learning_rate": 6.676359603029369e-09, "loss": 13.125, "step": 29039 }, { "epoch": 1.9286710500099622, "grad_norm": 251.32725524902344, "learning_rate": 6.663958053822472e-09, "loss": 17.3281, "step": 29040 }, { "epoch": 1.9287374643023178, "grad_norm": 258.25115966796875, "learning_rate": 6.651567994954255e-09, "loss": 14.7812, "step": 29041 }, { "epoch": 1.9288038785946737, "grad_norm": 152.872802734375, "learning_rate": 6.639189426567826e-09, "loss": 15.2344, "step": 29042 }, { "epoch": 1.9288702928870292, "grad_norm": 239.71197509765625, "learning_rate": 6.626822348806405e-09, "loss": 20.5312, "step": 29043 }, { "epoch": 1.928936707179385, "grad_norm": 94.43968963623047, "learning_rate": 6.614466761813209e-09, "loss": 10.5781, "step": 29044 }, { "epoch": 1.9290031214717407, "grad_norm": 270.33587646484375, "learning_rate": 6.6021226657310136e-09, "loss": 23.6094, "step": 29045 }, { "epoch": 1.9290695357640963, "grad_norm": 141.18003845214844, "learning_rate": 6.589790060702594e-09, "loss": 11.4219, "step": 29046 }, { "epoch": 1.9291359500564522, "grad_norm": 206.78196716308594, "learning_rate": 6.5774689468706125e-09, "loss": 14.7656, "step": 29047 }, { "epoch": 1.9292023643488079, "grad_norm": 107.96134185791016, "learning_rate": 6.565159324377734e-09, "loss": 13.4375, "step": 29048 }, { "epoch": 1.9292687786411635, "grad_norm": 252.3624725341797, "learning_rate": 6.552861193366177e-09, "loss": 14.7031, "step": 29049 }, { "epoch": 1.9293351929335194, "grad_norm": 122.93614196777344, "learning_rate": 6.540574553978273e-09, "loss": 13.0, "step": 29050 }, { "epoch": 1.929401607225875, "grad_norm": 109.54667663574219, "learning_rate": 6.52829940635613e-09, "loss": 15.625, "step": 29051 }, { "epoch": 1.9294680215182307, "grad_norm": 288.96978759765625, "learning_rate": 6.516035750641747e-09, "loss": 20.2031, "step": 29052 }, { "epoch": 1.9295344358105866, "grad_norm": 244.69334411621094, "learning_rate": 6.503783586977008e-09, "loss": 16.8125, "step": 29053 }, { "epoch": 1.929600850102942, "grad_norm": 256.5703125, "learning_rate": 6.4915429155035784e-09, "loss": 18.0781, "step": 29054 }, { "epoch": 1.929667264395298, "grad_norm": 126.322998046875, "learning_rate": 6.479313736363235e-09, "loss": 13.2344, "step": 29055 }, { "epoch": 1.9297336786876536, "grad_norm": 96.41287231445312, "learning_rate": 6.467096049697196e-09, "loss": 11.0938, "step": 29056 }, { "epoch": 1.9298000929800092, "grad_norm": 247.49850463867188, "learning_rate": 6.454889855646795e-09, "loss": 17.0625, "step": 29057 }, { "epoch": 1.929866507272365, "grad_norm": 276.04986572265625, "learning_rate": 6.442695154353472e-09, "loss": 16.2656, "step": 29058 }, { "epoch": 1.9299329215647207, "grad_norm": 227.14637756347656, "learning_rate": 6.430511945958006e-09, "loss": 13.3125, "step": 29059 }, { "epoch": 1.9299993358570764, "grad_norm": 174.7765350341797, "learning_rate": 6.418340230601505e-09, "loss": 17.1172, "step": 29060 }, { "epoch": 1.9300657501494323, "grad_norm": 242.845458984375, "learning_rate": 6.406180008424744e-09, "loss": 16.875, "step": 29061 }, { "epoch": 1.930132164441788, "grad_norm": 258.55230712890625, "learning_rate": 6.394031279568279e-09, "loss": 20.75, "step": 29062 }, { "epoch": 1.9301985787341436, "grad_norm": 227.5668487548828, "learning_rate": 6.3818940441727754e-09, "loss": 19.5156, "step": 29063 }, { "epoch": 1.9302649930264995, "grad_norm": 238.40496826171875, "learning_rate": 6.369768302378564e-09, "loss": 13.3594, "step": 29064 }, { "epoch": 1.9303314073188549, "grad_norm": 188.6115264892578, "learning_rate": 6.357654054325867e-09, "loss": 13.3594, "step": 29065 }, { "epoch": 1.9303978216112108, "grad_norm": 543.2522583007812, "learning_rate": 6.3455513001549056e-09, "loss": 19.0625, "step": 29066 }, { "epoch": 1.9304642359035664, "grad_norm": 663.6454467773438, "learning_rate": 6.3334600400056785e-09, "loss": 14.9844, "step": 29067 }, { "epoch": 1.930530650195922, "grad_norm": 174.24168395996094, "learning_rate": 6.321380274017962e-09, "loss": 13.5938, "step": 29068 }, { "epoch": 1.930597064488278, "grad_norm": 254.2684326171875, "learning_rate": 6.309312002331535e-09, "loss": 17.5781, "step": 29069 }, { "epoch": 1.9306634787806336, "grad_norm": 222.01268005371094, "learning_rate": 6.2972552250859515e-09, "loss": 13.8984, "step": 29070 }, { "epoch": 1.9307298930729893, "grad_norm": 204.44590759277344, "learning_rate": 6.285209942420766e-09, "loss": 16.4688, "step": 29071 }, { "epoch": 1.9307963073653451, "grad_norm": 219.1608428955078, "learning_rate": 6.273176154475202e-09, "loss": 20.8281, "step": 29072 }, { "epoch": 1.9308627216577008, "grad_norm": 590.6981201171875, "learning_rate": 6.2611538613885904e-09, "loss": 19.8516, "step": 29073 }, { "epoch": 1.9309291359500564, "grad_norm": 166.19290161132812, "learning_rate": 6.249143063299822e-09, "loss": 17.0625, "step": 29074 }, { "epoch": 1.9309955502424123, "grad_norm": 144.32464599609375, "learning_rate": 6.237143760348118e-09, "loss": 12.2656, "step": 29075 }, { "epoch": 1.9310619645347677, "grad_norm": 162.38253784179688, "learning_rate": 6.225155952671924e-09, "loss": 12.8594, "step": 29076 }, { "epoch": 1.9311283788271236, "grad_norm": 211.17495727539062, "learning_rate": 6.213179640410126e-09, "loss": 15.7188, "step": 29077 }, { "epoch": 1.9311947931194793, "grad_norm": 278.856201171875, "learning_rate": 6.201214823701284e-09, "loss": 13.5, "step": 29078 }, { "epoch": 1.931261207411835, "grad_norm": 581.1757202148438, "learning_rate": 6.189261502683618e-09, "loss": 18.8438, "step": 29079 }, { "epoch": 1.9313276217041908, "grad_norm": 1079.1373291015625, "learning_rate": 6.1773196774955735e-09, "loss": 13.3438, "step": 29080 }, { "epoch": 1.9313940359965465, "grad_norm": 265.3814697265625, "learning_rate": 6.165389348275262e-09, "loss": 15.2031, "step": 29081 }, { "epoch": 1.9314604502889021, "grad_norm": 152.69070434570312, "learning_rate": 6.1534705151605745e-09, "loss": 10.5938, "step": 29082 }, { "epoch": 1.931526864581258, "grad_norm": 178.52317810058594, "learning_rate": 6.141563178289622e-09, "loss": 15.5547, "step": 29083 }, { "epoch": 1.9315932788736137, "grad_norm": 274.0249328613281, "learning_rate": 6.129667337799849e-09, "loss": 14.6875, "step": 29084 }, { "epoch": 1.9316596931659693, "grad_norm": 153.6446990966797, "learning_rate": 6.117782993828924e-09, "loss": 14.6562, "step": 29085 }, { "epoch": 1.9317261074583252, "grad_norm": 156.6508331298828, "learning_rate": 6.105910146514404e-09, "loss": 11.8281, "step": 29086 }, { "epoch": 1.9317925217506806, "grad_norm": 346.92767333984375, "learning_rate": 6.094048795993622e-09, "loss": 17.0, "step": 29087 }, { "epoch": 1.9318589360430365, "grad_norm": 176.41807556152344, "learning_rate": 6.0821989424038044e-09, "loss": 12.3203, "step": 29088 }, { "epoch": 1.9319253503353921, "grad_norm": 354.47991943359375, "learning_rate": 6.070360585881951e-09, "loss": 16.3125, "step": 29089 }, { "epoch": 1.9319917646277478, "grad_norm": 357.6184387207031, "learning_rate": 6.058533726565063e-09, "loss": 19.7188, "step": 29090 }, { "epoch": 1.9320581789201037, "grad_norm": 349.0919494628906, "learning_rate": 6.04671836458992e-09, "loss": 15.9297, "step": 29091 }, { "epoch": 1.9321245932124593, "grad_norm": 1077.99169921875, "learning_rate": 6.034914500093191e-09, "loss": 15.1172, "step": 29092 }, { "epoch": 1.932191007504815, "grad_norm": 413.2481994628906, "learning_rate": 6.023122133211323e-09, "loss": 23.6719, "step": 29093 }, { "epoch": 1.9322574217971709, "grad_norm": 417.87322998046875, "learning_rate": 6.011341264080983e-09, "loss": 22.2031, "step": 29094 }, { "epoch": 1.9323238360895265, "grad_norm": 364.3816833496094, "learning_rate": 5.9995718928381735e-09, "loss": 12.1875, "step": 29095 }, { "epoch": 1.9323902503818822, "grad_norm": 210.21778869628906, "learning_rate": 5.987814019619119e-09, "loss": 15.625, "step": 29096 }, { "epoch": 1.932456664674238, "grad_norm": 166.16427612304688, "learning_rate": 5.976067644559934e-09, "loss": 15.8438, "step": 29097 }, { "epoch": 1.9325230789665935, "grad_norm": 333.8625183105469, "learning_rate": 5.964332767796399e-09, "loss": 15.4688, "step": 29098 }, { "epoch": 1.9325894932589494, "grad_norm": 162.81423950195312, "learning_rate": 5.952609389464181e-09, "loss": 15.875, "step": 29099 }, { "epoch": 1.932655907551305, "grad_norm": 202.4944305419922, "learning_rate": 5.940897509699061e-09, "loss": 16.4688, "step": 29100 }, { "epoch": 1.9327223218436607, "grad_norm": 209.23834228515625, "learning_rate": 5.929197128636376e-09, "loss": 15.7188, "step": 29101 }, { "epoch": 1.9327887361360165, "grad_norm": 151.1124267578125, "learning_rate": 5.917508246411573e-09, "loss": 13.7109, "step": 29102 }, { "epoch": 1.9328551504283722, "grad_norm": 176.95716857910156, "learning_rate": 5.905830863159878e-09, "loss": 13.1875, "step": 29103 }, { "epoch": 1.9329215647207278, "grad_norm": 400.3356628417969, "learning_rate": 5.894164979016181e-09, "loss": 16.0625, "step": 29104 }, { "epoch": 1.9329879790130837, "grad_norm": 298.5389709472656, "learning_rate": 5.8825105941155976e-09, "loss": 14.8281, "step": 29105 }, { "epoch": 1.9330543933054394, "grad_norm": 215.8662872314453, "learning_rate": 5.870867708592908e-09, "loss": 19.9375, "step": 29106 }, { "epoch": 1.933120807597795, "grad_norm": 430.4334716796875, "learning_rate": 5.859236322582672e-09, "loss": 17.8125, "step": 29107 }, { "epoch": 1.933187221890151, "grad_norm": 611.421142578125, "learning_rate": 5.847616436219671e-09, "loss": 18.4375, "step": 29108 }, { "epoch": 1.9332536361825063, "grad_norm": 253.3699493408203, "learning_rate": 5.83600804963813e-09, "loss": 15.3438, "step": 29109 }, { "epoch": 1.9333200504748622, "grad_norm": 387.5494384765625, "learning_rate": 5.824411162972387e-09, "loss": 17.5156, "step": 29110 }, { "epoch": 1.9333864647672179, "grad_norm": 378.7625732421875, "learning_rate": 5.8128257763565555e-09, "loss": 16.8906, "step": 29111 }, { "epoch": 1.9334528790595735, "grad_norm": 547.156005859375, "learning_rate": 5.801251889924641e-09, "loss": 17.2031, "step": 29112 }, { "epoch": 1.9335192933519294, "grad_norm": 137.98458862304688, "learning_rate": 5.789689503810646e-09, "loss": 12.3594, "step": 29113 }, { "epoch": 1.933585707644285, "grad_norm": 146.921142578125, "learning_rate": 5.7781386181482426e-09, "loss": 16.1875, "step": 29114 }, { "epoch": 1.9336521219366407, "grad_norm": 196.85592651367188, "learning_rate": 5.766599233070879e-09, "loss": 13.1562, "step": 29115 }, { "epoch": 1.9337185362289966, "grad_norm": 106.71289825439453, "learning_rate": 5.755071348712337e-09, "loss": 15.9219, "step": 29116 }, { "epoch": 1.9337849505213522, "grad_norm": 328.6888122558594, "learning_rate": 5.743554965205732e-09, "loss": 15.1719, "step": 29117 }, { "epoch": 1.933851364813708, "grad_norm": 489.193603515625, "learning_rate": 5.732050082684403e-09, "loss": 15.0156, "step": 29118 }, { "epoch": 1.9339177791060638, "grad_norm": 233.8013153076172, "learning_rate": 5.720556701281354e-09, "loss": 12.9062, "step": 29119 }, { "epoch": 1.9339841933984192, "grad_norm": 144.41490173339844, "learning_rate": 5.709074821129589e-09, "loss": 13.0625, "step": 29120 }, { "epoch": 1.934050607690775, "grad_norm": 176.13845825195312, "learning_rate": 5.697604442361892e-09, "loss": 12.2344, "step": 29121 }, { "epoch": 1.9341170219831307, "grad_norm": 255.85316467285156, "learning_rate": 5.6861455651109334e-09, "loss": 15.7969, "step": 29122 }, { "epoch": 1.9341834362754864, "grad_norm": 261.1791687011719, "learning_rate": 5.674698189509386e-09, "loss": 12.4844, "step": 29123 }, { "epoch": 1.9342498505678423, "grad_norm": 136.73081970214844, "learning_rate": 5.663262315689476e-09, "loss": 16.0, "step": 29124 }, { "epoch": 1.934316264860198, "grad_norm": 272.322265625, "learning_rate": 5.651837943783655e-09, "loss": 19.2969, "step": 29125 }, { "epoch": 1.9343826791525536, "grad_norm": 240.3119354248047, "learning_rate": 5.640425073924038e-09, "loss": 18.4219, "step": 29126 }, { "epoch": 1.9344490934449095, "grad_norm": 367.91943359375, "learning_rate": 5.629023706242519e-09, "loss": 14.7969, "step": 29127 }, { "epoch": 1.934515507737265, "grad_norm": 153.31077575683594, "learning_rate": 5.617633840870994e-09, "loss": 16.2031, "step": 29128 }, { "epoch": 1.9345819220296208, "grad_norm": 142.981689453125, "learning_rate": 5.606255477941357e-09, "loss": 17.3438, "step": 29129 }, { "epoch": 1.9346483363219766, "grad_norm": 119.46373748779297, "learning_rate": 5.59488861758517e-09, "loss": 14.7812, "step": 29130 }, { "epoch": 1.934714750614332, "grad_norm": 127.3603515625, "learning_rate": 5.583533259933992e-09, "loss": 15.6562, "step": 29131 }, { "epoch": 1.934781164906688, "grad_norm": 141.41200256347656, "learning_rate": 5.572189405118944e-09, "loss": 14.7812, "step": 29132 }, { "epoch": 1.9348475791990436, "grad_norm": 610.8541259765625, "learning_rate": 5.560857053271473e-09, "loss": 24.1562, "step": 29133 }, { "epoch": 1.9349139934913993, "grad_norm": 313.1514892578125, "learning_rate": 5.549536204522587e-09, "loss": 12.5156, "step": 29134 }, { "epoch": 1.9349804077837551, "grad_norm": 212.57984924316406, "learning_rate": 5.538226859003181e-09, "loss": 15.375, "step": 29135 }, { "epoch": 1.9350468220761108, "grad_norm": 96.86891174316406, "learning_rate": 5.52692901684415e-09, "loss": 14.7031, "step": 29136 }, { "epoch": 1.9351132363684664, "grad_norm": 108.53472900390625, "learning_rate": 5.515642678176169e-09, "loss": 11.4844, "step": 29137 }, { "epoch": 1.9351796506608223, "grad_norm": 136.2747802734375, "learning_rate": 5.504367843129687e-09, "loss": 17.0781, "step": 29138 }, { "epoch": 1.935246064953178, "grad_norm": 1566.807373046875, "learning_rate": 5.493104511835378e-09, "loss": 15.5469, "step": 29139 }, { "epoch": 1.9353124792455336, "grad_norm": 224.08973693847656, "learning_rate": 5.4818526844232495e-09, "loss": 16.8438, "step": 29140 }, { "epoch": 1.9353788935378895, "grad_norm": 178.30401611328125, "learning_rate": 5.470612361023641e-09, "loss": 16.5938, "step": 29141 }, { "epoch": 1.935445307830245, "grad_norm": 139.86404418945312, "learning_rate": 5.4593835417664494e-09, "loss": 17.1094, "step": 29142 }, { "epoch": 1.9355117221226008, "grad_norm": 185.2494659423828, "learning_rate": 5.448166226781681e-09, "loss": 15.5781, "step": 29143 }, { "epoch": 1.9355781364149565, "grad_norm": 155.10147094726562, "learning_rate": 5.43696041619901e-09, "loss": 18.2188, "step": 29144 }, { "epoch": 1.9356445507073121, "grad_norm": 230.7150115966797, "learning_rate": 5.4257661101481115e-09, "loss": 15.0391, "step": 29145 }, { "epoch": 1.935710964999668, "grad_norm": 337.6761474609375, "learning_rate": 5.414583308758325e-09, "loss": 19.6719, "step": 29146 }, { "epoch": 1.9357773792920236, "grad_norm": 262.8506774902344, "learning_rate": 5.403412012159325e-09, "loss": 17.7188, "step": 29147 }, { "epoch": 1.9358437935843793, "grad_norm": 400.5506286621094, "learning_rate": 5.392252220480009e-09, "loss": 14.8047, "step": 29148 }, { "epoch": 1.9359102078767352, "grad_norm": 247.7913055419922, "learning_rate": 5.381103933849607e-09, "loss": 11.375, "step": 29149 }, { "epoch": 1.9359766221690908, "grad_norm": 233.41012573242188, "learning_rate": 5.369967152397126e-09, "loss": 9.3281, "step": 29150 }, { "epoch": 1.9360430364614465, "grad_norm": 191.80535888671875, "learning_rate": 5.358841876251241e-09, "loss": 15.5156, "step": 29151 }, { "epoch": 1.9361094507538024, "grad_norm": 322.54949951171875, "learning_rate": 5.347728105540739e-09, "loss": 21.0, "step": 29152 }, { "epoch": 1.9361758650461578, "grad_norm": 119.2567138671875, "learning_rate": 5.336625840394293e-09, "loss": 15.75, "step": 29153 }, { "epoch": 1.9362422793385137, "grad_norm": 377.4454650878906, "learning_rate": 5.325535080940025e-09, "loss": 18.7969, "step": 29154 }, { "epoch": 1.9363086936308693, "grad_norm": 107.33785247802734, "learning_rate": 5.314455827306608e-09, "loss": 14.2812, "step": 29155 }, { "epoch": 1.936375107923225, "grad_norm": 116.28313446044922, "learning_rate": 5.303388079621829e-09, "loss": 14.5, "step": 29156 }, { "epoch": 1.9364415222155809, "grad_norm": 110.9703140258789, "learning_rate": 5.292331838013919e-09, "loss": 14.4688, "step": 29157 }, { "epoch": 1.9365079365079365, "grad_norm": 205.4779052734375, "learning_rate": 5.281287102610776e-09, "loss": 16.2656, "step": 29158 }, { "epoch": 1.9365743508002922, "grad_norm": 369.08740234375, "learning_rate": 5.2702538735400755e-09, "loss": 16.0781, "step": 29159 }, { "epoch": 1.936640765092648, "grad_norm": 174.99452209472656, "learning_rate": 5.2592321509294935e-09, "loss": 14.9062, "step": 29160 }, { "epoch": 1.9367071793850037, "grad_norm": 174.08607482910156, "learning_rate": 5.248221934906483e-09, "loss": 16.0781, "step": 29161 }, { "epoch": 1.9367735936773594, "grad_norm": 281.5416564941406, "learning_rate": 5.237223225598497e-09, "loss": 13.8906, "step": 29162 }, { "epoch": 1.9368400079697152, "grad_norm": 723.3052368164062, "learning_rate": 5.2262360231326575e-09, "loss": 24.6562, "step": 29163 }, { "epoch": 1.9369064222620707, "grad_norm": 182.94117736816406, "learning_rate": 5.215260327636195e-09, "loss": 16.0781, "step": 29164 }, { "epoch": 1.9369728365544265, "grad_norm": 316.7005310058594, "learning_rate": 5.204296139235786e-09, "loss": 20.2031, "step": 29165 }, { "epoch": 1.9370392508467822, "grad_norm": 449.5566101074219, "learning_rate": 5.193343458058663e-09, "loss": 14.0781, "step": 29166 }, { "epoch": 1.9371056651391378, "grad_norm": 214.14205932617188, "learning_rate": 5.182402284231169e-09, "loss": 14.4375, "step": 29167 }, { "epoch": 1.9371720794314937, "grad_norm": 160.44580078125, "learning_rate": 5.171472617879979e-09, "loss": 14.875, "step": 29168 }, { "epoch": 1.9372384937238494, "grad_norm": 257.6488037109375, "learning_rate": 5.16055445913155e-09, "loss": 14.3594, "step": 29169 }, { "epoch": 1.937304908016205, "grad_norm": 174.54452514648438, "learning_rate": 5.149647808112334e-09, "loss": 15.9688, "step": 29170 }, { "epoch": 1.937371322308561, "grad_norm": 182.16455078125, "learning_rate": 5.138752664948121e-09, "loss": 16.5469, "step": 29171 }, { "epoch": 1.9374377366009166, "grad_norm": 146.5081024169922, "learning_rate": 5.127869029765252e-09, "loss": 18.2969, "step": 29172 }, { "epoch": 1.9375041508932722, "grad_norm": 326.768310546875, "learning_rate": 5.1169969026895185e-09, "loss": 17.9531, "step": 29173 }, { "epoch": 1.937570565185628, "grad_norm": 271.5592346191406, "learning_rate": 5.106136283846707e-09, "loss": 13.1484, "step": 29174 }, { "epoch": 1.9376369794779835, "grad_norm": 189.34646606445312, "learning_rate": 5.095287173362384e-09, "loss": 15.2656, "step": 29175 }, { "epoch": 1.9377033937703394, "grad_norm": 286.9380798339844, "learning_rate": 5.084449571362004e-09, "loss": 20.7969, "step": 29176 }, { "epoch": 1.937769808062695, "grad_norm": 713.3358764648438, "learning_rate": 5.073623477971134e-09, "loss": 19.7344, "step": 29177 }, { "epoch": 1.9378362223550507, "grad_norm": 226.42581176757812, "learning_rate": 5.062808893314785e-09, "loss": 10.1797, "step": 29178 }, { "epoch": 1.9379026366474066, "grad_norm": 146.837158203125, "learning_rate": 5.05200581751819e-09, "loss": 16.7031, "step": 29179 }, { "epoch": 1.9379690509397622, "grad_norm": 143.51541137695312, "learning_rate": 5.04121425070636e-09, "loss": 12.2188, "step": 29180 }, { "epoch": 1.938035465232118, "grad_norm": 284.4769592285156, "learning_rate": 5.030434193003863e-09, "loss": 14.9062, "step": 29181 }, { "epoch": 1.9381018795244738, "grad_norm": 169.017333984375, "learning_rate": 5.019665644535709e-09, "loss": 13.2344, "step": 29182 }, { "epoch": 1.9381682938168294, "grad_norm": 251.04306030273438, "learning_rate": 5.008908605426243e-09, "loss": 16.4531, "step": 29183 }, { "epoch": 1.938234708109185, "grad_norm": 951.5508422851562, "learning_rate": 4.998163075800032e-09, "loss": 19.5, "step": 29184 }, { "epoch": 1.938301122401541, "grad_norm": 323.8304443359375, "learning_rate": 4.9874290557813114e-09, "loss": 10.25, "step": 29185 }, { "epoch": 1.9383675366938964, "grad_norm": 175.1061553955078, "learning_rate": 4.976706545494314e-09, "loss": 18.5312, "step": 29186 }, { "epoch": 1.9384339509862523, "grad_norm": 471.0245666503906, "learning_rate": 4.9659955450628284e-09, "loss": 24.5938, "step": 29187 }, { "epoch": 1.938500365278608, "grad_norm": 681.8290405273438, "learning_rate": 4.9552960546110915e-09, "loss": 14.2969, "step": 29188 }, { "epoch": 1.9385667795709636, "grad_norm": 126.30872344970703, "learning_rate": 4.944608074262669e-09, "loss": 13.4219, "step": 29189 }, { "epoch": 1.9386331938633194, "grad_norm": 414.6378173828125, "learning_rate": 4.9339316041412394e-09, "loss": 20.4531, "step": 29190 }, { "epoch": 1.938699608155675, "grad_norm": 184.5866241455078, "learning_rate": 4.923266644370261e-09, "loss": 15.5312, "step": 29191 }, { "epoch": 1.9387660224480308, "grad_norm": 166.85023498535156, "learning_rate": 4.9126131950733005e-09, "loss": 12.4141, "step": 29192 }, { "epoch": 1.9388324367403866, "grad_norm": 428.7266845703125, "learning_rate": 4.901971256373261e-09, "loss": 14.3906, "step": 29193 }, { "epoch": 1.9388988510327423, "grad_norm": 179.19178771972656, "learning_rate": 4.8913408283934866e-09, "loss": 14.3438, "step": 29194 }, { "epoch": 1.938965265325098, "grad_norm": 373.1227111816406, "learning_rate": 4.880721911256769e-09, "loss": 16.3281, "step": 29195 }, { "epoch": 1.9390316796174538, "grad_norm": 226.12705993652344, "learning_rate": 4.87011450508612e-09, "loss": 19.4844, "step": 29196 }, { "epoch": 1.9390980939098093, "grad_norm": 743.4547729492188, "learning_rate": 4.859518610004221e-09, "loss": 17.0, "step": 29197 }, { "epoch": 1.9391645082021651, "grad_norm": 179.82481384277344, "learning_rate": 4.848934226133527e-09, "loss": 15.4844, "step": 29198 }, { "epoch": 1.9392309224945208, "grad_norm": 214.22119140625, "learning_rate": 4.838361353596499e-09, "loss": 13.9688, "step": 29199 }, { "epoch": 1.9392973367868764, "grad_norm": 128.89280700683594, "learning_rate": 4.82779999251548e-09, "loss": 13.6562, "step": 29200 }, { "epoch": 1.9393637510792323, "grad_norm": 228.318359375, "learning_rate": 4.817250143012597e-09, "loss": 19.3438, "step": 29201 }, { "epoch": 1.939430165371588, "grad_norm": 197.9193878173828, "learning_rate": 4.806711805209862e-09, "loss": 17.5938, "step": 29202 }, { "epoch": 1.9394965796639436, "grad_norm": 257.35272216796875, "learning_rate": 4.7961849792294e-09, "loss": 12.5469, "step": 29203 }, { "epoch": 1.9395629939562995, "grad_norm": 394.7881164550781, "learning_rate": 4.785669665192671e-09, "loss": 15.0625, "step": 29204 }, { "epoch": 1.9396294082486552, "grad_norm": 117.58777618408203, "learning_rate": 4.775165863221464e-09, "loss": 14.8125, "step": 29205 }, { "epoch": 1.9396958225410108, "grad_norm": 126.95933532714844, "learning_rate": 4.764673573437239e-09, "loss": 18.5781, "step": 29206 }, { "epoch": 1.9397622368333667, "grad_norm": 183.35406494140625, "learning_rate": 4.754192795961454e-09, "loss": 13.8906, "step": 29207 }, { "epoch": 1.9398286511257221, "grad_norm": 136.18600463867188, "learning_rate": 4.743723530915233e-09, "loss": 13.7109, "step": 29208 }, { "epoch": 1.939895065418078, "grad_norm": 191.06723022460938, "learning_rate": 4.733265778419704e-09, "loss": 10.7344, "step": 29209 }, { "epoch": 1.9399614797104336, "grad_norm": 376.9507141113281, "learning_rate": 4.722819538595879e-09, "loss": 14.7188, "step": 29210 }, { "epoch": 1.9400278940027893, "grad_norm": 212.0674591064453, "learning_rate": 4.712384811564551e-09, "loss": 26.2188, "step": 29211 }, { "epoch": 1.9400943082951452, "grad_norm": 165.50148010253906, "learning_rate": 4.701961597446402e-09, "loss": 13.7969, "step": 29212 }, { "epoch": 1.9401607225875008, "grad_norm": 123.4425277709961, "learning_rate": 4.691549896362112e-09, "loss": 14.8906, "step": 29213 }, { "epoch": 1.9402271368798565, "grad_norm": 206.15628051757812, "learning_rate": 4.681149708432031e-09, "loss": 20.2188, "step": 29214 }, { "epoch": 1.9402935511722124, "grad_norm": 130.65313720703125, "learning_rate": 4.670761033776393e-09, "loss": 15.2344, "step": 29215 }, { "epoch": 1.940359965464568, "grad_norm": 181.67059326171875, "learning_rate": 4.660383872515549e-09, "loss": 18.2344, "step": 29216 }, { "epoch": 1.9404263797569237, "grad_norm": 165.41946411132812, "learning_rate": 4.6500182247694034e-09, "loss": 16.5625, "step": 29217 }, { "epoch": 1.9404927940492795, "grad_norm": 215.78970336914062, "learning_rate": 4.639664090657858e-09, "loss": 20.5625, "step": 29218 }, { "epoch": 1.940559208341635, "grad_norm": 137.16165161132812, "learning_rate": 4.6293214703008175e-09, "loss": 11.7188, "step": 29219 }, { "epoch": 1.9406256226339909, "grad_norm": 257.75140380859375, "learning_rate": 4.618990363817743e-09, "loss": 18.3438, "step": 29220 }, { "epoch": 1.9406920369263465, "grad_norm": 774.369873046875, "learning_rate": 4.6086707713283155e-09, "loss": 22.5781, "step": 29221 }, { "epoch": 1.9407584512187022, "grad_norm": 134.314453125, "learning_rate": 4.5983626929516625e-09, "loss": 14.3594, "step": 29222 }, { "epoch": 1.940824865511058, "grad_norm": 181.60440063476562, "learning_rate": 4.588066128807244e-09, "loss": 12.3906, "step": 29223 }, { "epoch": 1.9408912798034137, "grad_norm": 841.3139038085938, "learning_rate": 4.577781079014076e-09, "loss": 25.0, "step": 29224 }, { "epoch": 1.9409576940957693, "grad_norm": 352.0540466308594, "learning_rate": 4.567507543691174e-09, "loss": 16.2344, "step": 29225 }, { "epoch": 1.9410241083881252, "grad_norm": 219.0392303466797, "learning_rate": 4.557245522957331e-09, "loss": 15.1562, "step": 29226 }, { "epoch": 1.9410905226804809, "grad_norm": 369.05731201171875, "learning_rate": 4.5469950169313434e-09, "loss": 19.625, "step": 29227 }, { "epoch": 1.9411569369728365, "grad_norm": 193.94317626953125, "learning_rate": 4.5367560257315585e-09, "loss": 13.5, "step": 29228 }, { "epoch": 1.9412233512651924, "grad_norm": 314.22625732421875, "learning_rate": 4.52652854947666e-09, "loss": 24.5625, "step": 29229 }, { "epoch": 1.9412897655575478, "grad_norm": 233.81895446777344, "learning_rate": 4.516312588284777e-09, "loss": 18.1094, "step": 29230 }, { "epoch": 1.9413561798499037, "grad_norm": 145.1153564453125, "learning_rate": 4.506108142274256e-09, "loss": 17.2031, "step": 29231 }, { "epoch": 1.9414225941422594, "grad_norm": 115.98136901855469, "learning_rate": 4.495915211563006e-09, "loss": 11.3828, "step": 29232 }, { "epoch": 1.941489008434615, "grad_norm": 376.0218200683594, "learning_rate": 4.4857337962689315e-09, "loss": 13.4062, "step": 29233 }, { "epoch": 1.941555422726971, "grad_norm": 165.8031005859375, "learning_rate": 4.475563896509826e-09, "loss": 16.0469, "step": 29234 }, { "epoch": 1.9416218370193266, "grad_norm": 227.1307830810547, "learning_rate": 4.465405512403375e-09, "loss": 27.9531, "step": 29235 }, { "epoch": 1.9416882513116822, "grad_norm": 195.99771118164062, "learning_rate": 4.45525864406715e-09, "loss": 12.6094, "step": 29236 }, { "epoch": 1.941754665604038, "grad_norm": 203.60659790039062, "learning_rate": 4.4451232916182805e-09, "loss": 10.3594, "step": 29237 }, { "epoch": 1.9418210798963937, "grad_norm": 286.4190979003906, "learning_rate": 4.434999455174227e-09, "loss": 17.0, "step": 29238 }, { "epoch": 1.9418874941887494, "grad_norm": 313.8025817871094, "learning_rate": 4.424887134852006e-09, "loss": 22.4844, "step": 29239 }, { "epoch": 1.9419539084811053, "grad_norm": 152.85748291015625, "learning_rate": 4.414786330768639e-09, "loss": 16.2969, "step": 29240 }, { "epoch": 1.9420203227734607, "grad_norm": 150.31982421875, "learning_rate": 4.404697043040917e-09, "loss": 11.5078, "step": 29241 }, { "epoch": 1.9420867370658166, "grad_norm": 242.6800537109375, "learning_rate": 4.394619271785638e-09, "loss": 13.7812, "step": 29242 }, { "epoch": 1.9421531513581722, "grad_norm": 267.87548828125, "learning_rate": 4.384553017119153e-09, "loss": 19.7188, "step": 29243 }, { "epoch": 1.942219565650528, "grad_norm": 93.41510009765625, "learning_rate": 4.3744982791581455e-09, "loss": 13.5312, "step": 29244 }, { "epoch": 1.9422859799428838, "grad_norm": 113.19566345214844, "learning_rate": 4.3644550580188565e-09, "loss": 16.2031, "step": 29245 }, { "epoch": 1.9423523942352394, "grad_norm": 197.9187469482422, "learning_rate": 4.354423353817416e-09, "loss": 14.6875, "step": 29246 }, { "epoch": 1.942418808527595, "grad_norm": 243.8917236328125, "learning_rate": 4.344403166669841e-09, "loss": 20.5312, "step": 29247 }, { "epoch": 1.942485222819951, "grad_norm": 264.8394470214844, "learning_rate": 4.3343944966920395e-09, "loss": 14.4297, "step": 29248 }, { "epoch": 1.9425516371123066, "grad_norm": 128.99436950683594, "learning_rate": 4.324397343999919e-09, "loss": 16.25, "step": 29249 }, { "epoch": 1.9426180514046623, "grad_norm": 197.3368377685547, "learning_rate": 4.314411708709053e-09, "loss": 15.0312, "step": 29250 }, { "epoch": 1.9426844656970181, "grad_norm": 133.21917724609375, "learning_rate": 4.304437590934906e-09, "loss": 15.2344, "step": 29251 }, { "epoch": 1.9427508799893736, "grad_norm": 166.44906616210938, "learning_rate": 4.294474990792829e-09, "loss": 12.8438, "step": 29252 }, { "epoch": 1.9428172942817294, "grad_norm": 274.0035095214844, "learning_rate": 4.284523908398174e-09, "loss": 12.6719, "step": 29253 }, { "epoch": 1.942883708574085, "grad_norm": 202.09286499023438, "learning_rate": 4.27458434386585e-09, "loss": 19.5625, "step": 29254 }, { "epoch": 1.9429501228664408, "grad_norm": 341.1129455566406, "learning_rate": 4.264656297311098e-09, "loss": 27.375, "step": 29255 }, { "epoch": 1.9430165371587966, "grad_norm": 115.1923828125, "learning_rate": 4.254739768848603e-09, "loss": 13.3438, "step": 29256 }, { "epoch": 1.9430829514511523, "grad_norm": 215.90280151367188, "learning_rate": 4.244834758593052e-09, "loss": 23.9844, "step": 29257 }, { "epoch": 1.943149365743508, "grad_norm": 246.46673583984375, "learning_rate": 4.234941266659131e-09, "loss": 18.2344, "step": 29258 }, { "epoch": 1.9432157800358638, "grad_norm": 438.54168701171875, "learning_rate": 4.225059293161193e-09, "loss": 26.625, "step": 29259 }, { "epoch": 1.9432821943282195, "grad_norm": 91.08480834960938, "learning_rate": 4.21518883821359e-09, "loss": 16.0938, "step": 29260 }, { "epoch": 1.9433486086205751, "grad_norm": 122.17262268066406, "learning_rate": 4.205329901930455e-09, "loss": 13.7656, "step": 29261 }, { "epoch": 1.943415022912931, "grad_norm": 523.2134399414062, "learning_rate": 4.195482484425805e-09, "loss": 16.75, "step": 29262 }, { "epoch": 1.9434814372052864, "grad_norm": 137.3948974609375, "learning_rate": 4.185646585813662e-09, "loss": 16.4844, "step": 29263 }, { "epoch": 1.9435478514976423, "grad_norm": 346.5079345703125, "learning_rate": 4.1758222062077134e-09, "loss": 20.9219, "step": 29264 }, { "epoch": 1.943614265789998, "grad_norm": 952.7716064453125, "learning_rate": 4.1660093457216436e-09, "loss": 16.5, "step": 29265 }, { "epoch": 1.9436806800823536, "grad_norm": 470.6528015136719, "learning_rate": 4.15620800446892e-09, "loss": 19.3438, "step": 29266 }, { "epoch": 1.9437470943747095, "grad_norm": 185.9984893798828, "learning_rate": 4.146418182563005e-09, "loss": 23.7969, "step": 29267 }, { "epoch": 1.9438135086670651, "grad_norm": 600.38720703125, "learning_rate": 4.136639880117032e-09, "loss": 14.6719, "step": 29268 }, { "epoch": 1.9438799229594208, "grad_norm": 225.9159698486328, "learning_rate": 4.126873097244132e-09, "loss": 14.9844, "step": 29269 }, { "epoch": 1.9439463372517767, "grad_norm": 116.85958862304688, "learning_rate": 4.117117834057437e-09, "loss": 12.3438, "step": 29270 }, { "epoch": 1.9440127515441323, "grad_norm": 331.2992858886719, "learning_rate": 4.107374090669524e-09, "loss": 13.7812, "step": 29271 }, { "epoch": 1.944079165836488, "grad_norm": 239.33221435546875, "learning_rate": 4.097641867193302e-09, "loss": 18.5469, "step": 29272 }, { "epoch": 1.9441455801288439, "grad_norm": 253.32778930664062, "learning_rate": 4.087921163741348e-09, "loss": 15.7656, "step": 29273 }, { "epoch": 1.9442119944211993, "grad_norm": 483.9154357910156, "learning_rate": 4.078211980425905e-09, "loss": 24.1719, "step": 29274 }, { "epoch": 1.9442784087135552, "grad_norm": 409.5130615234375, "learning_rate": 4.0685143173595504e-09, "loss": 18.2656, "step": 29275 }, { "epoch": 1.9443448230059108, "grad_norm": 149.65692138671875, "learning_rate": 4.058828174654305e-09, "loss": 13.7969, "step": 29276 }, { "epoch": 1.9444112372982665, "grad_norm": 153.70513916015625, "learning_rate": 4.0491535524221905e-09, "loss": 16.8594, "step": 29277 }, { "epoch": 1.9444776515906224, "grad_norm": 378.1625671386719, "learning_rate": 4.039490450775229e-09, "loss": 12.0938, "step": 29278 }, { "epoch": 1.944544065882978, "grad_norm": 228.96376037597656, "learning_rate": 4.029838869825108e-09, "loss": 15.1406, "step": 29279 }, { "epoch": 1.9446104801753337, "grad_norm": 1067.608642578125, "learning_rate": 4.0201988096835175e-09, "loss": 18.5, "step": 29280 }, { "epoch": 1.9446768944676895, "grad_norm": 231.3382110595703, "learning_rate": 4.010570270462033e-09, "loss": 17.2656, "step": 29281 }, { "epoch": 1.9447433087600452, "grad_norm": 664.2277221679688, "learning_rate": 4.000953252271788e-09, "loss": 15.375, "step": 29282 }, { "epoch": 1.9448097230524009, "grad_norm": 166.05252075195312, "learning_rate": 3.99134775522425e-09, "loss": 13.6172, "step": 29283 }, { "epoch": 1.9448761373447567, "grad_norm": 181.58746337890625, "learning_rate": 3.9817537794305524e-09, "loss": 17.5625, "step": 29284 }, { "epoch": 1.9449425516371122, "grad_norm": 300.0530700683594, "learning_rate": 3.972171325001494e-09, "loss": 14.7656, "step": 29285 }, { "epoch": 1.945008965929468, "grad_norm": 224.4257354736328, "learning_rate": 3.962600392047988e-09, "loss": 11.4453, "step": 29286 }, { "epoch": 1.9450753802218237, "grad_norm": 363.42523193359375, "learning_rate": 3.9530409806808326e-09, "loss": 22.3125, "step": 29287 }, { "epoch": 1.9451417945141793, "grad_norm": 165.8362274169922, "learning_rate": 3.943493091010608e-09, "loss": 13.2969, "step": 29288 }, { "epoch": 1.9452082088065352, "grad_norm": 167.9617462158203, "learning_rate": 3.9339567231476685e-09, "loss": 13.0469, "step": 29289 }, { "epoch": 1.9452746230988909, "grad_norm": 121.00296783447266, "learning_rate": 3.924431877202261e-09, "loss": 14.2969, "step": 29290 }, { "epoch": 1.9453410373912465, "grad_norm": 117.074462890625, "learning_rate": 3.91491855328474e-09, "loss": 11.3438, "step": 29291 }, { "epoch": 1.9454074516836024, "grad_norm": 161.0843963623047, "learning_rate": 3.905416751505019e-09, "loss": 13.9688, "step": 29292 }, { "epoch": 1.945473865975958, "grad_norm": 273.5976257324219, "learning_rate": 3.89592647197301e-09, "loss": 17.7031, "step": 29293 }, { "epoch": 1.9455402802683137, "grad_norm": 219.58236694335938, "learning_rate": 3.886447714798624e-09, "loss": 13.9688, "step": 29294 }, { "epoch": 1.9456066945606696, "grad_norm": 284.70587158203125, "learning_rate": 3.876980480091441e-09, "loss": 19.8594, "step": 29295 }, { "epoch": 1.945673108853025, "grad_norm": 171.54034423828125, "learning_rate": 3.867524767960817e-09, "loss": 16.625, "step": 29296 }, { "epoch": 1.945739523145381, "grad_norm": 308.1536560058594, "learning_rate": 3.8580805785164426e-09, "loss": 17.2969, "step": 29297 }, { "epoch": 1.9458059374377366, "grad_norm": 373.0201721191406, "learning_rate": 3.848647911867231e-09, "loss": 17.0625, "step": 29298 }, { "epoch": 1.9458723517300922, "grad_norm": 296.4899597167969, "learning_rate": 3.839226768122539e-09, "loss": 15.5, "step": 29299 }, { "epoch": 1.945938766022448, "grad_norm": 197.4220733642578, "learning_rate": 3.829817147391168e-09, "loss": 18.4375, "step": 29300 }, { "epoch": 1.9460051803148037, "grad_norm": 506.755126953125, "learning_rate": 3.820419049782142e-09, "loss": 16.0469, "step": 29301 }, { "epoch": 1.9460715946071594, "grad_norm": 350.74261474609375, "learning_rate": 3.8110324754040415e-09, "loss": 15.5234, "step": 29302 }, { "epoch": 1.9461380088995153, "grad_norm": 234.6397705078125, "learning_rate": 3.801657424365445e-09, "loss": 16.6562, "step": 29303 }, { "epoch": 1.946204423191871, "grad_norm": 378.99017333984375, "learning_rate": 3.792293896774823e-09, "loss": 15.375, "step": 29304 }, { "epoch": 1.9462708374842266, "grad_norm": 123.46613311767578, "learning_rate": 3.782941892740532e-09, "loss": 12.7812, "step": 29305 }, { "epoch": 1.9463372517765825, "grad_norm": 93.50418090820312, "learning_rate": 3.773601412370819e-09, "loss": 11.2656, "step": 29306 }, { "epoch": 1.9464036660689379, "grad_norm": 210.11314392089844, "learning_rate": 3.764272455773487e-09, "loss": 20.1406, "step": 29307 }, { "epoch": 1.9464700803612938, "grad_norm": 195.8797149658203, "learning_rate": 3.754955023056672e-09, "loss": 16.1562, "step": 29308 }, { "epoch": 1.9465364946536494, "grad_norm": 206.75473022460938, "learning_rate": 3.745649114328064e-09, "loss": 16.9375, "step": 29309 }, { "epoch": 1.946602908946005, "grad_norm": 169.01425170898438, "learning_rate": 3.7363547296952456e-09, "loss": 14.0469, "step": 29310 }, { "epoch": 1.946669323238361, "grad_norm": 195.2554168701172, "learning_rate": 3.7270718692657964e-09, "loss": 15.1562, "step": 29311 }, { "epoch": 1.9467357375307166, "grad_norm": 149.7541046142578, "learning_rate": 3.7178005331471863e-09, "loss": 12.2656, "step": 29312 }, { "epoch": 1.9468021518230723, "grad_norm": 806.6689453125, "learning_rate": 3.7085407214464403e-09, "loss": 14.3594, "step": 29313 }, { "epoch": 1.9468685661154281, "grad_norm": 243.35987854003906, "learning_rate": 3.6992924342709174e-09, "loss": 14.8125, "step": 29314 }, { "epoch": 1.9469349804077838, "grad_norm": 173.84373474121094, "learning_rate": 3.690055671727421e-09, "loss": 17.2344, "step": 29315 }, { "epoch": 1.9470013947001394, "grad_norm": 319.7431640625, "learning_rate": 3.680830433922977e-09, "loss": 17.7969, "step": 29316 }, { "epoch": 1.9470678089924953, "grad_norm": 374.4164123535156, "learning_rate": 3.6716167209640547e-09, "loss": 11.4609, "step": 29317 }, { "epoch": 1.9471342232848508, "grad_norm": 209.19102478027344, "learning_rate": 3.6624145329573474e-09, "loss": 18.5625, "step": 29318 }, { "epoch": 1.9472006375772066, "grad_norm": 249.3365936279297, "learning_rate": 3.6532238700093252e-09, "loss": 17.0312, "step": 29319 }, { "epoch": 1.9472670518695623, "grad_norm": 538.1400756835938, "learning_rate": 3.644044732226348e-09, "loss": 22.4531, "step": 29320 }, { "epoch": 1.947333466161918, "grad_norm": 205.1349334716797, "learning_rate": 3.6348771197144413e-09, "loss": 18.2656, "step": 29321 }, { "epoch": 1.9473998804542738, "grad_norm": 166.6787109375, "learning_rate": 3.6257210325797427e-09, "loss": 14.3906, "step": 29322 }, { "epoch": 1.9474662947466295, "grad_norm": 247.2721405029297, "learning_rate": 3.6165764709281675e-09, "loss": 16.375, "step": 29323 }, { "epoch": 1.9475327090389851, "grad_norm": 216.4747314453125, "learning_rate": 3.60744343486552e-09, "loss": 19.4219, "step": 29324 }, { "epoch": 1.947599123331341, "grad_norm": 144.54331970214844, "learning_rate": 3.598321924497383e-09, "loss": 17.0, "step": 29325 }, { "epoch": 1.9476655376236967, "grad_norm": 240.81675720214844, "learning_rate": 3.5892119399293375e-09, "loss": 18.4844, "step": 29326 }, { "epoch": 1.9477319519160523, "grad_norm": 179.69882202148438, "learning_rate": 3.5801134812667445e-09, "loss": 13.7188, "step": 29327 }, { "epoch": 1.9477983662084082, "grad_norm": 153.35910034179688, "learning_rate": 3.571026548614853e-09, "loss": 10.3984, "step": 29328 }, { "epoch": 1.9478647805007636, "grad_norm": 167.95384216308594, "learning_rate": 3.5619511420785785e-09, "loss": 14.8125, "step": 29329 }, { "epoch": 1.9479311947931195, "grad_norm": 112.28236389160156, "learning_rate": 3.5528872617632822e-09, "loss": 14.9844, "step": 29330 }, { "epoch": 1.9479976090854751, "grad_norm": 173.22235107421875, "learning_rate": 3.5438349077734352e-09, "loss": 14.2812, "step": 29331 }, { "epoch": 1.9480640233778308, "grad_norm": 154.9235382080078, "learning_rate": 3.534794080214065e-09, "loss": 14.25, "step": 29332 }, { "epoch": 1.9481304376701867, "grad_norm": 458.0746765136719, "learning_rate": 3.525764779189533e-09, "loss": 17.9219, "step": 29333 }, { "epoch": 1.9481968519625423, "grad_norm": 244.28916931152344, "learning_rate": 3.5167470048043102e-09, "loss": 13.1719, "step": 29334 }, { "epoch": 1.948263266254898, "grad_norm": 144.05087280273438, "learning_rate": 3.5077407571627583e-09, "loss": 16.2344, "step": 29335 }, { "epoch": 1.9483296805472539, "grad_norm": 111.8706283569336, "learning_rate": 3.498746036369127e-09, "loss": 11.2812, "step": 29336 }, { "epoch": 1.9483960948396095, "grad_norm": 444.472412109375, "learning_rate": 3.489762842527333e-09, "loss": 12.9375, "step": 29337 }, { "epoch": 1.9484625091319652, "grad_norm": 186.29542541503906, "learning_rate": 3.4807911757414044e-09, "loss": 14.4375, "step": 29338 }, { "epoch": 1.948528923424321, "grad_norm": 193.41436767578125, "learning_rate": 3.471831036114925e-09, "loss": 17.8125, "step": 29339 }, { "epoch": 1.9485953377166765, "grad_norm": 340.242919921875, "learning_rate": 3.4628824237518116e-09, "loss": 17.6562, "step": 29340 }, { "epoch": 1.9486617520090324, "grad_norm": 174.27415466308594, "learning_rate": 3.4539453387553152e-09, "loss": 14.2656, "step": 29341 }, { "epoch": 1.948728166301388, "grad_norm": 255.09104919433594, "learning_rate": 3.445019781229019e-09, "loss": 16.6875, "step": 29342 }, { "epoch": 1.9487945805937437, "grad_norm": 393.0544128417969, "learning_rate": 3.4361057512760637e-09, "loss": 25.0312, "step": 29343 }, { "epoch": 1.9488609948860995, "grad_norm": 114.85626220703125, "learning_rate": 3.4272032489995884e-09, "loss": 17.3125, "step": 29344 }, { "epoch": 1.9489274091784552, "grad_norm": 195.33328247070312, "learning_rate": 3.4183122745026215e-09, "loss": 15.5312, "step": 29345 }, { "epoch": 1.9489938234708108, "grad_norm": 256.66156005859375, "learning_rate": 3.409432827887859e-09, "loss": 21.4375, "step": 29346 }, { "epoch": 1.9490602377631667, "grad_norm": 200.7644805908203, "learning_rate": 3.400564909258219e-09, "loss": 11.1016, "step": 29347 }, { "epoch": 1.9491266520555224, "grad_norm": 177.94204711914062, "learning_rate": 3.391708518716063e-09, "loss": 14.6875, "step": 29348 }, { "epoch": 1.949193066347878, "grad_norm": 204.22308349609375, "learning_rate": 3.382863656363977e-09, "loss": 13.8438, "step": 29349 }, { "epoch": 1.949259480640234, "grad_norm": 198.7569580078125, "learning_rate": 3.374030322304322e-09, "loss": 18.5312, "step": 29350 }, { "epoch": 1.9493258949325893, "grad_norm": 124.82353210449219, "learning_rate": 3.3652085166391286e-09, "loss": 16.7188, "step": 29351 }, { "epoch": 1.9493923092249452, "grad_norm": 220.74415588378906, "learning_rate": 3.356398239470426e-09, "loss": 14.7656, "step": 29352 }, { "epoch": 1.9494587235173009, "grad_norm": 199.61056518554688, "learning_rate": 3.3475994909003546e-09, "loss": 15.8906, "step": 29353 }, { "epoch": 1.9495251378096565, "grad_norm": 263.8713073730469, "learning_rate": 3.3388122710305e-09, "loss": 24.7031, "step": 29354 }, { "epoch": 1.9495915521020124, "grad_norm": 293.7528381347656, "learning_rate": 3.3300365799625584e-09, "loss": 17.5938, "step": 29355 }, { "epoch": 1.949657966394368, "grad_norm": 1102.552978515625, "learning_rate": 3.3212724177980043e-09, "loss": 18.2188, "step": 29356 }, { "epoch": 1.9497243806867237, "grad_norm": 238.06817626953125, "learning_rate": 3.312519784638312e-09, "loss": 14.6719, "step": 29357 }, { "epoch": 1.9497907949790796, "grad_norm": 208.01695251464844, "learning_rate": 3.3037786805846234e-09, "loss": 15.6016, "step": 29358 }, { "epoch": 1.9498572092714352, "grad_norm": 236.88339233398438, "learning_rate": 3.29504910573819e-09, "loss": 15.6719, "step": 29359 }, { "epoch": 1.949923623563791, "grad_norm": 198.30821228027344, "learning_rate": 3.2863310601997095e-09, "loss": 15.0781, "step": 29360 }, { "epoch": 1.9499900378561468, "grad_norm": 117.38085174560547, "learning_rate": 3.277624544070323e-09, "loss": 13.375, "step": 29361 }, { "epoch": 1.9500564521485022, "grad_norm": 136.6082763671875, "learning_rate": 3.268929557450617e-09, "loss": 13.7031, "step": 29362 }, { "epoch": 1.950122866440858, "grad_norm": 259.5372009277344, "learning_rate": 3.2602461004412886e-09, "loss": 13.75, "step": 29363 }, { "epoch": 1.9501892807332137, "grad_norm": 272.6490173339844, "learning_rate": 3.2515741731425906e-09, "loss": 20.0312, "step": 29364 }, { "epoch": 1.9502556950255694, "grad_norm": 194.80569458007812, "learning_rate": 3.2429137756548875e-09, "loss": 19.2344, "step": 29365 }, { "epoch": 1.9503221093179253, "grad_norm": 242.41587829589844, "learning_rate": 3.2342649080784323e-09, "loss": 19.4375, "step": 29366 }, { "epoch": 1.950388523610281, "grad_norm": 119.66153717041016, "learning_rate": 3.2256275705132564e-09, "loss": 14.4688, "step": 29367 }, { "epoch": 1.9504549379026366, "grad_norm": 597.0059204101562, "learning_rate": 3.217001763059168e-09, "loss": 14.1875, "step": 29368 }, { "epoch": 1.9505213521949925, "grad_norm": 165.0343780517578, "learning_rate": 3.2083874858160887e-09, "loss": 13.5312, "step": 29369 }, { "epoch": 1.950587766487348, "grad_norm": 158.41261291503906, "learning_rate": 3.1997847388836043e-09, "loss": 17.25, "step": 29370 }, { "epoch": 1.9506541807797038, "grad_norm": 407.3310852050781, "learning_rate": 3.1911935223611908e-09, "loss": 19.4219, "step": 29371 }, { "epoch": 1.9507205950720596, "grad_norm": 524.2645874023438, "learning_rate": 3.1826138363483245e-09, "loss": 18.9375, "step": 29372 }, { "epoch": 1.950787009364415, "grad_norm": 164.08460998535156, "learning_rate": 3.1740456809441485e-09, "loss": 16.375, "step": 29373 }, { "epoch": 1.950853423656771, "grad_norm": 125.17808532714844, "learning_rate": 3.1654890562476944e-09, "loss": 13.9688, "step": 29374 }, { "epoch": 1.9509198379491266, "grad_norm": 169.3071746826172, "learning_rate": 3.156943962358216e-09, "loss": 12.7969, "step": 29375 }, { "epoch": 1.9509862522414823, "grad_norm": 235.26441955566406, "learning_rate": 3.1484103993743016e-09, "loss": 20.6172, "step": 29376 }, { "epoch": 1.9510526665338381, "grad_norm": 200.7100372314453, "learning_rate": 3.1398883673948716e-09, "loss": 20.5625, "step": 29377 }, { "epoch": 1.9511190808261938, "grad_norm": 155.13568115234375, "learning_rate": 3.131377866518292e-09, "loss": 9.2891, "step": 29378 }, { "epoch": 1.9511854951185494, "grad_norm": 163.439697265625, "learning_rate": 3.1228788968431507e-09, "loss": 11.9062, "step": 29379 }, { "epoch": 1.9512519094109053, "grad_norm": 346.315673828125, "learning_rate": 3.1143914584677024e-09, "loss": 20.7656, "step": 29380 }, { "epoch": 1.951318323703261, "grad_norm": 96.87432098388672, "learning_rate": 3.1059155514900904e-09, "loss": 13.625, "step": 29381 }, { "epoch": 1.9513847379956166, "grad_norm": 175.55477905273438, "learning_rate": 3.0974511760084587e-09, "loss": 16.0781, "step": 29382 }, { "epoch": 1.9514511522879725, "grad_norm": 2469.767333984375, "learning_rate": 3.0889983321206183e-09, "loss": 16.0625, "step": 29383 }, { "epoch": 1.951517566580328, "grad_norm": 338.80657958984375, "learning_rate": 3.080557019924379e-09, "loss": 17.3438, "step": 29384 }, { "epoch": 1.9515839808726838, "grad_norm": 134.37319946289062, "learning_rate": 3.0721272395174414e-09, "loss": 17.2812, "step": 29385 }, { "epoch": 1.9516503951650395, "grad_norm": 260.7613830566406, "learning_rate": 3.0637089909972823e-09, "loss": 14.0312, "step": 29386 }, { "epoch": 1.9517168094573951, "grad_norm": 307.250732421875, "learning_rate": 3.0553022744612687e-09, "loss": 15.875, "step": 29387 }, { "epoch": 1.951783223749751, "grad_norm": 380.2984619140625, "learning_rate": 3.046907090006656e-09, "loss": 20.7812, "step": 29388 }, { "epoch": 1.9518496380421066, "grad_norm": 215.288818359375, "learning_rate": 3.038523437730589e-09, "loss": 17.1562, "step": 29389 }, { "epoch": 1.9519160523344623, "grad_norm": 173.30979919433594, "learning_rate": 3.030151317729879e-09, "loss": 19.2344, "step": 29390 }, { "epoch": 1.9519824666268182, "grad_norm": 307.5077209472656, "learning_rate": 3.0217907301015587e-09, "loss": 16.1094, "step": 29391 }, { "epoch": 1.9520488809191738, "grad_norm": 197.24913024902344, "learning_rate": 3.01344167494233e-09, "loss": 17.4375, "step": 29392 }, { "epoch": 1.9521152952115295, "grad_norm": 84.88787078857422, "learning_rate": 3.005104152348559e-09, "loss": 12.3828, "step": 29393 }, { "epoch": 1.9521817095038854, "grad_norm": 198.50579833984375, "learning_rate": 2.996778162416946e-09, "loss": 17.6719, "step": 29394 }, { "epoch": 1.9522481237962408, "grad_norm": 265.8388977050781, "learning_rate": 2.9884637052437487e-09, "loss": 17.3281, "step": 29395 }, { "epoch": 1.9523145380885967, "grad_norm": 185.9082489013672, "learning_rate": 2.980160780925001e-09, "loss": 11.1172, "step": 29396 }, { "epoch": 1.9523809523809523, "grad_norm": 246.39093017578125, "learning_rate": 2.971869389556847e-09, "loss": 11.0625, "step": 29397 }, { "epoch": 1.952447366673308, "grad_norm": 388.44903564453125, "learning_rate": 2.963589531235322e-09, "loss": 23.2812, "step": 29398 }, { "epoch": 1.9525137809656639, "grad_norm": 129.9491424560547, "learning_rate": 2.955321206055905e-09, "loss": 15.4375, "step": 29399 }, { "epoch": 1.9525801952580195, "grad_norm": 146.1909637451172, "learning_rate": 2.9470644141145196e-09, "loss": 17.4062, "step": 29400 }, { "epoch": 1.9526466095503752, "grad_norm": 346.9869079589844, "learning_rate": 2.9388191555064224e-09, "loss": 12.7031, "step": 29401 }, { "epoch": 1.952713023842731, "grad_norm": 115.15128326416016, "learning_rate": 2.9305854303272037e-09, "loss": 16.375, "step": 29402 }, { "epoch": 1.9527794381350867, "grad_norm": 383.7506103515625, "learning_rate": 2.9223632386720097e-09, "loss": 15.2344, "step": 29403 }, { "epoch": 1.9528458524274424, "grad_norm": 320.72613525390625, "learning_rate": 2.9141525806359867e-09, "loss": 15.3516, "step": 29404 }, { "epoch": 1.9529122667197982, "grad_norm": 147.74351501464844, "learning_rate": 2.9059534563141696e-09, "loss": 13.0156, "step": 29405 }, { "epoch": 1.9529786810121537, "grad_norm": 183.39175415039062, "learning_rate": 2.8977658658012606e-09, "loss": 15.625, "step": 29406 }, { "epoch": 1.9530450953045095, "grad_norm": 493.7867736816406, "learning_rate": 2.889589809191961e-09, "loss": 15.8594, "step": 29407 }, { "epoch": 1.9531115095968652, "grad_norm": 181.56935119628906, "learning_rate": 2.881425286581085e-09, "loss": 15.75, "step": 29408 }, { "epoch": 1.9531779238892208, "grad_norm": 156.9471435546875, "learning_rate": 2.873272298062779e-09, "loss": 11.7188, "step": 29409 }, { "epoch": 1.9532443381815767, "grad_norm": 746.5023193359375, "learning_rate": 2.8651308437315226e-09, "loss": 14.375, "step": 29410 }, { "epoch": 1.9533107524739324, "grad_norm": 147.6060333251953, "learning_rate": 2.857000923681352e-09, "loss": 14.0, "step": 29411 }, { "epoch": 1.953377166766288, "grad_norm": 229.1919708251953, "learning_rate": 2.8488825380065253e-09, "loss": 22.9844, "step": 29412 }, { "epoch": 1.953443581058644, "grad_norm": 88.46127319335938, "learning_rate": 2.840775686800856e-09, "loss": 12.1562, "step": 29413 }, { "epoch": 1.9535099953509996, "grad_norm": 172.0115203857422, "learning_rate": 2.832680370158047e-09, "loss": 18.3438, "step": 29414 }, { "epoch": 1.9535764096433552, "grad_norm": 155.39599609375, "learning_rate": 2.8245965881718017e-09, "loss": 15.4062, "step": 29415 }, { "epoch": 1.953642823935711, "grad_norm": 383.5508117675781, "learning_rate": 2.8165243409356e-09, "loss": 27.3281, "step": 29416 }, { "epoch": 1.9537092382280665, "grad_norm": 377.9912109375, "learning_rate": 2.808463628542812e-09, "loss": 13.7812, "step": 29417 }, { "epoch": 1.9537756525204224, "grad_norm": 142.00253295898438, "learning_rate": 2.8004144510866966e-09, "loss": 14.2812, "step": 29418 }, { "epoch": 1.953842066812778, "grad_norm": 158.20285034179688, "learning_rate": 2.792376808660513e-09, "loss": 12.4297, "step": 29419 }, { "epoch": 1.9539084811051337, "grad_norm": 265.78253173828125, "learning_rate": 2.7843507013569633e-09, "loss": 16.1719, "step": 29420 }, { "epoch": 1.9539748953974896, "grad_norm": 249.238525390625, "learning_rate": 2.7763361292690857e-09, "loss": 21.9219, "step": 29421 }, { "epoch": 1.9540413096898452, "grad_norm": 171.89288330078125, "learning_rate": 2.768333092489472e-09, "loss": 17.125, "step": 29422 }, { "epoch": 1.954107723982201, "grad_norm": 295.15911865234375, "learning_rate": 2.7603415911107152e-09, "loss": 14.7656, "step": 29423 }, { "epoch": 1.9541741382745568, "grad_norm": 147.49839782714844, "learning_rate": 2.7523616252252966e-09, "loss": 21.8438, "step": 29424 }, { "epoch": 1.9542405525669124, "grad_norm": 106.68130493164062, "learning_rate": 2.7443931949255874e-09, "loss": 13.5938, "step": 29425 }, { "epoch": 1.954306966859268, "grad_norm": 220.54750061035156, "learning_rate": 2.736436300303624e-09, "loss": 13.5938, "step": 29426 }, { "epoch": 1.954373381151624, "grad_norm": 127.48216247558594, "learning_rate": 2.7284909414515556e-09, "loss": 12.625, "step": 29427 }, { "epoch": 1.9544397954439794, "grad_norm": 174.20028686523438, "learning_rate": 2.7205571184611975e-09, "loss": 13.3281, "step": 29428 }, { "epoch": 1.9545062097363353, "grad_norm": 218.87686157226562, "learning_rate": 2.712634831424365e-09, "loss": 16.8594, "step": 29429 }, { "epoch": 1.954572624028691, "grad_norm": 231.8466033935547, "learning_rate": 2.7047240804327632e-09, "loss": 19.4453, "step": 29430 }, { "epoch": 1.9546390383210466, "grad_norm": 283.1824035644531, "learning_rate": 2.6968248655777628e-09, "loss": 17.5625, "step": 29431 }, { "epoch": 1.9547054526134024, "grad_norm": 176.3333740234375, "learning_rate": 2.688937186950846e-09, "loss": 15.4062, "step": 29432 }, { "epoch": 1.954771866905758, "grad_norm": 192.1396484375, "learning_rate": 2.6810610446432737e-09, "loss": 16.1719, "step": 29433 }, { "epoch": 1.9548382811981138, "grad_norm": 325.4783020019531, "learning_rate": 2.6731964387460836e-09, "loss": 17.3438, "step": 29434 }, { "epoch": 1.9549046954904696, "grad_norm": 209.275390625, "learning_rate": 2.6653433693502035e-09, "loss": 16.3906, "step": 29435 }, { "epoch": 1.9549711097828253, "grad_norm": 140.7949676513672, "learning_rate": 2.6575018365465605e-09, "loss": 16.7969, "step": 29436 }, { "epoch": 1.955037524075181, "grad_norm": 151.0329132080078, "learning_rate": 2.64967184042586e-09, "loss": 15.4531, "step": 29437 }, { "epoch": 1.9551039383675368, "grad_norm": 640.3980102539062, "learning_rate": 2.641853381078585e-09, "loss": 25.9375, "step": 29438 }, { "epoch": 1.9551703526598923, "grad_norm": 173.85475158691406, "learning_rate": 2.6340464585953294e-09, "loss": 13.3125, "step": 29439 }, { "epoch": 1.9552367669522481, "grad_norm": 167.9897003173828, "learning_rate": 2.6262510730662437e-09, "loss": 17.7969, "step": 29440 }, { "epoch": 1.955303181244604, "grad_norm": 176.72021484375, "learning_rate": 2.6184672245815888e-09, "loss": 15.1562, "step": 29441 }, { "epoch": 1.9553695955369594, "grad_norm": 148.2406463623047, "learning_rate": 2.6106949132314038e-09, "loss": 14.5938, "step": 29442 }, { "epoch": 1.9554360098293153, "grad_norm": 430.668212890625, "learning_rate": 2.602934139105506e-09, "loss": 18.2031, "step": 29443 }, { "epoch": 1.955502424121671, "grad_norm": 393.6514587402344, "learning_rate": 2.595184902293823e-09, "loss": 13.8594, "step": 29444 }, { "epoch": 1.9555688384140266, "grad_norm": 315.9309997558594, "learning_rate": 2.5874472028859506e-09, "loss": 17.0312, "step": 29445 }, { "epoch": 1.9556352527063825, "grad_norm": 271.9769592285156, "learning_rate": 2.57972104097115e-09, "loss": 15.8594, "step": 29446 }, { "epoch": 1.9557016669987382, "grad_norm": 212.5292205810547, "learning_rate": 2.5720064166391274e-09, "loss": 15.1406, "step": 29447 }, { "epoch": 1.9557680812910938, "grad_norm": 202.64500427246094, "learning_rate": 2.564303329979034e-09, "loss": 14.9531, "step": 29448 }, { "epoch": 1.9558344955834497, "grad_norm": 469.6265563964844, "learning_rate": 2.5566117810797983e-09, "loss": 18.1875, "step": 29449 }, { "epoch": 1.9559009098758051, "grad_norm": 110.90234375, "learning_rate": 2.5489317700305714e-09, "loss": 11.1016, "step": 29450 }, { "epoch": 1.955967324168161, "grad_norm": 194.20526123046875, "learning_rate": 2.541263296920171e-09, "loss": 17.2969, "step": 29451 }, { "epoch": 1.9560337384605169, "grad_norm": 107.68568420410156, "learning_rate": 2.5336063618371928e-09, "loss": 12.9062, "step": 29452 }, { "epoch": 1.9561001527528723, "grad_norm": 122.6291732788086, "learning_rate": 2.525960964870344e-09, "loss": 14.6406, "step": 29453 }, { "epoch": 1.9561665670452282, "grad_norm": 386.5921325683594, "learning_rate": 2.518327106107998e-09, "loss": 23.5156, "step": 29454 }, { "epoch": 1.9562329813375838, "grad_norm": 152.53094482421875, "learning_rate": 2.5107047856384177e-09, "loss": 14.9609, "step": 29455 }, { "epoch": 1.9562993956299395, "grad_norm": 163.4402313232422, "learning_rate": 2.503094003549866e-09, "loss": 15.4062, "step": 29456 }, { "epoch": 1.9563658099222954, "grad_norm": 480.5977783203125, "learning_rate": 2.495494759930272e-09, "loss": 23.3125, "step": 29457 }, { "epoch": 1.956432224214651, "grad_norm": 259.3358154296875, "learning_rate": 2.4879070548676773e-09, "loss": 14.8125, "step": 29458 }, { "epoch": 1.9564986385070067, "grad_norm": 103.90694427490234, "learning_rate": 2.4803308884497886e-09, "loss": 12.3281, "step": 29459 }, { "epoch": 1.9565650527993625, "grad_norm": 231.7584991455078, "learning_rate": 2.472766260764203e-09, "loss": 19.5781, "step": 29460 }, { "epoch": 1.956631467091718, "grad_norm": 108.50116729736328, "learning_rate": 2.465213171898406e-09, "loss": 13.3828, "step": 29461 }, { "epoch": 1.9566978813840739, "grad_norm": 161.95599365234375, "learning_rate": 2.457671621939883e-09, "loss": 20.1875, "step": 29462 }, { "epoch": 1.9567642956764297, "grad_norm": 401.43328857421875, "learning_rate": 2.4501416109756756e-09, "loss": 14.7812, "step": 29463 }, { "epoch": 1.9568307099687852, "grad_norm": 176.18565368652344, "learning_rate": 2.4426231390931585e-09, "loss": 15.0938, "step": 29464 }, { "epoch": 1.956897124261141, "grad_norm": 342.41558837890625, "learning_rate": 2.4351162063790397e-09, "loss": 18.0156, "step": 29465 }, { "epoch": 1.9569635385534967, "grad_norm": 142.47483825683594, "learning_rate": 2.427620812920361e-09, "loss": 16.0469, "step": 29466 }, { "epoch": 1.9570299528458523, "grad_norm": 238.2351531982422, "learning_rate": 2.420136958803609e-09, "loss": 19.1562, "step": 29467 }, { "epoch": 1.9570963671382082, "grad_norm": 130.0062255859375, "learning_rate": 2.412664644115603e-09, "loss": 15.7656, "step": 29468 }, { "epoch": 1.9571627814305639, "grad_norm": 119.11810302734375, "learning_rate": 2.4052038689426068e-09, "loss": 15.8906, "step": 29469 }, { "epoch": 1.9572291957229195, "grad_norm": 341.5417175292969, "learning_rate": 2.3977546333709964e-09, "loss": 12.2344, "step": 29470 }, { "epoch": 1.9572956100152754, "grad_norm": 274.9103698730469, "learning_rate": 2.3903169374868136e-09, "loss": 17.8125, "step": 29471 }, { "epoch": 1.9573620243076308, "grad_norm": 126.68231201171875, "learning_rate": 2.3828907813762122e-09, "loss": 12.1406, "step": 29472 }, { "epoch": 1.9574284385999867, "grad_norm": 283.1101379394531, "learning_rate": 2.375476165125123e-09, "loss": 15.625, "step": 29473 }, { "epoch": 1.9574948528923426, "grad_norm": 245.00482177734375, "learning_rate": 2.368073088819256e-09, "loss": 11.3125, "step": 29474 }, { "epoch": 1.957561267184698, "grad_norm": 446.33197021484375, "learning_rate": 2.360681552544208e-09, "loss": 17.3594, "step": 29475 }, { "epoch": 1.957627681477054, "grad_norm": 132.48989868164062, "learning_rate": 2.3533015563855785e-09, "loss": 11.2188, "step": 29476 }, { "epoch": 1.9576940957694096, "grad_norm": 153.48036193847656, "learning_rate": 2.345933100428632e-09, "loss": 12.5469, "step": 29477 }, { "epoch": 1.9577605100617652, "grad_norm": 152.6505126953125, "learning_rate": 2.3385761847587448e-09, "loss": 14.0312, "step": 29478 }, { "epoch": 1.957826924354121, "grad_norm": 188.92469787597656, "learning_rate": 2.331230809460738e-09, "loss": 12.5, "step": 29479 }, { "epoch": 1.9578933386464767, "grad_norm": 234.02151489257812, "learning_rate": 2.3238969746199875e-09, "loss": 13.9062, "step": 29480 }, { "epoch": 1.9579597529388324, "grad_norm": 1797.5465087890625, "learning_rate": 2.316574680320982e-09, "loss": 16.6875, "step": 29481 }, { "epoch": 1.9580261672311883, "grad_norm": 236.44309997558594, "learning_rate": 2.3092639266485416e-09, "loss": 22.7656, "step": 29482 }, { "epoch": 1.9580925815235437, "grad_norm": 238.3513641357422, "learning_rate": 2.3019647136872654e-09, "loss": 13.6406, "step": 29483 }, { "epoch": 1.9581589958158996, "grad_norm": 188.26454162597656, "learning_rate": 2.294677041521531e-09, "loss": 16.3906, "step": 29484 }, { "epoch": 1.9582254101082555, "grad_norm": 236.4562225341797, "learning_rate": 2.287400910235715e-09, "loss": 13.1875, "step": 29485 }, { "epoch": 1.958291824400611, "grad_norm": 238.9822540283203, "learning_rate": 2.2801363199138612e-09, "loss": 16.25, "step": 29486 }, { "epoch": 1.9583582386929668, "grad_norm": 175.9106903076172, "learning_rate": 2.2728832706401245e-09, "loss": 16.5156, "step": 29487 }, { "epoch": 1.9584246529853224, "grad_norm": 162.69154357910156, "learning_rate": 2.265641762498327e-09, "loss": 17.4844, "step": 29488 }, { "epoch": 1.958491067277678, "grad_norm": 221.90396118164062, "learning_rate": 2.2584117955722904e-09, "loss": 14.0469, "step": 29489 }, { "epoch": 1.958557481570034, "grad_norm": 2913.594970703125, "learning_rate": 2.251193369945503e-09, "loss": 11.1797, "step": 29490 }, { "epoch": 1.9586238958623896, "grad_norm": 259.8468322753906, "learning_rate": 2.2439864857016764e-09, "loss": 18.1875, "step": 29491 }, { "epoch": 1.9586903101547453, "grad_norm": 325.16802978515625, "learning_rate": 2.236791142924077e-09, "loss": 13.875, "step": 29492 }, { "epoch": 1.9587567244471011, "grad_norm": 811.398193359375, "learning_rate": 2.2296073416958603e-09, "loss": 14.7812, "step": 29493 }, { "epoch": 1.9588231387394566, "grad_norm": 402.92144775390625, "learning_rate": 2.2224350821002935e-09, "loss": 22.4688, "step": 29494 }, { "epoch": 1.9588895530318124, "grad_norm": 306.0969543457031, "learning_rate": 2.215274364220199e-09, "loss": 11.6562, "step": 29495 }, { "epoch": 1.9589559673241683, "grad_norm": 246.74298095703125, "learning_rate": 2.2081251881383988e-09, "loss": 12.7656, "step": 29496 }, { "epoch": 1.9590223816165238, "grad_norm": 161.33187866210938, "learning_rate": 2.2009875539378275e-09, "loss": 20.2969, "step": 29497 }, { "epoch": 1.9590887959088796, "grad_norm": 249.91297912597656, "learning_rate": 2.1938614617007524e-09, "loss": 19.3594, "step": 29498 }, { "epoch": 1.9591552102012353, "grad_norm": 224.83592224121094, "learning_rate": 2.1867469115096627e-09, "loss": 13.6406, "step": 29499 }, { "epoch": 1.959221624493591, "grad_norm": 121.40872192382812, "learning_rate": 2.179643903446937e-09, "loss": 20.75, "step": 29500 }, { "epoch": 1.9592880387859468, "grad_norm": 105.03131866455078, "learning_rate": 2.1725524375947325e-09, "loss": 10.25, "step": 29501 }, { "epoch": 1.9593544530783025, "grad_norm": 121.65867614746094, "learning_rate": 2.165472514035094e-09, "loss": 15.1719, "step": 29502 }, { "epoch": 1.9594208673706581, "grad_norm": 952.0779418945312, "learning_rate": 2.158404132849956e-09, "loss": 13.0781, "step": 29503 }, { "epoch": 1.959487281663014, "grad_norm": 175.4353790283203, "learning_rate": 2.1513472941209197e-09, "loss": 19.5469, "step": 29504 }, { "epoch": 1.9595536959553694, "grad_norm": 158.9816131591797, "learning_rate": 2.1443019979296984e-09, "loss": 15.6562, "step": 29505 }, { "epoch": 1.9596201102477253, "grad_norm": 225.99102783203125, "learning_rate": 2.1372682443577815e-09, "loss": 10.1484, "step": 29506 }, { "epoch": 1.9596865245400812, "grad_norm": 405.8930358886719, "learning_rate": 2.130246033486549e-09, "loss": 18.2031, "step": 29507 }, { "epoch": 1.9597529388324366, "grad_norm": 173.6669158935547, "learning_rate": 2.123235365397269e-09, "loss": 13.6484, "step": 29508 }, { "epoch": 1.9598193531247925, "grad_norm": 286.34552001953125, "learning_rate": 2.1162362401709877e-09, "loss": 14.3906, "step": 29509 }, { "epoch": 1.9598857674171482, "grad_norm": 398.88287353515625, "learning_rate": 2.1092486578885294e-09, "loss": 16.6562, "step": 29510 }, { "epoch": 1.9599521817095038, "grad_norm": 191.43536376953125, "learning_rate": 2.1022726186310514e-09, "loss": 15.6094, "step": 29511 }, { "epoch": 1.9600185960018597, "grad_norm": 226.410888671875, "learning_rate": 2.0953081224790448e-09, "loss": 16.7031, "step": 29512 }, { "epoch": 1.9600850102942153, "grad_norm": 166.79095458984375, "learning_rate": 2.088355169513001e-09, "loss": 16.7344, "step": 29513 }, { "epoch": 1.960151424586571, "grad_norm": 223.452392578125, "learning_rate": 2.0814137598135216e-09, "loss": 19.1094, "step": 29514 }, { "epoch": 1.9602178388789269, "grad_norm": 88.93529510498047, "learning_rate": 2.0744838934607655e-09, "loss": 15.125, "step": 29515 }, { "epoch": 1.9602842531712823, "grad_norm": 224.97727966308594, "learning_rate": 2.0675655705348906e-09, "loss": 15.7656, "step": 29516 }, { "epoch": 1.9603506674636382, "grad_norm": 254.2862548828125, "learning_rate": 2.0606587911160544e-09, "loss": 12.7188, "step": 29517 }, { "epoch": 1.960417081755994, "grad_norm": 167.98255920410156, "learning_rate": 2.0537635552840827e-09, "loss": 13.6094, "step": 29518 }, { "epoch": 1.9604834960483495, "grad_norm": 422.9147644042969, "learning_rate": 2.046879863118689e-09, "loss": 21.9688, "step": 29519 }, { "epoch": 1.9605499103407054, "grad_norm": 116.32144165039062, "learning_rate": 2.0400077146994767e-09, "loss": 11.0156, "step": 29520 }, { "epoch": 1.960616324633061, "grad_norm": 445.0595397949219, "learning_rate": 2.0331471101061594e-09, "loss": 22.25, "step": 29521 }, { "epoch": 1.9606827389254167, "grad_norm": 231.14288330078125, "learning_rate": 2.026298049417785e-09, "loss": 16.5781, "step": 29522 }, { "epoch": 1.9607491532177725, "grad_norm": 271.4096374511719, "learning_rate": 2.019460532713846e-09, "loss": 18.8281, "step": 29523 }, { "epoch": 1.9608155675101282, "grad_norm": 150.29739379882812, "learning_rate": 2.0126345600731674e-09, "loss": 12.6719, "step": 29524 }, { "epoch": 1.9608819818024839, "grad_norm": 238.14059448242188, "learning_rate": 2.00582013157502e-09, "loss": 22.75, "step": 29525 }, { "epoch": 1.9609483960948397, "grad_norm": 217.7808837890625, "learning_rate": 1.999017247298007e-09, "loss": 10.2188, "step": 29526 }, { "epoch": 1.9610148103871952, "grad_norm": 1003.514892578125, "learning_rate": 1.9922259073208435e-09, "loss": 14.5781, "step": 29527 }, { "epoch": 1.961081224679551, "grad_norm": 351.924072265625, "learning_rate": 1.9854461117221332e-09, "loss": 18.4062, "step": 29528 }, { "epoch": 1.961147638971907, "grad_norm": 263.4153747558594, "learning_rate": 1.978677860580369e-09, "loss": 27.6875, "step": 29529 }, { "epoch": 1.9612140532642623, "grad_norm": 310.37567138671875, "learning_rate": 1.9719211539737102e-09, "loss": 17.2188, "step": 29530 }, { "epoch": 1.9612804675566182, "grad_norm": 122.52149200439453, "learning_rate": 1.965175991980317e-09, "loss": 13.5312, "step": 29531 }, { "epoch": 1.9613468818489739, "grad_norm": 1517.8753662109375, "learning_rate": 1.9584423746783484e-09, "loss": 21.5156, "step": 29532 }, { "epoch": 1.9614132961413295, "grad_norm": 348.7450256347656, "learning_rate": 1.9517203021456317e-09, "loss": 15.5, "step": 29533 }, { "epoch": 1.9614797104336854, "grad_norm": 513.8043823242188, "learning_rate": 1.9450097744598825e-09, "loss": 12.7969, "step": 29534 }, { "epoch": 1.961546124726041, "grad_norm": 1171.53076171875, "learning_rate": 1.9383107916987052e-09, "loss": 16.1875, "step": 29535 }, { "epoch": 1.9616125390183967, "grad_norm": 307.93450927734375, "learning_rate": 1.9316233539397043e-09, "loss": 15.5469, "step": 29536 }, { "epoch": 1.9616789533107526, "grad_norm": 112.24999237060547, "learning_rate": 1.9249474612600404e-09, "loss": 15.4844, "step": 29537 }, { "epoch": 1.961745367603108, "grad_norm": 178.5769500732422, "learning_rate": 1.9182831137372067e-09, "loss": 12.7031, "step": 29538 }, { "epoch": 1.961811781895464, "grad_norm": 364.9347229003906, "learning_rate": 1.9116303114480312e-09, "loss": 19.2812, "step": 29539 }, { "epoch": 1.9618781961878198, "grad_norm": 131.95657348632812, "learning_rate": 1.9049890544696743e-09, "loss": 12.9062, "step": 29540 }, { "epoch": 1.9619446104801752, "grad_norm": 174.37246704101562, "learning_rate": 1.8983593428788524e-09, "loss": 15.25, "step": 29541 }, { "epoch": 1.962011024772531, "grad_norm": 311.0539245605469, "learning_rate": 1.891741176752282e-09, "loss": 12.6406, "step": 29542 }, { "epoch": 1.9620774390648867, "grad_norm": 352.98760986328125, "learning_rate": 1.885134556166457e-09, "loss": 20.4062, "step": 29543 }, { "epoch": 1.9621438533572424, "grad_norm": 181.91162109375, "learning_rate": 1.878539481197872e-09, "loss": 16.1875, "step": 29544 }, { "epoch": 1.9622102676495983, "grad_norm": 263.3121337890625, "learning_rate": 1.871955951922799e-09, "loss": 16.0469, "step": 29545 }, { "epoch": 1.962276681941954, "grad_norm": 96.75019836425781, "learning_rate": 1.8653839684174e-09, "loss": 12.3672, "step": 29546 }, { "epoch": 1.9623430962343096, "grad_norm": 284.0802917480469, "learning_rate": 1.858823530757614e-09, "loss": 14.5156, "step": 29547 }, { "epoch": 1.9624095105266655, "grad_norm": 167.96055603027344, "learning_rate": 1.852274639019491e-09, "loss": 11.1328, "step": 29548 }, { "epoch": 1.962475924819021, "grad_norm": 147.8052520751953, "learning_rate": 1.8457372932786375e-09, "loss": 16.625, "step": 29549 }, { "epoch": 1.9625423391113768, "grad_norm": 290.5067138671875, "learning_rate": 1.8392114936107705e-09, "loss": 15.8906, "step": 29550 }, { "epoch": 1.9626087534037326, "grad_norm": 741.5702514648438, "learning_rate": 1.8326972400912744e-09, "loss": 14.7344, "step": 29551 }, { "epoch": 1.962675167696088, "grad_norm": 127.9774169921875, "learning_rate": 1.826194532795533e-09, "loss": 12.7188, "step": 29552 }, { "epoch": 1.962741581988444, "grad_norm": 304.6509094238281, "learning_rate": 1.8197033717988198e-09, "loss": 20.1562, "step": 29553 }, { "epoch": 1.9628079962807996, "grad_norm": 280.4635925292969, "learning_rate": 1.8132237571762964e-09, "loss": 17.1406, "step": 29554 }, { "epoch": 1.9628744105731553, "grad_norm": 303.53790283203125, "learning_rate": 1.8067556890026815e-09, "loss": 14.4531, "step": 29555 }, { "epoch": 1.9629408248655111, "grad_norm": 333.7122497558594, "learning_rate": 1.8002991673530255e-09, "loss": 16.5781, "step": 29556 }, { "epoch": 1.9630072391578668, "grad_norm": 258.6501770019531, "learning_rate": 1.7938541923018247e-09, "loss": 12.4531, "step": 29557 }, { "epoch": 1.9630736534502224, "grad_norm": 161.37713623046875, "learning_rate": 1.787420763923797e-09, "loss": 14.125, "step": 29558 }, { "epoch": 1.9631400677425783, "grad_norm": 201.2755126953125, "learning_rate": 1.7809988822932165e-09, "loss": 13.6094, "step": 29559 }, { "epoch": 1.9632064820349338, "grad_norm": 479.13385009765625, "learning_rate": 1.774588547484468e-09, "loss": 14.5312, "step": 29560 }, { "epoch": 1.9632728963272896, "grad_norm": 329.59991455078125, "learning_rate": 1.7681897595716034e-09, "loss": 13.4062, "step": 29561 }, { "epoch": 1.9633393106196455, "grad_norm": 172.28717041015625, "learning_rate": 1.7618025186287855e-09, "loss": 12.7734, "step": 29562 }, { "epoch": 1.963405724912001, "grad_norm": 271.1436462402344, "learning_rate": 1.7554268247298443e-09, "loss": 14.6094, "step": 29563 }, { "epoch": 1.9634721392043568, "grad_norm": 173.3595733642578, "learning_rate": 1.7490626779484985e-09, "loss": 13.8125, "step": 29564 }, { "epoch": 1.9635385534967125, "grad_norm": 270.91796875, "learning_rate": 1.7427100783582448e-09, "loss": 28.5312, "step": 29565 }, { "epoch": 1.9636049677890681, "grad_norm": 136.9093475341797, "learning_rate": 1.7363690260328024e-09, "loss": 17.0, "step": 29566 }, { "epoch": 1.963671382081424, "grad_norm": 173.21766662597656, "learning_rate": 1.7300395210454455e-09, "loss": 14.9688, "step": 29567 }, { "epoch": 1.9637377963737797, "grad_norm": 650.3052368164062, "learning_rate": 1.723721563469227e-09, "loss": 21.7188, "step": 29568 }, { "epoch": 1.9638042106661353, "grad_norm": 138.5858612060547, "learning_rate": 1.7174151533775327e-09, "loss": 15.7969, "step": 29569 }, { "epoch": 1.9638706249584912, "grad_norm": 160.33316040039062, "learning_rate": 1.7111202908429711e-09, "loss": 17.0156, "step": 29570 }, { "epoch": 1.9639370392508466, "grad_norm": 267.5471496582031, "learning_rate": 1.704836975938595e-09, "loss": 16.2344, "step": 29571 }, { "epoch": 1.9640034535432025, "grad_norm": 230.8921356201172, "learning_rate": 1.6985652087370129e-09, "loss": 13.8125, "step": 29572 }, { "epoch": 1.9640698678355584, "grad_norm": 535.6858520507812, "learning_rate": 1.6923049893108332e-09, "loss": 20.4062, "step": 29573 }, { "epoch": 1.9641362821279138, "grad_norm": 415.6338806152344, "learning_rate": 1.6860563177323317e-09, "loss": 15.6016, "step": 29574 }, { "epoch": 1.9642026964202697, "grad_norm": 204.50035095214844, "learning_rate": 1.679819194073895e-09, "loss": 20.4844, "step": 29575 }, { "epoch": 1.9642691107126253, "grad_norm": 199.04312133789062, "learning_rate": 1.673593618407576e-09, "loss": 15.0625, "step": 29576 }, { "epoch": 1.964335525004981, "grad_norm": 253.74758911132812, "learning_rate": 1.6673795908055398e-09, "loss": 15.5, "step": 29577 }, { "epoch": 1.9644019392973369, "grad_norm": 194.68812561035156, "learning_rate": 1.6611771113396178e-09, "loss": 13.125, "step": 29578 }, { "epoch": 1.9644683535896925, "grad_norm": 275.71502685546875, "learning_rate": 1.654986180081419e-09, "loss": 15.9844, "step": 29579 }, { "epoch": 1.9645347678820482, "grad_norm": 166.1497039794922, "learning_rate": 1.6488067971027753e-09, "loss": 14.2656, "step": 29580 }, { "epoch": 1.964601182174404, "grad_norm": 127.90506744384766, "learning_rate": 1.6426389624749626e-09, "loss": 12.8438, "step": 29581 }, { "epoch": 1.9646675964667595, "grad_norm": 393.0592346191406, "learning_rate": 1.6364826762693684e-09, "loss": 16.1094, "step": 29582 }, { "epoch": 1.9647340107591154, "grad_norm": 125.07003021240234, "learning_rate": 1.6303379385573802e-09, "loss": 12.9219, "step": 29583 }, { "epoch": 1.9648004250514712, "grad_norm": 175.32965087890625, "learning_rate": 1.624204749409941e-09, "loss": 15.1719, "step": 29584 }, { "epoch": 1.9648668393438267, "grad_norm": 288.4268798828125, "learning_rate": 1.618083108897994e-09, "loss": 20.0156, "step": 29585 }, { "epoch": 1.9649332536361825, "grad_norm": 230.2587432861328, "learning_rate": 1.6119730170922608e-09, "loss": 23.5, "step": 29586 }, { "epoch": 1.9649996679285382, "grad_norm": 184.58792114257812, "learning_rate": 1.6058744740636842e-09, "loss": 15.5312, "step": 29587 }, { "epoch": 1.9650660822208939, "grad_norm": 177.1698455810547, "learning_rate": 1.5997874798824306e-09, "loss": 18.5781, "step": 29588 }, { "epoch": 1.9651324965132497, "grad_norm": 172.49014282226562, "learning_rate": 1.5937120346193322e-09, "loss": 13.8125, "step": 29589 }, { "epoch": 1.9651989108056054, "grad_norm": 5485.07373046875, "learning_rate": 1.587648138344333e-09, "loss": 18.4688, "step": 29590 }, { "epoch": 1.965265325097961, "grad_norm": 255.1709747314453, "learning_rate": 1.58159579112771e-09, "loss": 18.6406, "step": 29591 }, { "epoch": 1.965331739390317, "grad_norm": 106.7894287109375, "learning_rate": 1.5755549930395185e-09, "loss": 14.5625, "step": 29592 }, { "epoch": 1.9653981536826726, "grad_norm": 233.0265655517578, "learning_rate": 1.5695257441495914e-09, "loss": 13.9062, "step": 29593 }, { "epoch": 1.9654645679750282, "grad_norm": 288.5845031738281, "learning_rate": 1.5635080445276505e-09, "loss": 17.125, "step": 29594 }, { "epoch": 1.965530982267384, "grad_norm": 172.99464416503906, "learning_rate": 1.557501894243307e-09, "loss": 17.1562, "step": 29595 }, { "epoch": 1.9655973965597395, "grad_norm": 497.0601806640625, "learning_rate": 1.5515072933659502e-09, "loss": 20.875, "step": 29596 }, { "epoch": 1.9656638108520954, "grad_norm": 296.15008544921875, "learning_rate": 1.5455242419651904e-09, "loss": 25.6875, "step": 29597 }, { "epoch": 1.965730225144451, "grad_norm": 216.78904724121094, "learning_rate": 1.5395527401099728e-09, "loss": 18.6719, "step": 29598 }, { "epoch": 1.9657966394368067, "grad_norm": 170.248779296875, "learning_rate": 1.5335927878694643e-09, "loss": 17.5625, "step": 29599 }, { "epoch": 1.9658630537291626, "grad_norm": 259.7982482910156, "learning_rate": 1.5276443853126097e-09, "loss": 11.5625, "step": 29600 }, { "epoch": 1.9659294680215182, "grad_norm": 392.3541259765625, "learning_rate": 1.5217075325082429e-09, "loss": 13.0469, "step": 29601 }, { "epoch": 1.965995882313874, "grad_norm": 1517.2711181640625, "learning_rate": 1.5157822295249755e-09, "loss": 14.5469, "step": 29602 }, { "epoch": 1.9660622966062298, "grad_norm": 215.00494384765625, "learning_rate": 1.5098684764313085e-09, "loss": 15.1016, "step": 29603 }, { "epoch": 1.9661287108985854, "grad_norm": 247.59548950195312, "learning_rate": 1.5039662732958536e-09, "loss": 17.9375, "step": 29604 }, { "epoch": 1.966195125190941, "grad_norm": 197.2353973388672, "learning_rate": 1.4980756201866674e-09, "loss": 19.0781, "step": 29605 }, { "epoch": 1.966261539483297, "grad_norm": 174.93679809570312, "learning_rate": 1.4921965171720286e-09, "loss": 14.75, "step": 29606 }, { "epoch": 1.9663279537756524, "grad_norm": 97.28263854980469, "learning_rate": 1.4863289643198828e-09, "loss": 14.6875, "step": 29607 }, { "epoch": 1.9663943680680083, "grad_norm": 310.38397216796875, "learning_rate": 1.4804729616980648e-09, "loss": 23.2812, "step": 29608 }, { "epoch": 1.966460782360364, "grad_norm": 205.66299438476562, "learning_rate": 1.4746285093744094e-09, "loss": 18.6875, "step": 29609 }, { "epoch": 1.9665271966527196, "grad_norm": 436.177978515625, "learning_rate": 1.4687956074164176e-09, "loss": 11.4688, "step": 29610 }, { "epoch": 1.9665936109450755, "grad_norm": 245.18057250976562, "learning_rate": 1.4629742558915913e-09, "loss": 15.7578, "step": 29611 }, { "epoch": 1.966660025237431, "grad_norm": 328.13934326171875, "learning_rate": 1.4571644548673211e-09, "loss": 15.1719, "step": 29612 }, { "epoch": 1.9667264395297868, "grad_norm": 381.16802978515625, "learning_rate": 1.4513662044107755e-09, "loss": 16.6719, "step": 29613 }, { "epoch": 1.9667928538221426, "grad_norm": 178.6610870361328, "learning_rate": 1.445579504589123e-09, "loss": 17.1875, "step": 29614 }, { "epoch": 1.9668592681144983, "grad_norm": 305.910400390625, "learning_rate": 1.4398043554690875e-09, "loss": 17.875, "step": 29615 }, { "epoch": 1.966925682406854, "grad_norm": 380.6836853027344, "learning_rate": 1.4340407571176162e-09, "loss": 17.1406, "step": 29616 }, { "epoch": 1.9669920966992098, "grad_norm": 563.4966430664062, "learning_rate": 1.4282887096014328e-09, "loss": 19.5938, "step": 29617 }, { "epoch": 1.9670585109915653, "grad_norm": 215.3492431640625, "learning_rate": 1.4225482129869292e-09, "loss": 12.3516, "step": 29618 }, { "epoch": 1.9671249252839211, "grad_norm": 312.81610107421875, "learning_rate": 1.4168192673404966e-09, "loss": 15.2969, "step": 29619 }, { "epoch": 1.9671913395762768, "grad_norm": 103.75074005126953, "learning_rate": 1.4111018727286373e-09, "loss": 11.1719, "step": 29620 }, { "epoch": 1.9672577538686324, "grad_norm": 139.881103515625, "learning_rate": 1.4053960292171873e-09, "loss": 19.2812, "step": 29621 }, { "epoch": 1.9673241681609883, "grad_norm": 316.6572570800781, "learning_rate": 1.3997017368724273e-09, "loss": 16.7969, "step": 29622 }, { "epoch": 1.967390582453344, "grad_norm": 105.73473358154297, "learning_rate": 1.3940189957599712e-09, "loss": 13.9062, "step": 29623 }, { "epoch": 1.9674569967456996, "grad_norm": 204.0854949951172, "learning_rate": 1.3883478059457666e-09, "loss": 21.375, "step": 29624 }, { "epoch": 1.9675234110380555, "grad_norm": 455.07672119140625, "learning_rate": 1.3826881674953162e-09, "loss": 15.6641, "step": 29625 }, { "epoch": 1.9675898253304112, "grad_norm": 603.8046875, "learning_rate": 1.3770400804740123e-09, "loss": 16.0938, "step": 29626 }, { "epoch": 1.9676562396227668, "grad_norm": 401.5160827636719, "learning_rate": 1.3714035449473582e-09, "loss": 16.0781, "step": 29627 }, { "epoch": 1.9677226539151227, "grad_norm": 140.93899536132812, "learning_rate": 1.365778560980413e-09, "loss": 17.5156, "step": 29628 }, { "epoch": 1.9677890682074781, "grad_norm": 443.7251892089844, "learning_rate": 1.3601651286383464e-09, "loss": 17.9375, "step": 29629 }, { "epoch": 1.967855482499834, "grad_norm": 305.2982177734375, "learning_rate": 1.3545632479859959e-09, "loss": 17.7969, "step": 29630 }, { "epoch": 1.9679218967921897, "grad_norm": 306.6945495605469, "learning_rate": 1.3489729190881981e-09, "loss": 18.2656, "step": 29631 }, { "epoch": 1.9679883110845453, "grad_norm": 110.75240325927734, "learning_rate": 1.3433941420096794e-09, "loss": 16.0938, "step": 29632 }, { "epoch": 1.9680547253769012, "grad_norm": 184.07789611816406, "learning_rate": 1.3378269168148325e-09, "loss": 13.3906, "step": 29633 }, { "epoch": 1.9681211396692568, "grad_norm": 340.3381652832031, "learning_rate": 1.3322712435682726e-09, "loss": 22.6562, "step": 29634 }, { "epoch": 1.9681875539616125, "grad_norm": 328.2636413574219, "learning_rate": 1.3267271223340593e-09, "loss": 15.0, "step": 29635 }, { "epoch": 1.9682539682539684, "grad_norm": 150.44253540039062, "learning_rate": 1.3211945531763635e-09, "loss": 15.6094, "step": 29636 }, { "epoch": 1.968320382546324, "grad_norm": 369.2940979003906, "learning_rate": 1.3156735361592452e-09, "loss": 13.8906, "step": 29637 }, { "epoch": 1.9683867968386797, "grad_norm": 218.06253051757812, "learning_rate": 1.3101640713465422e-09, "loss": 18.7812, "step": 29638 }, { "epoch": 1.9684532111310356, "grad_norm": 224.14930725097656, "learning_rate": 1.3046661588019814e-09, "loss": 15.0938, "step": 29639 }, { "epoch": 1.968519625423391, "grad_norm": 253.28604125976562, "learning_rate": 1.2991797985891783e-09, "loss": 17.4375, "step": 29640 }, { "epoch": 1.9685860397157469, "grad_norm": 202.54747009277344, "learning_rate": 1.293704990771527e-09, "loss": 12.4844, "step": 29641 }, { "epoch": 1.9686524540081025, "grad_norm": 159.644775390625, "learning_rate": 1.288241735412532e-09, "loss": 12.3203, "step": 29642 }, { "epoch": 1.9687188683004582, "grad_norm": 276.2588195800781, "learning_rate": 1.282790032575143e-09, "loss": 15.8281, "step": 29643 }, { "epoch": 1.968785282592814, "grad_norm": 141.6810760498047, "learning_rate": 1.2773498823226425e-09, "loss": 15.4688, "step": 29644 }, { "epoch": 1.9688516968851697, "grad_norm": 443.3586730957031, "learning_rate": 1.2719212847177585e-09, "loss": 15.6562, "step": 29645 }, { "epoch": 1.9689181111775254, "grad_norm": 182.1769256591797, "learning_rate": 1.2665042398234406e-09, "loss": 15.1875, "step": 29646 }, { "epoch": 1.9689845254698812, "grad_norm": 276.1295471191406, "learning_rate": 1.261098747702305e-09, "loss": 17.0938, "step": 29647 }, { "epoch": 1.9690509397622369, "grad_norm": 263.5727233886719, "learning_rate": 1.2557048084168576e-09, "loss": 15.0938, "step": 29648 }, { "epoch": 1.9691173540545925, "grad_norm": 440.6484375, "learning_rate": 1.2503224220294927e-09, "loss": 17.5, "step": 29649 }, { "epoch": 1.9691837683469484, "grad_norm": 106.81942749023438, "learning_rate": 1.2449515886024943e-09, "loss": 13.4375, "step": 29650 }, { "epoch": 1.9692501826393038, "grad_norm": 623.833984375, "learning_rate": 1.2395923081980342e-09, "loss": 21.5781, "step": 29651 }, { "epoch": 1.9693165969316597, "grad_norm": 189.65293884277344, "learning_rate": 1.2342445808780632e-09, "loss": 20.4844, "step": 29652 }, { "epoch": 1.9693830112240154, "grad_norm": 163.69100952148438, "learning_rate": 1.2289084067043098e-09, "loss": 15.3594, "step": 29653 }, { "epoch": 1.969449425516371, "grad_norm": 119.17556762695312, "learning_rate": 1.2235837857387244e-09, "loss": 16.3438, "step": 29654 }, { "epoch": 1.969515839808727, "grad_norm": 158.1358184814453, "learning_rate": 1.2182707180428132e-09, "loss": 21.0938, "step": 29655 }, { "epoch": 1.9695822541010826, "grad_norm": 221.06503295898438, "learning_rate": 1.2129692036778604e-09, "loss": 14.8594, "step": 29656 }, { "epoch": 1.9696486683934382, "grad_norm": 157.40545654296875, "learning_rate": 1.2076792427054839e-09, "loss": 18.25, "step": 29657 }, { "epoch": 1.969715082685794, "grad_norm": 203.28683471679688, "learning_rate": 1.2024008351867453e-09, "loss": 17.2656, "step": 29658 }, { "epoch": 1.9697814969781497, "grad_norm": 329.77880859375, "learning_rate": 1.1971339811825965e-09, "loss": 11.6328, "step": 29659 }, { "epoch": 1.9698479112705054, "grad_norm": 100.92428588867188, "learning_rate": 1.1918786807540993e-09, "loss": 14.5938, "step": 29660 }, { "epoch": 1.9699143255628613, "grad_norm": 233.10760498046875, "learning_rate": 1.1866349339620941e-09, "loss": 15.0312, "step": 29661 }, { "epoch": 1.9699807398552167, "grad_norm": 167.26229858398438, "learning_rate": 1.1814027408670878e-09, "loss": 16.2656, "step": 29662 }, { "epoch": 1.9700471541475726, "grad_norm": 493.063720703125, "learning_rate": 1.176182101529588e-09, "loss": 21.8281, "step": 29663 }, { "epoch": 1.9701135684399282, "grad_norm": 311.91900634765625, "learning_rate": 1.1709730160101017e-09, "loss": 19.2031, "step": 29664 }, { "epoch": 1.970179982732284, "grad_norm": 102.73912048339844, "learning_rate": 1.1657754843689139e-09, "loss": 16.4531, "step": 29665 }, { "epoch": 1.9702463970246398, "grad_norm": 458.0368957519531, "learning_rate": 1.1605895066659765e-09, "loss": 15.1562, "step": 29666 }, { "epoch": 1.9703128113169954, "grad_norm": 150.06884765625, "learning_rate": 1.1554150829614639e-09, "loss": 19.2188, "step": 29667 }, { "epoch": 1.970379225609351, "grad_norm": 425.1878356933594, "learning_rate": 1.150252213314995e-09, "loss": 17.7188, "step": 29668 }, { "epoch": 1.970445639901707, "grad_norm": 135.87989807128906, "learning_rate": 1.1451008977866328e-09, "loss": 14.0469, "step": 29669 }, { "epoch": 1.9705120541940626, "grad_norm": 273.2561340332031, "learning_rate": 1.1399611364356632e-09, "loss": 18.1562, "step": 29670 }, { "epoch": 1.9705784684864183, "grad_norm": 246.91693115234375, "learning_rate": 1.1348329293215942e-09, "loss": 12.8125, "step": 29671 }, { "epoch": 1.9706448827787741, "grad_norm": 395.043212890625, "learning_rate": 1.1297162765039336e-09, "loss": 17.5938, "step": 29672 }, { "epoch": 1.9707112970711296, "grad_norm": 165.4005584716797, "learning_rate": 1.1246111780416346e-09, "loss": 15.9844, "step": 29673 }, { "epoch": 1.9707777113634855, "grad_norm": 186.133056640625, "learning_rate": 1.1195176339938717e-09, "loss": 16.7656, "step": 29674 }, { "epoch": 1.970844125655841, "grad_norm": 131.44613647460938, "learning_rate": 1.1144356444195978e-09, "loss": 11.75, "step": 29675 }, { "epoch": 1.9709105399481968, "grad_norm": 149.2247772216797, "learning_rate": 1.109365209377433e-09, "loss": 14.2344, "step": 29676 }, { "epoch": 1.9709769542405526, "grad_norm": 234.51129150390625, "learning_rate": 1.104306328926219e-09, "loss": 17.4062, "step": 29677 }, { "epoch": 1.9710433685329083, "grad_norm": 152.07041931152344, "learning_rate": 1.099259003124353e-09, "loss": 14.5938, "step": 29678 }, { "epoch": 1.971109782825264, "grad_norm": 119.69378662109375, "learning_rate": 1.0942232320303447e-09, "loss": 15.0078, "step": 29679 }, { "epoch": 1.9711761971176198, "grad_norm": 281.2864074707031, "learning_rate": 1.089199015702258e-09, "loss": 15.6094, "step": 29680 }, { "epoch": 1.9712426114099755, "grad_norm": 161.8865509033203, "learning_rate": 1.0841863541983798e-09, "loss": 15.375, "step": 29681 }, { "epoch": 1.9713090257023311, "grad_norm": 200.67068481445312, "learning_rate": 1.079185247576553e-09, "loss": 14.25, "step": 29682 }, { "epoch": 1.971375439994687, "grad_norm": 251.97769165039062, "learning_rate": 1.0741956958947307e-09, "loss": 16.1562, "step": 29683 }, { "epoch": 1.9714418542870424, "grad_norm": 99.0555419921875, "learning_rate": 1.069217699210645e-09, "loss": 12.9531, "step": 29684 }, { "epoch": 1.9715082685793983, "grad_norm": 116.45386505126953, "learning_rate": 1.0642512575818053e-09, "loss": 12.1094, "step": 29685 }, { "epoch": 1.971574682871754, "grad_norm": 141.69921875, "learning_rate": 1.05929637106561e-09, "loss": 18.0, "step": 29686 }, { "epoch": 1.9716410971641096, "grad_norm": 413.7723693847656, "learning_rate": 1.0543530397195687e-09, "loss": 19.7812, "step": 29687 }, { "epoch": 1.9717075114564655, "grad_norm": 312.0832824707031, "learning_rate": 1.049421263600747e-09, "loss": 13.5938, "step": 29688 }, { "epoch": 1.9717739257488212, "grad_norm": 786.630859375, "learning_rate": 1.0445010427660994e-09, "loss": 12.2266, "step": 29689 }, { "epoch": 1.9718403400411768, "grad_norm": 134.34849548339844, "learning_rate": 1.0395923772726912e-09, "loss": 14.8281, "step": 29690 }, { "epoch": 1.9719067543335327, "grad_norm": 220.46592712402344, "learning_rate": 1.0346952671772546e-09, "loss": 13.0469, "step": 29691 }, { "epoch": 1.9719731686258883, "grad_norm": 103.00798797607422, "learning_rate": 1.0298097125364113e-09, "loss": 11.5156, "step": 29692 }, { "epoch": 1.972039582918244, "grad_norm": 140.06314086914062, "learning_rate": 1.0249357134066717e-09, "loss": 13.6562, "step": 29693 }, { "epoch": 1.9721059972105999, "grad_norm": 202.66696166992188, "learning_rate": 1.0200732698444347e-09, "loss": 15.9375, "step": 29694 }, { "epoch": 1.9721724115029553, "grad_norm": 121.56024169921875, "learning_rate": 1.0152223819059891e-09, "loss": 14.3906, "step": 29695 }, { "epoch": 1.9722388257953112, "grad_norm": 182.7086181640625, "learning_rate": 1.01038304964729e-09, "loss": 19.1094, "step": 29696 }, { "epoch": 1.9723052400876668, "grad_norm": 159.2163543701172, "learning_rate": 1.0055552731245143e-09, "loss": 14.6172, "step": 29697 }, { "epoch": 1.9723716543800225, "grad_norm": 194.45933532714844, "learning_rate": 1.0007390523935066e-09, "loss": 13.6875, "step": 29698 }, { "epoch": 1.9724380686723784, "grad_norm": 276.00396728515625, "learning_rate": 9.959343875097781e-10, "loss": 14.5469, "step": 29699 }, { "epoch": 1.972504482964734, "grad_norm": 525.480224609375, "learning_rate": 9.911412785290618e-10, "loss": 20.0781, "step": 29700 }, { "epoch": 1.9725708972570897, "grad_norm": 319.8211669921875, "learning_rate": 9.86359725506758e-10, "loss": 18.0938, "step": 29701 }, { "epoch": 1.9726373115494455, "grad_norm": 134.9473419189453, "learning_rate": 9.815897284981555e-10, "loss": 17.5625, "step": 29702 }, { "epoch": 1.9727037258418012, "grad_norm": 303.2142028808594, "learning_rate": 9.768312875584329e-10, "loss": 16.3594, "step": 29703 }, { "epoch": 1.9727701401341569, "grad_norm": 260.0734558105469, "learning_rate": 9.720844027426567e-10, "loss": 14.0312, "step": 29704 }, { "epoch": 1.9728365544265127, "grad_norm": 216.14134216308594, "learning_rate": 9.673490741057833e-10, "loss": 23.9375, "step": 29705 }, { "epoch": 1.9729029687188682, "grad_norm": 223.25559997558594, "learning_rate": 9.626253017025465e-10, "loss": 16.7344, "step": 29706 }, { "epoch": 1.972969383011224, "grad_norm": 166.59130859375, "learning_rate": 9.579130855875694e-10, "loss": 15.9375, "step": 29707 }, { "epoch": 1.9730357973035797, "grad_norm": 148.2534942626953, "learning_rate": 9.53212425815364e-10, "loss": 16.9844, "step": 29708 }, { "epoch": 1.9731022115959354, "grad_norm": 264.4779052734375, "learning_rate": 9.485233224403311e-10, "loss": 18.9375, "step": 29709 }, { "epoch": 1.9731686258882912, "grad_norm": 112.54449462890625, "learning_rate": 9.438457755166497e-10, "loss": 17.3438, "step": 29710 }, { "epoch": 1.9732350401806469, "grad_norm": 282.12890625, "learning_rate": 9.391797850984984e-10, "loss": 16.1719, "step": 29711 }, { "epoch": 1.9733014544730025, "grad_norm": 281.7425537109375, "learning_rate": 9.345253512398344e-10, "loss": 22.2812, "step": 29712 }, { "epoch": 1.9733678687653584, "grad_norm": 248.4547882080078, "learning_rate": 9.298824739945033e-10, "loss": 13.3281, "step": 29713 }, { "epoch": 1.973434283057714, "grad_norm": 92.49491882324219, "learning_rate": 9.252511534161289e-10, "loss": 14.7031, "step": 29714 }, { "epoch": 1.9735006973500697, "grad_norm": 180.79859924316406, "learning_rate": 9.206313895584461e-10, "loss": 12.6719, "step": 29715 }, { "epoch": 1.9735671116424256, "grad_norm": 158.7040557861328, "learning_rate": 9.160231824747455e-10, "loss": 13.0625, "step": 29716 }, { "epoch": 1.973633525934781, "grad_norm": 270.01031494140625, "learning_rate": 9.114265322184289e-10, "loss": 16.3125, "step": 29717 }, { "epoch": 1.973699940227137, "grad_norm": 202.60853576660156, "learning_rate": 9.068414388425649e-10, "loss": 21.0156, "step": 29718 }, { "epoch": 1.9737663545194926, "grad_norm": 155.4619598388672, "learning_rate": 9.022679024002222e-10, "loss": 11.5, "step": 29719 }, { "epoch": 1.9738327688118482, "grad_norm": 163.380615234375, "learning_rate": 8.977059229443584e-10, "loss": 20.6875, "step": 29720 }, { "epoch": 1.973899183104204, "grad_norm": 556.7127685546875, "learning_rate": 8.931555005277091e-10, "loss": 15.8281, "step": 29721 }, { "epoch": 1.9739655973965597, "grad_norm": 202.20884704589844, "learning_rate": 8.8861663520301e-10, "loss": 19.1562, "step": 29722 }, { "epoch": 1.9740320116889154, "grad_norm": 208.710693359375, "learning_rate": 8.840893270225524e-10, "loss": 14.2656, "step": 29723 }, { "epoch": 1.9740984259812713, "grad_norm": 160.9514923095703, "learning_rate": 8.7957357603885e-10, "loss": 14.0625, "step": 29724 }, { "epoch": 1.974164840273627, "grad_norm": 142.59181213378906, "learning_rate": 8.750693823041944e-10, "loss": 12.7188, "step": 29725 }, { "epoch": 1.9742312545659826, "grad_norm": 162.33399963378906, "learning_rate": 8.705767458705438e-10, "loss": 13.3594, "step": 29726 }, { "epoch": 1.9742976688583385, "grad_norm": 1040.5975341796875, "learning_rate": 8.660956667899677e-10, "loss": 14.125, "step": 29727 }, { "epoch": 1.974364083150694, "grad_norm": 140.22909545898438, "learning_rate": 8.616261451142027e-10, "loss": 14.0469, "step": 29728 }, { "epoch": 1.9744304974430498, "grad_norm": 296.9455871582031, "learning_rate": 8.571681808950959e-10, "loss": 13.9766, "step": 29729 }, { "epoch": 1.9744969117354054, "grad_norm": 162.1430206298828, "learning_rate": 8.527217741840509e-10, "loss": 10.875, "step": 29730 }, { "epoch": 1.974563326027761, "grad_norm": 107.60700225830078, "learning_rate": 8.482869250325819e-10, "loss": 13.0, "step": 29731 }, { "epoch": 1.974629740320117, "grad_norm": 226.56417846679688, "learning_rate": 8.438636334919813e-10, "loss": 23.2969, "step": 29732 }, { "epoch": 1.9746961546124726, "grad_norm": 296.99945068359375, "learning_rate": 8.394518996135413e-10, "loss": 25.9609, "step": 29733 }, { "epoch": 1.9747625689048283, "grad_norm": 299.460205078125, "learning_rate": 8.350517234479992e-10, "loss": 19.1406, "step": 29734 }, { "epoch": 1.9748289831971841, "grad_norm": 163.3943328857422, "learning_rate": 8.306631050465362e-10, "loss": 14.4531, "step": 29735 }, { "epoch": 1.9748953974895398, "grad_norm": 195.4482879638672, "learning_rate": 8.262860444597786e-10, "loss": 18.7188, "step": 29736 }, { "epoch": 1.9749618117818954, "grad_norm": 141.47679138183594, "learning_rate": 8.219205417384634e-10, "loss": 14.9141, "step": 29737 }, { "epoch": 1.9750282260742513, "grad_norm": 207.18992614746094, "learning_rate": 8.175665969328837e-10, "loss": 17.6094, "step": 29738 }, { "epoch": 1.9750946403666068, "grad_norm": 134.29441833496094, "learning_rate": 8.132242100935549e-10, "loss": 10.2188, "step": 29739 }, { "epoch": 1.9751610546589626, "grad_norm": 136.0496368408203, "learning_rate": 8.088933812707699e-10, "loss": 15.2031, "step": 29740 }, { "epoch": 1.9752274689513183, "grad_norm": 496.50390625, "learning_rate": 8.045741105144887e-10, "loss": 14.8906, "step": 29741 }, { "epoch": 1.975293883243674, "grad_norm": 623.2554931640625, "learning_rate": 8.002663978746716e-10, "loss": 18.7188, "step": 29742 }, { "epoch": 1.9753602975360298, "grad_norm": 592.4150390625, "learning_rate": 7.959702434012783e-10, "loss": 23.75, "step": 29743 }, { "epoch": 1.9754267118283855, "grad_norm": 128.0477752685547, "learning_rate": 7.91685647143936e-10, "loss": 13.1406, "step": 29744 }, { "epoch": 1.9754931261207411, "grad_norm": 265.7501525878906, "learning_rate": 7.874126091521605e-10, "loss": 14.2188, "step": 29745 }, { "epoch": 1.975559540413097, "grad_norm": 140.09947204589844, "learning_rate": 7.831511294753568e-10, "loss": 14.6875, "step": 29746 }, { "epoch": 1.9756259547054527, "grad_norm": 193.74468994140625, "learning_rate": 7.789012081629299e-10, "loss": 16.8203, "step": 29747 }, { "epoch": 1.9756923689978083, "grad_norm": 306.9519348144531, "learning_rate": 7.746628452640625e-10, "loss": 15.2188, "step": 29748 }, { "epoch": 1.9757587832901642, "grad_norm": 216.0952606201172, "learning_rate": 7.704360408276045e-10, "loss": 15.2344, "step": 29749 }, { "epoch": 1.9758251975825196, "grad_norm": 324.646484375, "learning_rate": 7.662207949026278e-10, "loss": 17.3906, "step": 29750 }, { "epoch": 1.9758916118748755, "grad_norm": 113.21682739257812, "learning_rate": 7.620171075377602e-10, "loss": 11.9297, "step": 29751 }, { "epoch": 1.9759580261672312, "grad_norm": 127.92350006103516, "learning_rate": 7.578249787817403e-10, "loss": 9.7656, "step": 29752 }, { "epoch": 1.9760244404595868, "grad_norm": 176.5972137451172, "learning_rate": 7.536444086829741e-10, "loss": 15.3047, "step": 29753 }, { "epoch": 1.9760908547519427, "grad_norm": 214.00845336914062, "learning_rate": 7.494753972898671e-10, "loss": 20.9688, "step": 29754 }, { "epoch": 1.9761572690442983, "grad_norm": 186.13818359375, "learning_rate": 7.453179446506031e-10, "loss": 14.9219, "step": 29755 }, { "epoch": 1.976223683336654, "grad_norm": 326.03411865234375, "learning_rate": 7.411720508132546e-10, "loss": 21.6562, "step": 29756 }, { "epoch": 1.9762900976290099, "grad_norm": 349.634521484375, "learning_rate": 7.370377158258945e-10, "loss": 18.2031, "step": 29757 }, { "epoch": 1.9763565119213655, "grad_norm": 221.56146240234375, "learning_rate": 7.329149397362621e-10, "loss": 17.7656, "step": 29758 }, { "epoch": 1.9764229262137212, "grad_norm": 368.5390625, "learning_rate": 7.288037225919863e-10, "loss": 22.3594, "step": 29759 }, { "epoch": 1.976489340506077, "grad_norm": 205.24891662597656, "learning_rate": 7.247040644408065e-10, "loss": 18.3594, "step": 29760 }, { "epoch": 1.9765557547984325, "grad_norm": 273.21875, "learning_rate": 7.206159653300181e-10, "loss": 16.875, "step": 29761 }, { "epoch": 1.9766221690907884, "grad_norm": 544.265869140625, "learning_rate": 7.165394253069168e-10, "loss": 19.9531, "step": 29762 }, { "epoch": 1.976688583383144, "grad_norm": 197.1372528076172, "learning_rate": 7.12474444418576e-10, "loss": 16.1406, "step": 29763 }, { "epoch": 1.9767549976754997, "grad_norm": 126.99818420410156, "learning_rate": 7.084210227121801e-10, "loss": 15.3281, "step": 29764 }, { "epoch": 1.9768214119678555, "grad_norm": 84.53312683105469, "learning_rate": 7.043791602345805e-10, "loss": 11.8906, "step": 29765 }, { "epoch": 1.9768878262602112, "grad_norm": 315.4937744140625, "learning_rate": 7.003488570325177e-10, "loss": 16.5, "step": 29766 }, { "epoch": 1.9769542405525669, "grad_norm": 152.70382690429688, "learning_rate": 6.963301131525101e-10, "loss": 18.5, "step": 29767 }, { "epoch": 1.9770206548449227, "grad_norm": 176.66526794433594, "learning_rate": 6.923229286410759e-10, "loss": 14.2656, "step": 29768 }, { "epoch": 1.9770870691372784, "grad_norm": 167.37265014648438, "learning_rate": 6.883273035447334e-10, "loss": 21.4688, "step": 29769 }, { "epoch": 1.977153483429634, "grad_norm": 239.67025756835938, "learning_rate": 6.84343237909557e-10, "loss": 16.5469, "step": 29770 }, { "epoch": 1.97721989772199, "grad_norm": 285.3998107910156, "learning_rate": 6.803707317816209e-10, "loss": 14.6641, "step": 29771 }, { "epoch": 1.9772863120143453, "grad_norm": 498.0498352050781, "learning_rate": 6.764097852069994e-10, "loss": 15.2344, "step": 29772 }, { "epoch": 1.9773527263067012, "grad_norm": 249.08462524414062, "learning_rate": 6.724603982313226e-10, "loss": 16.1719, "step": 29773 }, { "epoch": 1.9774191405990569, "grad_norm": 287.8838195800781, "learning_rate": 6.685225709004427e-10, "loss": 30.0312, "step": 29774 }, { "epoch": 1.9774855548914125, "grad_norm": 421.45172119140625, "learning_rate": 6.645963032597679e-10, "loss": 17.4531, "step": 29775 }, { "epoch": 1.9775519691837684, "grad_norm": 241.1865234375, "learning_rate": 6.606815953549283e-10, "loss": 20.4062, "step": 29776 }, { "epoch": 1.977618383476124, "grad_norm": 186.27700805664062, "learning_rate": 6.567784472308879e-10, "loss": 16.8516, "step": 29777 }, { "epoch": 1.9776847977684797, "grad_norm": 240.7480010986328, "learning_rate": 6.52886858933166e-10, "loss": 19.8438, "step": 29778 }, { "epoch": 1.9777512120608356, "grad_norm": 272.66729736328125, "learning_rate": 6.490068305065044e-10, "loss": 16.6406, "step": 29779 }, { "epoch": 1.9778176263531912, "grad_norm": 156.7595977783203, "learning_rate": 6.451383619958672e-10, "loss": 20.0625, "step": 29780 }, { "epoch": 1.977884040645547, "grad_norm": 283.1149597167969, "learning_rate": 6.412814534459965e-10, "loss": 18.1719, "step": 29781 }, { "epoch": 1.9779504549379028, "grad_norm": 565.9223022460938, "learning_rate": 6.374361049015232e-10, "loss": 20.7031, "step": 29782 }, { "epoch": 1.9780168692302582, "grad_norm": 438.42694091796875, "learning_rate": 6.336023164069671e-10, "loss": 14.9844, "step": 29783 }, { "epoch": 1.978083283522614, "grad_norm": 116.80072784423828, "learning_rate": 6.297800880066262e-10, "loss": 17.2188, "step": 29784 }, { "epoch": 1.9781496978149697, "grad_norm": 202.77752685546875, "learning_rate": 6.259694197446875e-10, "loss": 16.4688, "step": 29785 }, { "epoch": 1.9782161121073254, "grad_norm": 265.8988037109375, "learning_rate": 6.221703116652266e-10, "loss": 13.9453, "step": 29786 }, { "epoch": 1.9782825263996813, "grad_norm": 173.74942016601562, "learning_rate": 6.183827638123196e-10, "loss": 13.3125, "step": 29787 }, { "epoch": 1.978348940692037, "grad_norm": 153.3989715576172, "learning_rate": 6.146067762295981e-10, "loss": 14.5, "step": 29788 }, { "epoch": 1.9784153549843926, "grad_norm": 135.48614501953125, "learning_rate": 6.10842348960916e-10, "loss": 12.8672, "step": 29789 }, { "epoch": 1.9784817692767485, "grad_norm": 304.128173828125, "learning_rate": 6.07089482049683e-10, "loss": 20.2344, "step": 29790 }, { "epoch": 1.9785481835691041, "grad_norm": 247.57757568359375, "learning_rate": 6.033481755393089e-10, "loss": 16.1094, "step": 29791 }, { "epoch": 1.9786145978614598, "grad_norm": 222.4123992919922, "learning_rate": 5.996184294732032e-10, "loss": 14.3438, "step": 29792 }, { "epoch": 1.9786810121538156, "grad_norm": 288.6795654296875, "learning_rate": 5.959002438944427e-10, "loss": 14.3281, "step": 29793 }, { "epoch": 1.978747426446171, "grad_norm": 146.43511962890625, "learning_rate": 5.92193618845882e-10, "loss": 12.5, "step": 29794 }, { "epoch": 1.978813840738527, "grad_norm": 127.07250213623047, "learning_rate": 5.884985543705978e-10, "loss": 14.2188, "step": 29795 }, { "epoch": 1.9788802550308826, "grad_norm": 234.90953063964844, "learning_rate": 5.848150505113336e-10, "loss": 18.7188, "step": 29796 }, { "epoch": 1.9789466693232383, "grad_norm": 480.6916809082031, "learning_rate": 5.81143107310611e-10, "loss": 22.4688, "step": 29797 }, { "epoch": 1.9790130836155941, "grad_norm": 503.5009765625, "learning_rate": 5.774827248108404e-10, "loss": 32.4375, "step": 29798 }, { "epoch": 1.9790794979079498, "grad_norm": 184.35211181640625, "learning_rate": 5.738339030545436e-10, "loss": 20.8438, "step": 29799 }, { "epoch": 1.9791459122003054, "grad_norm": 263.80206298828125, "learning_rate": 5.701966420837978e-10, "loss": 14.4688, "step": 29800 }, { "epoch": 1.9792123264926613, "grad_norm": 224.44705200195312, "learning_rate": 5.665709419406806e-10, "loss": 21.9219, "step": 29801 }, { "epoch": 1.979278740785017, "grad_norm": 276.55438232421875, "learning_rate": 5.629568026671583e-10, "loss": 19.7969, "step": 29802 }, { "epoch": 1.9793451550773726, "grad_norm": 642.7906494140625, "learning_rate": 5.593542243051974e-10, "loss": 15.8281, "step": 29803 }, { "epoch": 1.9794115693697285, "grad_norm": 1865.0863037109375, "learning_rate": 5.557632068960982e-10, "loss": 17.4062, "step": 29804 }, { "epoch": 1.979477983662084, "grad_norm": 168.29054260253906, "learning_rate": 5.521837504817162e-10, "loss": 15.2812, "step": 29805 }, { "epoch": 1.9795443979544398, "grad_norm": 202.3062744140625, "learning_rate": 5.486158551034626e-10, "loss": 16.5781, "step": 29806 }, { "epoch": 1.9796108122467955, "grad_norm": 268.8985900878906, "learning_rate": 5.450595208023045e-10, "loss": 19.5625, "step": 29807 }, { "epoch": 1.9796772265391511, "grad_norm": 194.00953674316406, "learning_rate": 5.415147476197646e-10, "loss": 17.2344, "step": 29808 }, { "epoch": 1.979743640831507, "grad_norm": 124.49293518066406, "learning_rate": 5.379815355965877e-10, "loss": 11.5391, "step": 29809 }, { "epoch": 1.9798100551238627, "grad_norm": 278.8968505859375, "learning_rate": 5.344598847736304e-10, "loss": 14.4688, "step": 29810 }, { "epoch": 1.9798764694162183, "grad_norm": 319.10015869140625, "learning_rate": 5.309497951918596e-10, "loss": 21.8594, "step": 29811 }, { "epoch": 1.9799428837085742, "grad_norm": 211.49195861816406, "learning_rate": 5.274512668915765e-10, "loss": 13.2969, "step": 29812 }, { "epoch": 1.9800092980009298, "grad_norm": 168.16470336914062, "learning_rate": 5.239642999135263e-10, "loss": 17.4219, "step": 29813 }, { "epoch": 1.9800757122932855, "grad_norm": 272.63836669921875, "learning_rate": 5.204888942978991e-10, "loss": 9.8984, "step": 29814 }, { "epoch": 1.9801421265856414, "grad_norm": 195.16336059570312, "learning_rate": 5.17025050084885e-10, "loss": 15.8438, "step": 29815 }, { "epoch": 1.9802085408779968, "grad_norm": 174.69436645507812, "learning_rate": 5.135727673146739e-10, "loss": 16.1875, "step": 29816 }, { "epoch": 1.9802749551703527, "grad_norm": 263.72021484375, "learning_rate": 5.10132046027123e-10, "loss": 16.6562, "step": 29817 }, { "epoch": 1.9803413694627083, "grad_norm": 258.0528564453125, "learning_rate": 5.067028862618672e-10, "loss": 17.8125, "step": 29818 }, { "epoch": 1.980407783755064, "grad_norm": 189.0381622314453, "learning_rate": 5.032852880588745e-10, "loss": 18.4375, "step": 29819 }, { "epoch": 1.9804741980474199, "grad_norm": 168.54527282714844, "learning_rate": 4.998792514575578e-10, "loss": 12.6719, "step": 29820 }, { "epoch": 1.9805406123397755, "grad_norm": 404.89306640625, "learning_rate": 4.964847764972191e-10, "loss": 18.8438, "step": 29821 }, { "epoch": 1.9806070266321312, "grad_norm": 103.920166015625, "learning_rate": 4.931018632171602e-10, "loss": 10.1484, "step": 29822 }, { "epoch": 1.980673440924487, "grad_norm": 230.0148468017578, "learning_rate": 4.89730511656572e-10, "loss": 22.9531, "step": 29823 }, { "epoch": 1.9807398552168427, "grad_norm": 108.60342407226562, "learning_rate": 4.863707218545343e-10, "loss": 11.6719, "step": 29824 }, { "epoch": 1.9808062695091984, "grad_norm": 138.26341247558594, "learning_rate": 4.830224938496829e-10, "loss": 17.0, "step": 29825 }, { "epoch": 1.9808726838015542, "grad_norm": 147.06509399414062, "learning_rate": 4.796858276809867e-10, "loss": 14.8906, "step": 29826 }, { "epoch": 1.9809390980939097, "grad_norm": 107.0012435913086, "learning_rate": 4.763607233868594e-10, "loss": 12.5312, "step": 29827 }, { "epoch": 1.9810055123862655, "grad_norm": 327.39788818359375, "learning_rate": 4.730471810058257e-10, "loss": 12.7109, "step": 29828 }, { "epoch": 1.9810719266786212, "grad_norm": 228.94268798828125, "learning_rate": 4.697452005761881e-10, "loss": 16.5781, "step": 29829 }, { "epoch": 1.9811383409709769, "grad_norm": 329.33624267578125, "learning_rate": 4.664547821362496e-10, "loss": 24.1875, "step": 29830 }, { "epoch": 1.9812047552633327, "grad_norm": 198.74522399902344, "learning_rate": 4.631759257239798e-10, "loss": 17.6406, "step": 29831 }, { "epoch": 1.9812711695556884, "grad_norm": 177.89027404785156, "learning_rate": 4.599086313772371e-10, "loss": 12.6719, "step": 29832 }, { "epoch": 1.981337583848044, "grad_norm": 340.6739196777344, "learning_rate": 4.566528991339913e-10, "loss": 21.3438, "step": 29833 }, { "epoch": 1.9814039981404, "grad_norm": 235.02552795410156, "learning_rate": 4.534087290318789e-10, "loss": 12.0156, "step": 29834 }, { "epoch": 1.9814704124327556, "grad_norm": 870.64697265625, "learning_rate": 4.5017612110820336e-10, "loss": 14.9844, "step": 29835 }, { "epoch": 1.9815368267251112, "grad_norm": 4155.22119140625, "learning_rate": 4.469550754007123e-10, "loss": 18.9844, "step": 29836 }, { "epoch": 1.981603241017467, "grad_norm": 132.82498168945312, "learning_rate": 4.4374559194626514e-10, "loss": 15.3906, "step": 29837 }, { "epoch": 1.9816696553098225, "grad_norm": 186.4306182861328, "learning_rate": 4.405476707822764e-10, "loss": 14.3906, "step": 29838 }, { "epoch": 1.9817360696021784, "grad_norm": 177.99742126464844, "learning_rate": 4.3736131194571644e-10, "loss": 14.7969, "step": 29839 }, { "epoch": 1.981802483894534, "grad_norm": 172.49160766601562, "learning_rate": 4.3418651547333375e-10, "loss": 12.9609, "step": 29840 }, { "epoch": 1.9818688981868897, "grad_norm": 229.1228485107422, "learning_rate": 4.310232814018766e-10, "loss": 16.7344, "step": 29841 }, { "epoch": 1.9819353124792456, "grad_norm": 167.55645751953125, "learning_rate": 4.2787160976798243e-10, "loss": 14.5625, "step": 29842 }, { "epoch": 1.9820017267716012, "grad_norm": 110.16545867919922, "learning_rate": 4.247315006080665e-10, "loss": 18.5469, "step": 29843 }, { "epoch": 1.982068141063957, "grad_norm": 276.54376220703125, "learning_rate": 4.2160295395843315e-10, "loss": 17.0156, "step": 29844 }, { "epoch": 1.9821345553563128, "grad_norm": 175.5704345703125, "learning_rate": 4.1848596985538666e-10, "loss": 18.5625, "step": 29845 }, { "epoch": 1.9822009696486684, "grad_norm": 176.56578063964844, "learning_rate": 4.1538054833489823e-10, "loss": 16.5781, "step": 29846 }, { "epoch": 1.982267383941024, "grad_norm": 138.8981170654297, "learning_rate": 4.1228668943282807e-10, "loss": 13.3594, "step": 29847 }, { "epoch": 1.98233379823338, "grad_norm": 112.1473617553711, "learning_rate": 4.092043931850364e-10, "loss": 12.75, "step": 29848 }, { "epoch": 1.9824002125257354, "grad_norm": 234.12469482421875, "learning_rate": 4.061336596271614e-10, "loss": 19.75, "step": 29849 }, { "epoch": 1.9824666268180913, "grad_norm": 123.95975494384766, "learning_rate": 4.030744887948412e-10, "loss": 13.9688, "step": 29850 }, { "epoch": 1.982533041110447, "grad_norm": 126.93898010253906, "learning_rate": 4.0002688072326984e-10, "loss": 14.8438, "step": 29851 }, { "epoch": 1.9825994554028026, "grad_norm": 477.4247741699219, "learning_rate": 3.969908354477525e-10, "loss": 24.6562, "step": 29852 }, { "epoch": 1.9826658696951585, "grad_norm": 537.604736328125, "learning_rate": 3.9396635300348315e-10, "loss": 21.2188, "step": 29853 }, { "epoch": 1.982732283987514, "grad_norm": 349.3736267089844, "learning_rate": 3.9095343342543387e-10, "loss": 16.375, "step": 29854 }, { "epoch": 1.9827986982798698, "grad_norm": 318.0660400390625, "learning_rate": 3.8795207674846566e-10, "loss": 17.125, "step": 29855 }, { "epoch": 1.9828651125722256, "grad_norm": 185.01463317871094, "learning_rate": 3.8496228300721745e-10, "loss": 11.4844, "step": 29856 }, { "epoch": 1.9829315268645813, "grad_norm": 325.8623046875, "learning_rate": 3.8198405223621724e-10, "loss": 20.0938, "step": 29857 }, { "epoch": 1.982997941156937, "grad_norm": 220.39878845214844, "learning_rate": 3.790173844701039e-10, "loss": 18.0312, "step": 29858 }, { "epoch": 1.9830643554492928, "grad_norm": 129.12135314941406, "learning_rate": 3.7606227974307234e-10, "loss": 11.9375, "step": 29859 }, { "epoch": 1.9831307697416483, "grad_norm": 163.5359344482422, "learning_rate": 3.731187380893175e-10, "loss": 16.5, "step": 29860 }, { "epoch": 1.9831971840340041, "grad_norm": 294.3202209472656, "learning_rate": 3.7018675954303413e-10, "loss": 17.8125, "step": 29861 }, { "epoch": 1.9832635983263598, "grad_norm": 199.28826904296875, "learning_rate": 3.672663441378621e-10, "loss": 14.5312, "step": 29862 }, { "epoch": 1.9833300126187154, "grad_norm": 131.56982421875, "learning_rate": 3.643574919077741e-10, "loss": 16.8281, "step": 29863 }, { "epoch": 1.9833964269110713, "grad_norm": 170.70083618164062, "learning_rate": 3.6146020288641e-10, "loss": 11.5938, "step": 29864 }, { "epoch": 1.983462841203427, "grad_norm": 239.52362060546875, "learning_rate": 3.5857447710718746e-10, "loss": 16.8438, "step": 29865 }, { "epoch": 1.9835292554957826, "grad_norm": 245.67770385742188, "learning_rate": 3.5570031460363524e-10, "loss": 14.8281, "step": 29866 }, { "epoch": 1.9835956697881385, "grad_norm": 134.83526611328125, "learning_rate": 3.52837715408838e-10, "loss": 12.2188, "step": 29867 }, { "epoch": 1.9836620840804942, "grad_norm": 267.01763916015625, "learning_rate": 3.4998667955599136e-10, "loss": 12.8281, "step": 29868 }, { "epoch": 1.9837284983728498, "grad_norm": 259.1098327636719, "learning_rate": 3.4714720707818003e-10, "loss": 14.0625, "step": 29869 }, { "epoch": 1.9837949126652057, "grad_norm": 304.1251525878906, "learning_rate": 3.443192980080445e-10, "loss": 17.9609, "step": 29870 }, { "epoch": 1.9838613269575611, "grad_norm": 182.8317108154297, "learning_rate": 3.415029523783364e-10, "loss": 15.8438, "step": 29871 }, { "epoch": 1.983927741249917, "grad_norm": 118.45099639892578, "learning_rate": 3.3869817022180723e-10, "loss": 13.7969, "step": 29872 }, { "epoch": 1.9839941555422727, "grad_norm": 139.4284210205078, "learning_rate": 3.3590495157065357e-10, "loss": 14.2969, "step": 29873 }, { "epoch": 1.9840605698346283, "grad_norm": 112.32474517822266, "learning_rate": 3.331232964574049e-10, "loss": 11.5938, "step": 29874 }, { "epoch": 1.9841269841269842, "grad_norm": 176.92791748046875, "learning_rate": 3.303532049141466e-10, "loss": 12.8438, "step": 29875 }, { "epoch": 1.9841933984193398, "grad_norm": 134.31039428710938, "learning_rate": 3.2759467697285326e-10, "loss": 12.5391, "step": 29876 }, { "epoch": 1.9842598127116955, "grad_norm": 124.68353271484375, "learning_rate": 3.2484771266549916e-10, "loss": 15.6719, "step": 29877 }, { "epoch": 1.9843262270040514, "grad_norm": 197.86849975585938, "learning_rate": 3.2211231202394773e-10, "loss": 11.0781, "step": 29878 }, { "epoch": 1.984392641296407, "grad_norm": 291.0882873535156, "learning_rate": 3.193884750796183e-10, "loss": 15.9062, "step": 29879 }, { "epoch": 1.9844590555887627, "grad_norm": 142.427490234375, "learning_rate": 3.1667620186426323e-10, "loss": 13.5781, "step": 29880 }, { "epoch": 1.9845254698811186, "grad_norm": 225.04232788085938, "learning_rate": 3.1397549240919084e-10, "loss": 17.0156, "step": 29881 }, { "epoch": 1.984591884173474, "grad_norm": 90.91323852539062, "learning_rate": 3.112863467454874e-10, "loss": 11.2812, "step": 29882 }, { "epoch": 1.9846582984658299, "grad_norm": 231.72731018066406, "learning_rate": 3.0860876490435007e-10, "loss": 17.0312, "step": 29883 }, { "epoch": 1.9847247127581855, "grad_norm": 240.09799194335938, "learning_rate": 3.059427469168652e-10, "loss": 19.8125, "step": 29884 }, { "epoch": 1.9847911270505412, "grad_norm": 180.5756378173828, "learning_rate": 3.032882928137859e-10, "loss": 22.3594, "step": 29885 }, { "epoch": 1.984857541342897, "grad_norm": 399.3248291015625, "learning_rate": 3.0064540262575434e-10, "loss": 14.4375, "step": 29886 }, { "epoch": 1.9849239556352527, "grad_norm": 834.6359252929688, "learning_rate": 2.980140763834127e-10, "loss": 13.6406, "step": 29887 }, { "epoch": 1.9849903699276084, "grad_norm": 693.7579345703125, "learning_rate": 2.953943141171811e-10, "loss": 24.9062, "step": 29888 }, { "epoch": 1.9850567842199642, "grad_norm": 166.82386779785156, "learning_rate": 2.9278611585736856e-10, "loss": 13.3125, "step": 29889 }, { "epoch": 1.9851231985123199, "grad_norm": 106.73294067382812, "learning_rate": 2.9018948163417323e-10, "loss": 13.3984, "step": 29890 }, { "epoch": 1.9851896128046755, "grad_norm": 213.91195678710938, "learning_rate": 2.876044114775711e-10, "loss": 18.6094, "step": 29891 }, { "epoch": 1.9852560270970314, "grad_norm": 182.1944122314453, "learning_rate": 2.8503090541742713e-10, "loss": 13.3125, "step": 29892 }, { "epoch": 1.9853224413893868, "grad_norm": 98.42205810546875, "learning_rate": 2.824689634836064e-10, "loss": 10.6406, "step": 29893 }, { "epoch": 1.9853888556817427, "grad_norm": 199.6141357421875, "learning_rate": 2.799185857057518e-10, "loss": 16.6562, "step": 29894 }, { "epoch": 1.9854552699740984, "grad_norm": 247.87730407714844, "learning_rate": 2.7737977211339527e-10, "loss": 13.3281, "step": 29895 }, { "epoch": 1.985521684266454, "grad_norm": 127.2840576171875, "learning_rate": 2.748525227357357e-10, "loss": 14.0156, "step": 29896 }, { "epoch": 1.98558809855881, "grad_norm": 221.5677032470703, "learning_rate": 2.723368376020829e-10, "loss": 15.2969, "step": 29897 }, { "epoch": 1.9856545128511656, "grad_norm": 172.166259765625, "learning_rate": 2.698327167416359e-10, "loss": 13.8281, "step": 29898 }, { "epoch": 1.9857209271435212, "grad_norm": 116.47233581542969, "learning_rate": 2.6734016018326033e-10, "loss": 16.3672, "step": 29899 }, { "epoch": 1.985787341435877, "grad_norm": 400.99737548828125, "learning_rate": 2.648591679558221e-10, "loss": 16.8594, "step": 29900 }, { "epoch": 1.9858537557282328, "grad_norm": 268.9792785644531, "learning_rate": 2.623897400880759e-10, "loss": 16.125, "step": 29901 }, { "epoch": 1.9859201700205884, "grad_norm": 811.6617431640625, "learning_rate": 2.5993187660844353e-10, "loss": 21.125, "step": 29902 }, { "epoch": 1.9859865843129443, "grad_norm": 271.1747741699219, "learning_rate": 2.574855775454576e-10, "loss": 15.9531, "step": 29903 }, { "epoch": 1.9860529986052997, "grad_norm": 331.6934814453125, "learning_rate": 2.5505084292731793e-10, "loss": 11.6953, "step": 29904 }, { "epoch": 1.9861194128976556, "grad_norm": 166.11083984375, "learning_rate": 2.526276727823351e-10, "loss": 15.8125, "step": 29905 }, { "epoch": 1.9861858271900112, "grad_norm": 623.02099609375, "learning_rate": 2.5021606713848676e-10, "loss": 13.0156, "step": 29906 }, { "epoch": 1.986252241482367, "grad_norm": 116.65702819824219, "learning_rate": 2.478160260236395e-10, "loss": 13.4688, "step": 29907 }, { "epoch": 1.9863186557747228, "grad_norm": 134.13446044921875, "learning_rate": 2.454275494655489e-10, "loss": 17.75, "step": 29908 }, { "epoch": 1.9863850700670784, "grad_norm": 169.13548278808594, "learning_rate": 2.430506374918595e-10, "loss": 20.1875, "step": 29909 }, { "epoch": 1.986451484359434, "grad_norm": 184.9854736328125, "learning_rate": 2.4068529013010484e-10, "loss": 14.5469, "step": 29910 }, { "epoch": 1.98651789865179, "grad_norm": 589.900390625, "learning_rate": 2.3833150740759647e-10, "loss": 20.8281, "step": 29911 }, { "epoch": 1.9865843129441456, "grad_norm": 271.15313720703125, "learning_rate": 2.3598928935153473e-10, "loss": 19.9375, "step": 29912 }, { "epoch": 1.9866507272365013, "grad_norm": 432.9671936035156, "learning_rate": 2.3365863598900916e-10, "loss": 17.875, "step": 29913 }, { "epoch": 1.9867171415288571, "grad_norm": 320.51513671875, "learning_rate": 2.3133954734710913e-10, "loss": 14.4375, "step": 29914 }, { "epoch": 1.9867835558212126, "grad_norm": 230.63282775878906, "learning_rate": 2.290320234525911e-10, "loss": 16.1719, "step": 29915 }, { "epoch": 1.9868499701135685, "grad_norm": 166.96041870117188, "learning_rate": 2.2673606433210036e-10, "loss": 18.2969, "step": 29916 }, { "epoch": 1.986916384405924, "grad_norm": 261.3675842285156, "learning_rate": 2.2445167001217124e-10, "loss": 17.7656, "step": 29917 }, { "epoch": 1.9869827986982798, "grad_norm": 218.9579315185547, "learning_rate": 2.2217884051933811e-10, "loss": 16.0469, "step": 29918 }, { "epoch": 1.9870492129906356, "grad_norm": 187.9188232421875, "learning_rate": 2.199175758798022e-10, "loss": 14.6406, "step": 29919 }, { "epoch": 1.9871156272829913, "grad_norm": 127.87073516845703, "learning_rate": 2.176678761197648e-10, "loss": 19.1562, "step": 29920 }, { "epoch": 1.987182041575347, "grad_norm": 230.6216278076172, "learning_rate": 2.154297412653161e-10, "loss": 19.5781, "step": 29921 }, { "epoch": 1.9872484558677028, "grad_norm": 220.03536987304688, "learning_rate": 2.132031713421023e-10, "loss": 18.4062, "step": 29922 }, { "epoch": 1.9873148701600585, "grad_norm": 67.25418853759766, "learning_rate": 2.1098816637621363e-10, "loss": 9.2656, "step": 29923 }, { "epoch": 1.9873812844524141, "grad_norm": 106.00582122802734, "learning_rate": 2.087847263929632e-10, "loss": 12.6094, "step": 29924 }, { "epoch": 1.98744769874477, "grad_norm": 137.56761169433594, "learning_rate": 2.0659285141810811e-10, "loss": 15.0859, "step": 29925 }, { "epoch": 1.9875141130371254, "grad_norm": 247.46702575683594, "learning_rate": 2.0441254147673948e-10, "loss": 15.7969, "step": 29926 }, { "epoch": 1.9875805273294813, "grad_norm": 324.0851135253906, "learning_rate": 2.0224379659428136e-10, "loss": 16.7031, "step": 29927 }, { "epoch": 1.987646941621837, "grad_norm": 184.6156463623047, "learning_rate": 2.0008661679582485e-10, "loss": 20.2812, "step": 29928 }, { "epoch": 1.9877133559141926, "grad_norm": 267.61749267578125, "learning_rate": 1.9794100210612785e-10, "loss": 15.3438, "step": 29929 }, { "epoch": 1.9877797702065485, "grad_norm": 163.73043823242188, "learning_rate": 1.9580695255017043e-10, "loss": 13.5938, "step": 29930 }, { "epoch": 1.9878461844989042, "grad_norm": 405.6534729003906, "learning_rate": 1.9368446815259954e-10, "loss": 17.7656, "step": 29931 }, { "epoch": 1.9879125987912598, "grad_norm": 253.04071044921875, "learning_rate": 1.9157354893795108e-10, "loss": 19.5, "step": 29932 }, { "epoch": 1.9879790130836157, "grad_norm": 162.55186462402344, "learning_rate": 1.8947419493064997e-10, "loss": 14.875, "step": 29933 }, { "epoch": 1.9880454273759713, "grad_norm": 113.68008422851562, "learning_rate": 1.8738640615501012e-10, "loss": 12.4844, "step": 29934 }, { "epoch": 1.988111841668327, "grad_norm": 527.1334228515625, "learning_rate": 1.8531018263523435e-10, "loss": 23.9219, "step": 29935 }, { "epoch": 1.9881782559606829, "grad_norm": 192.7169189453125, "learning_rate": 1.8324552439519248e-10, "loss": 17.3594, "step": 29936 }, { "epoch": 1.9882446702530383, "grad_norm": 205.72683715820312, "learning_rate": 1.8119243145886532e-10, "loss": 18.2188, "step": 29937 }, { "epoch": 1.9883110845453942, "grad_norm": 159.3927459716797, "learning_rate": 1.791509038499006e-10, "loss": 14.7969, "step": 29938 }, { "epoch": 1.9883774988377498, "grad_norm": 108.60848999023438, "learning_rate": 1.7712094159205716e-10, "loss": 10.7266, "step": 29939 }, { "epoch": 1.9884439131301055, "grad_norm": 111.69609832763672, "learning_rate": 1.751025447086496e-10, "loss": 12.75, "step": 29940 }, { "epoch": 1.9885103274224614, "grad_norm": 147.16259765625, "learning_rate": 1.730957132232147e-10, "loss": 15.2344, "step": 29941 }, { "epoch": 1.988576741714817, "grad_norm": 166.3317413330078, "learning_rate": 1.7110044715884508e-10, "loss": 15.7656, "step": 29942 }, { "epoch": 1.9886431560071727, "grad_norm": 201.69464111328125, "learning_rate": 1.6911674653874442e-10, "loss": 18.6719, "step": 29943 }, { "epoch": 1.9887095702995286, "grad_norm": 206.49713134765625, "learning_rate": 1.6714461138567227e-10, "loss": 20.375, "step": 29944 }, { "epoch": 1.9887759845918842, "grad_norm": 291.03033447265625, "learning_rate": 1.6518404172249922e-10, "loss": 22.0312, "step": 29945 }, { "epoch": 1.9888423988842399, "grad_norm": 136.8979949951172, "learning_rate": 1.6323503757198487e-10, "loss": 15.4844, "step": 29946 }, { "epoch": 1.9889088131765957, "grad_norm": 203.56076049804688, "learning_rate": 1.6129759895666671e-10, "loss": 18.6094, "step": 29947 }, { "epoch": 1.9889752274689512, "grad_norm": 150.86968994140625, "learning_rate": 1.5937172589886028e-10, "loss": 17.7812, "step": 29948 }, { "epoch": 1.989041641761307, "grad_norm": 261.69683837890625, "learning_rate": 1.5745741842099204e-10, "loss": 16.4375, "step": 29949 }, { "epoch": 1.9891080560536627, "grad_norm": 175.0885467529297, "learning_rate": 1.5555467654504438e-10, "loss": 14.3594, "step": 29950 }, { "epoch": 1.9891744703460184, "grad_norm": 273.9044189453125, "learning_rate": 1.5366350029311082e-10, "loss": 20.0, "step": 29951 }, { "epoch": 1.9892408846383742, "grad_norm": 179.33648681640625, "learning_rate": 1.5178388968695166e-10, "loss": 14.625, "step": 29952 }, { "epoch": 1.9893072989307299, "grad_norm": 120.07548522949219, "learning_rate": 1.4991584474854936e-10, "loss": 12.7344, "step": 29953 }, { "epoch": 1.9893737132230855, "grad_norm": 222.59307861328125, "learning_rate": 1.480593654993312e-10, "loss": 13.9062, "step": 29954 }, { "epoch": 1.9894401275154414, "grad_norm": 185.36378479003906, "learning_rate": 1.462144519608355e-10, "loss": 22.6406, "step": 29955 }, { "epoch": 1.989506541807797, "grad_norm": 365.5715637207031, "learning_rate": 1.4438110415437854e-10, "loss": 14.3594, "step": 29956 }, { "epoch": 1.9895729561001527, "grad_norm": 183.08180236816406, "learning_rate": 1.425593221010546e-10, "loss": 14.4844, "step": 29957 }, { "epoch": 1.9896393703925086, "grad_norm": 105.84103393554688, "learning_rate": 1.4074910582217992e-10, "loss": 12.0859, "step": 29958 }, { "epoch": 1.989705784684864, "grad_norm": 143.8919677734375, "learning_rate": 1.3895045533851568e-10, "loss": 9.75, "step": 29959 }, { "epoch": 1.98977219897722, "grad_norm": 119.35423278808594, "learning_rate": 1.371633706709341e-10, "loss": 15.7812, "step": 29960 }, { "epoch": 1.9898386132695756, "grad_norm": 301.5553283691406, "learning_rate": 1.353878518401963e-10, "loss": 14.9531, "step": 29961 }, { "epoch": 1.9899050275619312, "grad_norm": 656.846435546875, "learning_rate": 1.336238988666194e-10, "loss": 13.0469, "step": 29962 }, { "epoch": 1.989971441854287, "grad_norm": 304.03692626953125, "learning_rate": 1.3187151177074252e-10, "loss": 11.4062, "step": 29963 }, { "epoch": 1.9900378561466427, "grad_norm": 161.9090118408203, "learning_rate": 1.301306905727717e-10, "loss": 13.1094, "step": 29964 }, { "epoch": 1.9901042704389984, "grad_norm": 449.6712951660156, "learning_rate": 1.2840143529291303e-10, "loss": 18.1016, "step": 29965 }, { "epoch": 1.9901706847313543, "grad_norm": 150.27108764648438, "learning_rate": 1.266837459512615e-10, "loss": 14.5625, "step": 29966 }, { "epoch": 1.99023709902371, "grad_norm": 120.91302490234375, "learning_rate": 1.2497762256746813e-10, "loss": 12.4375, "step": 29967 }, { "epoch": 1.9903035133160656, "grad_norm": 248.95741271972656, "learning_rate": 1.2328306516140586e-10, "loss": 14.4375, "step": 29968 }, { "epoch": 1.9903699276084215, "grad_norm": 152.1917724609375, "learning_rate": 1.216000737526146e-10, "loss": 18.375, "step": 29969 }, { "epoch": 1.990436341900777, "grad_norm": 294.568359375, "learning_rate": 1.1992864836063432e-10, "loss": 19.8594, "step": 29970 }, { "epoch": 1.9905027561931328, "grad_norm": 281.943603515625, "learning_rate": 1.1826878900467185e-10, "loss": 22.4531, "step": 29971 }, { "epoch": 1.9905691704854884, "grad_norm": 183.90377807617188, "learning_rate": 1.1662049570404507e-10, "loss": 13.2656, "step": 29972 }, { "epoch": 1.990635584777844, "grad_norm": 187.79061889648438, "learning_rate": 1.1498376847784985e-10, "loss": 16.4688, "step": 29973 }, { "epoch": 1.9907019990702, "grad_norm": 491.22607421875, "learning_rate": 1.1335860734484892e-10, "loss": 22.5469, "step": 29974 }, { "epoch": 1.9907684133625556, "grad_norm": 142.4912109375, "learning_rate": 1.1174501232402711e-10, "loss": 15.125, "step": 29975 }, { "epoch": 1.9908348276549113, "grad_norm": 157.02210998535156, "learning_rate": 1.1014298343403616e-10, "loss": 16.9531, "step": 29976 }, { "epoch": 1.9909012419472671, "grad_norm": 114.38025665283203, "learning_rate": 1.0855252069319476e-10, "loss": 16.6406, "step": 29977 }, { "epoch": 1.9909676562396228, "grad_norm": 277.26031494140625, "learning_rate": 1.0697362412015464e-10, "loss": 19.2188, "step": 29978 }, { "epoch": 1.9910340705319785, "grad_norm": 142.1007537841797, "learning_rate": 1.0540629373301246e-10, "loss": 16.625, "step": 29979 }, { "epoch": 1.9911004848243343, "grad_norm": 191.74017333984375, "learning_rate": 1.0385052954997587e-10, "loss": 15.3906, "step": 29980 }, { "epoch": 1.9911668991166898, "grad_norm": 126.9088363647461, "learning_rate": 1.0230633158903046e-10, "loss": 12.8281, "step": 29981 }, { "epoch": 1.9912333134090456, "grad_norm": 122.10250091552734, "learning_rate": 1.0077369986805084e-10, "loss": 12.1328, "step": 29982 }, { "epoch": 1.9912997277014013, "grad_norm": 216.67098999023438, "learning_rate": 9.925263440468956e-11, "loss": 16.1406, "step": 29983 }, { "epoch": 1.991366141993757, "grad_norm": 187.6599884033203, "learning_rate": 9.774313521659915e-11, "loss": 14.5312, "step": 29984 }, { "epoch": 1.9914325562861128, "grad_norm": 277.2764892578125, "learning_rate": 9.624520232132117e-11, "loss": 18.4219, "step": 29985 }, { "epoch": 1.9914989705784685, "grad_norm": 393.8785400390625, "learning_rate": 9.4758835735953e-11, "loss": 16.4375, "step": 29986 }, { "epoch": 1.9915653848708241, "grad_norm": 175.2695770263672, "learning_rate": 9.328403547792518e-11, "loss": 18.375, "step": 29987 }, { "epoch": 1.99163179916318, "grad_norm": 126.26898193359375, "learning_rate": 9.182080156422411e-11, "loss": 15.0, "step": 29988 }, { "epoch": 1.9916982134555357, "grad_norm": 108.49217224121094, "learning_rate": 9.036913401183622e-11, "loss": 13.5625, "step": 29989 }, { "epoch": 1.9917646277478913, "grad_norm": 878.5018920898438, "learning_rate": 8.89290328373038e-11, "loss": 34.7969, "step": 29990 }, { "epoch": 1.9918310420402472, "grad_norm": 526.5514526367188, "learning_rate": 8.750049805761328e-11, "loss": 14.4375, "step": 29991 }, { "epoch": 1.9918974563326026, "grad_norm": 231.69056701660156, "learning_rate": 8.608352968908494e-11, "loss": 22.125, "step": 29992 }, { "epoch": 1.9919638706249585, "grad_norm": 337.9718017578125, "learning_rate": 8.467812774815008e-11, "loss": 11.8516, "step": 29993 }, { "epoch": 1.9920302849173142, "grad_norm": 250.8922119140625, "learning_rate": 8.328429225101797e-11, "loss": 17.0938, "step": 29994 }, { "epoch": 1.9920966992096698, "grad_norm": 722.409912109375, "learning_rate": 8.190202321389782e-11, "loss": 21.6719, "step": 29995 }, { "epoch": 1.9921631135020257, "grad_norm": 286.5731506347656, "learning_rate": 8.05313206527769e-11, "loss": 22.7188, "step": 29996 }, { "epoch": 1.9922295277943813, "grad_norm": 362.6443176269531, "learning_rate": 7.917218458342034e-11, "loss": 18.4375, "step": 29997 }, { "epoch": 1.992295942086737, "grad_norm": 343.5428771972656, "learning_rate": 7.782461502170434e-11, "loss": 20.5625, "step": 29998 }, { "epoch": 1.9923623563790929, "grad_norm": 216.9970703125, "learning_rate": 7.648861198306101e-11, "loss": 20.1875, "step": 29999 }, { "epoch": 1.9924287706714485, "grad_norm": 300.6775207519531, "learning_rate": 7.516417548303344e-11, "loss": 19.4219, "step": 30000 }, { "epoch": 1.9924951849638042, "grad_norm": 157.7637939453125, "learning_rate": 7.385130553694274e-11, "loss": 14.375, "step": 30001 }, { "epoch": 1.99256159925616, "grad_norm": 140.23614501953125, "learning_rate": 7.255000215988794e-11, "loss": 18.9219, "step": 30002 }, { "epoch": 1.9926280135485155, "grad_norm": 125.28079986572266, "learning_rate": 7.126026536707908e-11, "loss": 11.3906, "step": 30003 }, { "epoch": 1.9926944278408714, "grad_norm": 165.0611114501953, "learning_rate": 6.998209517328213e-11, "loss": 17.0469, "step": 30004 }, { "epoch": 1.992760842133227, "grad_norm": 166.6775665283203, "learning_rate": 6.871549159337408e-11, "loss": 13.1719, "step": 30005 }, { "epoch": 1.9928272564255827, "grad_norm": 307.5706481933594, "learning_rate": 6.746045464189887e-11, "loss": 20.2188, "step": 30006 }, { "epoch": 1.9928936707179385, "grad_norm": 127.92391204833984, "learning_rate": 6.621698433351141e-11, "loss": 14.2031, "step": 30007 }, { "epoch": 1.9929600850102942, "grad_norm": 260.78143310546875, "learning_rate": 6.498508068242259e-11, "loss": 18.4844, "step": 30008 }, { "epoch": 1.9930264993026499, "grad_norm": 159.80711364746094, "learning_rate": 6.376474370306529e-11, "loss": 15.0938, "step": 30009 }, { "epoch": 1.9930929135950057, "grad_norm": 376.61187744140625, "learning_rate": 6.255597340953933e-11, "loss": 24.375, "step": 30010 }, { "epoch": 1.9931593278873614, "grad_norm": 236.62493896484375, "learning_rate": 6.135876981572252e-11, "loss": 16.25, "step": 30011 }, { "epoch": 1.993225742179717, "grad_norm": 509.9814758300781, "learning_rate": 6.017313293549265e-11, "loss": 27.125, "step": 30012 }, { "epoch": 1.993292156472073, "grad_norm": 232.89414978027344, "learning_rate": 5.899906278261646e-11, "loss": 16.5781, "step": 30013 }, { "epoch": 1.9933585707644284, "grad_norm": 189.2486114501953, "learning_rate": 5.783655937052767e-11, "loss": 15.4062, "step": 30014 }, { "epoch": 1.9934249850567842, "grad_norm": 146.9263916015625, "learning_rate": 5.6685622712882017e-11, "loss": 14.5938, "step": 30015 }, { "epoch": 1.9934913993491399, "grad_norm": 295.8692932128906, "learning_rate": 5.554625282289116e-11, "loss": 18.7344, "step": 30016 }, { "epoch": 1.9935578136414955, "grad_norm": 323.6199035644531, "learning_rate": 5.4418449713766746e-11, "loss": 17.0781, "step": 30017 }, { "epoch": 1.9936242279338514, "grad_norm": 385.4278869628906, "learning_rate": 5.330221339849839e-11, "loss": 12.0938, "step": 30018 }, { "epoch": 1.993690642226207, "grad_norm": 198.72927856445312, "learning_rate": 5.219754389007569e-11, "loss": 17.3906, "step": 30019 }, { "epoch": 1.9937570565185627, "grad_norm": 125.474609375, "learning_rate": 5.1104441201155204e-11, "loss": 12.9688, "step": 30020 }, { "epoch": 1.9938234708109186, "grad_norm": 324.7491455078125, "learning_rate": 5.002290534450449e-11, "loss": 17.6094, "step": 30021 }, { "epoch": 1.9938898851032743, "grad_norm": 186.8865966796875, "learning_rate": 4.895293633255804e-11, "loss": 15.7812, "step": 30022 }, { "epoch": 1.99395629939563, "grad_norm": 499.3489685058594, "learning_rate": 4.7894534177750356e-11, "loss": 13.3125, "step": 30023 }, { "epoch": 1.9940227136879858, "grad_norm": 158.61338806152344, "learning_rate": 4.6847698892182876e-11, "loss": 18.3594, "step": 30024 }, { "epoch": 1.9940891279803412, "grad_norm": 288.5595703125, "learning_rate": 4.5812430488179064e-11, "loss": 13.9844, "step": 30025 }, { "epoch": 1.994155542272697, "grad_norm": 304.6450500488281, "learning_rate": 4.478872897761832e-11, "loss": 30.4688, "step": 30026 }, { "epoch": 1.9942219565650527, "grad_norm": 157.18646240234375, "learning_rate": 4.377659437226899e-11, "loss": 11.4844, "step": 30027 }, { "epoch": 1.9942883708574084, "grad_norm": 302.5985107421875, "learning_rate": 4.2776026683899456e-11, "loss": 13.2031, "step": 30028 }, { "epoch": 1.9943547851497643, "grad_norm": 190.78138732910156, "learning_rate": 4.1787025924167055e-11, "loss": 18.4141, "step": 30029 }, { "epoch": 1.99442119944212, "grad_norm": 78.53704071044922, "learning_rate": 4.080959210439605e-11, "loss": 14.9531, "step": 30030 }, { "epoch": 1.9944876137344756, "grad_norm": 131.81503295898438, "learning_rate": 3.984372523591073e-11, "loss": 14.9844, "step": 30031 }, { "epoch": 1.9945540280268315, "grad_norm": 253.13548278808594, "learning_rate": 3.888942532992434e-11, "loss": 15.6094, "step": 30032 }, { "epoch": 1.9946204423191871, "grad_norm": 253.72711181640625, "learning_rate": 3.794669239753912e-11, "loss": 16.9219, "step": 30033 }, { "epoch": 1.9946868566115428, "grad_norm": 297.0831604003906, "learning_rate": 3.701552644941319e-11, "loss": 14.0625, "step": 30034 }, { "epoch": 1.9947532709038986, "grad_norm": 238.4644775390625, "learning_rate": 3.609592749653778e-11, "loss": 13.5469, "step": 30035 }, { "epoch": 1.994819685196254, "grad_norm": 218.01980590820312, "learning_rate": 3.518789554957102e-11, "loss": 15.8125, "step": 30036 }, { "epoch": 1.99488609948861, "grad_norm": 240.4785614013672, "learning_rate": 3.429143061883799e-11, "loss": 14.1719, "step": 30037 }, { "epoch": 1.9949525137809656, "grad_norm": 193.0302276611328, "learning_rate": 3.34065327148858e-11, "loss": 13.0469, "step": 30038 }, { "epoch": 1.9950189280733213, "grad_norm": 426.85284423828125, "learning_rate": 3.253320184781749e-11, "loss": 17.8906, "step": 30039 }, { "epoch": 1.9950853423656771, "grad_norm": 164.64747619628906, "learning_rate": 3.167143802784711e-11, "loss": 11.6875, "step": 30040 }, { "epoch": 1.9951517566580328, "grad_norm": 133.66131591796875, "learning_rate": 3.082124126474461e-11, "loss": 14.375, "step": 30041 }, { "epoch": 1.9952181709503884, "grad_norm": 174.97264099121094, "learning_rate": 2.998261156861304e-11, "loss": 15.7656, "step": 30042 }, { "epoch": 1.9952845852427443, "grad_norm": 282.6871032714844, "learning_rate": 2.91555489490003e-11, "loss": 13.7812, "step": 30043 }, { "epoch": 1.9953509995351, "grad_norm": 90.52983856201172, "learning_rate": 2.8340053415565336e-11, "loss": 12.8906, "step": 30044 }, { "epoch": 1.9954174138274556, "grad_norm": 152.02090454101562, "learning_rate": 2.7536124977634023e-11, "loss": 18.2969, "step": 30045 }, { "epoch": 1.9954838281198115, "grad_norm": 98.28018951416016, "learning_rate": 2.6743763644532235e-11, "loss": 14.7969, "step": 30046 }, { "epoch": 1.995550242412167, "grad_norm": 231.12831115722656, "learning_rate": 2.5962969425363802e-11, "loss": 15.8125, "step": 30047 }, { "epoch": 1.9956166567045228, "grad_norm": 292.8406066894531, "learning_rate": 2.519374232934357e-11, "loss": 20.5781, "step": 30048 }, { "epoch": 1.9956830709968787, "grad_norm": 118.61425018310547, "learning_rate": 2.4436082365242306e-11, "loss": 18.5625, "step": 30049 }, { "epoch": 1.9957494852892341, "grad_norm": 190.3134307861328, "learning_rate": 2.3689989541830767e-11, "loss": 21.6719, "step": 30050 }, { "epoch": 1.99581589958159, "grad_norm": 220.93618774414062, "learning_rate": 2.29554638677687e-11, "loss": 18.7031, "step": 30051 }, { "epoch": 1.9958823138739457, "grad_norm": 216.31509399414062, "learning_rate": 2.2232505351493792e-11, "loss": 14.5156, "step": 30052 }, { "epoch": 1.9959487281663013, "grad_norm": 125.1325454711914, "learning_rate": 2.152111400144374e-11, "loss": 11.4531, "step": 30053 }, { "epoch": 1.9960151424586572, "grad_norm": 131.24136352539062, "learning_rate": 2.0821289825834198e-11, "loss": 13.2188, "step": 30054 }, { "epoch": 1.9960815567510128, "grad_norm": 207.88711547851562, "learning_rate": 2.013303283265877e-11, "loss": 15.4375, "step": 30055 }, { "epoch": 1.9961479710433685, "grad_norm": 355.3081970214844, "learning_rate": 1.9456343030022083e-11, "loss": 17.6406, "step": 30056 }, { "epoch": 1.9962143853357244, "grad_norm": 309.7239990234375, "learning_rate": 1.879122042580672e-11, "loss": 17.1406, "step": 30057 }, { "epoch": 1.9962807996280798, "grad_norm": 1517.9454345703125, "learning_rate": 1.8137665027451175e-11, "loss": 13.7812, "step": 30058 }, { "epoch": 1.9963472139204357, "grad_norm": 238.537841796875, "learning_rate": 1.7495676842727013e-11, "loss": 11.5156, "step": 30059 }, { "epoch": 1.9964136282127916, "grad_norm": 125.39132690429688, "learning_rate": 1.6865255878850682e-11, "loss": 12.1562, "step": 30060 }, { "epoch": 1.996480042505147, "grad_norm": 208.11204528808594, "learning_rate": 1.6246402143371696e-11, "loss": 15.4844, "step": 30061 }, { "epoch": 1.9965464567975029, "grad_norm": 219.97132873535156, "learning_rate": 1.5639115643395485e-11, "loss": 14.9062, "step": 30062 }, { "epoch": 1.9966128710898585, "grad_norm": 216.57174682617188, "learning_rate": 1.504339638580543e-11, "loss": 16.9844, "step": 30063 }, { "epoch": 1.9966792853822142, "grad_norm": 217.37123107910156, "learning_rate": 1.4459244377484914e-11, "loss": 16.5312, "step": 30064 }, { "epoch": 1.99674569967457, "grad_norm": 267.8753662109375, "learning_rate": 1.3886659625428344e-11, "loss": 20.3438, "step": 30065 }, { "epoch": 1.9968121139669257, "grad_norm": 291.8421325683594, "learning_rate": 1.3325642135963988e-11, "loss": 13.875, "step": 30066 }, { "epoch": 1.9968785282592814, "grad_norm": 1075.6263427734375, "learning_rate": 1.277619191586421e-11, "loss": 12.4375, "step": 30067 }, { "epoch": 1.9969449425516372, "grad_norm": 253.87428283691406, "learning_rate": 1.2238308971235234e-11, "loss": 22.5781, "step": 30068 }, { "epoch": 1.9970113568439927, "grad_norm": 501.6405944824219, "learning_rate": 1.1711993308405332e-11, "loss": 28.3906, "step": 30069 }, { "epoch": 1.9970777711363485, "grad_norm": 176.54837036132812, "learning_rate": 1.1197244933591755e-11, "loss": 16.375, "step": 30070 }, { "epoch": 1.9971441854287044, "grad_norm": 187.9347686767578, "learning_rate": 1.0694063852456637e-11, "loss": 16.2344, "step": 30071 }, { "epoch": 1.9972105997210599, "grad_norm": 133.01271057128906, "learning_rate": 1.0202450071106205e-11, "loss": 11.9844, "step": 30072 }, { "epoch": 1.9972770140134157, "grad_norm": 138.1533203125, "learning_rate": 9.722403595091578e-12, "loss": 20.8125, "step": 30073 }, { "epoch": 1.9973434283057714, "grad_norm": 722.3855590820312, "learning_rate": 9.253924429963866e-12, "loss": 16.0938, "step": 30074 }, { "epoch": 1.997409842598127, "grad_norm": 110.22797393798828, "learning_rate": 8.797012581052143e-12, "loss": 19.8125, "step": 30075 }, { "epoch": 1.997476256890483, "grad_norm": 310.5642395019531, "learning_rate": 8.351668053907524e-12, "loss": 16.4219, "step": 30076 }, { "epoch": 1.9975426711828386, "grad_norm": 226.766845703125, "learning_rate": 7.917890853414988e-12, "loss": 20.0781, "step": 30077 }, { "epoch": 1.9976090854751942, "grad_norm": 212.2895965576172, "learning_rate": 7.495680984792585e-12, "loss": 15.25, "step": 30078 }, { "epoch": 1.99767549976755, "grad_norm": 408.9681091308594, "learning_rate": 7.085038452814274e-12, "loss": 32.9531, "step": 30079 }, { "epoch": 1.9977419140599055, "grad_norm": 139.8256378173828, "learning_rate": 6.6859632621429906e-12, "loss": 13.4375, "step": 30080 }, { "epoch": 1.9978083283522614, "grad_norm": 140.266845703125, "learning_rate": 6.298455417552695e-12, "loss": 18.5, "step": 30081 }, { "epoch": 1.9978747426446173, "grad_norm": 189.7626495361328, "learning_rate": 5.922514923595301e-12, "loss": 13.1094, "step": 30082 }, { "epoch": 1.9979411569369727, "grad_norm": 111.49302673339844, "learning_rate": 5.558141784378634e-12, "loss": 19.4375, "step": 30083 }, { "epoch": 1.9980075712293286, "grad_norm": 561.5444946289062, "learning_rate": 5.2053360042325635e-12, "loss": 12.0469, "step": 30084 }, { "epoch": 1.9980739855216842, "grad_norm": 325.1975402832031, "learning_rate": 4.864097587264915e-12, "loss": 16.0781, "step": 30085 }, { "epoch": 1.99814039981404, "grad_norm": 141.12908935546875, "learning_rate": 4.534426537361468e-12, "loss": 10.3281, "step": 30086 }, { "epoch": 1.9982068141063958, "grad_norm": 243.6167449951172, "learning_rate": 4.216322858408005e-12, "loss": 22.4844, "step": 30087 }, { "epoch": 1.9982732283987514, "grad_norm": 363.7413330078125, "learning_rate": 3.909786554068262e-12, "loss": 18.0312, "step": 30088 }, { "epoch": 1.998339642691107, "grad_norm": 423.06610107421875, "learning_rate": 3.614817627783928e-12, "loss": 17.0781, "step": 30089 }, { "epoch": 1.998406056983463, "grad_norm": 1151.6209716796875, "learning_rate": 3.331416083218741e-12, "loss": 17.7344, "step": 30090 }, { "epoch": 1.9984724712758184, "grad_norm": 498.8965148925781, "learning_rate": 3.0595819233703024e-12, "loss": 19.7656, "step": 30091 }, { "epoch": 1.9985388855681743, "grad_norm": 308.4645080566406, "learning_rate": 2.799315151458259e-12, "loss": 14.1875, "step": 30092 }, { "epoch": 1.9986052998605301, "grad_norm": 247.805419921875, "learning_rate": 2.550615770591236e-12, "loss": 15.9141, "step": 30093 }, { "epoch": 1.9986717141528856, "grad_norm": 240.67893981933594, "learning_rate": 2.3134837835447896e-12, "loss": 21.6094, "step": 30094 }, { "epoch": 1.9987381284452415, "grad_norm": 339.8260192871094, "learning_rate": 2.0879191932055007e-12, "loss": 17.3906, "step": 30095 }, { "epoch": 1.9988045427375971, "grad_norm": 198.2056884765625, "learning_rate": 1.87392200201586e-12, "loss": 17.2969, "step": 30096 }, { "epoch": 1.9988709570299528, "grad_norm": 156.77854919433594, "learning_rate": 1.6714922125293795e-12, "loss": 15.9219, "step": 30097 }, { "epoch": 1.9989373713223086, "grad_norm": 209.61834716796875, "learning_rate": 1.4806298269665062e-12, "loss": 11.4375, "step": 30098 }, { "epoch": 1.9990037856146643, "grad_norm": 201.14707946777344, "learning_rate": 1.3013348477697305e-12, "loss": 16.9688, "step": 30099 }, { "epoch": 1.99907019990702, "grad_norm": 263.0876159667969, "learning_rate": 1.1336072768264314e-12, "loss": 23.75, "step": 30100 }, { "epoch": 1.9991366141993758, "grad_norm": 177.28713989257812, "learning_rate": 9.77447116246033e-13, "loss": 13.9062, "step": 30101 }, { "epoch": 1.9992030284917313, "grad_norm": 254.5480194091797, "learning_rate": 8.32854367582847e-13, "loss": 13.0312, "step": 30102 }, { "epoch": 1.9992694427840871, "grad_norm": 272.5587158203125, "learning_rate": 6.998290327242528e-13, "loss": 16.4375, "step": 30103 }, { "epoch": 1.999335857076443, "grad_norm": 791.290283203125, "learning_rate": 5.783711131135404e-13, "loss": 20.6094, "step": 30104 }, { "epoch": 1.9994022713687984, "grad_norm": 385.17633056640625, "learning_rate": 4.684806101939998e-13, "loss": 15.1094, "step": 30105 }, { "epoch": 1.9994686856611543, "grad_norm": 230.87022399902344, "learning_rate": 3.7015752529789836e-13, "loss": 15.75, "step": 30106 }, { "epoch": 1.99953509995351, "grad_norm": 146.02281188964844, "learning_rate": 2.8340185942443696e-13, "loss": 18.5, "step": 30107 }, { "epoch": 1.9996015142458656, "grad_norm": 203.42735290527344, "learning_rate": 2.0821361357281632e-13, "loss": 15.3594, "step": 30108 }, { "epoch": 1.9996679285382215, "grad_norm": 261.6709289550781, "learning_rate": 1.4459278874223713e-13, "loss": 16.4688, "step": 30109 }, { "epoch": 1.9997343428305772, "grad_norm": 145.9993438720703, "learning_rate": 9.253938559883323e-14, "loss": 12.2969, "step": 30110 }, { "epoch": 1.9998007571229328, "grad_norm": 146.69334411621094, "learning_rate": 5.205340469771613e-14, "loss": 16.9062, "step": 30111 }, { "epoch": 1.9998671714152887, "grad_norm": 598.2527465820312, "learning_rate": 2.313484670501964e-14, "loss": 22.8125, "step": 30112 }, { "epoch": 1.9999335857076441, "grad_norm": 290.93682861328125, "learning_rate": 5.783711731766061e-15, "loss": 23.5, "step": 30113 }, { "epoch": 2.0, "grad_norm": 207.208984375, "learning_rate": 0.0, "loss": 15.5156, "step": 30114 }, { "epoch": 2.0, "step": 30114, "total_flos": 4.176436712014086e+18, "train_loss": 18.515504883525935, "train_runtime": 59634.7241, "train_samples_per_second": 4.04, "train_steps_per_second": 0.505 } ], "logging_steps": 1.0, "max_steps": 30114, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.176436712014086e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }