{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999435442895049, "eval_steps": 500, "global_step": 13284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.527428066015545e-05, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 11.3058, "step": 1 }, { "epoch": 0.0001505485613203109, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 11.2167, "step": 2 }, { "epoch": 0.0002258228419804663, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 11.0158, "step": 3 }, { "epoch": 0.0003010971226406218, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 10.9581, "step": 4 }, { "epoch": 0.00037637140330077723, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 11.0368, "step": 5 }, { "epoch": 0.0004516456839609326, "grad_norm": 101.22171020507812, "learning_rate": 2.506265664160401e-07, "loss": 11.0195, "step": 6 }, { "epoch": 0.0005269199646210881, "grad_norm": 101.00434875488281, "learning_rate": 5.012531328320802e-07, "loss": 10.8547, "step": 7 }, { "epoch": 0.0006021942452812436, "grad_norm": 101.00434875488281, "learning_rate": 5.012531328320802e-07, "loss": 11.2062, "step": 8 }, { "epoch": 0.000677468525941399, "grad_norm": 100.9647216796875, "learning_rate": 7.518796992481203e-07, "loss": 11.0756, "step": 9 }, { "epoch": 0.0007527428066015545, "grad_norm": 98.53367614746094, "learning_rate": 1.0025062656641603e-06, "loss": 10.9468, "step": 10 }, { "epoch": 0.0008280170872617098, "grad_norm": 103.55094146728516, "learning_rate": 1.2531328320802005e-06, "loss": 10.6497, "step": 11 }, { "epoch": 0.0009032913679218653, "grad_norm": 101.98058319091797, "learning_rate": 1.5037593984962406e-06, "loss": 11.0655, "step": 12 }, { "epoch": 0.0009785656485820208, "grad_norm": 99.74136352539062, "learning_rate": 1.7543859649122807e-06, "loss": 11.0013, "step": 13 }, { "epoch": 0.0010538399292421763, "grad_norm": 102.27731323242188, "learning_rate": 2.0050125313283207e-06, "loss": 11.1922, "step": 14 }, { "epoch": 0.0011291142099023317, "grad_norm": 103.02958679199219, "learning_rate": 2.255639097744361e-06, "loss": 10.9353, "step": 15 }, { "epoch": 0.0012043884905624871, "grad_norm": 102.86016082763672, "learning_rate": 2.506265664160401e-06, "loss": 10.8083, "step": 16 }, { "epoch": 0.0012796627712226426, "grad_norm": 99.78475952148438, "learning_rate": 2.7568922305764413e-06, "loss": 10.789, "step": 17 }, { "epoch": 0.001354937051882798, "grad_norm": 97.88682556152344, "learning_rate": 3.007518796992481e-06, "loss": 10.6574, "step": 18 }, { "epoch": 0.0014302113325429535, "grad_norm": 94.35863494873047, "learning_rate": 3.258145363408521e-06, "loss": 10.2311, "step": 19 }, { "epoch": 0.001505485613203109, "grad_norm": 97.56143951416016, "learning_rate": 3.5087719298245615e-06, "loss": 10.3029, "step": 20 }, { "epoch": 0.0015807598938632642, "grad_norm": 93.58026123046875, "learning_rate": 3.7593984962406014e-06, "loss": 10.1692, "step": 21 }, { "epoch": 0.0016560341745234196, "grad_norm": 94.10589599609375, "learning_rate": 4.010025062656641e-06, "loss": 9.9597, "step": 22 }, { "epoch": 0.001731308455183575, "grad_norm": 91.92644500732422, "learning_rate": 4.260651629072682e-06, "loss": 9.6604, "step": 23 }, { "epoch": 0.0018065827358437305, "grad_norm": 91.24312591552734, "learning_rate": 4.511278195488722e-06, "loss": 9.8507, "step": 24 }, { "epoch": 0.001881857016503886, "grad_norm": 91.24312591552734, "learning_rate": 4.511278195488722e-06, "loss": 9.0144, "step": 25 }, { "epoch": 0.0019571312971640416, "grad_norm": 91.44805145263672, "learning_rate": 4.7619047619047615e-06, "loss": 9.6564, "step": 26 }, { "epoch": 0.002032405577824197, "grad_norm": 88.8994140625, "learning_rate": 5.012531328320802e-06, "loss": 9.2601, "step": 27 }, { "epoch": 0.0021076798584843525, "grad_norm": 85.07931518554688, "learning_rate": 5.263157894736842e-06, "loss": 9.1414, "step": 28 }, { "epoch": 0.002182954139144508, "grad_norm": 91.28398132324219, "learning_rate": 5.5137844611528826e-06, "loss": 9.0763, "step": 29 }, { "epoch": 0.0022582284198046634, "grad_norm": 82.28803253173828, "learning_rate": 5.764411027568922e-06, "loss": 8.6692, "step": 30 }, { "epoch": 0.002333502700464819, "grad_norm": 80.47472381591797, "learning_rate": 6.015037593984962e-06, "loss": 8.3735, "step": 31 }, { "epoch": 0.0024087769811249743, "grad_norm": 80.88319396972656, "learning_rate": 6.265664160401003e-06, "loss": 7.9273, "step": 32 }, { "epoch": 0.0024840512617851297, "grad_norm": 77.2528076171875, "learning_rate": 6.516290726817042e-06, "loss": 8.8538, "step": 33 }, { "epoch": 0.002559325542445285, "grad_norm": 80.05760192871094, "learning_rate": 6.766917293233083e-06, "loss": 8.0052, "step": 34 }, { "epoch": 0.0026345998231054406, "grad_norm": 77.98321533203125, "learning_rate": 7.017543859649123e-06, "loss": 7.9527, "step": 35 }, { "epoch": 0.002709874103765596, "grad_norm": 76.9040756225586, "learning_rate": 7.2681704260651625e-06, "loss": 7.5808, "step": 36 }, { "epoch": 0.0027851483844257515, "grad_norm": 74.9203109741211, "learning_rate": 7.518796992481203e-06, "loss": 7.2887, "step": 37 }, { "epoch": 0.002860422665085907, "grad_norm": 71.68769836425781, "learning_rate": 7.769423558897243e-06, "loss": 7.2107, "step": 38 }, { "epoch": 0.0029356969457460624, "grad_norm": 72.97769927978516, "learning_rate": 8.020050125313283e-06, "loss": 6.7856, "step": 39 }, { "epoch": 0.003010971226406218, "grad_norm": 67.87655639648438, "learning_rate": 8.270676691729324e-06, "loss": 7.0611, "step": 40 }, { "epoch": 0.0030862455070663733, "grad_norm": 70.81007385253906, "learning_rate": 8.521303258145363e-06, "loss": 6.8754, "step": 41 }, { "epoch": 0.0031615197877265283, "grad_norm": 65.32673645019531, "learning_rate": 8.771929824561403e-06, "loss": 6.6728, "step": 42 }, { "epoch": 0.0032367940683866838, "grad_norm": 61.19737243652344, "learning_rate": 9.022556390977444e-06, "loss": 6.531, "step": 43 }, { "epoch": 0.0033120683490468392, "grad_norm": 59.31134033203125, "learning_rate": 9.273182957393484e-06, "loss": 6.202, "step": 44 }, { "epoch": 0.0033873426297069947, "grad_norm": 63.7019157409668, "learning_rate": 9.523809523809523e-06, "loss": 6.1788, "step": 45 }, { "epoch": 0.00346261691036715, "grad_norm": 56.24123764038086, "learning_rate": 9.774436090225564e-06, "loss": 6.3614, "step": 46 }, { "epoch": 0.0035378911910273056, "grad_norm": 54.28101348876953, "learning_rate": 1.0025062656641604e-05, "loss": 5.5329, "step": 47 }, { "epoch": 0.003613165471687461, "grad_norm": 54.67324447631836, "learning_rate": 1.0275689223057643e-05, "loss": 5.4588, "step": 48 }, { "epoch": 0.0036884397523476165, "grad_norm": 55.15860366821289, "learning_rate": 1.0526315789473684e-05, "loss": 5.4524, "step": 49 }, { "epoch": 0.003763714033007772, "grad_norm": 51.7996711730957, "learning_rate": 1.0776942355889724e-05, "loss": 5.0809, "step": 50 }, { "epoch": 0.0038389883136679273, "grad_norm": 50.0368537902832, "learning_rate": 1.1027568922305765e-05, "loss": 4.9969, "step": 51 }, { "epoch": 0.003914262594328083, "grad_norm": 51.43173599243164, "learning_rate": 1.1278195488721805e-05, "loss": 4.9582, "step": 52 }, { "epoch": 0.003989536874988238, "grad_norm": 48.52267837524414, "learning_rate": 1.1528822055137844e-05, "loss": 5.2144, "step": 53 }, { "epoch": 0.004064811155648394, "grad_norm": 45.86253356933594, "learning_rate": 1.1779448621553885e-05, "loss": 4.963, "step": 54 }, { "epoch": 0.004140085436308549, "grad_norm": 46.87076187133789, "learning_rate": 1.2030075187969925e-05, "loss": 4.8069, "step": 55 }, { "epoch": 0.004215359716968705, "grad_norm": 43.505149841308594, "learning_rate": 1.2280701754385964e-05, "loss": 4.4433, "step": 56 }, { "epoch": 0.00429063399762886, "grad_norm": 42.096683502197266, "learning_rate": 1.2531328320802006e-05, "loss": 4.5141, "step": 57 }, { "epoch": 0.004365908278289016, "grad_norm": 41.87490463256836, "learning_rate": 1.2781954887218045e-05, "loss": 4.4574, "step": 58 }, { "epoch": 0.004441182558949171, "grad_norm": 40.193180084228516, "learning_rate": 1.3032581453634085e-05, "loss": 3.9994, "step": 59 }, { "epoch": 0.004516456839609327, "grad_norm": 42.521270751953125, "learning_rate": 1.3283208020050126e-05, "loss": 4.0917, "step": 60 }, { "epoch": 0.004591731120269482, "grad_norm": 37.18898010253906, "learning_rate": 1.3533834586466165e-05, "loss": 4.3226, "step": 61 }, { "epoch": 0.004667005400929638, "grad_norm": 36.022308349609375, "learning_rate": 1.3784461152882205e-05, "loss": 4.0419, "step": 62 }, { "epoch": 0.004742279681589793, "grad_norm": 30.037837982177734, "learning_rate": 1.4035087719298246e-05, "loss": 4.033, "step": 63 }, { "epoch": 0.004817553962249949, "grad_norm": 30.106727600097656, "learning_rate": 1.4285714285714285e-05, "loss": 3.7006, "step": 64 }, { "epoch": 0.004892828242910104, "grad_norm": 27.95155143737793, "learning_rate": 1.4536340852130325e-05, "loss": 4.0383, "step": 65 }, { "epoch": 0.0049681025235702595, "grad_norm": 31.840641021728516, "learning_rate": 1.4786967418546366e-05, "loss": 3.7578, "step": 66 }, { "epoch": 0.0050433768042304145, "grad_norm": 26.964170455932617, "learning_rate": 1.5037593984962406e-05, "loss": 3.8204, "step": 67 }, { "epoch": 0.00511865108489057, "grad_norm": 28.508447647094727, "learning_rate": 1.5288220551378447e-05, "loss": 4.0668, "step": 68 }, { "epoch": 0.005193925365550725, "grad_norm": 27.509992599487305, "learning_rate": 1.5538847117794486e-05, "loss": 3.3308, "step": 69 }, { "epoch": 0.005269199646210881, "grad_norm": 27.621871948242188, "learning_rate": 1.5789473684210526e-05, "loss": 3.569, "step": 70 }, { "epoch": 0.005344473926871036, "grad_norm": 26.775339126586914, "learning_rate": 1.6040100250626565e-05, "loss": 3.6699, "step": 71 }, { "epoch": 0.005419748207531192, "grad_norm": 27.39186668395996, "learning_rate": 1.6290726817042605e-05, "loss": 3.4824, "step": 72 }, { "epoch": 0.005495022488191347, "grad_norm": 26.232194900512695, "learning_rate": 1.6541353383458648e-05, "loss": 3.3197, "step": 73 }, { "epoch": 0.005570296768851503, "grad_norm": 26.70931053161621, "learning_rate": 1.6791979949874687e-05, "loss": 3.3354, "step": 74 }, { "epoch": 0.005645571049511658, "grad_norm": 28.280254364013672, "learning_rate": 1.7042606516290727e-05, "loss": 3.1711, "step": 75 }, { "epoch": 0.005720845330171814, "grad_norm": 25.6878662109375, "learning_rate": 1.7293233082706766e-05, "loss": 3.3231, "step": 76 }, { "epoch": 0.005796119610831969, "grad_norm": 26.246875762939453, "learning_rate": 1.7543859649122806e-05, "loss": 3.171, "step": 77 }, { "epoch": 0.005871393891492125, "grad_norm": 26.477882385253906, "learning_rate": 1.779448621553885e-05, "loss": 2.823, "step": 78 }, { "epoch": 0.00594666817215228, "grad_norm": 24.533912658691406, "learning_rate": 1.8045112781954888e-05, "loss": 3.3136, "step": 79 }, { "epoch": 0.006021942452812436, "grad_norm": 22.89409065246582, "learning_rate": 1.8295739348370928e-05, "loss": 2.9865, "step": 80 }, { "epoch": 0.006097216733472591, "grad_norm": 23.302785873413086, "learning_rate": 1.8546365914786967e-05, "loss": 2.8005, "step": 81 }, { "epoch": 0.006172491014132747, "grad_norm": 24.13874626159668, "learning_rate": 1.8796992481203007e-05, "loss": 2.8054, "step": 82 }, { "epoch": 0.006247765294792902, "grad_norm": 21.652523040771484, "learning_rate": 1.9047619047619046e-05, "loss": 3.3006, "step": 83 }, { "epoch": 0.006323039575453057, "grad_norm": 23.442100524902344, "learning_rate": 1.929824561403509e-05, "loss": 3.1333, "step": 84 }, { "epoch": 0.0063983138561132125, "grad_norm": 20.342926025390625, "learning_rate": 1.954887218045113e-05, "loss": 3.1004, "step": 85 }, { "epoch": 0.0064735881367733675, "grad_norm": 21.158906936645508, "learning_rate": 1.9799498746867168e-05, "loss": 2.8283, "step": 86 }, { "epoch": 0.006548862417433523, "grad_norm": 21.205425262451172, "learning_rate": 2.0050125313283208e-05, "loss": 2.8407, "step": 87 }, { "epoch": 0.0066241366980936784, "grad_norm": 16.539318084716797, "learning_rate": 2.0300751879699247e-05, "loss": 2.992, "step": 88 }, { "epoch": 0.006699410978753834, "grad_norm": 15.958436012268066, "learning_rate": 2.0551378446115287e-05, "loss": 2.7528, "step": 89 }, { "epoch": 0.006774685259413989, "grad_norm": 16.10943603515625, "learning_rate": 2.080200501253133e-05, "loss": 2.9206, "step": 90 }, { "epoch": 0.006849959540074145, "grad_norm": 13.623184204101562, "learning_rate": 2.105263157894737e-05, "loss": 2.8539, "step": 91 }, { "epoch": 0.0069252338207343, "grad_norm": 13.710868835449219, "learning_rate": 2.130325814536341e-05, "loss": 2.8377, "step": 92 }, { "epoch": 0.007000508101394456, "grad_norm": 14.358534812927246, "learning_rate": 2.1553884711779448e-05, "loss": 2.971, "step": 93 }, { "epoch": 0.007075782382054611, "grad_norm": 13.918020248413086, "learning_rate": 2.1804511278195487e-05, "loss": 2.6541, "step": 94 }, { "epoch": 0.007151056662714767, "grad_norm": 12.453470230102539, "learning_rate": 2.205513784461153e-05, "loss": 2.9524, "step": 95 }, { "epoch": 0.007226330943374922, "grad_norm": 10.925068855285645, "learning_rate": 2.230576441102757e-05, "loss": 2.789, "step": 96 }, { "epoch": 0.007301605224035078, "grad_norm": 11.36191177368164, "learning_rate": 2.255639097744361e-05, "loss": 2.7477, "step": 97 }, { "epoch": 0.007376879504695233, "grad_norm": 9.909281730651855, "learning_rate": 2.280701754385965e-05, "loss": 2.981, "step": 98 }, { "epoch": 0.007452153785355389, "grad_norm": 11.188608169555664, "learning_rate": 2.3057644110275688e-05, "loss": 2.6559, "step": 99 }, { "epoch": 0.007527428066015544, "grad_norm": 12.857466697692871, "learning_rate": 2.3308270676691728e-05, "loss": 2.6402, "step": 100 }, { "epoch": 0.0076027023466757, "grad_norm": 12.022490501403809, "learning_rate": 2.355889724310777e-05, "loss": 2.5045, "step": 101 }, { "epoch": 0.007677976627335855, "grad_norm": 9.543107986450195, "learning_rate": 2.380952380952381e-05, "loss": 2.876, "step": 102 }, { "epoch": 0.007753250907996011, "grad_norm": 11.801715850830078, "learning_rate": 2.406015037593985e-05, "loss": 2.9806, "step": 103 }, { "epoch": 0.007828525188656166, "grad_norm": 13.703426361083984, "learning_rate": 2.431077694235589e-05, "loss": 2.642, "step": 104 }, { "epoch": 0.007903799469316321, "grad_norm": 11.499256134033203, "learning_rate": 2.456140350877193e-05, "loss": 2.8738, "step": 105 }, { "epoch": 0.007979073749976476, "grad_norm": 12.320425987243652, "learning_rate": 2.4812030075187968e-05, "loss": 2.8757, "step": 106 }, { "epoch": 0.008054348030636631, "grad_norm": 14.50576114654541, "learning_rate": 2.506265664160401e-05, "loss": 2.8903, "step": 107 }, { "epoch": 0.008129622311296788, "grad_norm": 11.007486343383789, "learning_rate": 2.531328320802005e-05, "loss": 2.7056, "step": 108 }, { "epoch": 0.008204896591956943, "grad_norm": 11.293839454650879, "learning_rate": 2.556390977443609e-05, "loss": 2.6704, "step": 109 }, { "epoch": 0.008280170872617098, "grad_norm": 8.651300430297852, "learning_rate": 2.581453634085213e-05, "loss": 2.6453, "step": 110 }, { "epoch": 0.008355445153277253, "grad_norm": 12.535172462463379, "learning_rate": 2.606516290726817e-05, "loss": 2.7358, "step": 111 }, { "epoch": 0.00843071943393741, "grad_norm": 8.519335746765137, "learning_rate": 2.6315789473684212e-05, "loss": 2.9246, "step": 112 }, { "epoch": 0.008505993714597565, "grad_norm": 9.05390739440918, "learning_rate": 2.656641604010025e-05, "loss": 2.8547, "step": 113 }, { "epoch": 0.00858126799525772, "grad_norm": 11.525436401367188, "learning_rate": 2.681704260651629e-05, "loss": 2.6, "step": 114 }, { "epoch": 0.008656542275917875, "grad_norm": 10.789581298828125, "learning_rate": 2.706766917293233e-05, "loss": 2.6484, "step": 115 }, { "epoch": 0.008731816556578032, "grad_norm": 12.913826942443848, "learning_rate": 2.731829573934837e-05, "loss": 2.8547, "step": 116 }, { "epoch": 0.008807090837238187, "grad_norm": 11.780866622924805, "learning_rate": 2.756892230576441e-05, "loss": 2.6838, "step": 117 }, { "epoch": 0.008882365117898342, "grad_norm": 8.270967483520508, "learning_rate": 2.7819548872180452e-05, "loss": 2.5647, "step": 118 }, { "epoch": 0.008957639398558497, "grad_norm": 7.76688814163208, "learning_rate": 2.8070175438596492e-05, "loss": 2.686, "step": 119 }, { "epoch": 0.009032913679218654, "grad_norm": 9.221709251403809, "learning_rate": 2.832080200501253e-05, "loss": 2.9965, "step": 120 }, { "epoch": 0.009108187959878809, "grad_norm": 11.874032974243164, "learning_rate": 2.857142857142857e-05, "loss": 3.1813, "step": 121 }, { "epoch": 0.009183462240538964, "grad_norm": 12.074503898620605, "learning_rate": 2.882205513784461e-05, "loss": 3.005, "step": 122 }, { "epoch": 0.009258736521199119, "grad_norm": 12.125741004943848, "learning_rate": 2.907268170426065e-05, "loss": 3.0457, "step": 123 }, { "epoch": 0.009334010801859275, "grad_norm": 13.402585983276367, "learning_rate": 2.9323308270676693e-05, "loss": 2.9726, "step": 124 }, { "epoch": 0.00940928508251943, "grad_norm": 7.172392845153809, "learning_rate": 2.9573934837092732e-05, "loss": 2.773, "step": 125 }, { "epoch": 0.009484559363179585, "grad_norm": 8.764741897583008, "learning_rate": 2.9824561403508772e-05, "loss": 2.3307, "step": 126 }, { "epoch": 0.00955983364383974, "grad_norm": 10.728087425231934, "learning_rate": 3.007518796992481e-05, "loss": 2.9004, "step": 127 }, { "epoch": 0.009635107924499897, "grad_norm": 9.693150520324707, "learning_rate": 3.032581453634085e-05, "loss": 2.942, "step": 128 }, { "epoch": 0.009710382205160052, "grad_norm": 11.793756484985352, "learning_rate": 3.0576441102756894e-05, "loss": 2.7575, "step": 129 }, { "epoch": 0.009785656485820207, "grad_norm": 11.24767017364502, "learning_rate": 3.082706766917293e-05, "loss": 2.6284, "step": 130 }, { "epoch": 0.009860930766480362, "grad_norm": 9.41115665435791, "learning_rate": 3.107769423558897e-05, "loss": 2.8344, "step": 131 }, { "epoch": 0.009936205047140519, "grad_norm": 10.862749099731445, "learning_rate": 3.132832080200501e-05, "loss": 2.4733, "step": 132 }, { "epoch": 0.010011479327800674, "grad_norm": 10.923384666442871, "learning_rate": 3.157894736842105e-05, "loss": 2.4103, "step": 133 }, { "epoch": 0.010086753608460829, "grad_norm": 10.988842010498047, "learning_rate": 3.182957393483709e-05, "loss": 2.599, "step": 134 }, { "epoch": 0.010162027889120984, "grad_norm": 10.643759727478027, "learning_rate": 3.208020050125313e-05, "loss": 3.0312, "step": 135 }, { "epoch": 0.01023730216978114, "grad_norm": 9.693270683288574, "learning_rate": 3.233082706766917e-05, "loss": 2.7399, "step": 136 }, { "epoch": 0.010312576450441296, "grad_norm": 10.290359497070312, "learning_rate": 3.258145363408521e-05, "loss": 2.8732, "step": 137 }, { "epoch": 0.01038785073110145, "grad_norm": 12.482595443725586, "learning_rate": 3.2832080200501256e-05, "loss": 2.8354, "step": 138 }, { "epoch": 0.010463125011761606, "grad_norm": 9.462326049804688, "learning_rate": 3.3082706766917295e-05, "loss": 2.6542, "step": 139 }, { "epoch": 0.010538399292421763, "grad_norm": 10.323150634765625, "learning_rate": 3.3333333333333335e-05, "loss": 2.433, "step": 140 }, { "epoch": 0.010613673573081918, "grad_norm": 10.48134708404541, "learning_rate": 3.3583959899749374e-05, "loss": 2.6072, "step": 141 }, { "epoch": 0.010688947853742073, "grad_norm": 14.575450897216797, "learning_rate": 3.3834586466165414e-05, "loss": 2.7018, "step": 142 }, { "epoch": 0.010764222134402228, "grad_norm": 10.16996955871582, "learning_rate": 3.4085213032581453e-05, "loss": 2.5096, "step": 143 }, { "epoch": 0.010839496415062384, "grad_norm": 14.478660583496094, "learning_rate": 3.433583959899749e-05, "loss": 3.074, "step": 144 }, { "epoch": 0.01091477069572254, "grad_norm": 11.558063507080078, "learning_rate": 3.458646616541353e-05, "loss": 2.9335, "step": 145 }, { "epoch": 0.010990044976382694, "grad_norm": 12.873056411743164, "learning_rate": 3.483709273182957e-05, "loss": 2.7112, "step": 146 }, { "epoch": 0.01106531925704285, "grad_norm": 10.242127418518066, "learning_rate": 3.508771929824561e-05, "loss": 2.8176, "step": 147 }, { "epoch": 0.011140593537703006, "grad_norm": 9.07247257232666, "learning_rate": 3.533834586466165e-05, "loss": 2.6257, "step": 148 }, { "epoch": 0.011215867818363161, "grad_norm": 11.077366828918457, "learning_rate": 3.55889724310777e-05, "loss": 2.4143, "step": 149 }, { "epoch": 0.011291142099023316, "grad_norm": 10.589329719543457, "learning_rate": 3.583959899749374e-05, "loss": 2.7472, "step": 150 }, { "epoch": 0.011366416379683471, "grad_norm": 9.62755012512207, "learning_rate": 3.6090225563909776e-05, "loss": 2.802, "step": 151 }, { "epoch": 0.011441690660343628, "grad_norm": 8.788288116455078, "learning_rate": 3.6340852130325816e-05, "loss": 2.4896, "step": 152 }, { "epoch": 0.011516964941003783, "grad_norm": 10.032755851745605, "learning_rate": 3.6591478696741855e-05, "loss": 2.5718, "step": 153 }, { "epoch": 0.011592239221663938, "grad_norm": 9.229397773742676, "learning_rate": 3.6842105263157895e-05, "loss": 2.7517, "step": 154 }, { "epoch": 0.011667513502324093, "grad_norm": 8.434051513671875, "learning_rate": 3.7092731829573934e-05, "loss": 2.8381, "step": 155 }, { "epoch": 0.01174278778298425, "grad_norm": 9.783924102783203, "learning_rate": 3.7343358395989974e-05, "loss": 2.7413, "step": 156 }, { "epoch": 0.011818062063644405, "grad_norm": 9.114348411560059, "learning_rate": 3.759398496240601e-05, "loss": 2.7403, "step": 157 }, { "epoch": 0.01189333634430456, "grad_norm": 12.778593063354492, "learning_rate": 3.784461152882205e-05, "loss": 2.7325, "step": 158 }, { "epoch": 0.011968610624964715, "grad_norm": 8.32952880859375, "learning_rate": 3.809523809523809e-05, "loss": 2.8156, "step": 159 }, { "epoch": 0.012043884905624871, "grad_norm": 12.271199226379395, "learning_rate": 3.834586466165413e-05, "loss": 2.7965, "step": 160 }, { "epoch": 0.012119159186285026, "grad_norm": 11.134658813476562, "learning_rate": 3.859649122807018e-05, "loss": 2.5964, "step": 161 }, { "epoch": 0.012194433466945181, "grad_norm": 9.325178146362305, "learning_rate": 3.884711779448622e-05, "loss": 2.6213, "step": 162 }, { "epoch": 0.012269707747605337, "grad_norm": 12.493197441101074, "learning_rate": 3.909774436090226e-05, "loss": 2.4914, "step": 163 }, { "epoch": 0.012344982028265493, "grad_norm": 9.506828308105469, "learning_rate": 3.9348370927318297e-05, "loss": 2.5746, "step": 164 }, { "epoch": 0.012420256308925648, "grad_norm": 10.369872093200684, "learning_rate": 3.9598997493734336e-05, "loss": 2.7721, "step": 165 }, { "epoch": 0.012495530589585803, "grad_norm": 10.101872444152832, "learning_rate": 3.9849624060150376e-05, "loss": 2.5736, "step": 166 }, { "epoch": 0.012570804870245958, "grad_norm": 17.092546463012695, "learning_rate": 4.0100250626566415e-05, "loss": 2.598, "step": 167 }, { "epoch": 0.012646079150906113, "grad_norm": 11.833017349243164, "learning_rate": 4.0350877192982455e-05, "loss": 2.6022, "step": 168 }, { "epoch": 0.01272135343156627, "grad_norm": 7.747100830078125, "learning_rate": 4.0601503759398494e-05, "loss": 2.6336, "step": 169 }, { "epoch": 0.012796627712226425, "grad_norm": 9.564743995666504, "learning_rate": 4.0852130325814534e-05, "loss": 2.5899, "step": 170 }, { "epoch": 0.01287190199288658, "grad_norm": 9.064987182617188, "learning_rate": 4.110275689223057e-05, "loss": 2.7125, "step": 171 }, { "epoch": 0.012947176273546735, "grad_norm": 10.471091270446777, "learning_rate": 4.135338345864662e-05, "loss": 2.4207, "step": 172 }, { "epoch": 0.013022450554206892, "grad_norm": 10.878631591796875, "learning_rate": 4.160401002506266e-05, "loss": 2.6163, "step": 173 }, { "epoch": 0.013097724834867047, "grad_norm": 8.91076946258545, "learning_rate": 4.18546365914787e-05, "loss": 2.4706, "step": 174 }, { "epoch": 0.013172999115527202, "grad_norm": 9.40295124053955, "learning_rate": 4.210526315789474e-05, "loss": 2.6412, "step": 175 }, { "epoch": 0.013248273396187357, "grad_norm": 9.639464378356934, "learning_rate": 4.235588972431078e-05, "loss": 2.4719, "step": 176 }, { "epoch": 0.013323547676847514, "grad_norm": 7.9790825843811035, "learning_rate": 4.260651629072682e-05, "loss": 3.1457, "step": 177 }, { "epoch": 0.013398821957507669, "grad_norm": 8.203713417053223, "learning_rate": 4.2857142857142856e-05, "loss": 2.6524, "step": 178 }, { "epoch": 0.013474096238167824, "grad_norm": 9.808259010314941, "learning_rate": 4.3107769423558896e-05, "loss": 2.5888, "step": 179 }, { "epoch": 0.013549370518827979, "grad_norm": 7.837316513061523, "learning_rate": 4.3358395989974935e-05, "loss": 2.6199, "step": 180 }, { "epoch": 0.013624644799488135, "grad_norm": 10.121833801269531, "learning_rate": 4.3609022556390975e-05, "loss": 2.5769, "step": 181 }, { "epoch": 0.01369991908014829, "grad_norm": 7.981498718261719, "learning_rate": 4.3859649122807014e-05, "loss": 2.7497, "step": 182 }, { "epoch": 0.013775193360808445, "grad_norm": 8.599218368530273, "learning_rate": 4.411027568922306e-05, "loss": 2.9126, "step": 183 }, { "epoch": 0.0138504676414686, "grad_norm": 5.973028659820557, "learning_rate": 4.43609022556391e-05, "loss": 2.8026, "step": 184 }, { "epoch": 0.013925741922128757, "grad_norm": 9.95548152923584, "learning_rate": 4.461152882205514e-05, "loss": 2.5834, "step": 185 }, { "epoch": 0.014001016202788912, "grad_norm": 6.851253986358643, "learning_rate": 4.486215538847118e-05, "loss": 2.7454, "step": 186 }, { "epoch": 0.014076290483449067, "grad_norm": 10.43259048461914, "learning_rate": 4.511278195488722e-05, "loss": 2.5623, "step": 187 }, { "epoch": 0.014151564764109222, "grad_norm": 7.847858905792236, "learning_rate": 4.536340852130326e-05, "loss": 2.6478, "step": 188 }, { "epoch": 0.014226839044769379, "grad_norm": 9.846750259399414, "learning_rate": 4.56140350877193e-05, "loss": 2.7244, "step": 189 }, { "epoch": 0.014302113325429534, "grad_norm": 9.090900421142578, "learning_rate": 4.586466165413534e-05, "loss": 2.7087, "step": 190 }, { "epoch": 0.014377387606089689, "grad_norm": 13.029911994934082, "learning_rate": 4.6115288220551377e-05, "loss": 2.6135, "step": 191 }, { "epoch": 0.014452661886749844, "grad_norm": 10.575517654418945, "learning_rate": 4.6365914786967416e-05, "loss": 2.6086, "step": 192 }, { "epoch": 0.01452793616741, "grad_norm": 11.485733985900879, "learning_rate": 4.6616541353383456e-05, "loss": 2.9375, "step": 193 }, { "epoch": 0.014603210448070156, "grad_norm": 7.707833290100098, "learning_rate": 4.6867167919799495e-05, "loss": 2.6695, "step": 194 }, { "epoch": 0.01467848472873031, "grad_norm": 7.827512264251709, "learning_rate": 4.711779448621554e-05, "loss": 2.5223, "step": 195 }, { "epoch": 0.014753759009390466, "grad_norm": 7.267850875854492, "learning_rate": 4.736842105263158e-05, "loss": 2.4527, "step": 196 }, { "epoch": 0.014829033290050623, "grad_norm": 7.232391357421875, "learning_rate": 4.761904761904762e-05, "loss": 2.5753, "step": 197 }, { "epoch": 0.014904307570710778, "grad_norm": 12.583212852478027, "learning_rate": 4.786967418546366e-05, "loss": 2.8173, "step": 198 }, { "epoch": 0.014979581851370933, "grad_norm": 10.620282173156738, "learning_rate": 4.81203007518797e-05, "loss": 2.6762, "step": 199 }, { "epoch": 0.015054856132031088, "grad_norm": 9.189391136169434, "learning_rate": 4.837092731829574e-05, "loss": 2.7449, "step": 200 }, { "epoch": 0.015130130412691244, "grad_norm": 8.411532402038574, "learning_rate": 4.862155388471178e-05, "loss": 2.4211, "step": 201 }, { "epoch": 0.0152054046933514, "grad_norm": 12.243152618408203, "learning_rate": 4.887218045112782e-05, "loss": 2.7006, "step": 202 }, { "epoch": 0.015280678974011554, "grad_norm": 8.701164245605469, "learning_rate": 4.912280701754386e-05, "loss": 2.5448, "step": 203 }, { "epoch": 0.01535595325467171, "grad_norm": 10.85342788696289, "learning_rate": 4.93734335839599e-05, "loss": 3.0048, "step": 204 }, { "epoch": 0.015431227535331866, "grad_norm": 10.478662490844727, "learning_rate": 4.9624060150375936e-05, "loss": 2.5254, "step": 205 }, { "epoch": 0.015506501815992021, "grad_norm": 11.290699005126953, "learning_rate": 4.987468671679198e-05, "loss": 2.478, "step": 206 }, { "epoch": 0.015581776096652176, "grad_norm": 8.114274024963379, "learning_rate": 5.012531328320802e-05, "loss": 2.565, "step": 207 }, { "epoch": 0.015657050377312333, "grad_norm": 9.683647155761719, "learning_rate": 5.037593984962407e-05, "loss": 2.3578, "step": 208 }, { "epoch": 0.015732324657972488, "grad_norm": 9.260629653930664, "learning_rate": 5.06265664160401e-05, "loss": 2.6074, "step": 209 }, { "epoch": 0.015807598938632643, "grad_norm": 10.380516052246094, "learning_rate": 5.087719298245615e-05, "loss": 2.6925, "step": 210 }, { "epoch": 0.015882873219292798, "grad_norm": 9.815593719482422, "learning_rate": 5.112781954887218e-05, "loss": 2.3392, "step": 211 }, { "epoch": 0.015958147499952953, "grad_norm": 9.420886993408203, "learning_rate": 5.1378446115288226e-05, "loss": 2.8057, "step": 212 }, { "epoch": 0.016033421780613108, "grad_norm": 11.434077262878418, "learning_rate": 5.162907268170426e-05, "loss": 2.5541, "step": 213 }, { "epoch": 0.016108696061273263, "grad_norm": 8.445125579833984, "learning_rate": 5.1879699248120305e-05, "loss": 2.4893, "step": 214 }, { "epoch": 0.01618397034193342, "grad_norm": 6.411423206329346, "learning_rate": 5.213032581453634e-05, "loss": 2.6418, "step": 215 }, { "epoch": 0.016259244622593576, "grad_norm": 8.493948936462402, "learning_rate": 5.2380952380952384e-05, "loss": 2.9823, "step": 216 }, { "epoch": 0.01633451890325373, "grad_norm": 7.110140323638916, "learning_rate": 5.2631578947368424e-05, "loss": 2.7274, "step": 217 }, { "epoch": 0.016409793183913886, "grad_norm": 10.618717193603516, "learning_rate": 5.2882205513784463e-05, "loss": 2.7715, "step": 218 }, { "epoch": 0.01648506746457404, "grad_norm": 6.806361198425293, "learning_rate": 5.31328320802005e-05, "loss": 2.7524, "step": 219 }, { "epoch": 0.016560341745234197, "grad_norm": 11.365694046020508, "learning_rate": 5.338345864661655e-05, "loss": 2.6783, "step": 220 }, { "epoch": 0.01663561602589435, "grad_norm": 8.204473495483398, "learning_rate": 5.363408521303258e-05, "loss": 2.779, "step": 221 }, { "epoch": 0.016710890306554507, "grad_norm": 7.241642475128174, "learning_rate": 5.388471177944863e-05, "loss": 2.4447, "step": 222 }, { "epoch": 0.016786164587214665, "grad_norm": 8.607502937316895, "learning_rate": 5.413533834586466e-05, "loss": 2.674, "step": 223 }, { "epoch": 0.01686143886787482, "grad_norm": 11.882096290588379, "learning_rate": 5.438596491228071e-05, "loss": 2.5251, "step": 224 }, { "epoch": 0.016936713148534975, "grad_norm": 7.958731174468994, "learning_rate": 5.463659147869674e-05, "loss": 2.6136, "step": 225 }, { "epoch": 0.01701198742919513, "grad_norm": 9.484212875366211, "learning_rate": 5.4887218045112786e-05, "loss": 2.6626, "step": 226 }, { "epoch": 0.017087261709855285, "grad_norm": 7.906175136566162, "learning_rate": 5.513784461152882e-05, "loss": 2.8029, "step": 227 }, { "epoch": 0.01716253599051544, "grad_norm": 8.0032377243042, "learning_rate": 5.5388471177944865e-05, "loss": 2.646, "step": 228 }, { "epoch": 0.017237810271175595, "grad_norm": 9.094749450683594, "learning_rate": 5.5639097744360905e-05, "loss": 2.5874, "step": 229 }, { "epoch": 0.01731308455183575, "grad_norm": 10.920360565185547, "learning_rate": 5.5889724310776944e-05, "loss": 2.7446, "step": 230 }, { "epoch": 0.01738835883249591, "grad_norm": 7.0707573890686035, "learning_rate": 5.6140350877192984e-05, "loss": 2.3312, "step": 231 }, { "epoch": 0.017463633113156064, "grad_norm": 8.162520408630371, "learning_rate": 5.639097744360903e-05, "loss": 2.7798, "step": 232 }, { "epoch": 0.01753890739381622, "grad_norm": 7.946226596832275, "learning_rate": 5.664160401002506e-05, "loss": 2.6714, "step": 233 }, { "epoch": 0.017614181674476374, "grad_norm": 6.753323554992676, "learning_rate": 5.689223057644111e-05, "loss": 2.5322, "step": 234 }, { "epoch": 0.01768945595513653, "grad_norm": 8.795600891113281, "learning_rate": 5.714285714285714e-05, "loss": 2.6777, "step": 235 }, { "epoch": 0.017764730235796684, "grad_norm": 7.2303786277771, "learning_rate": 5.739348370927319e-05, "loss": 2.5091, "step": 236 }, { "epoch": 0.01784000451645684, "grad_norm": 8.029302597045898, "learning_rate": 5.764411027568922e-05, "loss": 3.0638, "step": 237 }, { "epoch": 0.017915278797116994, "grad_norm": 7.138152599334717, "learning_rate": 5.789473684210527e-05, "loss": 2.4998, "step": 238 }, { "epoch": 0.017990553077777152, "grad_norm": 8.6770601272583, "learning_rate": 5.81453634085213e-05, "loss": 2.6019, "step": 239 }, { "epoch": 0.018065827358437307, "grad_norm": 9.012789726257324, "learning_rate": 5.8395989974937346e-05, "loss": 2.4086, "step": 240 }, { "epoch": 0.018141101639097462, "grad_norm": 7.267442226409912, "learning_rate": 5.8646616541353386e-05, "loss": 2.6218, "step": 241 }, { "epoch": 0.018216375919757617, "grad_norm": 11.098615646362305, "learning_rate": 5.889724310776943e-05, "loss": 2.6493, "step": 242 }, { "epoch": 0.018291650200417772, "grad_norm": 8.830709457397461, "learning_rate": 5.9147869674185465e-05, "loss": 3.1504, "step": 243 }, { "epoch": 0.018366924481077927, "grad_norm": 7.9406609535217285, "learning_rate": 5.939849624060151e-05, "loss": 2.4455, "step": 244 }, { "epoch": 0.018442198761738082, "grad_norm": 7.834328651428223, "learning_rate": 5.9649122807017544e-05, "loss": 2.7414, "step": 245 }, { "epoch": 0.018517473042398237, "grad_norm": 10.794530868530273, "learning_rate": 5.989974937343359e-05, "loss": 2.5237, "step": 246 }, { "epoch": 0.018592747323058396, "grad_norm": 10.709030151367188, "learning_rate": 6.015037593984962e-05, "loss": 2.4774, "step": 247 }, { "epoch": 0.01866802160371855, "grad_norm": 10.267542839050293, "learning_rate": 6.040100250626567e-05, "loss": 2.6907, "step": 248 }, { "epoch": 0.018743295884378706, "grad_norm": 8.143807411193848, "learning_rate": 6.06516290726817e-05, "loss": 2.5935, "step": 249 }, { "epoch": 0.01881857016503886, "grad_norm": 10.164816856384277, "learning_rate": 6.090225563909775e-05, "loss": 2.5394, "step": 250 }, { "epoch": 0.018893844445699016, "grad_norm": 14.251152038574219, "learning_rate": 6.115288220551379e-05, "loss": 2.5156, "step": 251 }, { "epoch": 0.01896911872635917, "grad_norm": 8.305621147155762, "learning_rate": 6.140350877192983e-05, "loss": 2.414, "step": 252 }, { "epoch": 0.019044393007019326, "grad_norm": 6.562389373779297, "learning_rate": 6.165413533834587e-05, "loss": 2.6894, "step": 253 }, { "epoch": 0.01911966728767948, "grad_norm": 9.983012199401855, "learning_rate": 6.19047619047619e-05, "loss": 2.35, "step": 254 }, { "epoch": 0.019194941568339636, "grad_norm": 18.99936294555664, "learning_rate": 6.215538847117795e-05, "loss": 3.0117, "step": 255 }, { "epoch": 0.019270215848999794, "grad_norm": 10.677833557128906, "learning_rate": 6.240601503759398e-05, "loss": 2.578, "step": 256 }, { "epoch": 0.01934549012965995, "grad_norm": 8.067180633544922, "learning_rate": 6.265664160401002e-05, "loss": 2.4961, "step": 257 }, { "epoch": 0.019420764410320104, "grad_norm": 10.744783401489258, "learning_rate": 6.290726817042606e-05, "loss": 2.6725, "step": 258 }, { "epoch": 0.01949603869098026, "grad_norm": 14.301708221435547, "learning_rate": 6.31578947368421e-05, "loss": 2.7495, "step": 259 }, { "epoch": 0.019571312971640414, "grad_norm": 6.461829662322998, "learning_rate": 6.340852130325816e-05, "loss": 2.4634, "step": 260 }, { "epoch": 0.01964658725230057, "grad_norm": 12.466805458068848, "learning_rate": 6.365914786967418e-05, "loss": 3.057, "step": 261 }, { "epoch": 0.019721861532960724, "grad_norm": 9.61961555480957, "learning_rate": 6.390977443609024e-05, "loss": 2.3606, "step": 262 }, { "epoch": 0.01979713581362088, "grad_norm": 7.8552374839782715, "learning_rate": 6.416040100250626e-05, "loss": 2.3515, "step": 263 }, { "epoch": 0.019872410094281038, "grad_norm": 8.121237754821777, "learning_rate": 6.441102756892231e-05, "loss": 2.6421, "step": 264 }, { "epoch": 0.019947684374941193, "grad_norm": 7.811336040496826, "learning_rate": 6.466165413533834e-05, "loss": 2.4611, "step": 265 }, { "epoch": 0.020022958655601348, "grad_norm": 12.549189567565918, "learning_rate": 6.49122807017544e-05, "loss": 2.8462, "step": 266 }, { "epoch": 0.020098232936261503, "grad_norm": 9.146442413330078, "learning_rate": 6.516290726817042e-05, "loss": 2.9353, "step": 267 }, { "epoch": 0.020173507216921658, "grad_norm": 9.0011625289917, "learning_rate": 6.541353383458647e-05, "loss": 2.4484, "step": 268 }, { "epoch": 0.020248781497581813, "grad_norm": 8.126823425292969, "learning_rate": 6.566416040100251e-05, "loss": 2.5692, "step": 269 }, { "epoch": 0.020324055778241968, "grad_norm": 8.504871368408203, "learning_rate": 6.591478696741855e-05, "loss": 2.6727, "step": 270 }, { "epoch": 0.020399330058902123, "grad_norm": 18.124483108520508, "learning_rate": 6.616541353383459e-05, "loss": 2.672, "step": 271 }, { "epoch": 0.02047460433956228, "grad_norm": 9.791601181030273, "learning_rate": 6.641604010025063e-05, "loss": 2.5762, "step": 272 }, { "epoch": 0.020549878620222436, "grad_norm": 10.024748802185059, "learning_rate": 6.666666666666667e-05, "loss": 2.6214, "step": 273 }, { "epoch": 0.02062515290088259, "grad_norm": 8.394241333007812, "learning_rate": 6.691729323308271e-05, "loss": 2.5758, "step": 274 }, { "epoch": 0.020700427181542747, "grad_norm": 12.312226295471191, "learning_rate": 6.716791979949875e-05, "loss": 3.2458, "step": 275 }, { "epoch": 0.0207757014622029, "grad_norm": 11.922405242919922, "learning_rate": 6.741854636591479e-05, "loss": 2.4109, "step": 276 }, { "epoch": 0.020850975742863057, "grad_norm": 8.170721054077148, "learning_rate": 6.766917293233083e-05, "loss": 2.7055, "step": 277 }, { "epoch": 0.02092625002352321, "grad_norm": 11.160572052001953, "learning_rate": 6.791979949874687e-05, "loss": 2.749, "step": 278 }, { "epoch": 0.021001524304183367, "grad_norm": 8.44627857208252, "learning_rate": 6.817042606516291e-05, "loss": 2.5876, "step": 279 }, { "epoch": 0.021076798584843525, "grad_norm": 8.555981636047363, "learning_rate": 6.842105263157895e-05, "loss": 2.3791, "step": 280 }, { "epoch": 0.02115207286550368, "grad_norm": 6.683128356933594, "learning_rate": 6.867167919799499e-05, "loss": 2.3495, "step": 281 }, { "epoch": 0.021227347146163835, "grad_norm": 7.898855209350586, "learning_rate": 6.892230576441104e-05, "loss": 2.8104, "step": 282 }, { "epoch": 0.02130262142682399, "grad_norm": 10.912748336791992, "learning_rate": 6.917293233082706e-05, "loss": 2.8165, "step": 283 }, { "epoch": 0.021377895707484145, "grad_norm": 11.60110855102539, "learning_rate": 6.942355889724312e-05, "loss": 2.4285, "step": 284 }, { "epoch": 0.0214531699881443, "grad_norm": 7.183400630950928, "learning_rate": 6.967418546365914e-05, "loss": 2.4517, "step": 285 }, { "epoch": 0.021528444268804455, "grad_norm": 8.255367279052734, "learning_rate": 6.99248120300752e-05, "loss": 2.6391, "step": 286 }, { "epoch": 0.02160371854946461, "grad_norm": 9.572988510131836, "learning_rate": 7.017543859649122e-05, "loss": 2.6038, "step": 287 }, { "epoch": 0.02167899283012477, "grad_norm": 5.990320205688477, "learning_rate": 7.042606516290728e-05, "loss": 2.3011, "step": 288 }, { "epoch": 0.021754267110784924, "grad_norm": 12.906543731689453, "learning_rate": 7.06766917293233e-05, "loss": 2.4855, "step": 289 }, { "epoch": 0.02182954139144508, "grad_norm": 8.411698341369629, "learning_rate": 7.092731829573935e-05, "loss": 2.5813, "step": 290 }, { "epoch": 0.021904815672105234, "grad_norm": 8.958061218261719, "learning_rate": 7.11779448621554e-05, "loss": 2.7643, "step": 291 }, { "epoch": 0.02198008995276539, "grad_norm": 5.848999500274658, "learning_rate": 7.142857142857143e-05, "loss": 2.542, "step": 292 }, { "epoch": 0.022055364233425544, "grad_norm": 10.549403190612793, "learning_rate": 7.167919799498747e-05, "loss": 2.6225, "step": 293 }, { "epoch": 0.0221306385140857, "grad_norm": 7.328732490539551, "learning_rate": 7.192982456140351e-05, "loss": 2.3358, "step": 294 }, { "epoch": 0.022205912794745854, "grad_norm": 8.090675354003906, "learning_rate": 7.218045112781955e-05, "loss": 2.4583, "step": 295 }, { "epoch": 0.022281187075406012, "grad_norm": 4.750062942504883, "learning_rate": 7.243107769423559e-05, "loss": 2.5265, "step": 296 }, { "epoch": 0.022356461356066167, "grad_norm": 7.802643775939941, "learning_rate": 7.268170426065163e-05, "loss": 2.9576, "step": 297 }, { "epoch": 0.022431735636726322, "grad_norm": 6.483747482299805, "learning_rate": 7.293233082706767e-05, "loss": 2.6836, "step": 298 }, { "epoch": 0.022507009917386477, "grad_norm": 11.573009490966797, "learning_rate": 7.318295739348371e-05, "loss": 2.3012, "step": 299 }, { "epoch": 0.022582284198046632, "grad_norm": 8.898382186889648, "learning_rate": 7.343358395989975e-05, "loss": 2.3735, "step": 300 }, { "epoch": 0.022657558478706787, "grad_norm": 5.705074787139893, "learning_rate": 7.368421052631579e-05, "loss": 2.4902, "step": 301 }, { "epoch": 0.022732832759366942, "grad_norm": 8.153017044067383, "learning_rate": 7.393483709273183e-05, "loss": 2.5585, "step": 302 }, { "epoch": 0.022808107040027097, "grad_norm": 9.58834457397461, "learning_rate": 7.418546365914787e-05, "loss": 2.6702, "step": 303 }, { "epoch": 0.022883381320687256, "grad_norm": 7.1557230949401855, "learning_rate": 7.443609022556392e-05, "loss": 2.4311, "step": 304 }, { "epoch": 0.02295865560134741, "grad_norm": 7.851171016693115, "learning_rate": 7.468671679197995e-05, "loss": 2.5695, "step": 305 }, { "epoch": 0.023033929882007566, "grad_norm": 10.56496810913086, "learning_rate": 7.4937343358396e-05, "loss": 2.608, "step": 306 }, { "epoch": 0.02310920416266772, "grad_norm": 10.43893051147461, "learning_rate": 7.518796992481203e-05, "loss": 2.4521, "step": 307 }, { "epoch": 0.023184478443327876, "grad_norm": 15.015542030334473, "learning_rate": 7.543859649122808e-05, "loss": 2.6188, "step": 308 }, { "epoch": 0.02325975272398803, "grad_norm": 7.0824151039123535, "learning_rate": 7.56892230576441e-05, "loss": 2.3985, "step": 309 }, { "epoch": 0.023335027004648186, "grad_norm": 11.571524620056152, "learning_rate": 7.593984962406016e-05, "loss": 2.8705, "step": 310 }, { "epoch": 0.02341030128530834, "grad_norm": 13.608711242675781, "learning_rate": 7.619047619047618e-05, "loss": 2.6108, "step": 311 }, { "epoch": 0.0234855755659685, "grad_norm": 12.766798973083496, "learning_rate": 7.644110275689224e-05, "loss": 2.7538, "step": 312 }, { "epoch": 0.023560849846628654, "grad_norm": 10.92900562286377, "learning_rate": 7.669172932330826e-05, "loss": 2.5924, "step": 313 }, { "epoch": 0.02363612412728881, "grad_norm": 7.872305393218994, "learning_rate": 7.694235588972432e-05, "loss": 2.4474, "step": 314 }, { "epoch": 0.023711398407948964, "grad_norm": 8.653833389282227, "learning_rate": 7.719298245614036e-05, "loss": 2.764, "step": 315 }, { "epoch": 0.02378667268860912, "grad_norm": 8.444396018981934, "learning_rate": 7.74436090225564e-05, "loss": 2.5794, "step": 316 }, { "epoch": 0.023861946969269274, "grad_norm": 9.17378044128418, "learning_rate": 7.769423558897244e-05, "loss": 2.4611, "step": 317 }, { "epoch": 0.02393722124992943, "grad_norm": 5.68281364440918, "learning_rate": 7.794486215538847e-05, "loss": 2.7726, "step": 318 }, { "epoch": 0.024012495530589584, "grad_norm": 9.510702133178711, "learning_rate": 7.819548872180451e-05, "loss": 2.4254, "step": 319 }, { "epoch": 0.024087769811249743, "grad_norm": 9.245924949645996, "learning_rate": 7.844611528822055e-05, "loss": 2.4872, "step": 320 }, { "epoch": 0.024163044091909898, "grad_norm": 6.058887958526611, "learning_rate": 7.869674185463659e-05, "loss": 2.5338, "step": 321 }, { "epoch": 0.024238318372570053, "grad_norm": 9.469134330749512, "learning_rate": 7.894736842105263e-05, "loss": 2.9507, "step": 322 }, { "epoch": 0.024313592653230208, "grad_norm": 8.840911865234375, "learning_rate": 7.919799498746867e-05, "loss": 2.9634, "step": 323 }, { "epoch": 0.024388866933890363, "grad_norm": 6.134981155395508, "learning_rate": 7.944862155388471e-05, "loss": 2.5341, "step": 324 }, { "epoch": 0.024464141214550518, "grad_norm": 7.281913757324219, "learning_rate": 7.969924812030075e-05, "loss": 2.6995, "step": 325 }, { "epoch": 0.024539415495210673, "grad_norm": 8.727099418640137, "learning_rate": 7.994987468671679e-05, "loss": 2.355, "step": 326 }, { "epoch": 0.024614689775870828, "grad_norm": 7.811582088470459, "learning_rate": 8.020050125313283e-05, "loss": 2.3913, "step": 327 }, { "epoch": 0.024689964056530986, "grad_norm": 8.502856254577637, "learning_rate": 8.045112781954888e-05, "loss": 2.5857, "step": 328 }, { "epoch": 0.02476523833719114, "grad_norm": 10.060704231262207, "learning_rate": 8.070175438596491e-05, "loss": 2.7334, "step": 329 }, { "epoch": 0.024840512617851297, "grad_norm": 9.777992248535156, "learning_rate": 8.095238095238096e-05, "loss": 2.3778, "step": 330 }, { "epoch": 0.02491578689851145, "grad_norm": 9.373239517211914, "learning_rate": 8.120300751879699e-05, "loss": 2.5264, "step": 331 }, { "epoch": 0.024991061179171607, "grad_norm": 7.8832221031188965, "learning_rate": 8.145363408521304e-05, "loss": 2.702, "step": 332 }, { "epoch": 0.02506633545983176, "grad_norm": 9.64803695678711, "learning_rate": 8.170426065162907e-05, "loss": 2.4452, "step": 333 }, { "epoch": 0.025141609740491917, "grad_norm": 12.649718284606934, "learning_rate": 8.195488721804512e-05, "loss": 2.5841, "step": 334 }, { "epoch": 0.02521688402115207, "grad_norm": 10.74282455444336, "learning_rate": 8.220551378446115e-05, "loss": 2.5936, "step": 335 }, { "epoch": 0.025292158301812227, "grad_norm": 10.858030319213867, "learning_rate": 8.24561403508772e-05, "loss": 2.4946, "step": 336 }, { "epoch": 0.025367432582472385, "grad_norm": 11.46617603302002, "learning_rate": 8.270676691729324e-05, "loss": 2.234, "step": 337 }, { "epoch": 0.02544270686313254, "grad_norm": 9.468840599060059, "learning_rate": 8.295739348370928e-05, "loss": 2.7549, "step": 338 }, { "epoch": 0.025517981143792695, "grad_norm": 13.022171020507812, "learning_rate": 8.320802005012532e-05, "loss": 2.4938, "step": 339 }, { "epoch": 0.02559325542445285, "grad_norm": 7.186497211456299, "learning_rate": 8.345864661654136e-05, "loss": 2.6162, "step": 340 }, { "epoch": 0.025668529705113005, "grad_norm": 9.196660995483398, "learning_rate": 8.37092731829574e-05, "loss": 2.6004, "step": 341 }, { "epoch": 0.02574380398577316, "grad_norm": 8.185606956481934, "learning_rate": 8.395989974937344e-05, "loss": 2.3878, "step": 342 }, { "epoch": 0.025819078266433315, "grad_norm": 11.060405731201172, "learning_rate": 8.421052631578948e-05, "loss": 2.6286, "step": 343 }, { "epoch": 0.02589435254709347, "grad_norm": 9.861959457397461, "learning_rate": 8.446115288220552e-05, "loss": 3.0686, "step": 344 }, { "epoch": 0.02596962682775363, "grad_norm": 5.27044677734375, "learning_rate": 8.471177944862155e-05, "loss": 2.4063, "step": 345 }, { "epoch": 0.026044901108413784, "grad_norm": 6.576343059539795, "learning_rate": 8.49624060150376e-05, "loss": 2.7093, "step": 346 }, { "epoch": 0.02612017538907394, "grad_norm": 7.733982086181641, "learning_rate": 8.521303258145363e-05, "loss": 3.3062, "step": 347 }, { "epoch": 0.026195449669734094, "grad_norm": 8.054815292358398, "learning_rate": 8.546365914786967e-05, "loss": 2.5026, "step": 348 }, { "epoch": 0.02627072395039425, "grad_norm": 6.866470813751221, "learning_rate": 8.571428571428571e-05, "loss": 2.2814, "step": 349 }, { "epoch": 0.026345998231054404, "grad_norm": 9.631629943847656, "learning_rate": 8.596491228070177e-05, "loss": 2.5933, "step": 350 }, { "epoch": 0.02642127251171456, "grad_norm": 7.4486846923828125, "learning_rate": 8.621553884711779e-05, "loss": 3.0249, "step": 351 }, { "epoch": 0.026496546792374714, "grad_norm": 8.702048301696777, "learning_rate": 8.646616541353384e-05, "loss": 2.5208, "step": 352 }, { "epoch": 0.026571821073034872, "grad_norm": 9.063924789428711, "learning_rate": 8.671679197994987e-05, "loss": 2.4645, "step": 353 }, { "epoch": 0.026647095353695027, "grad_norm": 8.293697357177734, "learning_rate": 8.696741854636592e-05, "loss": 2.5806, "step": 354 }, { "epoch": 0.026722369634355182, "grad_norm": 12.996391296386719, "learning_rate": 8.721804511278195e-05, "loss": 2.4446, "step": 355 }, { "epoch": 0.026797643915015337, "grad_norm": 9.56811809539795, "learning_rate": 8.7468671679198e-05, "loss": 2.1486, "step": 356 }, { "epoch": 0.026872918195675492, "grad_norm": 9.908924102783203, "learning_rate": 8.771929824561403e-05, "loss": 2.6213, "step": 357 }, { "epoch": 0.026948192476335647, "grad_norm": 5.640969276428223, "learning_rate": 8.796992481203008e-05, "loss": 2.2379, "step": 358 }, { "epoch": 0.027023466756995802, "grad_norm": 9.026790618896484, "learning_rate": 8.822055137844612e-05, "loss": 2.6115, "step": 359 }, { "epoch": 0.027098741037655957, "grad_norm": 8.025802612304688, "learning_rate": 8.847117794486216e-05, "loss": 2.6743, "step": 360 }, { "epoch": 0.027174015318316116, "grad_norm": 6.087635040283203, "learning_rate": 8.87218045112782e-05, "loss": 2.4731, "step": 361 }, { "epoch": 0.02724928959897627, "grad_norm": 7.93490743637085, "learning_rate": 8.897243107769424e-05, "loss": 2.2696, "step": 362 }, { "epoch": 0.027324563879636426, "grad_norm": 8.029409408569336, "learning_rate": 8.922305764411028e-05, "loss": 2.5261, "step": 363 }, { "epoch": 0.02739983816029658, "grad_norm": 11.829414367675781, "learning_rate": 8.947368421052632e-05, "loss": 2.4563, "step": 364 }, { "epoch": 0.027475112440956736, "grad_norm": 7.845546722412109, "learning_rate": 8.972431077694236e-05, "loss": 2.4929, "step": 365 }, { "epoch": 0.02755038672161689, "grad_norm": 9.62540054321289, "learning_rate": 8.99749373433584e-05, "loss": 2.4691, "step": 366 }, { "epoch": 0.027625661002277046, "grad_norm": 7.262996196746826, "learning_rate": 9.022556390977444e-05, "loss": 2.1627, "step": 367 }, { "epoch": 0.0277009352829372, "grad_norm": 8.31999397277832, "learning_rate": 9.047619047619048e-05, "loss": 2.9411, "step": 368 }, { "epoch": 0.02777620956359736, "grad_norm": 8.52043342590332, "learning_rate": 9.072681704260652e-05, "loss": 2.3104, "step": 369 }, { "epoch": 0.027851483844257514, "grad_norm": 6.144811153411865, "learning_rate": 9.097744360902256e-05, "loss": 2.4447, "step": 370 }, { "epoch": 0.02792675812491767, "grad_norm": 7.1753363609313965, "learning_rate": 9.12280701754386e-05, "loss": 2.3539, "step": 371 }, { "epoch": 0.028002032405577824, "grad_norm": 9.527101516723633, "learning_rate": 9.147869674185465e-05, "loss": 2.9253, "step": 372 }, { "epoch": 0.02807730668623798, "grad_norm": 13.049726486206055, "learning_rate": 9.172932330827067e-05, "loss": 2.6301, "step": 373 }, { "epoch": 0.028152580966898134, "grad_norm": 10.012778282165527, "learning_rate": 9.197994987468673e-05, "loss": 2.4109, "step": 374 }, { "epoch": 0.02822785524755829, "grad_norm": 9.397246360778809, "learning_rate": 9.223057644110275e-05, "loss": 2.5127, "step": 375 }, { "epoch": 0.028303129528218444, "grad_norm": 6.507575988769531, "learning_rate": 9.24812030075188e-05, "loss": 2.3723, "step": 376 }, { "epoch": 0.028378403808878603, "grad_norm": 8.265238761901855, "learning_rate": 9.273182957393483e-05, "loss": 2.4718, "step": 377 }, { "epoch": 0.028453678089538758, "grad_norm": 6.205020904541016, "learning_rate": 9.298245614035089e-05, "loss": 2.3485, "step": 378 }, { "epoch": 0.028528952370198913, "grad_norm": 9.341041564941406, "learning_rate": 9.323308270676691e-05, "loss": 2.6597, "step": 379 }, { "epoch": 0.028604226650859068, "grad_norm": 8.819635391235352, "learning_rate": 9.348370927318296e-05, "loss": 2.3982, "step": 380 }, { "epoch": 0.028679500931519223, "grad_norm": 7.3793721199035645, "learning_rate": 9.373433583959899e-05, "loss": 2.6471, "step": 381 }, { "epoch": 0.028754775212179378, "grad_norm": 8.588555335998535, "learning_rate": 9.398496240601504e-05, "loss": 2.5609, "step": 382 }, { "epoch": 0.028830049492839533, "grad_norm": 10.27634334564209, "learning_rate": 9.423558897243108e-05, "loss": 2.7713, "step": 383 }, { "epoch": 0.028905323773499688, "grad_norm": 8.250353813171387, "learning_rate": 9.448621553884712e-05, "loss": 2.8336, "step": 384 }, { "epoch": 0.028980598054159847, "grad_norm": 10.630231857299805, "learning_rate": 9.473684210526316e-05, "loss": 2.5987, "step": 385 }, { "epoch": 0.02905587233482, "grad_norm": 6.692311763763428, "learning_rate": 9.49874686716792e-05, "loss": 2.9426, "step": 386 }, { "epoch": 0.029131146615480157, "grad_norm": 9.565740585327148, "learning_rate": 9.523809523809524e-05, "loss": 2.5801, "step": 387 }, { "epoch": 0.02920642089614031, "grad_norm": 10.207667350769043, "learning_rate": 9.548872180451128e-05, "loss": 2.5359, "step": 388 }, { "epoch": 0.029281695176800467, "grad_norm": 8.448806762695312, "learning_rate": 9.573934837092732e-05, "loss": 2.7342, "step": 389 }, { "epoch": 0.02935696945746062, "grad_norm": 8.73253059387207, "learning_rate": 9.598997493734336e-05, "loss": 2.7241, "step": 390 }, { "epoch": 0.029432243738120777, "grad_norm": 5.377964973449707, "learning_rate": 9.62406015037594e-05, "loss": 2.406, "step": 391 }, { "epoch": 0.02950751801878093, "grad_norm": 8.039495468139648, "learning_rate": 9.649122807017544e-05, "loss": 3.0968, "step": 392 }, { "epoch": 0.02958279229944109, "grad_norm": 7.784538745880127, "learning_rate": 9.674185463659148e-05, "loss": 2.2749, "step": 393 }, { "epoch": 0.029658066580101245, "grad_norm": 6.550390720367432, "learning_rate": 9.699248120300752e-05, "loss": 2.5131, "step": 394 }, { "epoch": 0.0297333408607614, "grad_norm": 7.988730430603027, "learning_rate": 9.724310776942356e-05, "loss": 2.3518, "step": 395 }, { "epoch": 0.029808615141421555, "grad_norm": 6.855724811553955, "learning_rate": 9.749373433583961e-05, "loss": 2.2968, "step": 396 }, { "epoch": 0.02988388942208171, "grad_norm": 9.838045120239258, "learning_rate": 9.774436090225564e-05, "loss": 2.6223, "step": 397 }, { "epoch": 0.029959163702741865, "grad_norm": 11.083606719970703, "learning_rate": 9.799498746867169e-05, "loss": 2.3675, "step": 398 }, { "epoch": 0.03003443798340202, "grad_norm": 7.736291408538818, "learning_rate": 9.824561403508771e-05, "loss": 2.2007, "step": 399 }, { "epoch": 0.030109712264062175, "grad_norm": 7.533214092254639, "learning_rate": 9.849624060150377e-05, "loss": 2.8185, "step": 400 }, { "epoch": 0.030184986544722334, "grad_norm": 5.824602127075195, "learning_rate": 9.87468671679198e-05, "loss": 2.4325, "step": 401 }, { "epoch": 0.03026026082538249, "grad_norm": 7.297369480133057, "learning_rate": 9.899749373433585e-05, "loss": 2.1774, "step": 402 }, { "epoch": 0.030335535106042644, "grad_norm": 5.556282043457031, "learning_rate": 9.924812030075187e-05, "loss": 2.2991, "step": 403 }, { "epoch": 0.0304108093867028, "grad_norm": 7.1707987785339355, "learning_rate": 9.949874686716793e-05, "loss": 2.7968, "step": 404 }, { "epoch": 0.030486083667362954, "grad_norm": 7.452920913696289, "learning_rate": 9.974937343358397e-05, "loss": 2.6478, "step": 405 }, { "epoch": 0.03056135794802311, "grad_norm": 5.904439926147461, "learning_rate": 0.0001, "loss": 2.4775, "step": 406 }, { "epoch": 0.030636632228683264, "grad_norm": 7.584089279174805, "learning_rate": 9.999999851382173e-05, "loss": 2.7436, "step": 407 }, { "epoch": 0.03071190650934342, "grad_norm": 5.895624160766602, "learning_rate": 9.999999405528702e-05, "loss": 3.1167, "step": 408 }, { "epoch": 0.030787180790003577, "grad_norm": 7.877616882324219, "learning_rate": 9.999998662439613e-05, "loss": 2.5746, "step": 409 }, { "epoch": 0.030862455070663732, "grad_norm": 10.939014434814453, "learning_rate": 9.99999762211495e-05, "loss": 2.3587, "step": 410 }, { "epoch": 0.030937729351323887, "grad_norm": 5.0304694175720215, "learning_rate": 9.999996284554776e-05, "loss": 2.5545, "step": 411 }, { "epoch": 0.031013003631984042, "grad_norm": 10.43912124633789, "learning_rate": 9.999994649759168e-05, "loss": 2.5543, "step": 412 }, { "epoch": 0.031088277912644197, "grad_norm": 11.310480117797852, "learning_rate": 9.999992717728223e-05, "loss": 3.2997, "step": 413 }, { "epoch": 0.031163552193304352, "grad_norm": 5.6342620849609375, "learning_rate": 9.99999048846206e-05, "loss": 2.4806, "step": 414 }, { "epoch": 0.031238826473964507, "grad_norm": 7.379421710968018, "learning_rate": 9.999987961960808e-05, "loss": 2.4209, "step": 415 }, { "epoch": 0.031314100754624666, "grad_norm": 9.048238754272461, "learning_rate": 9.999985138224619e-05, "loss": 2.5663, "step": 416 }, { "epoch": 0.03138937503528482, "grad_norm": 14.7123384475708, "learning_rate": 9.99998201725366e-05, "loss": 2.9186, "step": 417 }, { "epoch": 0.031464649315944976, "grad_norm": 11.715332984924316, "learning_rate": 9.999978599048117e-05, "loss": 2.6159, "step": 418 }, { "epoch": 0.03153992359660513, "grad_norm": 15.586795806884766, "learning_rate": 9.999974883608192e-05, "loss": 2.5011, "step": 419 }, { "epoch": 0.031615197877265286, "grad_norm": 9.806571960449219, "learning_rate": 9.999970870934108e-05, "loss": 2.9515, "step": 420 }, { "epoch": 0.03169047215792544, "grad_norm": 11.321080207824707, "learning_rate": 9.9999665610261e-05, "loss": 2.4308, "step": 421 }, { "epoch": 0.031765746438585596, "grad_norm": 6.57304573059082, "learning_rate": 9.999961953884427e-05, "loss": 2.487, "step": 422 }, { "epoch": 0.03184102071924575, "grad_norm": 5.4669365882873535, "learning_rate": 9.999957049509365e-05, "loss": 2.6643, "step": 423 }, { "epoch": 0.031916294999905906, "grad_norm": 8.392806053161621, "learning_rate": 9.9999518479012e-05, "loss": 3.0091, "step": 424 }, { "epoch": 0.03199156928056606, "grad_norm": 8.882760047912598, "learning_rate": 9.999946349060245e-05, "loss": 2.6367, "step": 425 }, { "epoch": 0.032066843561226216, "grad_norm": 8.551156044006348, "learning_rate": 9.999940552986826e-05, "loss": 2.4453, "step": 426 }, { "epoch": 0.03214211784188637, "grad_norm": 4.792285442352295, "learning_rate": 9.999934459681288e-05, "loss": 2.4126, "step": 427 }, { "epoch": 0.032217392122546526, "grad_norm": 6.7595953941345215, "learning_rate": 9.999928069143993e-05, "loss": 2.6518, "step": 428 }, { "epoch": 0.03229266640320668, "grad_norm": 10.124109268188477, "learning_rate": 9.999921381375319e-05, "loss": 2.7516, "step": 429 }, { "epoch": 0.03236794068386684, "grad_norm": 6.628558158874512, "learning_rate": 9.999914396375668e-05, "loss": 2.462, "step": 430 }, { "epoch": 0.032443214964527, "grad_norm": 8.301045417785645, "learning_rate": 9.99990711414545e-05, "loss": 2.8795, "step": 431 }, { "epoch": 0.03251848924518715, "grad_norm": 5.132826328277588, "learning_rate": 9.999899534685102e-05, "loss": 2.3893, "step": 432 }, { "epoch": 0.03259376352584731, "grad_norm": 9.406960487365723, "learning_rate": 9.99989165799507e-05, "loss": 2.5055, "step": 433 }, { "epoch": 0.03266903780650746, "grad_norm": 10.371724128723145, "learning_rate": 9.999883484075829e-05, "loss": 2.6004, "step": 434 }, { "epoch": 0.03274431208716762, "grad_norm": 7.023213863372803, "learning_rate": 9.99987501292786e-05, "loss": 2.7704, "step": 435 }, { "epoch": 0.03281958636782777, "grad_norm": 7.946764945983887, "learning_rate": 9.999866244551667e-05, "loss": 2.2468, "step": 436 }, { "epoch": 0.03289486064848793, "grad_norm": 6.187266826629639, "learning_rate": 9.999857178947772e-05, "loss": 2.7072, "step": 437 }, { "epoch": 0.03297013492914808, "grad_norm": 5.986959934234619, "learning_rate": 9.999847816116714e-05, "loss": 2.4356, "step": 438 }, { "epoch": 0.03304540920980824, "grad_norm": 7.843387126922607, "learning_rate": 9.999838156059051e-05, "loss": 2.5055, "step": 439 }, { "epoch": 0.03312068349046839, "grad_norm": 7.710484981536865, "learning_rate": 9.999828198775354e-05, "loss": 2.4021, "step": 440 }, { "epoch": 0.03319595777112855, "grad_norm": 12.383356094360352, "learning_rate": 9.999817944266216e-05, "loss": 2.6619, "step": 441 }, { "epoch": 0.0332712320517887, "grad_norm": 6.836391925811768, "learning_rate": 9.99980739253225e-05, "loss": 2.5544, "step": 442 }, { "epoch": 0.03334650633244886, "grad_norm": 11.41148567199707, "learning_rate": 9.999796543574078e-05, "loss": 2.6145, "step": 443 }, { "epoch": 0.03342178061310901, "grad_norm": 5.887909412384033, "learning_rate": 9.999785397392349e-05, "loss": 2.5155, "step": 444 }, { "epoch": 0.03349705489376917, "grad_norm": 9.66697883605957, "learning_rate": 9.999773953987724e-05, "loss": 2.7728, "step": 445 }, { "epoch": 0.03357232917442933, "grad_norm": 6.084951877593994, "learning_rate": 9.999762213360884e-05, "loss": 2.5111, "step": 446 }, { "epoch": 0.033647603455089485, "grad_norm": 7.673906326293945, "learning_rate": 9.999750175512524e-05, "loss": 2.5937, "step": 447 }, { "epoch": 0.03372287773574964, "grad_norm": 6.4027485847473145, "learning_rate": 9.999737840443364e-05, "loss": 2.4019, "step": 448 }, { "epoch": 0.033798152016409795, "grad_norm": 7.2442522048950195, "learning_rate": 9.999725208154135e-05, "loss": 2.6147, "step": 449 }, { "epoch": 0.03387342629706995, "grad_norm": 5.990758895874023, "learning_rate": 9.999712278645587e-05, "loss": 2.4489, "step": 450 }, { "epoch": 0.033948700577730105, "grad_norm": 4.980669021606445, "learning_rate": 9.999699051918491e-05, "loss": 2.5905, "step": 451 }, { "epoch": 0.03402397485839026, "grad_norm": 4.490603446960449, "learning_rate": 9.999685527973633e-05, "loss": 2.7177, "step": 452 }, { "epoch": 0.034099249139050415, "grad_norm": 9.191030502319336, "learning_rate": 9.999671706811814e-05, "loss": 2.5565, "step": 453 }, { "epoch": 0.03417452341971057, "grad_norm": 6.982939720153809, "learning_rate": 9.999657588433858e-05, "loss": 2.479, "step": 454 }, { "epoch": 0.034249797700370725, "grad_norm": 6.823544979095459, "learning_rate": 9.999643172840605e-05, "loss": 2.2048, "step": 455 }, { "epoch": 0.03432507198103088, "grad_norm": 6.251356601715088, "learning_rate": 9.99962846003291e-05, "loss": 2.5377, "step": 456 }, { "epoch": 0.034400346261691035, "grad_norm": 7.133268356323242, "learning_rate": 9.999613450011648e-05, "loss": 2.3033, "step": 457 }, { "epoch": 0.03447562054235119, "grad_norm": 5.284430503845215, "learning_rate": 9.999598142777715e-05, "loss": 2.7628, "step": 458 }, { "epoch": 0.034550894823011345, "grad_norm": 6.167489528656006, "learning_rate": 9.999582538332016e-05, "loss": 2.7558, "step": 459 }, { "epoch": 0.0346261691036715, "grad_norm": 5.280205726623535, "learning_rate": 9.999566636675479e-05, "loss": 2.1525, "step": 460 }, { "epoch": 0.034701443384331655, "grad_norm": 6.544081687927246, "learning_rate": 9.999550437809053e-05, "loss": 2.6361, "step": 461 }, { "epoch": 0.03477671766499182, "grad_norm": 5.885289192199707, "learning_rate": 9.999533941733699e-05, "loss": 2.3613, "step": 462 }, { "epoch": 0.03485199194565197, "grad_norm": 5.703323841094971, "learning_rate": 9.999517148450395e-05, "loss": 2.5808, "step": 463 }, { "epoch": 0.03492726622631213, "grad_norm": 9.864120483398438, "learning_rate": 9.999500057960142e-05, "loss": 2.6428, "step": 464 }, { "epoch": 0.03500254050697228, "grad_norm": 9.22010612487793, "learning_rate": 9.999482670263958e-05, "loss": 2.4964, "step": 465 }, { "epoch": 0.03507781478763244, "grad_norm": 8.119236946105957, "learning_rate": 9.999464985362875e-05, "loss": 2.7858, "step": 466 }, { "epoch": 0.03515308906829259, "grad_norm": 6.575767993927002, "learning_rate": 9.99944700325794e-05, "loss": 2.7414, "step": 467 }, { "epoch": 0.03522836334895275, "grad_norm": 7.580944061279297, "learning_rate": 9.999428723950228e-05, "loss": 2.6357, "step": 468 }, { "epoch": 0.0353036376296129, "grad_norm": 6.015957355499268, "learning_rate": 9.999410147440823e-05, "loss": 2.5034, "step": 469 }, { "epoch": 0.03537891191027306, "grad_norm": 5.732180118560791, "learning_rate": 9.999391273730829e-05, "loss": 2.5332, "step": 470 }, { "epoch": 0.03545418619093321, "grad_norm": 4.958330154418945, "learning_rate": 9.99937210282137e-05, "loss": 2.2508, "step": 471 }, { "epoch": 0.03552946047159337, "grad_norm": 6.343937397003174, "learning_rate": 9.999352634713584e-05, "loss": 2.3269, "step": 472 }, { "epoch": 0.03560473475225352, "grad_norm": 6.58331298828125, "learning_rate": 9.999332869408629e-05, "loss": 2.6838, "step": 473 }, { "epoch": 0.03568000903291368, "grad_norm": 11.521378517150879, "learning_rate": 9.999312806907676e-05, "loss": 2.7741, "step": 474 }, { "epoch": 0.03575528331357383, "grad_norm": 8.342028617858887, "learning_rate": 9.999292447211926e-05, "loss": 2.6406, "step": 475 }, { "epoch": 0.03583055759423399, "grad_norm": 5.67268180847168, "learning_rate": 9.999271790322581e-05, "loss": 2.4836, "step": 476 }, { "epoch": 0.03590583187489414, "grad_norm": 7.215313911437988, "learning_rate": 9.999250836240876e-05, "loss": 2.5318, "step": 477 }, { "epoch": 0.035981106155554304, "grad_norm": 6.308455944061279, "learning_rate": 9.99922958496805e-05, "loss": 2.3813, "step": 478 }, { "epoch": 0.03605638043621446, "grad_norm": 8.246892929077148, "learning_rate": 9.999208036505372e-05, "loss": 2.7915, "step": 479 }, { "epoch": 0.036131654716874614, "grad_norm": 7.0365071296691895, "learning_rate": 9.99918619085412e-05, "loss": 2.3779, "step": 480 }, { "epoch": 0.03620692899753477, "grad_norm": 5.719037055969238, "learning_rate": 9.999164048015593e-05, "loss": 2.7042, "step": 481 }, { "epoch": 0.036282203278194924, "grad_norm": 6.5956315994262695, "learning_rate": 9.999141607991107e-05, "loss": 2.3586, "step": 482 }, { "epoch": 0.03635747755885508, "grad_norm": 5.437718868255615, "learning_rate": 9.999118870781996e-05, "loss": 2.3513, "step": 483 }, { "epoch": 0.036432751839515234, "grad_norm": 8.259861946105957, "learning_rate": 9.999095836389613e-05, "loss": 2.2873, "step": 484 }, { "epoch": 0.03650802612017539, "grad_norm": 7.971058368682861, "learning_rate": 9.999072504815328e-05, "loss": 2.4196, "step": 485 }, { "epoch": 0.036583300400835544, "grad_norm": 9.012106895446777, "learning_rate": 9.999048876060525e-05, "loss": 2.3551, "step": 486 }, { "epoch": 0.0366585746814957, "grad_norm": 6.393646717071533, "learning_rate": 9.999024950126609e-05, "loss": 2.5493, "step": 487 }, { "epoch": 0.036733848962155854, "grad_norm": 8.119710922241211, "learning_rate": 9.999000727015007e-05, "loss": 2.2621, "step": 488 }, { "epoch": 0.03680912324281601, "grad_norm": 10.802091598510742, "learning_rate": 9.998976206727153e-05, "loss": 2.5148, "step": 489 }, { "epoch": 0.036884397523476165, "grad_norm": 6.845826148986816, "learning_rate": 9.998951389264508e-05, "loss": 2.7917, "step": 490 }, { "epoch": 0.03695967180413632, "grad_norm": 7.117809295654297, "learning_rate": 9.998926274628546e-05, "loss": 2.5097, "step": 491 }, { "epoch": 0.037034946084796475, "grad_norm": 6.615505218505859, "learning_rate": 9.99890086282076e-05, "loss": 2.5368, "step": 492 }, { "epoch": 0.03711022036545663, "grad_norm": 6.764005661010742, "learning_rate": 9.998875153842662e-05, "loss": 2.3979, "step": 493 }, { "epoch": 0.03718549464611679, "grad_norm": 6.176251411437988, "learning_rate": 9.99884914769578e-05, "loss": 2.4214, "step": 494 }, { "epoch": 0.037260768926776947, "grad_norm": 7.364236831665039, "learning_rate": 9.998822844381659e-05, "loss": 2.5111, "step": 495 }, { "epoch": 0.0373360432074371, "grad_norm": 7.308414936065674, "learning_rate": 9.998796243901862e-05, "loss": 2.555, "step": 496 }, { "epoch": 0.03741131748809726, "grad_norm": 8.038701057434082, "learning_rate": 9.998769346257972e-05, "loss": 2.6026, "step": 497 }, { "epoch": 0.03748659176875741, "grad_norm": 7.312729835510254, "learning_rate": 9.998742151451588e-05, "loss": 2.8019, "step": 498 }, { "epoch": 0.03756186604941757, "grad_norm": 7.3715128898620605, "learning_rate": 9.998714659484326e-05, "loss": 2.475, "step": 499 }, { "epoch": 0.03763714033007772, "grad_norm": 8.535462379455566, "learning_rate": 9.99868687035782e-05, "loss": 2.2464, "step": 500 }, { "epoch": 0.03771241461073788, "grad_norm": 5.346155166625977, "learning_rate": 9.998658784073723e-05, "loss": 2.3705, "step": 501 }, { "epoch": 0.03778768889139803, "grad_norm": 6.561291217803955, "learning_rate": 9.998630400633705e-05, "loss": 2.5572, "step": 502 }, { "epoch": 0.03786296317205819, "grad_norm": 6.337287902832031, "learning_rate": 9.99860172003945e-05, "loss": 2.2467, "step": 503 }, { "epoch": 0.03793823745271834, "grad_norm": 6.656157493591309, "learning_rate": 9.998572742292666e-05, "loss": 2.1208, "step": 504 }, { "epoch": 0.0380135117333785, "grad_norm": 5.564158916473389, "learning_rate": 9.998543467395076e-05, "loss": 2.4658, "step": 505 }, { "epoch": 0.03808878601403865, "grad_norm": 8.458843231201172, "learning_rate": 9.998513895348418e-05, "loss": 3.1493, "step": 506 }, { "epoch": 0.03816406029469881, "grad_norm": 6.628011703491211, "learning_rate": 9.998484026154453e-05, "loss": 2.367, "step": 507 }, { "epoch": 0.03823933457535896, "grad_norm": 6.011812210083008, "learning_rate": 9.998453859814953e-05, "loss": 2.2363, "step": 508 }, { "epoch": 0.03831460885601912, "grad_norm": 4.7522077560424805, "learning_rate": 9.998423396331715e-05, "loss": 2.5032, "step": 509 }, { "epoch": 0.03838988313667927, "grad_norm": 5.3855133056640625, "learning_rate": 9.998392635706547e-05, "loss": 2.7719, "step": 510 }, { "epoch": 0.038465157417339434, "grad_norm": 6.51707124710083, "learning_rate": 9.998361577941279e-05, "loss": 2.4928, "step": 511 }, { "epoch": 0.03854043169799959, "grad_norm": 5.331462860107422, "learning_rate": 9.998330223037757e-05, "loss": 2.3371, "step": 512 }, { "epoch": 0.038615705978659744, "grad_norm": 7.951931476593018, "learning_rate": 9.998298570997846e-05, "loss": 2.4478, "step": 513 }, { "epoch": 0.0386909802593199, "grad_norm": 7.166647911071777, "learning_rate": 9.998266621823426e-05, "loss": 2.3564, "step": 514 }, { "epoch": 0.038766254539980054, "grad_norm": 9.120553970336914, "learning_rate": 9.998234375516396e-05, "loss": 2.6303, "step": 515 }, { "epoch": 0.03884152882064021, "grad_norm": 6.1128058433532715, "learning_rate": 9.998201832078675e-05, "loss": 2.7501, "step": 516 }, { "epoch": 0.038916803101300364, "grad_norm": 7.445671081542969, "learning_rate": 9.998168991512197e-05, "loss": 2.4277, "step": 517 }, { "epoch": 0.03899207738196052, "grad_norm": 6.254639625549316, "learning_rate": 9.998135853818913e-05, "loss": 2.7413, "step": 518 }, { "epoch": 0.039067351662620674, "grad_norm": 8.722423553466797, "learning_rate": 9.998102419000794e-05, "loss": 2.341, "step": 519 }, { "epoch": 0.03914262594328083, "grad_norm": 7.148705959320068, "learning_rate": 9.998068687059827e-05, "loss": 2.5418, "step": 520 }, { "epoch": 0.039217900223940984, "grad_norm": 4.841706275939941, "learning_rate": 9.998034657998018e-05, "loss": 2.4428, "step": 521 }, { "epoch": 0.03929317450460114, "grad_norm": 4.844259262084961, "learning_rate": 9.99800033181739e-05, "loss": 2.4732, "step": 522 }, { "epoch": 0.039368448785261294, "grad_norm": 7.524613857269287, "learning_rate": 9.997965708519984e-05, "loss": 2.4265, "step": 523 }, { "epoch": 0.03944372306592145, "grad_norm": 7.401834964752197, "learning_rate": 9.997930788107854e-05, "loss": 2.2409, "step": 524 }, { "epoch": 0.039518997346581604, "grad_norm": 5.294851779937744, "learning_rate": 9.997895570583082e-05, "loss": 2.4899, "step": 525 }, { "epoch": 0.03959427162724176, "grad_norm": 12.990023612976074, "learning_rate": 9.997860055947758e-05, "loss": 2.5265, "step": 526 }, { "epoch": 0.03966954590790192, "grad_norm": 7.998562335968018, "learning_rate": 9.997824244203995e-05, "loss": 2.6798, "step": 527 }, { "epoch": 0.039744820188562076, "grad_norm": 6.187966823577881, "learning_rate": 9.997788135353919e-05, "loss": 2.3066, "step": 528 }, { "epoch": 0.03982009446922223, "grad_norm": 6.218991756439209, "learning_rate": 9.99775172939968e-05, "loss": 2.2707, "step": 529 }, { "epoch": 0.039895368749882386, "grad_norm": 5.636419773101807, "learning_rate": 9.997715026343442e-05, "loss": 2.5545, "step": 530 }, { "epoch": 0.03997064303054254, "grad_norm": 5.747351169586182, "learning_rate": 9.997678026187385e-05, "loss": 2.6234, "step": 531 }, { "epoch": 0.040045917311202696, "grad_norm": 9.34416675567627, "learning_rate": 9.997640728933708e-05, "loss": 2.9354, "step": 532 }, { "epoch": 0.04012119159186285, "grad_norm": 10.553180694580078, "learning_rate": 9.99760313458463e-05, "loss": 2.6615, "step": 533 }, { "epoch": 0.040196465872523006, "grad_norm": 6.410879135131836, "learning_rate": 9.997565243142386e-05, "loss": 2.7898, "step": 534 }, { "epoch": 0.04027174015318316, "grad_norm": 8.0240478515625, "learning_rate": 9.997527054609229e-05, "loss": 2.5834, "step": 535 }, { "epoch": 0.040347014433843316, "grad_norm": 6.889857292175293, "learning_rate": 9.997488568987425e-05, "loss": 2.5845, "step": 536 }, { "epoch": 0.04042228871450347, "grad_norm": 6.3225178718566895, "learning_rate": 9.997449786279267e-05, "loss": 2.807, "step": 537 }, { "epoch": 0.040497562995163626, "grad_norm": 5.587867736816406, "learning_rate": 9.997410706487058e-05, "loss": 2.3666, "step": 538 }, { "epoch": 0.04057283727582378, "grad_norm": 10.935953140258789, "learning_rate": 9.997371329613123e-05, "loss": 2.3211, "step": 539 }, { "epoch": 0.040648111556483936, "grad_norm": 6.5122246742248535, "learning_rate": 9.9973316556598e-05, "loss": 2.411, "step": 540 }, { "epoch": 0.04072338583714409, "grad_norm": 9.348227500915527, "learning_rate": 9.99729168462945e-05, "loss": 2.5361, "step": 541 }, { "epoch": 0.040798660117804246, "grad_norm": 8.161140441894531, "learning_rate": 9.997251416524446e-05, "loss": 2.4348, "step": 542 }, { "epoch": 0.04087393439846441, "grad_norm": 8.295262336730957, "learning_rate": 9.997210851347187e-05, "loss": 3.0491, "step": 543 }, { "epoch": 0.04094920867912456, "grad_norm": 5.279495716094971, "learning_rate": 9.997169989100081e-05, "loss": 2.5017, "step": 544 }, { "epoch": 0.04102448295978472, "grad_norm": 7.351101875305176, "learning_rate": 9.997128829785555e-05, "loss": 2.4604, "step": 545 }, { "epoch": 0.04109975724044487, "grad_norm": 7.537226676940918, "learning_rate": 9.997087373406063e-05, "loss": 2.6514, "step": 546 }, { "epoch": 0.04117503152110503, "grad_norm": 6.526947021484375, "learning_rate": 9.997045619964061e-05, "loss": 2.143, "step": 547 }, { "epoch": 0.04125030580176518, "grad_norm": 8.374302864074707, "learning_rate": 9.997003569462036e-05, "loss": 2.5148, "step": 548 }, { "epoch": 0.04132558008242534, "grad_norm": 6.060415744781494, "learning_rate": 9.996961221902487e-05, "loss": 2.3173, "step": 549 }, { "epoch": 0.04140085436308549, "grad_norm": 6.49837589263916, "learning_rate": 9.99691857728793e-05, "loss": 2.5405, "step": 550 }, { "epoch": 0.04147612864374565, "grad_norm": 5.977110862731934, "learning_rate": 9.996875635620903e-05, "loss": 2.3883, "step": 551 }, { "epoch": 0.0415514029244058, "grad_norm": 5.554208278656006, "learning_rate": 9.996832396903957e-05, "loss": 2.4085, "step": 552 }, { "epoch": 0.04162667720506596, "grad_norm": 7.002029895782471, "learning_rate": 9.996788861139664e-05, "loss": 2.039, "step": 553 }, { "epoch": 0.04170195148572611, "grad_norm": 5.461437702178955, "learning_rate": 9.996745028330607e-05, "loss": 2.3954, "step": 554 }, { "epoch": 0.04177722576638627, "grad_norm": 6.573638439178467, "learning_rate": 9.996700898479397e-05, "loss": 2.9218, "step": 555 }, { "epoch": 0.04185250004704642, "grad_norm": 6.566964149475098, "learning_rate": 9.996656471588657e-05, "loss": 2.6249, "step": 556 }, { "epoch": 0.04192777432770658, "grad_norm": 6.387213706970215, "learning_rate": 9.996611747661024e-05, "loss": 2.7764, "step": 557 }, { "epoch": 0.04200304860836673, "grad_norm": 5.735657215118408, "learning_rate": 9.996566726699163e-05, "loss": 2.665, "step": 558 }, { "epoch": 0.042078322889026895, "grad_norm": 7.310241222381592, "learning_rate": 9.996521408705744e-05, "loss": 2.5025, "step": 559 }, { "epoch": 0.04215359716968705, "grad_norm": 5.92233419418335, "learning_rate": 9.996475793683465e-05, "loss": 2.5994, "step": 560 }, { "epoch": 0.042228871450347205, "grad_norm": 5.580022811889648, "learning_rate": 9.996429881635036e-05, "loss": 2.6675, "step": 561 }, { "epoch": 0.04230414573100736, "grad_norm": 6.15976095199585, "learning_rate": 9.996383672563187e-05, "loss": 2.6914, "step": 562 }, { "epoch": 0.042379420011667515, "grad_norm": 4.10883903503418, "learning_rate": 9.996337166470667e-05, "loss": 2.5672, "step": 563 }, { "epoch": 0.04245469429232767, "grad_norm": 5.744843006134033, "learning_rate": 9.996290363360237e-05, "loss": 3.0443, "step": 564 }, { "epoch": 0.042529968572987825, "grad_norm": 7.262097358703613, "learning_rate": 9.996243263234679e-05, "loss": 2.4417, "step": 565 }, { "epoch": 0.04260524285364798, "grad_norm": 6.471399784088135, "learning_rate": 9.996195866096798e-05, "loss": 2.6812, "step": 566 }, { "epoch": 0.042680517134308135, "grad_norm": 5.091197490692139, "learning_rate": 9.996148171949406e-05, "loss": 2.5856, "step": 567 }, { "epoch": 0.04275579141496829, "grad_norm": 5.39486837387085, "learning_rate": 9.996100180795342e-05, "loss": 2.2875, "step": 568 }, { "epoch": 0.042831065695628445, "grad_norm": 4.359315395355225, "learning_rate": 9.996051892637457e-05, "loss": 2.6801, "step": 569 }, { "epoch": 0.0429063399762886, "grad_norm": 5.395928859710693, "learning_rate": 9.996003307478624e-05, "loss": 2.4626, "step": 570 }, { "epoch": 0.042981614256948755, "grad_norm": 5.215150356292725, "learning_rate": 9.995954425321728e-05, "loss": 2.383, "step": 571 }, { "epoch": 0.04305688853760891, "grad_norm": 8.549391746520996, "learning_rate": 9.995905246169675e-05, "loss": 2.4032, "step": 572 }, { "epoch": 0.043132162818269065, "grad_norm": 6.394818305969238, "learning_rate": 9.995855770025393e-05, "loss": 2.3881, "step": 573 }, { "epoch": 0.04320743709892922, "grad_norm": 5.099479675292969, "learning_rate": 9.995805996891819e-05, "loss": 2.598, "step": 574 }, { "epoch": 0.04328271137958938, "grad_norm": 8.931200981140137, "learning_rate": 9.995755926771912e-05, "loss": 2.2237, "step": 575 }, { "epoch": 0.04335798566024954, "grad_norm": 8.167283058166504, "learning_rate": 9.99570555966865e-05, "loss": 2.5803, "step": 576 }, { "epoch": 0.04343325994090969, "grad_norm": 6.921133518218994, "learning_rate": 9.995654895585028e-05, "loss": 2.4995, "step": 577 }, { "epoch": 0.04350853422156985, "grad_norm": 5.398970603942871, "learning_rate": 9.995603934524054e-05, "loss": 2.6149, "step": 578 }, { "epoch": 0.04358380850223, "grad_norm": 4.879952907562256, "learning_rate": 9.995552676488762e-05, "loss": 2.0798, "step": 579 }, { "epoch": 0.04365908278289016, "grad_norm": 4.6440749168396, "learning_rate": 9.995501121482199e-05, "loss": 2.4392, "step": 580 }, { "epoch": 0.04373435706355031, "grad_norm": 5.459920883178711, "learning_rate": 9.995449269507425e-05, "loss": 2.3939, "step": 581 }, { "epoch": 0.04380963134421047, "grad_norm": 7.002266883850098, "learning_rate": 9.995397120567527e-05, "loss": 2.614, "step": 582 }, { "epoch": 0.04388490562487062, "grad_norm": 7.5849127769470215, "learning_rate": 9.995344674665604e-05, "loss": 2.3912, "step": 583 }, { "epoch": 0.04396017990553078, "grad_norm": 6.636555194854736, "learning_rate": 9.995291931804771e-05, "loss": 2.9888, "step": 584 }, { "epoch": 0.04403545418619093, "grad_norm": 6.017383098602295, "learning_rate": 9.995238891988167e-05, "loss": 2.5443, "step": 585 }, { "epoch": 0.04411072846685109, "grad_norm": 5.583050727844238, "learning_rate": 9.995185555218944e-05, "loss": 2.5157, "step": 586 }, { "epoch": 0.04418600274751124, "grad_norm": 5.433871746063232, "learning_rate": 9.995131921500273e-05, "loss": 2.5615, "step": 587 }, { "epoch": 0.0442612770281714, "grad_norm": 5.141054153442383, "learning_rate": 9.995077990835342e-05, "loss": 2.4829, "step": 588 }, { "epoch": 0.04433655130883155, "grad_norm": 7.2861647605896, "learning_rate": 9.995023763227356e-05, "loss": 2.766, "step": 589 }, { "epoch": 0.04441182558949171, "grad_norm": 5.399280071258545, "learning_rate": 9.99496923867954e-05, "loss": 2.3811, "step": 590 }, { "epoch": 0.04448709987015186, "grad_norm": 6.386356830596924, "learning_rate": 9.994914417195134e-05, "loss": 2.4847, "step": 591 }, { "epoch": 0.044562374150812024, "grad_norm": 6.586709976196289, "learning_rate": 9.9948592987774e-05, "loss": 2.5281, "step": 592 }, { "epoch": 0.04463764843147218, "grad_norm": 7.796422004699707, "learning_rate": 9.994803883429612e-05, "loss": 2.7682, "step": 593 }, { "epoch": 0.044712922712132334, "grad_norm": 5.9834113121032715, "learning_rate": 9.994748171155064e-05, "loss": 2.2488, "step": 594 }, { "epoch": 0.04478819699279249, "grad_norm": 7.149685382843018, "learning_rate": 9.99469216195707e-05, "loss": 2.2831, "step": 595 }, { "epoch": 0.044863471273452644, "grad_norm": 8.159541130065918, "learning_rate": 9.994635855838956e-05, "loss": 2.2218, "step": 596 }, { "epoch": 0.0449387455541128, "grad_norm": 7.774938106536865, "learning_rate": 9.994579252804073e-05, "loss": 3.0546, "step": 597 }, { "epoch": 0.045014019834772954, "grad_norm": 6.304027557373047, "learning_rate": 9.994522352855784e-05, "loss": 2.4802, "step": 598 }, { "epoch": 0.04508929411543311, "grad_norm": 4.762304306030273, "learning_rate": 9.994465155997472e-05, "loss": 2.4738, "step": 599 }, { "epoch": 0.045164568396093265, "grad_norm": 8.871561050415039, "learning_rate": 9.994407662232539e-05, "loss": 2.4376, "step": 600 }, { "epoch": 0.04523984267675342, "grad_norm": 6.56936502456665, "learning_rate": 9.994349871564399e-05, "loss": 2.5849, "step": 601 }, { "epoch": 0.045315116957413575, "grad_norm": 5.996016502380371, "learning_rate": 9.99429178399649e-05, "loss": 2.148, "step": 602 }, { "epoch": 0.04539039123807373, "grad_norm": 6.419641971588135, "learning_rate": 9.994233399532265e-05, "loss": 2.2824, "step": 603 }, { "epoch": 0.045465665518733885, "grad_norm": 6.68547248840332, "learning_rate": 9.994174718175196e-05, "loss": 2.5177, "step": 604 }, { "epoch": 0.04554093979939404, "grad_norm": 6.640247821807861, "learning_rate": 9.994115739928767e-05, "loss": 3.0279, "step": 605 }, { "epoch": 0.045616214080054195, "grad_norm": 10.76980972290039, "learning_rate": 9.994056464796488e-05, "loss": 2.5157, "step": 606 }, { "epoch": 0.04569148836071435, "grad_norm": 5.20290994644165, "learning_rate": 9.993996892781883e-05, "loss": 2.2782, "step": 607 }, { "epoch": 0.04576676264137451, "grad_norm": 6.822776794433594, "learning_rate": 9.993937023888491e-05, "loss": 2.3409, "step": 608 }, { "epoch": 0.04584203692203467, "grad_norm": 5.55907678604126, "learning_rate": 9.993876858119872e-05, "loss": 2.843, "step": 609 }, { "epoch": 0.04591731120269482, "grad_norm": 5.106736183166504, "learning_rate": 9.993816395479603e-05, "loss": 2.216, "step": 610 }, { "epoch": 0.04599258548335498, "grad_norm": 8.372129440307617, "learning_rate": 9.993755635971278e-05, "loss": 2.1318, "step": 611 }, { "epoch": 0.04606785976401513, "grad_norm": 3.449436664581299, "learning_rate": 9.99369457959851e-05, "loss": 2.6827, "step": 612 }, { "epoch": 0.04614313404467529, "grad_norm": 5.324493408203125, "learning_rate": 9.993633226364927e-05, "loss": 2.618, "step": 613 }, { "epoch": 0.04621840832533544, "grad_norm": 5.592950344085693, "learning_rate": 9.993571576274177e-05, "loss": 2.3601, "step": 614 }, { "epoch": 0.0462936826059956, "grad_norm": 5.400594711303711, "learning_rate": 9.993509629329925e-05, "loss": 2.4891, "step": 615 }, { "epoch": 0.04636895688665575, "grad_norm": 5.7456583976745605, "learning_rate": 9.993447385535853e-05, "loss": 2.5015, "step": 616 }, { "epoch": 0.04644423116731591, "grad_norm": 5.353075981140137, "learning_rate": 9.993384844895662e-05, "loss": 2.773, "step": 617 }, { "epoch": 0.04651950544797606, "grad_norm": 5.943024158477783, "learning_rate": 9.99332200741307e-05, "loss": 2.1428, "step": 618 }, { "epoch": 0.04659477972863622, "grad_norm": 4.641613483428955, "learning_rate": 9.993258873091813e-05, "loss": 2.5845, "step": 619 }, { "epoch": 0.04667005400929637, "grad_norm": 7.78437614440918, "learning_rate": 9.993195441935642e-05, "loss": 2.4693, "step": 620 }, { "epoch": 0.04674532828995653, "grad_norm": 6.8994669914245605, "learning_rate": 9.99313171394833e-05, "loss": 3.0794, "step": 621 }, { "epoch": 0.04682060257061668, "grad_norm": 4.69108772277832, "learning_rate": 9.993067689133663e-05, "loss": 2.355, "step": 622 }, { "epoch": 0.04689587685127684, "grad_norm": 7.404256343841553, "learning_rate": 9.99300336749545e-05, "loss": 2.4818, "step": 623 }, { "epoch": 0.046971151131937, "grad_norm": 5.823089599609375, "learning_rate": 9.992938749037513e-05, "loss": 2.4112, "step": 624 }, { "epoch": 0.047046425412597154, "grad_norm": 5.970458507537842, "learning_rate": 9.992873833763693e-05, "loss": 2.3352, "step": 625 }, { "epoch": 0.04712169969325731, "grad_norm": 6.043220520019531, "learning_rate": 9.992808621677849e-05, "loss": 2.6431, "step": 626 }, { "epoch": 0.047196973973917464, "grad_norm": 6.244069576263428, "learning_rate": 9.99274311278386e-05, "loss": 2.2774, "step": 627 }, { "epoch": 0.04727224825457762, "grad_norm": 5.437317371368408, "learning_rate": 9.992677307085618e-05, "loss": 2.4144, "step": 628 }, { "epoch": 0.047347522535237774, "grad_norm": 5.5632829666137695, "learning_rate": 9.992611204587036e-05, "loss": 2.3307, "step": 629 }, { "epoch": 0.04742279681589793, "grad_norm": 6.590501308441162, "learning_rate": 9.992544805292044e-05, "loss": 2.6062, "step": 630 }, { "epoch": 0.047498071096558084, "grad_norm": 8.09814739227295, "learning_rate": 9.992478109204589e-05, "loss": 2.6395, "step": 631 }, { "epoch": 0.04757334537721824, "grad_norm": 7.408868312835693, "learning_rate": 9.992411116328634e-05, "loss": 1.9976, "step": 632 }, { "epoch": 0.047648619657878394, "grad_norm": 6.7912702560424805, "learning_rate": 9.992343826668164e-05, "loss": 2.6207, "step": 633 }, { "epoch": 0.04772389393853855, "grad_norm": 6.9812726974487305, "learning_rate": 9.992276240227176e-05, "loss": 2.7207, "step": 634 }, { "epoch": 0.047799168219198704, "grad_norm": 5.685427665710449, "learning_rate": 9.992208357009692e-05, "loss": 2.4244, "step": 635 }, { "epoch": 0.04787444249985886, "grad_norm": 5.536280632019043, "learning_rate": 9.992140177019745e-05, "loss": 2.44, "step": 636 }, { "epoch": 0.047949716780519014, "grad_norm": 9.64120864868164, "learning_rate": 9.992071700261389e-05, "loss": 2.6631, "step": 637 }, { "epoch": 0.04802499106117917, "grad_norm": 8.762601852416992, "learning_rate": 9.992002926738693e-05, "loss": 2.4953, "step": 638 }, { "epoch": 0.048100265341839324, "grad_norm": 5.968412399291992, "learning_rate": 9.991933856455747e-05, "loss": 2.5909, "step": 639 }, { "epoch": 0.048175539622499486, "grad_norm": 6.247552871704102, "learning_rate": 9.991864489416658e-05, "loss": 2.2363, "step": 640 }, { "epoch": 0.04825081390315964, "grad_norm": 4.866089344024658, "learning_rate": 9.991794825625547e-05, "loss": 2.3514, "step": 641 }, { "epoch": 0.048326088183819796, "grad_norm": 6.439634799957275, "learning_rate": 9.991724865086558e-05, "loss": 2.387, "step": 642 }, { "epoch": 0.04840136246447995, "grad_norm": 6.257725238800049, "learning_rate": 9.991654607803847e-05, "loss": 2.5005, "step": 643 }, { "epoch": 0.048476636745140106, "grad_norm": 7.805270195007324, "learning_rate": 9.991584053781593e-05, "loss": 2.2804, "step": 644 }, { "epoch": 0.04855191102580026, "grad_norm": 7.503831386566162, "learning_rate": 9.99151320302399e-05, "loss": 2.5358, "step": 645 }, { "epoch": 0.048627185306460416, "grad_norm": 5.44814920425415, "learning_rate": 9.991442055535248e-05, "loss": 2.2454, "step": 646 }, { "epoch": 0.04870245958712057, "grad_norm": 9.13061809539795, "learning_rate": 9.9913706113196e-05, "loss": 2.3681, "step": 647 }, { "epoch": 0.048777733867780726, "grad_norm": 5.287869930267334, "learning_rate": 9.991298870381289e-05, "loss": 2.446, "step": 648 }, { "epoch": 0.04885300814844088, "grad_norm": 5.838399410247803, "learning_rate": 9.991226832724583e-05, "loss": 2.2934, "step": 649 }, { "epoch": 0.048928282429101036, "grad_norm": 5.242301940917969, "learning_rate": 9.991154498353761e-05, "loss": 2.5359, "step": 650 }, { "epoch": 0.04900355670976119, "grad_norm": 4.780924320220947, "learning_rate": 9.991081867273127e-05, "loss": 2.543, "step": 651 }, { "epoch": 0.049078830990421346, "grad_norm": 5.0087995529174805, "learning_rate": 9.991008939486998e-05, "loss": 2.3301, "step": 652 }, { "epoch": 0.0491541052710815, "grad_norm": 5.926429748535156, "learning_rate": 9.990935714999707e-05, "loss": 2.6174, "step": 653 }, { "epoch": 0.049229379551741656, "grad_norm": 4.909923553466797, "learning_rate": 9.990862193815609e-05, "loss": 2.2662, "step": 654 }, { "epoch": 0.04930465383240181, "grad_norm": 6.0996623039245605, "learning_rate": 9.990788375939073e-05, "loss": 2.4263, "step": 655 }, { "epoch": 0.04937992811306197, "grad_norm": 7.067746639251709, "learning_rate": 9.99071426137449e-05, "loss": 2.7269, "step": 656 }, { "epoch": 0.04945520239372213, "grad_norm": 6.930025100708008, "learning_rate": 9.990639850126262e-05, "loss": 2.2417, "step": 657 }, { "epoch": 0.04953047667438228, "grad_norm": 8.447489738464355, "learning_rate": 9.990565142198816e-05, "loss": 2.51, "step": 658 }, { "epoch": 0.04960575095504244, "grad_norm": 5.6237077713012695, "learning_rate": 9.99049013759659e-05, "loss": 2.523, "step": 659 }, { "epoch": 0.04968102523570259, "grad_norm": 6.538666248321533, "learning_rate": 9.990414836324047e-05, "loss": 2.2978, "step": 660 }, { "epoch": 0.04975629951636275, "grad_norm": 7.247674942016602, "learning_rate": 9.99033923838566e-05, "loss": 2.3974, "step": 661 }, { "epoch": 0.0498315737970229, "grad_norm": 6.012288570404053, "learning_rate": 9.990263343785925e-05, "loss": 2.9195, "step": 662 }, { "epoch": 0.04990684807768306, "grad_norm": 7.676389217376709, "learning_rate": 9.990187152529353e-05, "loss": 2.4498, "step": 663 }, { "epoch": 0.04998212235834321, "grad_norm": 9.682546615600586, "learning_rate": 9.990110664620472e-05, "loss": 2.3964, "step": 664 }, { "epoch": 0.05005739663900337, "grad_norm": 6.515074729919434, "learning_rate": 9.990033880063832e-05, "loss": 2.3845, "step": 665 }, { "epoch": 0.05013267091966352, "grad_norm": 9.541638374328613, "learning_rate": 9.989956798863993e-05, "loss": 3.5656, "step": 666 }, { "epoch": 0.05020794520032368, "grad_norm": 7.824437141418457, "learning_rate": 9.989879421025542e-05, "loss": 2.9887, "step": 667 }, { "epoch": 0.05028321948098383, "grad_norm": 9.796915054321289, "learning_rate": 9.989801746553077e-05, "loss": 2.9921, "step": 668 }, { "epoch": 0.05035849376164399, "grad_norm": 6.329982757568359, "learning_rate": 9.989723775451215e-05, "loss": 2.5484, "step": 669 }, { "epoch": 0.05043376804230414, "grad_norm": 6.734201908111572, "learning_rate": 9.989645507724593e-05, "loss": 2.5668, "step": 670 }, { "epoch": 0.0505090423229643, "grad_norm": 5.232598781585693, "learning_rate": 9.98956694337786e-05, "loss": 2.4472, "step": 671 }, { "epoch": 0.05058431660362445, "grad_norm": 5.023258209228516, "learning_rate": 9.98948808241569e-05, "loss": 2.3763, "step": 672 }, { "epoch": 0.050659590884284615, "grad_norm": 8.550032615661621, "learning_rate": 9.98940892484277e-05, "loss": 2.3337, "step": 673 }, { "epoch": 0.05073486516494477, "grad_norm": 5.128488540649414, "learning_rate": 9.989329470663807e-05, "loss": 2.6499, "step": 674 }, { "epoch": 0.050810139445604925, "grad_norm": 5.089874267578125, "learning_rate": 9.989249719883522e-05, "loss": 2.5086, "step": 675 }, { "epoch": 0.05088541372626508, "grad_norm": 6.769781589508057, "learning_rate": 9.989169672506655e-05, "loss": 2.683, "step": 676 }, { "epoch": 0.050960688006925235, "grad_norm": 4.525973320007324, "learning_rate": 9.989089328537969e-05, "loss": 2.3601, "step": 677 }, { "epoch": 0.05103596228758539, "grad_norm": 5.513026714324951, "learning_rate": 9.989008687982238e-05, "loss": 2.37, "step": 678 }, { "epoch": 0.051111236568245545, "grad_norm": 6.706972599029541, "learning_rate": 9.988927750844253e-05, "loss": 2.0829, "step": 679 }, { "epoch": 0.0511865108489057, "grad_norm": 5.544755935668945, "learning_rate": 9.988846517128829e-05, "loss": 2.9128, "step": 680 }, { "epoch": 0.051261785129565855, "grad_norm": 5.722530841827393, "learning_rate": 9.988764986840795e-05, "loss": 2.5245, "step": 681 }, { "epoch": 0.05133705941022601, "grad_norm": 4.756669998168945, "learning_rate": 9.988683159984998e-05, "loss": 2.3442, "step": 682 }, { "epoch": 0.051412333690886165, "grad_norm": 7.017010688781738, "learning_rate": 9.988601036566298e-05, "loss": 2.2975, "step": 683 }, { "epoch": 0.05148760797154632, "grad_norm": 4.922911643981934, "learning_rate": 9.988518616589583e-05, "loss": 2.5241, "step": 684 }, { "epoch": 0.051562882252206475, "grad_norm": 4.759385108947754, "learning_rate": 9.98843590005975e-05, "loss": 2.1971, "step": 685 }, { "epoch": 0.05163815653286663, "grad_norm": 5.39924430847168, "learning_rate": 9.988352886981714e-05, "loss": 2.6424, "step": 686 }, { "epoch": 0.051713430813526785, "grad_norm": 5.887570381164551, "learning_rate": 9.988269577360414e-05, "loss": 2.3709, "step": 687 }, { "epoch": 0.05178870509418694, "grad_norm": 4.887275218963623, "learning_rate": 9.988185971200799e-05, "loss": 2.395, "step": 688 }, { "epoch": 0.0518639793748471, "grad_norm": 5.4255781173706055, "learning_rate": 9.988102068507842e-05, "loss": 2.4802, "step": 689 }, { "epoch": 0.05193925365550726, "grad_norm": 5.614969253540039, "learning_rate": 9.988017869286529e-05, "loss": 2.1813, "step": 690 }, { "epoch": 0.05201452793616741, "grad_norm": 7.493210792541504, "learning_rate": 9.987933373541866e-05, "loss": 2.3615, "step": 691 }, { "epoch": 0.05208980221682757, "grad_norm": 5.189207077026367, "learning_rate": 9.987848581278876e-05, "loss": 2.2156, "step": 692 }, { "epoch": 0.05216507649748772, "grad_norm": 6.868730068206787, "learning_rate": 9.9877634925026e-05, "loss": 2.5814, "step": 693 }, { "epoch": 0.05224035077814788, "grad_norm": 3.981825590133667, "learning_rate": 9.987678107218095e-05, "loss": 2.4568, "step": 694 }, { "epoch": 0.05231562505880803, "grad_norm": 6.582211971282959, "learning_rate": 9.987592425430439e-05, "loss": 2.4367, "step": 695 }, { "epoch": 0.05239089933946819, "grad_norm": 5.212049961090088, "learning_rate": 9.987506447144723e-05, "loss": 2.5464, "step": 696 }, { "epoch": 0.05246617362012834, "grad_norm": 6.014098644256592, "learning_rate": 9.98742017236606e-05, "loss": 2.1987, "step": 697 }, { "epoch": 0.0525414479007885, "grad_norm": 6.738082408905029, "learning_rate": 9.987333601099577e-05, "loss": 2.4259, "step": 698 }, { "epoch": 0.05261672218144865, "grad_norm": 5.633063316345215, "learning_rate": 9.987246733350423e-05, "loss": 2.5601, "step": 699 }, { "epoch": 0.05269199646210881, "grad_norm": 8.220179557800293, "learning_rate": 9.987159569123761e-05, "loss": 2.6459, "step": 700 }, { "epoch": 0.05276727074276896, "grad_norm": 5.723289966583252, "learning_rate": 9.987072108424772e-05, "loss": 2.4444, "step": 701 }, { "epoch": 0.05284254502342912, "grad_norm": 5.5899481773376465, "learning_rate": 9.986984351258656e-05, "loss": 2.6005, "step": 702 }, { "epoch": 0.05291781930408927, "grad_norm": 6.062432765960693, "learning_rate": 9.98689629763063e-05, "loss": 2.2026, "step": 703 }, { "epoch": 0.05299309358474943, "grad_norm": 7.311080455780029, "learning_rate": 9.986807947545927e-05, "loss": 2.318, "step": 704 }, { "epoch": 0.05306836786540959, "grad_norm": 5.180997848510742, "learning_rate": 9.986719301009799e-05, "loss": 2.4562, "step": 705 }, { "epoch": 0.053143642146069744, "grad_norm": 10.06629467010498, "learning_rate": 9.98663035802752e-05, "loss": 2.4868, "step": 706 }, { "epoch": 0.0532189164267299, "grad_norm": 6.045038223266602, "learning_rate": 9.986541118604373e-05, "loss": 2.4337, "step": 707 }, { "epoch": 0.053294190707390054, "grad_norm": 4.921200752258301, "learning_rate": 9.986451582745664e-05, "loss": 2.4036, "step": 708 }, { "epoch": 0.05336946498805021, "grad_norm": 4.26608419418335, "learning_rate": 9.986361750456716e-05, "loss": 2.2131, "step": 709 }, { "epoch": 0.053444739268710365, "grad_norm": 5.476470470428467, "learning_rate": 9.986271621742871e-05, "loss": 2.7903, "step": 710 }, { "epoch": 0.05352001354937052, "grad_norm": 7.235992908477783, "learning_rate": 9.986181196609485e-05, "loss": 2.2002, "step": 711 }, { "epoch": 0.053595287830030675, "grad_norm": 4.889042854309082, "learning_rate": 9.986090475061934e-05, "loss": 2.3387, "step": 712 }, { "epoch": 0.05367056211069083, "grad_norm": 5.935046672821045, "learning_rate": 9.98599945710561e-05, "loss": 2.3095, "step": 713 }, { "epoch": 0.053745836391350985, "grad_norm": 4.836548805236816, "learning_rate": 9.985908142745924e-05, "loss": 2.3641, "step": 714 }, { "epoch": 0.05382111067201114, "grad_norm": 6.320348262786865, "learning_rate": 9.985816531988306e-05, "loss": 2.4294, "step": 715 }, { "epoch": 0.053896384952671295, "grad_norm": 6.6849446296691895, "learning_rate": 9.985724624838201e-05, "loss": 2.3695, "step": 716 }, { "epoch": 0.05397165923333145, "grad_norm": 6.945517063140869, "learning_rate": 9.985632421301073e-05, "loss": 2.6486, "step": 717 }, { "epoch": 0.054046933513991605, "grad_norm": 10.14905071258545, "learning_rate": 9.985539921382402e-05, "loss": 2.2833, "step": 718 }, { "epoch": 0.05412220779465176, "grad_norm": 8.127327919006348, "learning_rate": 9.98544712508769e-05, "loss": 2.3559, "step": 719 }, { "epoch": 0.054197482075311915, "grad_norm": 6.484206676483154, "learning_rate": 9.98535403242245e-05, "loss": 2.3251, "step": 720 }, { "epoch": 0.05427275635597208, "grad_norm": 7.569386959075928, "learning_rate": 9.985260643392215e-05, "loss": 2.4524, "step": 721 }, { "epoch": 0.05434803063663223, "grad_norm": 4.585761547088623, "learning_rate": 9.985166958002543e-05, "loss": 2.4823, "step": 722 }, { "epoch": 0.05442330491729239, "grad_norm": 4.589804172515869, "learning_rate": 9.985072976258999e-05, "loss": 2.3001, "step": 723 }, { "epoch": 0.05449857919795254, "grad_norm": 6.320410251617432, "learning_rate": 9.984978698167167e-05, "loss": 2.3685, "step": 724 }, { "epoch": 0.0545738534786127, "grad_norm": 6.319190979003906, "learning_rate": 9.984884123732657e-05, "loss": 2.1191, "step": 725 }, { "epoch": 0.05464912775927285, "grad_norm": 7.725295543670654, "learning_rate": 9.984789252961088e-05, "loss": 2.4234, "step": 726 }, { "epoch": 0.05472440203993301, "grad_norm": 5.5601959228515625, "learning_rate": 9.984694085858105e-05, "loss": 2.5882, "step": 727 }, { "epoch": 0.05479967632059316, "grad_norm": 6.580722332000732, "learning_rate": 9.984598622429356e-05, "loss": 2.7962, "step": 728 }, { "epoch": 0.05487495060125332, "grad_norm": 6.733530521392822, "learning_rate": 9.984502862680524e-05, "loss": 2.2311, "step": 729 }, { "epoch": 0.05495022488191347, "grad_norm": 6.550483226776123, "learning_rate": 9.984406806617298e-05, "loss": 2.6386, "step": 730 }, { "epoch": 0.05502549916257363, "grad_norm": 8.7761812210083, "learning_rate": 9.984310454245389e-05, "loss": 3.1246, "step": 731 }, { "epoch": 0.05510077344323378, "grad_norm": 5.58363151550293, "learning_rate": 9.984213805570527e-05, "loss": 2.9234, "step": 732 }, { "epoch": 0.05517604772389394, "grad_norm": 7.577469348907471, "learning_rate": 9.984116860598454e-05, "loss": 2.532, "step": 733 }, { "epoch": 0.05525132200455409, "grad_norm": 6.627183437347412, "learning_rate": 9.984019619334935e-05, "loss": 2.4154, "step": 734 }, { "epoch": 0.05532659628521425, "grad_norm": 6.652490139007568, "learning_rate": 9.98392208178575e-05, "loss": 2.1275, "step": 735 }, { "epoch": 0.0554018705658744, "grad_norm": 6.850701808929443, "learning_rate": 9.983824247956699e-05, "loss": 2.0242, "step": 736 }, { "epoch": 0.055477144846534564, "grad_norm": 8.733121871948242, "learning_rate": 9.983726117853595e-05, "loss": 2.6301, "step": 737 }, { "epoch": 0.05555241912719472, "grad_norm": 5.286831855773926, "learning_rate": 9.983627691482273e-05, "loss": 2.8511, "step": 738 }, { "epoch": 0.055627693407854874, "grad_norm": 7.479588985443115, "learning_rate": 9.983528968848585e-05, "loss": 2.3986, "step": 739 }, { "epoch": 0.05570296768851503, "grad_norm": 4.2901153564453125, "learning_rate": 9.983429949958401e-05, "loss": 2.4922, "step": 740 }, { "epoch": 0.055778241969175184, "grad_norm": 4.9687323570251465, "learning_rate": 9.983330634817604e-05, "loss": 2.6474, "step": 741 }, { "epoch": 0.05585351624983534, "grad_norm": 5.911922454833984, "learning_rate": 9.9832310234321e-05, "loss": 2.2, "step": 742 }, { "epoch": 0.055928790530495494, "grad_norm": 5.424359321594238, "learning_rate": 9.983131115807809e-05, "loss": 2.464, "step": 743 }, { "epoch": 0.05600406481115565, "grad_norm": 6.540555000305176, "learning_rate": 9.983030911950671e-05, "loss": 2.2448, "step": 744 }, { "epoch": 0.056079339091815804, "grad_norm": 5.263029098510742, "learning_rate": 9.982930411866644e-05, "loss": 2.3292, "step": 745 }, { "epoch": 0.05615461337247596, "grad_norm": 4.780468940734863, "learning_rate": 9.982829615561703e-05, "loss": 2.4589, "step": 746 }, { "epoch": 0.056229887653136114, "grad_norm": 7.426937103271484, "learning_rate": 9.982728523041838e-05, "loss": 2.0081, "step": 747 }, { "epoch": 0.05630516193379627, "grad_norm": 5.775896072387695, "learning_rate": 9.982627134313058e-05, "loss": 2.369, "step": 748 }, { "epoch": 0.056380436214456424, "grad_norm": 6.047148704528809, "learning_rate": 9.982525449381392e-05, "loss": 2.1239, "step": 749 }, { "epoch": 0.05645571049511658, "grad_norm": 5.515091419219971, "learning_rate": 9.982423468252886e-05, "loss": 2.5432, "step": 750 }, { "epoch": 0.056530984775776734, "grad_norm": 5.722801685333252, "learning_rate": 9.9823211909336e-05, "loss": 2.8707, "step": 751 }, { "epoch": 0.05660625905643689, "grad_norm": 4.457740783691406, "learning_rate": 9.982218617429615e-05, "loss": 2.4601, "step": 752 }, { "epoch": 0.056681533337097044, "grad_norm": 6.8544182777404785, "learning_rate": 9.982115747747029e-05, "loss": 2.0331, "step": 753 }, { "epoch": 0.056756807617757206, "grad_norm": 7.249621868133545, "learning_rate": 9.982012581891958e-05, "loss": 2.2857, "step": 754 }, { "epoch": 0.05683208189841736, "grad_norm": 6.304576396942139, "learning_rate": 9.981909119870531e-05, "loss": 2.5246, "step": 755 }, { "epoch": 0.056907356179077516, "grad_norm": 5.6655497550964355, "learning_rate": 9.981805361688906e-05, "loss": 2.3135, "step": 756 }, { "epoch": 0.05698263045973767, "grad_norm": 7.412888050079346, "learning_rate": 9.981701307353243e-05, "loss": 2.6853, "step": 757 }, { "epoch": 0.057057904740397826, "grad_norm": 6.149228096008301, "learning_rate": 9.981596956869732e-05, "loss": 2.2792, "step": 758 }, { "epoch": 0.05713317902105798, "grad_norm": 4.461668968200684, "learning_rate": 9.981492310244576e-05, "loss": 2.429, "step": 759 }, { "epoch": 0.057208453301718136, "grad_norm": 8.047645568847656, "learning_rate": 9.981387367483997e-05, "loss": 2.5823, "step": 760 }, { "epoch": 0.05728372758237829, "grad_norm": 5.71897029876709, "learning_rate": 9.981282128594232e-05, "loss": 2.3675, "step": 761 }, { "epoch": 0.057359001863038446, "grad_norm": 5.366356372833252, "learning_rate": 9.981176593581537e-05, "loss": 2.2436, "step": 762 }, { "epoch": 0.0574342761436986, "grad_norm": 6.393449306488037, "learning_rate": 9.981070762452184e-05, "loss": 1.9928, "step": 763 }, { "epoch": 0.057509550424358756, "grad_norm": 5.603578090667725, "learning_rate": 9.980964635212469e-05, "loss": 2.5008, "step": 764 }, { "epoch": 0.05758482470501891, "grad_norm": 5.958922863006592, "learning_rate": 9.980858211868697e-05, "loss": 2.295, "step": 765 }, { "epoch": 0.057660098985679066, "grad_norm": 7.110422134399414, "learning_rate": 9.980751492427197e-05, "loss": 2.0474, "step": 766 }, { "epoch": 0.05773537326633922, "grad_norm": 7.016932964324951, "learning_rate": 9.980644476894311e-05, "loss": 2.3051, "step": 767 }, { "epoch": 0.057810647546999376, "grad_norm": 7.762566089630127, "learning_rate": 9.980537165276402e-05, "loss": 2.3683, "step": 768 }, { "epoch": 0.05788592182765953, "grad_norm": 7.152153491973877, "learning_rate": 9.98042955757985e-05, "loss": 2.5303, "step": 769 }, { "epoch": 0.05796119610831969, "grad_norm": 9.040239334106445, "learning_rate": 9.980321653811051e-05, "loss": 2.4154, "step": 770 }, { "epoch": 0.05803647038897985, "grad_norm": 6.318729877471924, "learning_rate": 9.980213453976421e-05, "loss": 2.3276, "step": 771 }, { "epoch": 0.05811174466964, "grad_norm": 7.08339262008667, "learning_rate": 9.980104958082389e-05, "loss": 2.3597, "step": 772 }, { "epoch": 0.05818701895030016, "grad_norm": 6.9691386222839355, "learning_rate": 9.979996166135408e-05, "loss": 2.5083, "step": 773 }, { "epoch": 0.05826229323096031, "grad_norm": 7.71740198135376, "learning_rate": 9.979887078141945e-05, "loss": 2.1033, "step": 774 }, { "epoch": 0.05833756751162047, "grad_norm": 4.721714019775391, "learning_rate": 9.979777694108483e-05, "loss": 2.2884, "step": 775 }, { "epoch": 0.05841284179228062, "grad_norm": 4.897715091705322, "learning_rate": 9.979668014041524e-05, "loss": 2.4906, "step": 776 }, { "epoch": 0.05848811607294078, "grad_norm": 5.300808429718018, "learning_rate": 9.979558037947591e-05, "loss": 2.2899, "step": 777 }, { "epoch": 0.05856339035360093, "grad_norm": 6.205648899078369, "learning_rate": 9.979447765833221e-05, "loss": 2.0926, "step": 778 }, { "epoch": 0.05863866463426109, "grad_norm": 5.6532464027404785, "learning_rate": 9.979337197704969e-05, "loss": 2.4447, "step": 779 }, { "epoch": 0.05871393891492124, "grad_norm": 8.568648338317871, "learning_rate": 9.97922633356941e-05, "loss": 2.1264, "step": 780 }, { "epoch": 0.0587892131955814, "grad_norm": 5.700965404510498, "learning_rate": 9.979115173433128e-05, "loss": 2.491, "step": 781 }, { "epoch": 0.05886448747624155, "grad_norm": 7.4272894859313965, "learning_rate": 9.97900371730274e-05, "loss": 2.3558, "step": 782 }, { "epoch": 0.05893976175690171, "grad_norm": 5.166018009185791, "learning_rate": 9.978891965184864e-05, "loss": 2.5376, "step": 783 }, { "epoch": 0.05901503603756186, "grad_norm": 8.077032089233398, "learning_rate": 9.97877991708615e-05, "loss": 2.1797, "step": 784 }, { "epoch": 0.05909031031822202, "grad_norm": 7.215895652770996, "learning_rate": 9.978667573013253e-05, "loss": 2.5612, "step": 785 }, { "epoch": 0.05916558459888218, "grad_norm": 8.063216209411621, "learning_rate": 9.978554932972855e-05, "loss": 2.4893, "step": 786 }, { "epoch": 0.059240858879542335, "grad_norm": 9.267261505126953, "learning_rate": 9.97844199697165e-05, "loss": 2.4537, "step": 787 }, { "epoch": 0.05931613316020249, "grad_norm": 9.832077980041504, "learning_rate": 9.978328765016355e-05, "loss": 2.5387, "step": 788 }, { "epoch": 0.059391407440862645, "grad_norm": 5.216586112976074, "learning_rate": 9.978215237113699e-05, "loss": 2.5711, "step": 789 }, { "epoch": 0.0594666817215228, "grad_norm": 6.500823974609375, "learning_rate": 9.978101413270431e-05, "loss": 2.7454, "step": 790 }, { "epoch": 0.059541956002182955, "grad_norm": 6.540772914886475, "learning_rate": 9.977987293493317e-05, "loss": 1.9508, "step": 791 }, { "epoch": 0.05961723028284311, "grad_norm": 6.135433197021484, "learning_rate": 9.977872877789143e-05, "loss": 2.1823, "step": 792 }, { "epoch": 0.059692504563503265, "grad_norm": 6.370469093322754, "learning_rate": 9.977758166164707e-05, "loss": 2.2792, "step": 793 }, { "epoch": 0.05976777884416342, "grad_norm": 5.538809299468994, "learning_rate": 9.977643158626835e-05, "loss": 2.4347, "step": 794 }, { "epoch": 0.059843053124823575, "grad_norm": 7.072920799255371, "learning_rate": 9.977527855182357e-05, "loss": 2.2672, "step": 795 }, { "epoch": 0.05991832740548373, "grad_norm": 7.1014509201049805, "learning_rate": 9.97741225583813e-05, "loss": 3.158, "step": 796 }, { "epoch": 0.059993601686143885, "grad_norm": 6.903295040130615, "learning_rate": 9.977296360601027e-05, "loss": 2.5023, "step": 797 }, { "epoch": 0.06006887596680404, "grad_norm": 5.6039323806762695, "learning_rate": 9.977180169477937e-05, "loss": 2.6403, "step": 798 }, { "epoch": 0.060144150247464195, "grad_norm": 5.642335891723633, "learning_rate": 9.977063682475767e-05, "loss": 2.513, "step": 799 }, { "epoch": 0.06021942452812435, "grad_norm": 7.554859638214111, "learning_rate": 9.976946899601441e-05, "loss": 2.1054, "step": 800 }, { "epoch": 0.060294698808784505, "grad_norm": 7.001617431640625, "learning_rate": 9.976829820861904e-05, "loss": 2.3187, "step": 801 }, { "epoch": 0.06036997308944467, "grad_norm": 6.228675842285156, "learning_rate": 9.976712446264113e-05, "loss": 2.068, "step": 802 }, { "epoch": 0.06044524737010482, "grad_norm": 6.299452304840088, "learning_rate": 9.976594775815047e-05, "loss": 2.3287, "step": 803 }, { "epoch": 0.06052052165076498, "grad_norm": 6.488040924072266, "learning_rate": 9.9764768095217e-05, "loss": 2.3386, "step": 804 }, { "epoch": 0.06059579593142513, "grad_norm": 7.455612659454346, "learning_rate": 9.976358547391088e-05, "loss": 2.4758, "step": 805 }, { "epoch": 0.06067107021208529, "grad_norm": 5.75909423828125, "learning_rate": 9.976239989430239e-05, "loss": 2.2389, "step": 806 }, { "epoch": 0.06074634449274544, "grad_norm": 6.22749662399292, "learning_rate": 9.976121135646199e-05, "loss": 2.7626, "step": 807 }, { "epoch": 0.0608216187734056, "grad_norm": 6.732685565948486, "learning_rate": 9.976001986046036e-05, "loss": 3.0899, "step": 808 }, { "epoch": 0.06089689305406575, "grad_norm": 7.681735992431641, "learning_rate": 9.975882540636834e-05, "loss": 2.4637, "step": 809 }, { "epoch": 0.06097216733472591, "grad_norm": 4.422317981719971, "learning_rate": 9.975762799425693e-05, "loss": 2.2814, "step": 810 }, { "epoch": 0.06104744161538606, "grad_norm": 9.311334609985352, "learning_rate": 9.975642762419728e-05, "loss": 2.5256, "step": 811 }, { "epoch": 0.06112271589604622, "grad_norm": 7.12771463394165, "learning_rate": 9.975522429626079e-05, "loss": 2.5477, "step": 812 }, { "epoch": 0.06119799017670637, "grad_norm": 5.93684720993042, "learning_rate": 9.975401801051897e-05, "loss": 2.3224, "step": 813 }, { "epoch": 0.06127326445736653, "grad_norm": 5.034492015838623, "learning_rate": 9.975280876704353e-05, "loss": 2.382, "step": 814 }, { "epoch": 0.06134853873802668, "grad_norm": 7.287402629852295, "learning_rate": 9.97515965659064e-05, "loss": 2.6205, "step": 815 }, { "epoch": 0.06142381301868684, "grad_norm": 6.875100612640381, "learning_rate": 9.975038140717959e-05, "loss": 2.4561, "step": 816 }, { "epoch": 0.06149908729934699, "grad_norm": 6.032425403594971, "learning_rate": 9.974916329093535e-05, "loss": 2.4004, "step": 817 }, { "epoch": 0.061574361580007154, "grad_norm": 7.3820295333862305, "learning_rate": 9.97479422172461e-05, "loss": 2.1098, "step": 818 }, { "epoch": 0.06164963586066731, "grad_norm": 4.1829142570495605, "learning_rate": 9.974671818618444e-05, "loss": 2.3091, "step": 819 }, { "epoch": 0.061724910141327465, "grad_norm": 5.894301414489746, "learning_rate": 9.974549119782311e-05, "loss": 2.4662, "step": 820 }, { "epoch": 0.06180018442198762, "grad_norm": 5.369170665740967, "learning_rate": 9.974426125223507e-05, "loss": 2.2282, "step": 821 }, { "epoch": 0.061875458702647775, "grad_norm": 5.439139366149902, "learning_rate": 9.974302834949343e-05, "loss": 2.5761, "step": 822 }, { "epoch": 0.06195073298330793, "grad_norm": 5.655567169189453, "learning_rate": 9.974179248967148e-05, "loss": 2.155, "step": 823 }, { "epoch": 0.062026007263968085, "grad_norm": 7.799777030944824, "learning_rate": 9.97405536728427e-05, "loss": 2.4568, "step": 824 }, { "epoch": 0.06210128154462824, "grad_norm": 6.679040431976318, "learning_rate": 9.973931189908073e-05, "loss": 2.6116, "step": 825 }, { "epoch": 0.062176555825288395, "grad_norm": 4.565517902374268, "learning_rate": 9.973806716845937e-05, "loss": 2.2689, "step": 826 }, { "epoch": 0.06225183010594855, "grad_norm": 8.25964069366455, "learning_rate": 9.973681948105263e-05, "loss": 2.2068, "step": 827 }, { "epoch": 0.062327104386608705, "grad_norm": 5.433253765106201, "learning_rate": 9.973556883693469e-05, "loss": 2.4395, "step": 828 }, { "epoch": 0.06240237866726886, "grad_norm": 7.11344575881958, "learning_rate": 9.97343152361799e-05, "loss": 2.2614, "step": 829 }, { "epoch": 0.062477652947929015, "grad_norm": 5.474208354949951, "learning_rate": 9.973305867886276e-05, "loss": 2.152, "step": 830 }, { "epoch": 0.06255292722858917, "grad_norm": 6.635583877563477, "learning_rate": 9.9731799165058e-05, "loss": 2.3749, "step": 831 }, { "epoch": 0.06262820150924933, "grad_norm": 6.661943435668945, "learning_rate": 9.973053669484045e-05, "loss": 2.354, "step": 832 }, { "epoch": 0.06270347578990948, "grad_norm": 5.574874401092529, "learning_rate": 9.97292712682852e-05, "loss": 2.0282, "step": 833 }, { "epoch": 0.06277875007056964, "grad_norm": 5.962992191314697, "learning_rate": 9.972800288546746e-05, "loss": 2.44, "step": 834 }, { "epoch": 0.06285402435122979, "grad_norm": 6.058572292327881, "learning_rate": 9.972673154646264e-05, "loss": 2.4435, "step": 835 }, { "epoch": 0.06292929863188995, "grad_norm": 6.082131385803223, "learning_rate": 9.97254572513463e-05, "loss": 2.4655, "step": 836 }, { "epoch": 0.0630045729125501, "grad_norm": 6.920094966888428, "learning_rate": 9.972418000019422e-05, "loss": 2.2728, "step": 837 }, { "epoch": 0.06307984719321026, "grad_norm": 6.013792514801025, "learning_rate": 9.972289979308229e-05, "loss": 2.2486, "step": 838 }, { "epoch": 0.06315512147387041, "grad_norm": 6.150375843048096, "learning_rate": 9.972161663008666e-05, "loss": 2.2234, "step": 839 }, { "epoch": 0.06323039575453057, "grad_norm": 6.13953971862793, "learning_rate": 9.972033051128357e-05, "loss": 2.2833, "step": 840 }, { "epoch": 0.06330567003519072, "grad_norm": 6.005297660827637, "learning_rate": 9.971904143674951e-05, "loss": 2.6507, "step": 841 }, { "epoch": 0.06338094431585088, "grad_norm": 5.4421772956848145, "learning_rate": 9.971774940656109e-05, "loss": 2.6106, "step": 842 }, { "epoch": 0.06345621859651104, "grad_norm": 6.671179294586182, "learning_rate": 9.971645442079513e-05, "loss": 2.743, "step": 843 }, { "epoch": 0.06353149287717119, "grad_norm": 4.749128818511963, "learning_rate": 9.971515647952861e-05, "loss": 2.8267, "step": 844 }, { "epoch": 0.06360676715783135, "grad_norm": 4.922209739685059, "learning_rate": 9.971385558283868e-05, "loss": 2.6258, "step": 845 }, { "epoch": 0.0636820414384915, "grad_norm": 7.677456378936768, "learning_rate": 9.971255173080267e-05, "loss": 2.4213, "step": 846 }, { "epoch": 0.06375731571915166, "grad_norm": 5.307931423187256, "learning_rate": 9.971124492349812e-05, "loss": 2.7683, "step": 847 }, { "epoch": 0.06383258999981181, "grad_norm": 6.525881767272949, "learning_rate": 9.970993516100269e-05, "loss": 2.2817, "step": 848 }, { "epoch": 0.06390786428047197, "grad_norm": 6.345904350280762, "learning_rate": 9.970862244339425e-05, "loss": 2.1466, "step": 849 }, { "epoch": 0.06398313856113212, "grad_norm": 5.944136142730713, "learning_rate": 9.970730677075082e-05, "loss": 2.3249, "step": 850 }, { "epoch": 0.06405841284179228, "grad_norm": 6.007492542266846, "learning_rate": 9.970598814315065e-05, "loss": 2.3649, "step": 851 }, { "epoch": 0.06413368712245243, "grad_norm": 5.802694320678711, "learning_rate": 9.97046665606721e-05, "loss": 2.7615, "step": 852 }, { "epoch": 0.0642089614031126, "grad_norm": 9.016657829284668, "learning_rate": 9.970334202339373e-05, "loss": 2.37, "step": 853 }, { "epoch": 0.06428423568377274, "grad_norm": 6.420614242553711, "learning_rate": 9.97020145313943e-05, "loss": 2.1526, "step": 854 }, { "epoch": 0.0643595099644329, "grad_norm": 6.139922142028809, "learning_rate": 9.970068408475272e-05, "loss": 2.6131, "step": 855 }, { "epoch": 0.06443478424509305, "grad_norm": 5.153894901275635, "learning_rate": 9.969935068354807e-05, "loss": 2.3414, "step": 856 }, { "epoch": 0.06451005852575321, "grad_norm": 8.590038299560547, "learning_rate": 9.969801432785965e-05, "loss": 2.5912, "step": 857 }, { "epoch": 0.06458533280641336, "grad_norm": 5.471381664276123, "learning_rate": 9.969667501776685e-05, "loss": 2.1089, "step": 858 }, { "epoch": 0.06466060708707352, "grad_norm": 4.682456970214844, "learning_rate": 9.969533275334933e-05, "loss": 2.4857, "step": 859 }, { "epoch": 0.06473588136773369, "grad_norm": 6.165603160858154, "learning_rate": 9.969398753468686e-05, "loss": 2.5575, "step": 860 }, { "epoch": 0.06481115564839383, "grad_norm": 6.946765422821045, "learning_rate": 9.969263936185943e-05, "loss": 2.5268, "step": 861 }, { "epoch": 0.064886429929054, "grad_norm": 5.101597309112549, "learning_rate": 9.969128823494717e-05, "loss": 2.6535, "step": 862 }, { "epoch": 0.06496170420971414, "grad_norm": 6.410821437835693, "learning_rate": 9.96899341540304e-05, "loss": 2.6071, "step": 863 }, { "epoch": 0.0650369784903743, "grad_norm": 5.042365074157715, "learning_rate": 9.968857711918963e-05, "loss": 2.4744, "step": 864 }, { "epoch": 0.06511225277103445, "grad_norm": 5.671431064605713, "learning_rate": 9.968721713050552e-05, "loss": 2.4302, "step": 865 }, { "epoch": 0.06518752705169462, "grad_norm": 5.558021068572998, "learning_rate": 9.968585418805891e-05, "loss": 2.2342, "step": 866 }, { "epoch": 0.06526280133235476, "grad_norm": 5.795393466949463, "learning_rate": 9.968448829193085e-05, "loss": 2.639, "step": 867 }, { "epoch": 0.06533807561301493, "grad_norm": 5.995419025421143, "learning_rate": 9.968311944220251e-05, "loss": 2.3098, "step": 868 }, { "epoch": 0.06541334989367507, "grad_norm": 4.014678001403809, "learning_rate": 9.968174763895529e-05, "loss": 1.9536, "step": 869 }, { "epoch": 0.06548862417433524, "grad_norm": 4.1848859786987305, "learning_rate": 9.96803728822707e-05, "loss": 2.2224, "step": 870 }, { "epoch": 0.06556389845499538, "grad_norm": 5.406251430511475, "learning_rate": 9.967899517223051e-05, "loss": 2.2532, "step": 871 }, { "epoch": 0.06563917273565555, "grad_norm": 7.1709465980529785, "learning_rate": 9.96776145089166e-05, "loss": 2.3149, "step": 872 }, { "epoch": 0.0657144470163157, "grad_norm": 5.1394195556640625, "learning_rate": 9.967623089241105e-05, "loss": 2.332, "step": 873 }, { "epoch": 0.06578972129697586, "grad_norm": 5.810478687286377, "learning_rate": 9.967484432279611e-05, "loss": 2.4215, "step": 874 }, { "epoch": 0.06586499557763602, "grad_norm": 5.735139846801758, "learning_rate": 9.96734548001542e-05, "loss": 2.2825, "step": 875 }, { "epoch": 0.06594026985829617, "grad_norm": 6.34000301361084, "learning_rate": 9.967206232456794e-05, "loss": 2.3447, "step": 876 }, { "epoch": 0.06601554413895633, "grad_norm": 6.4331889152526855, "learning_rate": 9.967066689612009e-05, "loss": 2.6042, "step": 877 }, { "epoch": 0.06609081841961648, "grad_norm": 6.684754848480225, "learning_rate": 9.966926851489363e-05, "loss": 2.1852, "step": 878 }, { "epoch": 0.06616609270027664, "grad_norm": 6.291850566864014, "learning_rate": 9.966786718097165e-05, "loss": 2.0725, "step": 879 }, { "epoch": 0.06624136698093679, "grad_norm": 7.072674751281738, "learning_rate": 9.96664628944375e-05, "loss": 2.4599, "step": 880 }, { "epoch": 0.06631664126159695, "grad_norm": 5.791036605834961, "learning_rate": 9.966505565537463e-05, "loss": 2.5734, "step": 881 }, { "epoch": 0.0663919155422571, "grad_norm": 7.2464919090271, "learning_rate": 9.96636454638667e-05, "loss": 2.8932, "step": 882 }, { "epoch": 0.06646718982291726, "grad_norm": 5.751157760620117, "learning_rate": 9.966223231999757e-05, "loss": 2.1895, "step": 883 }, { "epoch": 0.0665424641035774, "grad_norm": 5.123445510864258, "learning_rate": 9.966081622385122e-05, "loss": 2.0499, "step": 884 }, { "epoch": 0.06661773838423757, "grad_norm": 6.297735691070557, "learning_rate": 9.965939717551183e-05, "loss": 2.4749, "step": 885 }, { "epoch": 0.06669301266489772, "grad_norm": 4.268171310424805, "learning_rate": 9.965797517506377e-05, "loss": 2.5821, "step": 886 }, { "epoch": 0.06676828694555788, "grad_norm": 7.6090474128723145, "learning_rate": 9.965655022259157e-05, "loss": 2.2455, "step": 887 }, { "epoch": 0.06684356122621803, "grad_norm": 5.460054397583008, "learning_rate": 9.965512231817992e-05, "loss": 2.3015, "step": 888 }, { "epoch": 0.06691883550687819, "grad_norm": 5.82401180267334, "learning_rate": 9.965369146191376e-05, "loss": 2.156, "step": 889 }, { "epoch": 0.06699410978753834, "grad_norm": 7.957887172698975, "learning_rate": 9.965225765387809e-05, "loss": 2.4905, "step": 890 }, { "epoch": 0.0670693840681985, "grad_norm": 6.796222686767578, "learning_rate": 9.965082089415819e-05, "loss": 2.3148, "step": 891 }, { "epoch": 0.06714465834885866, "grad_norm": 4.408276557922363, "learning_rate": 9.964938118283944e-05, "loss": 2.1697, "step": 892 }, { "epoch": 0.06721993262951881, "grad_norm": 6.927859306335449, "learning_rate": 9.964793852000744e-05, "loss": 2.0336, "step": 893 }, { "epoch": 0.06729520691017897, "grad_norm": 6.1446146965026855, "learning_rate": 9.964649290574794e-05, "loss": 2.3938, "step": 894 }, { "epoch": 0.06737048119083912, "grad_norm": 6.0834832191467285, "learning_rate": 9.964504434014691e-05, "loss": 2.4393, "step": 895 }, { "epoch": 0.06744575547149928, "grad_norm": 5.697766304016113, "learning_rate": 9.964359282329044e-05, "loss": 2.2426, "step": 896 }, { "epoch": 0.06752102975215943, "grad_norm": 7.894393444061279, "learning_rate": 9.96421383552648e-05, "loss": 2.5878, "step": 897 }, { "epoch": 0.06759630403281959, "grad_norm": 5.909679412841797, "learning_rate": 9.964068093615649e-05, "loss": 2.3413, "step": 898 }, { "epoch": 0.06767157831347974, "grad_norm": 6.938018798828125, "learning_rate": 9.963922056605213e-05, "loss": 2.2678, "step": 899 }, { "epoch": 0.0677468525941399, "grad_norm": 8.607162475585938, "learning_rate": 9.963775724503853e-05, "loss": 2.7139, "step": 900 }, { "epoch": 0.06782212687480005, "grad_norm": 6.983379364013672, "learning_rate": 9.96362909732027e-05, "loss": 2.3579, "step": 901 }, { "epoch": 0.06789740115546021, "grad_norm": 7.071916103363037, "learning_rate": 9.963482175063179e-05, "loss": 2.7323, "step": 902 }, { "epoch": 0.06797267543612036, "grad_norm": 5.51076602935791, "learning_rate": 9.963334957741315e-05, "loss": 2.4123, "step": 903 }, { "epoch": 0.06804794971678052, "grad_norm": 6.5850653648376465, "learning_rate": 9.963187445363428e-05, "loss": 2.4006, "step": 904 }, { "epoch": 0.06812322399744067, "grad_norm": 4.215870380401611, "learning_rate": 9.96303963793829e-05, "loss": 2.3117, "step": 905 }, { "epoch": 0.06819849827810083, "grad_norm": 6.121519565582275, "learning_rate": 9.962891535474685e-05, "loss": 2.3227, "step": 906 }, { "epoch": 0.06827377255876099, "grad_norm": 6.652968883514404, "learning_rate": 9.962743137981417e-05, "loss": 2.0054, "step": 907 }, { "epoch": 0.06834904683942114, "grad_norm": 6.960452079772949, "learning_rate": 9.96259444546731e-05, "loss": 2.5732, "step": 908 }, { "epoch": 0.0684243211200813, "grad_norm": 5.3289713859558105, "learning_rate": 9.962445457941204e-05, "loss": 2.3569, "step": 909 }, { "epoch": 0.06849959540074145, "grad_norm": 6.253427028656006, "learning_rate": 9.962296175411954e-05, "loss": 2.4584, "step": 910 }, { "epoch": 0.06857486968140161, "grad_norm": 6.991831302642822, "learning_rate": 9.962146597888434e-05, "loss": 2.2947, "step": 911 }, { "epoch": 0.06865014396206176, "grad_norm": 5.575552940368652, "learning_rate": 9.961996725379537e-05, "loss": 2.3399, "step": 912 }, { "epoch": 0.06872541824272192, "grad_norm": 6.069404602050781, "learning_rate": 9.961846557894173e-05, "loss": 2.357, "step": 913 }, { "epoch": 0.06880069252338207, "grad_norm": 6.642460346221924, "learning_rate": 9.961696095441266e-05, "loss": 2.5456, "step": 914 }, { "epoch": 0.06887596680404223, "grad_norm": 7.879169940948486, "learning_rate": 9.961545338029765e-05, "loss": 2.3147, "step": 915 }, { "epoch": 0.06895124108470238, "grad_norm": 5.864014625549316, "learning_rate": 9.961394285668629e-05, "loss": 2.2817, "step": 916 }, { "epoch": 0.06902651536536254, "grad_norm": 5.330665111541748, "learning_rate": 9.96124293836684e-05, "loss": 2.3965, "step": 917 }, { "epoch": 0.06910178964602269, "grad_norm": 6.035168170928955, "learning_rate": 9.961091296133391e-05, "loss": 2.4772, "step": 918 }, { "epoch": 0.06917706392668285, "grad_norm": 5.027256488800049, "learning_rate": 9.960939358977301e-05, "loss": 2.3338, "step": 919 }, { "epoch": 0.069252338207343, "grad_norm": 6.097067356109619, "learning_rate": 9.960787126907602e-05, "loss": 2.4752, "step": 920 }, { "epoch": 0.06932761248800316, "grad_norm": 5.557241439819336, "learning_rate": 9.96063459993334e-05, "loss": 2.3751, "step": 921 }, { "epoch": 0.06940288676866331, "grad_norm": 7.162276268005371, "learning_rate": 9.960481778063586e-05, "loss": 2.0992, "step": 922 }, { "epoch": 0.06947816104932347, "grad_norm": 5.234118938446045, "learning_rate": 9.960328661307423e-05, "loss": 2.4994, "step": 923 }, { "epoch": 0.06955343532998363, "grad_norm": 6.395406246185303, "learning_rate": 9.960175249673956e-05, "loss": 2.2585, "step": 924 }, { "epoch": 0.06962870961064378, "grad_norm": 7.6956329345703125, "learning_rate": 9.960021543172299e-05, "loss": 2.4022, "step": 925 }, { "epoch": 0.06970398389130394, "grad_norm": 5.352781772613525, "learning_rate": 9.959867541811596e-05, "loss": 2.1947, "step": 926 }, { "epoch": 0.06977925817196409, "grad_norm": 6.12201452255249, "learning_rate": 9.959713245600996e-05, "loss": 1.9226, "step": 927 }, { "epoch": 0.06985453245262425, "grad_norm": 8.42553424835205, "learning_rate": 9.95955865454968e-05, "loss": 2.1042, "step": 928 }, { "epoch": 0.0699298067332844, "grad_norm": 7.184745788574219, "learning_rate": 9.959403768666828e-05, "loss": 2.092, "step": 929 }, { "epoch": 0.07000508101394456, "grad_norm": 7.369794845581055, "learning_rate": 9.959248587961652e-05, "loss": 2.1832, "step": 930 }, { "epoch": 0.07008035529460471, "grad_norm": 5.493983268737793, "learning_rate": 9.959093112443378e-05, "loss": 2.313, "step": 931 }, { "epoch": 0.07015562957526487, "grad_norm": 7.570969104766846, "learning_rate": 9.958937342121249e-05, "loss": 2.6078, "step": 932 }, { "epoch": 0.07023090385592502, "grad_norm": 7.0704216957092285, "learning_rate": 9.958781277004523e-05, "loss": 2.3047, "step": 933 }, { "epoch": 0.07030617813658518, "grad_norm": 5.484086513519287, "learning_rate": 9.958624917102479e-05, "loss": 2.2524, "step": 934 }, { "epoch": 0.07038145241724533, "grad_norm": 8.462700843811035, "learning_rate": 9.958468262424409e-05, "loss": 2.0558, "step": 935 }, { "epoch": 0.0704567266979055, "grad_norm": 6.400140285491943, "learning_rate": 9.958311312979631e-05, "loss": 3.0189, "step": 936 }, { "epoch": 0.07053200097856564, "grad_norm": 6.533931255340576, "learning_rate": 9.958154068777473e-05, "loss": 2.7761, "step": 937 }, { "epoch": 0.0706072752592258, "grad_norm": 5.321656227111816, "learning_rate": 9.95799652982728e-05, "loss": 2.0451, "step": 938 }, { "epoch": 0.07068254953988595, "grad_norm": 4.879770755767822, "learning_rate": 9.95783869613842e-05, "loss": 2.1618, "step": 939 }, { "epoch": 0.07075782382054611, "grad_norm": 6.507859230041504, "learning_rate": 9.957680567720275e-05, "loss": 2.1516, "step": 940 }, { "epoch": 0.07083309810120628, "grad_norm": 9.765229225158691, "learning_rate": 9.957522144582245e-05, "loss": 3.2496, "step": 941 }, { "epoch": 0.07090837238186642, "grad_norm": 6.177280426025391, "learning_rate": 9.957363426733751e-05, "loss": 2.2814, "step": 942 }, { "epoch": 0.07098364666252659, "grad_norm": 6.372262477874756, "learning_rate": 9.957204414184223e-05, "loss": 2.453, "step": 943 }, { "epoch": 0.07105892094318673, "grad_norm": 5.343485355377197, "learning_rate": 9.957045106943115e-05, "loss": 2.3823, "step": 944 }, { "epoch": 0.0711341952238469, "grad_norm": 5.006737232208252, "learning_rate": 9.956885505019901e-05, "loss": 2.457, "step": 945 }, { "epoch": 0.07120946950450704, "grad_norm": 4.654512405395508, "learning_rate": 9.956725608424066e-05, "loss": 2.2822, "step": 946 }, { "epoch": 0.0712847437851672, "grad_norm": 5.320466041564941, "learning_rate": 9.956565417165117e-05, "loss": 2.1134, "step": 947 }, { "epoch": 0.07136001806582735, "grad_norm": 4.9236249923706055, "learning_rate": 9.956404931252574e-05, "loss": 2.4609, "step": 948 }, { "epoch": 0.07143529234648752, "grad_norm": 5.646245002746582, "learning_rate": 9.956244150695981e-05, "loss": 2.3735, "step": 949 }, { "epoch": 0.07151056662714766, "grad_norm": 5.308044910430908, "learning_rate": 9.956083075504894e-05, "loss": 2.4017, "step": 950 }, { "epoch": 0.07158584090780783, "grad_norm": 6.247456073760986, "learning_rate": 9.955921705688889e-05, "loss": 2.3095, "step": 951 }, { "epoch": 0.07166111518846797, "grad_norm": 5.998600006103516, "learning_rate": 9.955760041257558e-05, "loss": 2.2016, "step": 952 }, { "epoch": 0.07173638946912814, "grad_norm": 5.084904670715332, "learning_rate": 9.955598082220513e-05, "loss": 2.5664, "step": 953 }, { "epoch": 0.07181166374978828, "grad_norm": 6.240522384643555, "learning_rate": 9.95543582858738e-05, "loss": 2.1274, "step": 954 }, { "epoch": 0.07188693803044845, "grad_norm": 5.399087429046631, "learning_rate": 9.955273280367807e-05, "loss": 2.204, "step": 955 }, { "epoch": 0.07196221231110861, "grad_norm": 4.183984756469727, "learning_rate": 9.955110437571456e-05, "loss": 2.179, "step": 956 }, { "epoch": 0.07203748659176876, "grad_norm": 5.170600891113281, "learning_rate": 9.954947300208007e-05, "loss": 2.455, "step": 957 }, { "epoch": 0.07211276087242892, "grad_norm": 6.372200012207031, "learning_rate": 9.954783868287157e-05, "loss": 2.2084, "step": 958 }, { "epoch": 0.07218803515308907, "grad_norm": 4.859469413757324, "learning_rate": 9.954620141818623e-05, "loss": 2.421, "step": 959 }, { "epoch": 0.07226330943374923, "grad_norm": 5.183671474456787, "learning_rate": 9.954456120812139e-05, "loss": 2.4034, "step": 960 }, { "epoch": 0.07233858371440938, "grad_norm": 7.792542457580566, "learning_rate": 9.954291805277455e-05, "loss": 2.2416, "step": 961 }, { "epoch": 0.07241385799506954, "grad_norm": 4.288693904876709, "learning_rate": 9.954127195224337e-05, "loss": 1.9048, "step": 962 }, { "epoch": 0.07248913227572969, "grad_norm": 5.301598072052002, "learning_rate": 9.953962290662573e-05, "loss": 2.4557, "step": 963 }, { "epoch": 0.07256440655638985, "grad_norm": 5.720681190490723, "learning_rate": 9.953797091601967e-05, "loss": 2.4226, "step": 964 }, { "epoch": 0.07263968083705, "grad_norm": 4.384246349334717, "learning_rate": 9.953631598052335e-05, "loss": 2.1714, "step": 965 }, { "epoch": 0.07271495511771016, "grad_norm": 6.421280860900879, "learning_rate": 9.95346581002352e-05, "loss": 2.2805, "step": 966 }, { "epoch": 0.0727902293983703, "grad_norm": 6.632420539855957, "learning_rate": 9.953299727525376e-05, "loss": 2.2177, "step": 967 }, { "epoch": 0.07286550367903047, "grad_norm": 5.725123405456543, "learning_rate": 9.953133350567774e-05, "loss": 2.5709, "step": 968 }, { "epoch": 0.07294077795969062, "grad_norm": 5.322400093078613, "learning_rate": 9.952966679160606e-05, "loss": 2.1169, "step": 969 }, { "epoch": 0.07301605224035078, "grad_norm": 6.844789981842041, "learning_rate": 9.952799713313783e-05, "loss": 2.2264, "step": 970 }, { "epoch": 0.07309132652101093, "grad_norm": 5.416439056396484, "learning_rate": 9.952632453037227e-05, "loss": 2.4751, "step": 971 }, { "epoch": 0.07316660080167109, "grad_norm": 6.6972856521606445, "learning_rate": 9.952464898340883e-05, "loss": 2.471, "step": 972 }, { "epoch": 0.07324187508233125, "grad_norm": 6.15932035446167, "learning_rate": 9.952297049234712e-05, "loss": 2.2028, "step": 973 }, { "epoch": 0.0733171493629914, "grad_norm": 6.374386310577393, "learning_rate": 9.952128905728688e-05, "loss": 2.2344, "step": 974 }, { "epoch": 0.07339242364365156, "grad_norm": 4.6305694580078125, "learning_rate": 9.951960467832813e-05, "loss": 2.2047, "step": 975 }, { "epoch": 0.07346769792431171, "grad_norm": 5.737475395202637, "learning_rate": 9.951791735557094e-05, "loss": 2.0497, "step": 976 }, { "epoch": 0.07354297220497187, "grad_norm": 7.1963019371032715, "learning_rate": 9.951622708911567e-05, "loss": 2.3527, "step": 977 }, { "epoch": 0.07361824648563202, "grad_norm": 6.804144382476807, "learning_rate": 9.951453387906277e-05, "loss": 2.3246, "step": 978 }, { "epoch": 0.07369352076629218, "grad_norm": 7.030019760131836, "learning_rate": 9.951283772551289e-05, "loss": 2.1328, "step": 979 }, { "epoch": 0.07376879504695233, "grad_norm": 9.475345611572266, "learning_rate": 9.95111386285669e-05, "loss": 2.3515, "step": 980 }, { "epoch": 0.07384406932761249, "grad_norm": 4.585552215576172, "learning_rate": 9.950943658832577e-05, "loss": 1.99, "step": 981 }, { "epoch": 0.07391934360827264, "grad_norm": 7.107287883758545, "learning_rate": 9.950773160489069e-05, "loss": 2.7024, "step": 982 }, { "epoch": 0.0739946178889328, "grad_norm": 6.360047817230225, "learning_rate": 9.950602367836303e-05, "loss": 2.6684, "step": 983 }, { "epoch": 0.07406989216959295, "grad_norm": 7.048748970031738, "learning_rate": 9.950431280884429e-05, "loss": 2.547, "step": 984 }, { "epoch": 0.07414516645025311, "grad_norm": 5.756119728088379, "learning_rate": 9.950259899643622e-05, "loss": 2.2265, "step": 985 }, { "epoch": 0.07422044073091326, "grad_norm": 4.780413627624512, "learning_rate": 9.950088224124068e-05, "loss": 2.3658, "step": 986 }, { "epoch": 0.07429571501157342, "grad_norm": 5.234473705291748, "learning_rate": 9.94991625433597e-05, "loss": 2.1723, "step": 987 }, { "epoch": 0.07437098929223358, "grad_norm": 5.499043941497803, "learning_rate": 9.949743990289555e-05, "loss": 2.6353, "step": 988 }, { "epoch": 0.07444626357289373, "grad_norm": 6.970423698425293, "learning_rate": 9.949571431995062e-05, "loss": 2.3523, "step": 989 }, { "epoch": 0.07452153785355389, "grad_norm": 5.077739238739014, "learning_rate": 9.949398579462749e-05, "loss": 2.3519, "step": 990 }, { "epoch": 0.07459681213421404, "grad_norm": 3.6100821495056152, "learning_rate": 9.949225432702893e-05, "loss": 2.2624, "step": 991 }, { "epoch": 0.0746720864148742, "grad_norm": 6.948885917663574, "learning_rate": 9.949051991725786e-05, "loss": 2.4027, "step": 992 }, { "epoch": 0.07474736069553435, "grad_norm": 8.069746017456055, "learning_rate": 9.948878256541738e-05, "loss": 2.3889, "step": 993 }, { "epoch": 0.07482263497619451, "grad_norm": 5.7547197341918945, "learning_rate": 9.948704227161078e-05, "loss": 2.4589, "step": 994 }, { "epoch": 0.07489790925685466, "grad_norm": 4.529244422912598, "learning_rate": 9.94852990359415e-05, "loss": 2.3446, "step": 995 }, { "epoch": 0.07497318353751482, "grad_norm": 5.505538463592529, "learning_rate": 9.948355285851318e-05, "loss": 2.1697, "step": 996 }, { "epoch": 0.07504845781817497, "grad_norm": 4.478546142578125, "learning_rate": 9.948180373942963e-05, "loss": 1.9482, "step": 997 }, { "epoch": 0.07512373209883513, "grad_norm": 7.179813861846924, "learning_rate": 9.948005167879485e-05, "loss": 2.4936, "step": 998 }, { "epoch": 0.07519900637949528, "grad_norm": 6.788231372833252, "learning_rate": 9.947829667671294e-05, "loss": 2.0441, "step": 999 }, { "epoch": 0.07527428066015544, "grad_norm": 6.892848014831543, "learning_rate": 9.947653873328828e-05, "loss": 2.2042, "step": 1000 }, { "epoch": 0.07534955494081559, "grad_norm": 6.119129180908203, "learning_rate": 9.947477784862536e-05, "loss": 2.4574, "step": 1001 }, { "epoch": 0.07542482922147575, "grad_norm": 4.81908655166626, "learning_rate": 9.947301402282884e-05, "loss": 2.275, "step": 1002 }, { "epoch": 0.0755001035021359, "grad_norm": 5.189309597015381, "learning_rate": 9.947124725600359e-05, "loss": 2.3772, "step": 1003 }, { "epoch": 0.07557537778279606, "grad_norm": 7.3246636390686035, "learning_rate": 9.946947754825464e-05, "loss": 2.6769, "step": 1004 }, { "epoch": 0.07565065206345623, "grad_norm": 4.19852876663208, "learning_rate": 9.94677048996872e-05, "loss": 2.3871, "step": 1005 }, { "epoch": 0.07572592634411637, "grad_norm": 7.656721591949463, "learning_rate": 9.946592931040666e-05, "loss": 2.063, "step": 1006 }, { "epoch": 0.07580120062477654, "grad_norm": 6.189587593078613, "learning_rate": 9.946415078051853e-05, "loss": 2.3819, "step": 1007 }, { "epoch": 0.07587647490543668, "grad_norm": 6.411081790924072, "learning_rate": 9.946236931012858e-05, "loss": 2.4864, "step": 1008 }, { "epoch": 0.07595174918609685, "grad_norm": 5.585498332977295, "learning_rate": 9.946058489934269e-05, "loss": 2.2988, "step": 1009 }, { "epoch": 0.076027023466757, "grad_norm": 8.20356559753418, "learning_rate": 9.945879754826695e-05, "loss": 2.4969, "step": 1010 }, { "epoch": 0.07610229774741716, "grad_norm": 4.942919731140137, "learning_rate": 9.94570072570076e-05, "loss": 2.7698, "step": 1011 }, { "epoch": 0.0761775720280773, "grad_norm": 6.5601043701171875, "learning_rate": 9.945521402567109e-05, "loss": 2.3243, "step": 1012 }, { "epoch": 0.07625284630873747, "grad_norm": 5.879470348358154, "learning_rate": 9.9453417854364e-05, "loss": 2.1877, "step": 1013 }, { "epoch": 0.07632812058939761, "grad_norm": 5.421281337738037, "learning_rate": 9.945161874319311e-05, "loss": 2.2029, "step": 1014 }, { "epoch": 0.07640339487005778, "grad_norm": 5.667712211608887, "learning_rate": 9.944981669226539e-05, "loss": 2.1336, "step": 1015 }, { "epoch": 0.07647866915071792, "grad_norm": 8.215184211730957, "learning_rate": 9.944801170168796e-05, "loss": 2.1728, "step": 1016 }, { "epoch": 0.07655394343137809, "grad_norm": 5.038661479949951, "learning_rate": 9.94462037715681e-05, "loss": 2.5447, "step": 1017 }, { "epoch": 0.07662921771203823, "grad_norm": 5.737669944763184, "learning_rate": 9.944439290201331e-05, "loss": 2.335, "step": 1018 }, { "epoch": 0.0767044919926984, "grad_norm": 4.581604957580566, "learning_rate": 9.944257909313124e-05, "loss": 2.2242, "step": 1019 }, { "epoch": 0.07677976627335854, "grad_norm": 7.627722263336182, "learning_rate": 9.94407623450297e-05, "loss": 2.6063, "step": 1020 }, { "epoch": 0.0768550405540187, "grad_norm": 4.925320148468018, "learning_rate": 9.94389426578167e-05, "loss": 2.3832, "step": 1021 }, { "epoch": 0.07693031483467887, "grad_norm": 6.603039741516113, "learning_rate": 9.943712003160043e-05, "loss": 2.2791, "step": 1022 }, { "epoch": 0.07700558911533902, "grad_norm": 4.396380424499512, "learning_rate": 9.943529446648921e-05, "loss": 2.1878, "step": 1023 }, { "epoch": 0.07708086339599918, "grad_norm": 4.721848011016846, "learning_rate": 9.943346596259159e-05, "loss": 2.3043, "step": 1024 }, { "epoch": 0.07715613767665933, "grad_norm": 6.192013740539551, "learning_rate": 9.943163452001625e-05, "loss": 2.3955, "step": 1025 }, { "epoch": 0.07723141195731949, "grad_norm": 6.906729221343994, "learning_rate": 9.942980013887207e-05, "loss": 2.4838, "step": 1026 }, { "epoch": 0.07730668623797964, "grad_norm": 7.221069812774658, "learning_rate": 9.942796281926812e-05, "loss": 2.2178, "step": 1027 }, { "epoch": 0.0773819605186398, "grad_norm": 6.018310070037842, "learning_rate": 9.942612256131359e-05, "loss": 2.6929, "step": 1028 }, { "epoch": 0.07745723479929995, "grad_norm": 5.666262149810791, "learning_rate": 9.94242793651179e-05, "loss": 2.4844, "step": 1029 }, { "epoch": 0.07753250907996011, "grad_norm": 7.3587260246276855, "learning_rate": 9.942243323079061e-05, "loss": 2.0608, "step": 1030 }, { "epoch": 0.07760778336062026, "grad_norm": 5.538628101348877, "learning_rate": 9.942058415844147e-05, "loss": 2.1712, "step": 1031 }, { "epoch": 0.07768305764128042, "grad_norm": 6.028646945953369, "learning_rate": 9.941873214818042e-05, "loss": 2.4612, "step": 1032 }, { "epoch": 0.07775833192194057, "grad_norm": 7.019837379455566, "learning_rate": 9.941687720011753e-05, "loss": 2.2944, "step": 1033 }, { "epoch": 0.07783360620260073, "grad_norm": 7.470941543579102, "learning_rate": 9.941501931436309e-05, "loss": 2.0832, "step": 1034 }, { "epoch": 0.07790888048326088, "grad_norm": 6.686201095581055, "learning_rate": 9.941315849102753e-05, "loss": 2.1468, "step": 1035 }, { "epoch": 0.07798415476392104, "grad_norm": 5.702566146850586, "learning_rate": 9.94112947302215e-05, "loss": 2.7865, "step": 1036 }, { "epoch": 0.0780594290445812, "grad_norm": 5.05864953994751, "learning_rate": 9.940942803205575e-05, "loss": 2.2278, "step": 1037 }, { "epoch": 0.07813470332524135, "grad_norm": 6.330860614776611, "learning_rate": 9.940755839664128e-05, "loss": 2.2412, "step": 1038 }, { "epoch": 0.07820997760590151, "grad_norm": 4.715969085693359, "learning_rate": 9.940568582408924e-05, "loss": 2.3136, "step": 1039 }, { "epoch": 0.07828525188656166, "grad_norm": 6.241091251373291, "learning_rate": 9.940381031451094e-05, "loss": 2.1582, "step": 1040 }, { "epoch": 0.07836052616722182, "grad_norm": 4.697853088378906, "learning_rate": 9.940193186801787e-05, "loss": 2.4335, "step": 1041 }, { "epoch": 0.07843580044788197, "grad_norm": 4.922056674957275, "learning_rate": 9.94000504847217e-05, "loss": 2.1039, "step": 1042 }, { "epoch": 0.07851107472854213, "grad_norm": 5.535480976104736, "learning_rate": 9.939816616473427e-05, "loss": 2.151, "step": 1043 }, { "epoch": 0.07858634900920228, "grad_norm": 4.560698986053467, "learning_rate": 9.93962789081676e-05, "loss": 2.4929, "step": 1044 }, { "epoch": 0.07866162328986244, "grad_norm": 5.94008207321167, "learning_rate": 9.939438871513388e-05, "loss": 2.3912, "step": 1045 }, { "epoch": 0.07873689757052259, "grad_norm": 5.139965057373047, "learning_rate": 9.939249558574548e-05, "loss": 1.9442, "step": 1046 }, { "epoch": 0.07881217185118275, "grad_norm": 5.666169166564941, "learning_rate": 9.939059952011494e-05, "loss": 2.258, "step": 1047 }, { "epoch": 0.0788874461318429, "grad_norm": 4.650025844573975, "learning_rate": 9.938870051835498e-05, "loss": 2.4298, "step": 1048 }, { "epoch": 0.07896272041250306, "grad_norm": 5.10502815246582, "learning_rate": 9.938679858057848e-05, "loss": 2.3377, "step": 1049 }, { "epoch": 0.07903799469316321, "grad_norm": 5.629189491271973, "learning_rate": 9.938489370689853e-05, "loss": 2.4516, "step": 1050 }, { "epoch": 0.07911326897382337, "grad_norm": 5.972681522369385, "learning_rate": 9.938298589742834e-05, "loss": 2.4691, "step": 1051 }, { "epoch": 0.07918854325448352, "grad_norm": 4.494705677032471, "learning_rate": 9.938107515228133e-05, "loss": 2.5649, "step": 1052 }, { "epoch": 0.07926381753514368, "grad_norm": 4.915561199188232, "learning_rate": 9.93791614715711e-05, "loss": 2.1048, "step": 1053 }, { "epoch": 0.07933909181580384, "grad_norm": 4.736732006072998, "learning_rate": 9.93772448554114e-05, "loss": 2.4447, "step": 1054 }, { "epoch": 0.07941436609646399, "grad_norm": 3.9177684783935547, "learning_rate": 9.937532530391617e-05, "loss": 2.2234, "step": 1055 }, { "epoch": 0.07948964037712415, "grad_norm": 5.615268707275391, "learning_rate": 9.937340281719952e-05, "loss": 2.35, "step": 1056 }, { "epoch": 0.0795649146577843, "grad_norm": 4.6389007568359375, "learning_rate": 9.937147739537576e-05, "loss": 2.2979, "step": 1057 }, { "epoch": 0.07964018893844446, "grad_norm": 4.469980239868164, "learning_rate": 9.93695490385593e-05, "loss": 2.4946, "step": 1058 }, { "epoch": 0.07971546321910461, "grad_norm": 4.239686965942383, "learning_rate": 9.936761774686482e-05, "loss": 1.9174, "step": 1059 }, { "epoch": 0.07979073749976477, "grad_norm": 4.816890239715576, "learning_rate": 9.936568352040714e-05, "loss": 2.5983, "step": 1060 }, { "epoch": 0.07986601178042492, "grad_norm": 5.86488151550293, "learning_rate": 9.936374635930119e-05, "loss": 2.3391, "step": 1061 }, { "epoch": 0.07994128606108508, "grad_norm": 5.060385704040527, "learning_rate": 9.936180626366218e-05, "loss": 2.1666, "step": 1062 }, { "epoch": 0.08001656034174523, "grad_norm": 5.208842754364014, "learning_rate": 9.935986323360541e-05, "loss": 2.2212, "step": 1063 }, { "epoch": 0.08009183462240539, "grad_norm": 4.977725982666016, "learning_rate": 9.935791726924642e-05, "loss": 2.3255, "step": 1064 }, { "epoch": 0.08016710890306554, "grad_norm": 4.706028461456299, "learning_rate": 9.935596837070085e-05, "loss": 2.0912, "step": 1065 }, { "epoch": 0.0802423831837257, "grad_norm": 4.670478343963623, "learning_rate": 9.935401653808458e-05, "loss": 2.1627, "step": 1066 }, { "epoch": 0.08031765746438585, "grad_norm": 7.468380928039551, "learning_rate": 9.935206177151364e-05, "loss": 2.2187, "step": 1067 }, { "epoch": 0.08039293174504601, "grad_norm": 4.354898929595947, "learning_rate": 9.935010407110424e-05, "loss": 1.9525, "step": 1068 }, { "epoch": 0.08046820602570617, "grad_norm": 5.351611614227295, "learning_rate": 9.934814343697275e-05, "loss": 2.3182, "step": 1069 }, { "epoch": 0.08054348030636632, "grad_norm": 5.501046657562256, "learning_rate": 9.934617986923574e-05, "loss": 2.111, "step": 1070 }, { "epoch": 0.08061875458702648, "grad_norm": 4.17952299118042, "learning_rate": 9.934421336800992e-05, "loss": 2.5661, "step": 1071 }, { "epoch": 0.08069402886768663, "grad_norm": 5.963723659515381, "learning_rate": 9.934224393341219e-05, "loss": 2.3498, "step": 1072 }, { "epoch": 0.0807693031483468, "grad_norm": 5.3424224853515625, "learning_rate": 9.934027156555965e-05, "loss": 2.3266, "step": 1073 }, { "epoch": 0.08084457742900694, "grad_norm": 5.322235107421875, "learning_rate": 9.933829626456953e-05, "loss": 2.3117, "step": 1074 }, { "epoch": 0.0809198517096671, "grad_norm": 6.136697292327881, "learning_rate": 9.933631803055927e-05, "loss": 2.2734, "step": 1075 }, { "epoch": 0.08099512599032725, "grad_norm": 5.181178092956543, "learning_rate": 9.933433686364646e-05, "loss": 2.1142, "step": 1076 }, { "epoch": 0.08107040027098741, "grad_norm": 4.634887218475342, "learning_rate": 9.933235276394887e-05, "loss": 2.4871, "step": 1077 }, { "epoch": 0.08114567455164756, "grad_norm": 9.338444709777832, "learning_rate": 9.933036573158449e-05, "loss": 2.3129, "step": 1078 }, { "epoch": 0.08122094883230772, "grad_norm": 5.21658182144165, "learning_rate": 9.93283757666714e-05, "loss": 2.3391, "step": 1079 }, { "epoch": 0.08129622311296787, "grad_norm": 4.9668989181518555, "learning_rate": 9.93263828693279e-05, "loss": 2.2192, "step": 1080 }, { "epoch": 0.08137149739362803, "grad_norm": 8.038995742797852, "learning_rate": 9.932438703967246e-05, "loss": 2.3, "step": 1081 }, { "epoch": 0.08144677167428818, "grad_norm": 6.618760108947754, "learning_rate": 9.932238827782376e-05, "loss": 2.393, "step": 1082 }, { "epoch": 0.08152204595494834, "grad_norm": 6.852845668792725, "learning_rate": 9.93203865839006e-05, "loss": 2.0772, "step": 1083 }, { "epoch": 0.08159732023560849, "grad_norm": 8.266192436218262, "learning_rate": 9.931838195802196e-05, "loss": 2.2675, "step": 1084 }, { "epoch": 0.08167259451626865, "grad_norm": 5.701829433441162, "learning_rate": 9.931637440030702e-05, "loss": 2.1978, "step": 1085 }, { "epoch": 0.08174786879692882, "grad_norm": 4.683379173278809, "learning_rate": 9.931436391087514e-05, "loss": 2.2586, "step": 1086 }, { "epoch": 0.08182314307758896, "grad_norm": 7.1637091636657715, "learning_rate": 9.931235048984582e-05, "loss": 2.0183, "step": 1087 }, { "epoch": 0.08189841735824913, "grad_norm": 5.0980658531188965, "learning_rate": 9.931033413733874e-05, "loss": 2.2851, "step": 1088 }, { "epoch": 0.08197369163890927, "grad_norm": 6.3092756271362305, "learning_rate": 9.930831485347381e-05, "loss": 2.6583, "step": 1089 }, { "epoch": 0.08204896591956944, "grad_norm": 5.924673080444336, "learning_rate": 9.930629263837103e-05, "loss": 2.0791, "step": 1090 }, { "epoch": 0.08212424020022958, "grad_norm": 4.839828014373779, "learning_rate": 9.930426749215063e-05, "loss": 2.121, "step": 1091 }, { "epoch": 0.08219951448088975, "grad_norm": 5.012866497039795, "learning_rate": 9.9302239414933e-05, "loss": 2.3123, "step": 1092 }, { "epoch": 0.0822747887615499, "grad_norm": 4.831496715545654, "learning_rate": 9.930020840683869e-05, "loss": 2.2346, "step": 1093 }, { "epoch": 0.08235006304221006, "grad_norm": 4.405677795410156, "learning_rate": 9.929817446798846e-05, "loss": 2.4625, "step": 1094 }, { "epoch": 0.0824253373228702, "grad_norm": 6.026737213134766, "learning_rate": 9.929613759850321e-05, "loss": 2.3199, "step": 1095 }, { "epoch": 0.08250061160353037, "grad_norm": 5.9357171058654785, "learning_rate": 9.929409779850401e-05, "loss": 2.7255, "step": 1096 }, { "epoch": 0.08257588588419051, "grad_norm": 7.4474592208862305, "learning_rate": 9.929205506811215e-05, "loss": 2.1615, "step": 1097 }, { "epoch": 0.08265116016485068, "grad_norm": 6.040008544921875, "learning_rate": 9.929000940744906e-05, "loss": 2.0836, "step": 1098 }, { "epoch": 0.08272643444551082, "grad_norm": 5.82507848739624, "learning_rate": 9.928796081663632e-05, "loss": 1.9006, "step": 1099 }, { "epoch": 0.08280170872617099, "grad_norm": 5.737878322601318, "learning_rate": 9.928590929579575e-05, "loss": 2.336, "step": 1100 }, { "epoch": 0.08287698300683113, "grad_norm": 5.825507640838623, "learning_rate": 9.928385484504927e-05, "loss": 2.5359, "step": 1101 }, { "epoch": 0.0829522572874913, "grad_norm": 6.403767108917236, "learning_rate": 9.928179746451905e-05, "loss": 2.2275, "step": 1102 }, { "epoch": 0.08302753156815146, "grad_norm": 7.845687389373779, "learning_rate": 9.927973715432736e-05, "loss": 2.7856, "step": 1103 }, { "epoch": 0.0831028058488116, "grad_norm": 6.293184280395508, "learning_rate": 9.92776739145967e-05, "loss": 2.4, "step": 1104 }, { "epoch": 0.08317808012947177, "grad_norm": 7.6609344482421875, "learning_rate": 9.927560774544972e-05, "loss": 2.2605, "step": 1105 }, { "epoch": 0.08325335441013192, "grad_norm": 4.736534118652344, "learning_rate": 9.927353864700926e-05, "loss": 2.4199, "step": 1106 }, { "epoch": 0.08332862869079208, "grad_norm": 4.435877323150635, "learning_rate": 9.92714666193983e-05, "loss": 2.1503, "step": 1107 }, { "epoch": 0.08340390297145223, "grad_norm": 6.6472320556640625, "learning_rate": 9.926939166274003e-05, "loss": 2.235, "step": 1108 }, { "epoch": 0.08347917725211239, "grad_norm": 5.518762111663818, "learning_rate": 9.926731377715779e-05, "loss": 2.0932, "step": 1109 }, { "epoch": 0.08355445153277254, "grad_norm": 4.8642449378967285, "learning_rate": 9.926523296277511e-05, "loss": 2.0227, "step": 1110 }, { "epoch": 0.0836297258134327, "grad_norm": 3.95300555229187, "learning_rate": 9.926314921971567e-05, "loss": 2.0899, "step": 1111 }, { "epoch": 0.08370500009409285, "grad_norm": 4.438868522644043, "learning_rate": 9.926106254810339e-05, "loss": 2.5073, "step": 1112 }, { "epoch": 0.08378027437475301, "grad_norm": 4.2434163093566895, "learning_rate": 9.925897294806228e-05, "loss": 2.16, "step": 1113 }, { "epoch": 0.08385554865541316, "grad_norm": 6.885189533233643, "learning_rate": 9.925688041971656e-05, "loss": 2.0897, "step": 1114 }, { "epoch": 0.08393082293607332, "grad_norm": 4.5535454750061035, "learning_rate": 9.925478496319063e-05, "loss": 2.0444, "step": 1115 }, { "epoch": 0.08400609721673347, "grad_norm": 5.977106094360352, "learning_rate": 9.925268657860906e-05, "loss": 2.353, "step": 1116 }, { "epoch": 0.08408137149739363, "grad_norm": 5.5683369636535645, "learning_rate": 9.925058526609661e-05, "loss": 2.3858, "step": 1117 }, { "epoch": 0.08415664577805379, "grad_norm": 7.104760646820068, "learning_rate": 9.924848102577818e-05, "loss": 2.127, "step": 1118 }, { "epoch": 0.08423192005871394, "grad_norm": 5.28377103805542, "learning_rate": 9.924637385777886e-05, "loss": 2.0708, "step": 1119 }, { "epoch": 0.0843071943393741, "grad_norm": 5.864218711853027, "learning_rate": 9.924426376222391e-05, "loss": 2.5374, "step": 1120 }, { "epoch": 0.08438246862003425, "grad_norm": 7.219125270843506, "learning_rate": 9.92421507392388e-05, "loss": 2.5879, "step": 1121 }, { "epoch": 0.08445774290069441, "grad_norm": 6.50026798248291, "learning_rate": 9.924003478894908e-05, "loss": 2.2759, "step": 1122 }, { "epoch": 0.08453301718135456, "grad_norm": 5.509936809539795, "learning_rate": 9.923791591148061e-05, "loss": 2.2141, "step": 1123 }, { "epoch": 0.08460829146201472, "grad_norm": 6.062098979949951, "learning_rate": 9.923579410695932e-05, "loss": 2.3936, "step": 1124 }, { "epoch": 0.08468356574267487, "grad_norm": 6.7604756355285645, "learning_rate": 9.923366937551132e-05, "loss": 2.3012, "step": 1125 }, { "epoch": 0.08475884002333503, "grad_norm": 6.552176475524902, "learning_rate": 9.923154171726295e-05, "loss": 2.4841, "step": 1126 }, { "epoch": 0.08483411430399518, "grad_norm": 5.604454517364502, "learning_rate": 9.92294111323407e-05, "loss": 2.0627, "step": 1127 }, { "epoch": 0.08490938858465534, "grad_norm": 5.521183967590332, "learning_rate": 9.92272776208712e-05, "loss": 2.4608, "step": 1128 }, { "epoch": 0.08498466286531549, "grad_norm": 5.1209611892700195, "learning_rate": 9.92251411829813e-05, "loss": 2.2051, "step": 1129 }, { "epoch": 0.08505993714597565, "grad_norm": 5.43388557434082, "learning_rate": 9.9223001818798e-05, "loss": 2.2708, "step": 1130 }, { "epoch": 0.0851352114266358, "grad_norm": 5.455743312835693, "learning_rate": 9.922085952844846e-05, "loss": 2.1273, "step": 1131 }, { "epoch": 0.08521048570729596, "grad_norm": 4.575812816619873, "learning_rate": 9.921871431206007e-05, "loss": 2.5016, "step": 1132 }, { "epoch": 0.08528575998795611, "grad_norm": 7.7491230964660645, "learning_rate": 9.921656616976033e-05, "loss": 1.8787, "step": 1133 }, { "epoch": 0.08536103426861627, "grad_norm": 5.662252426147461, "learning_rate": 9.921441510167697e-05, "loss": 2.6668, "step": 1134 }, { "epoch": 0.08543630854927643, "grad_norm": 4.987123966217041, "learning_rate": 9.921226110793783e-05, "loss": 2.8943, "step": 1135 }, { "epoch": 0.08551158282993658, "grad_norm": 6.202556610107422, "learning_rate": 9.921010418867098e-05, "loss": 2.2573, "step": 1136 }, { "epoch": 0.08558685711059674, "grad_norm": 4.842884540557861, "learning_rate": 9.920794434400462e-05, "loss": 2.0757, "step": 1137 }, { "epoch": 0.08566213139125689, "grad_norm": 5.519599437713623, "learning_rate": 9.920578157406717e-05, "loss": 2.3174, "step": 1138 }, { "epoch": 0.08573740567191705, "grad_norm": 7.333906650543213, "learning_rate": 9.920361587898719e-05, "loss": 2.1983, "step": 1139 }, { "epoch": 0.0858126799525772, "grad_norm": 6.405233860015869, "learning_rate": 9.920144725889343e-05, "loss": 2.1087, "step": 1140 }, { "epoch": 0.08588795423323736, "grad_norm": 6.9802656173706055, "learning_rate": 9.91992757139148e-05, "loss": 2.2193, "step": 1141 }, { "epoch": 0.08596322851389751, "grad_norm": 7.72580099105835, "learning_rate": 9.919710124418042e-05, "loss": 2.4775, "step": 1142 }, { "epoch": 0.08603850279455767, "grad_norm": 5.433417320251465, "learning_rate": 9.919492384981951e-05, "loss": 2.1343, "step": 1143 }, { "epoch": 0.08611377707521782, "grad_norm": 6.366405487060547, "learning_rate": 9.919274353096154e-05, "loss": 2.1731, "step": 1144 }, { "epoch": 0.08618905135587798, "grad_norm": 5.061887741088867, "learning_rate": 9.91905602877361e-05, "loss": 2.3821, "step": 1145 }, { "epoch": 0.08626432563653813, "grad_norm": 5.18080997467041, "learning_rate": 9.918837412027302e-05, "loss": 2.221, "step": 1146 }, { "epoch": 0.08633959991719829, "grad_norm": 5.952714920043945, "learning_rate": 9.918618502870222e-05, "loss": 2.3506, "step": 1147 }, { "epoch": 0.08641487419785844, "grad_norm": 7.845677852630615, "learning_rate": 9.918399301315386e-05, "loss": 2.0601, "step": 1148 }, { "epoch": 0.0864901484785186, "grad_norm": 6.018703460693359, "learning_rate": 9.918179807375824e-05, "loss": 2.2542, "step": 1149 }, { "epoch": 0.08656542275917876, "grad_norm": 4.599795341491699, "learning_rate": 9.917960021064585e-05, "loss": 1.9858, "step": 1150 }, { "epoch": 0.08664069703983891, "grad_norm": 8.23704719543457, "learning_rate": 9.917739942394733e-05, "loss": 2.432, "step": 1151 }, { "epoch": 0.08671597132049907, "grad_norm": 6.213704586029053, "learning_rate": 9.917519571379352e-05, "loss": 2.4168, "step": 1152 }, { "epoch": 0.08679124560115922, "grad_norm": 5.583656311035156, "learning_rate": 9.917298908031543e-05, "loss": 2.0159, "step": 1153 }, { "epoch": 0.08686651988181938, "grad_norm": 5.6313276290893555, "learning_rate": 9.917077952364422e-05, "loss": 2.0108, "step": 1154 }, { "epoch": 0.08694179416247953, "grad_norm": 6.999698162078857, "learning_rate": 9.916856704391127e-05, "loss": 2.5267, "step": 1155 }, { "epoch": 0.0870170684431397, "grad_norm": 5.772253036499023, "learning_rate": 9.916635164124807e-05, "loss": 2.2743, "step": 1156 }, { "epoch": 0.08709234272379984, "grad_norm": 4.270077705383301, "learning_rate": 9.916413331578635e-05, "loss": 2.1398, "step": 1157 }, { "epoch": 0.08716761700446, "grad_norm": 5.124973773956299, "learning_rate": 9.916191206765797e-05, "loss": 2.3097, "step": 1158 }, { "epoch": 0.08724289128512015, "grad_norm": 5.555698394775391, "learning_rate": 9.915968789699499e-05, "loss": 2.2625, "step": 1159 }, { "epoch": 0.08731816556578031, "grad_norm": 4.2281270027160645, "learning_rate": 9.91574608039296e-05, "loss": 2.3018, "step": 1160 }, { "epoch": 0.08739343984644046, "grad_norm": 5.254685401916504, "learning_rate": 9.915523078859423e-05, "loss": 2.4533, "step": 1161 }, { "epoch": 0.08746871412710062, "grad_norm": 6.163359642028809, "learning_rate": 9.915299785112142e-05, "loss": 2.1292, "step": 1162 }, { "epoch": 0.08754398840776077, "grad_norm": 5.744691371917725, "learning_rate": 9.915076199164392e-05, "loss": 2.2701, "step": 1163 }, { "epoch": 0.08761926268842093, "grad_norm": 5.872090816497803, "learning_rate": 9.914852321029466e-05, "loss": 2.6751, "step": 1164 }, { "epoch": 0.08769453696908108, "grad_norm": 5.114642143249512, "learning_rate": 9.914628150720674e-05, "loss": 2.2849, "step": 1165 }, { "epoch": 0.08776981124974124, "grad_norm": 4.612166404724121, "learning_rate": 9.914403688251337e-05, "loss": 2.5745, "step": 1166 }, { "epoch": 0.0878450855304014, "grad_norm": 8.587389945983887, "learning_rate": 9.914178933634801e-05, "loss": 2.4016, "step": 1167 }, { "epoch": 0.08792035981106155, "grad_norm": 5.873881816864014, "learning_rate": 9.913953886884431e-05, "loss": 2.2368, "step": 1168 }, { "epoch": 0.08799563409172172, "grad_norm": 6.308575630187988, "learning_rate": 9.913728548013599e-05, "loss": 2.4197, "step": 1169 }, { "epoch": 0.08807090837238186, "grad_norm": 4.444754123687744, "learning_rate": 9.913502917035705e-05, "loss": 2.1971, "step": 1170 }, { "epoch": 0.08814618265304203, "grad_norm": 4.840965270996094, "learning_rate": 9.913276993964162e-05, "loss": 2.3044, "step": 1171 }, { "epoch": 0.08822145693370217, "grad_norm": 5.132396697998047, "learning_rate": 9.913050778812399e-05, "loss": 2.1907, "step": 1172 }, { "epoch": 0.08829673121436234, "grad_norm": 4.840834617614746, "learning_rate": 9.912824271593863e-05, "loss": 2.0675, "step": 1173 }, { "epoch": 0.08837200549502248, "grad_norm": 6.998414993286133, "learning_rate": 9.912597472322021e-05, "loss": 2.4654, "step": 1174 }, { "epoch": 0.08844727977568265, "grad_norm": 6.046200275421143, "learning_rate": 9.912370381010356e-05, "loss": 2.3616, "step": 1175 }, { "epoch": 0.0885225540563428, "grad_norm": 4.645432472229004, "learning_rate": 9.912142997672366e-05, "loss": 2.3322, "step": 1176 }, { "epoch": 0.08859782833700296, "grad_norm": 7.393093585968018, "learning_rate": 9.911915322321572e-05, "loss": 2.041, "step": 1177 }, { "epoch": 0.0886731026176631, "grad_norm": 7.361612319946289, "learning_rate": 9.911687354971504e-05, "loss": 2.3988, "step": 1178 }, { "epoch": 0.08874837689832327, "grad_norm": 5.41740083694458, "learning_rate": 9.911459095635716e-05, "loss": 2.1069, "step": 1179 }, { "epoch": 0.08882365117898341, "grad_norm": 5.164453506469727, "learning_rate": 9.911230544327776e-05, "loss": 2.1698, "step": 1180 }, { "epoch": 0.08889892545964358, "grad_norm": 5.7849955558776855, "learning_rate": 9.911001701061273e-05, "loss": 2.002, "step": 1181 }, { "epoch": 0.08897419974030372, "grad_norm": 4.980674743652344, "learning_rate": 9.91077256584981e-05, "loss": 2.3879, "step": 1182 }, { "epoch": 0.08904947402096389, "grad_norm": 8.146739959716797, "learning_rate": 9.910543138707009e-05, "loss": 2.3295, "step": 1183 }, { "epoch": 0.08912474830162405, "grad_norm": 5.663999557495117, "learning_rate": 9.910313419646509e-05, "loss": 2.2329, "step": 1184 }, { "epoch": 0.0892000225822842, "grad_norm": 4.857204437255859, "learning_rate": 9.910083408681966e-05, "loss": 2.3024, "step": 1185 }, { "epoch": 0.08927529686294436, "grad_norm": 5.403531074523926, "learning_rate": 9.909853105827051e-05, "loss": 2.2123, "step": 1186 }, { "epoch": 0.0893505711436045, "grad_norm": 4.682754039764404, "learning_rate": 9.909622511095458e-05, "loss": 2.0178, "step": 1187 }, { "epoch": 0.08942584542426467, "grad_norm": 7.898589611053467, "learning_rate": 9.909391624500892e-05, "loss": 2.0381, "step": 1188 }, { "epoch": 0.08950111970492482, "grad_norm": 4.3345746994018555, "learning_rate": 9.909160446057082e-05, "loss": 1.9817, "step": 1189 }, { "epoch": 0.08957639398558498, "grad_norm": 5.2663774490356445, "learning_rate": 9.90892897577777e-05, "loss": 2.2422, "step": 1190 }, { "epoch": 0.08965166826624513, "grad_norm": 4.381959438323975, "learning_rate": 9.908697213676716e-05, "loss": 2.5133, "step": 1191 }, { "epoch": 0.08972694254690529, "grad_norm": 5.167469024658203, "learning_rate": 9.908465159767695e-05, "loss": 2.0555, "step": 1192 }, { "epoch": 0.08980221682756544, "grad_norm": 5.803744316101074, "learning_rate": 9.908232814064507e-05, "loss": 2.2211, "step": 1193 }, { "epoch": 0.0898774911082256, "grad_norm": 5.2488813400268555, "learning_rate": 9.908000176580959e-05, "loss": 2.2053, "step": 1194 }, { "epoch": 0.08995276538888575, "grad_norm": 4.878261566162109, "learning_rate": 9.907767247330885e-05, "loss": 2.1682, "step": 1195 }, { "epoch": 0.09002803966954591, "grad_norm": 5.3838725090026855, "learning_rate": 9.907534026328129e-05, "loss": 2.2657, "step": 1196 }, { "epoch": 0.09010331395020606, "grad_norm": 7.655898571014404, "learning_rate": 9.907300513586557e-05, "loss": 2.3684, "step": 1197 }, { "epoch": 0.09017858823086622, "grad_norm": 7.286603927612305, "learning_rate": 9.90706670912005e-05, "loss": 2.2987, "step": 1198 }, { "epoch": 0.09025386251152638, "grad_norm": 5.679907321929932, "learning_rate": 9.906832612942507e-05, "loss": 2.5172, "step": 1199 }, { "epoch": 0.09032913679218653, "grad_norm": 7.662585735321045, "learning_rate": 9.906598225067845e-05, "loss": 2.8452, "step": 1200 }, { "epoch": 0.09040441107284669, "grad_norm": 4.499806880950928, "learning_rate": 9.906363545509995e-05, "loss": 1.9919, "step": 1201 }, { "epoch": 0.09047968535350684, "grad_norm": 5.981786251068115, "learning_rate": 9.906128574282913e-05, "loss": 2.2924, "step": 1202 }, { "epoch": 0.090554959634167, "grad_norm": 4.9308977127075195, "learning_rate": 9.905893311400563e-05, "loss": 2.3659, "step": 1203 }, { "epoch": 0.09063023391482715, "grad_norm": 7.844737529754639, "learning_rate": 9.905657756876932e-05, "loss": 2.3044, "step": 1204 }, { "epoch": 0.09070550819548731, "grad_norm": 4.43223762512207, "learning_rate": 9.905421910726025e-05, "loss": 2.5299, "step": 1205 }, { "epoch": 0.09078078247614746, "grad_norm": 6.778882026672363, "learning_rate": 9.905185772961858e-05, "loss": 2.1524, "step": 1206 }, { "epoch": 0.09085605675680762, "grad_norm": 4.246809482574463, "learning_rate": 9.904949343598473e-05, "loss": 2.2371, "step": 1207 }, { "epoch": 0.09093133103746777, "grad_norm": 6.284351348876953, "learning_rate": 9.904712622649923e-05, "loss": 2.246, "step": 1208 }, { "epoch": 0.09100660531812793, "grad_norm": 6.6328020095825195, "learning_rate": 9.904475610130282e-05, "loss": 2.2712, "step": 1209 }, { "epoch": 0.09108187959878808, "grad_norm": 6.602048397064209, "learning_rate": 9.904238306053637e-05, "loss": 2.1394, "step": 1210 }, { "epoch": 0.09115715387944824, "grad_norm": 5.839536190032959, "learning_rate": 9.904000710434097e-05, "loss": 2.1277, "step": 1211 }, { "epoch": 0.09123242816010839, "grad_norm": 4.945462226867676, "learning_rate": 9.903762823285786e-05, "loss": 2.1361, "step": 1212 }, { "epoch": 0.09130770244076855, "grad_norm": 4.412333011627197, "learning_rate": 9.903524644622846e-05, "loss": 2.2587, "step": 1213 }, { "epoch": 0.0913829767214287, "grad_norm": 6.278764724731445, "learning_rate": 9.903286174459434e-05, "loss": 2.3017, "step": 1214 }, { "epoch": 0.09145825100208886, "grad_norm": 4.224057197570801, "learning_rate": 9.903047412809729e-05, "loss": 2.0379, "step": 1215 }, { "epoch": 0.09153352528274902, "grad_norm": 4.2989091873168945, "learning_rate": 9.902808359687926e-05, "loss": 2.0845, "step": 1216 }, { "epoch": 0.09160879956340917, "grad_norm": 4.08694314956665, "learning_rate": 9.90256901510823e-05, "loss": 2.3598, "step": 1217 }, { "epoch": 0.09168407384406933, "grad_norm": 5.269775390625, "learning_rate": 9.902329379084876e-05, "loss": 2.3412, "step": 1218 }, { "epoch": 0.09175934812472948, "grad_norm": 4.4063029289245605, "learning_rate": 9.902089451632105e-05, "loss": 2.3307, "step": 1219 }, { "epoch": 0.09183462240538964, "grad_norm": 5.829647541046143, "learning_rate": 9.901849232764182e-05, "loss": 1.9702, "step": 1220 }, { "epoch": 0.09190989668604979, "grad_norm": 6.281095027923584, "learning_rate": 9.901608722495388e-05, "loss": 2.2451, "step": 1221 }, { "epoch": 0.09198517096670995, "grad_norm": 4.600857734680176, "learning_rate": 9.901367920840018e-05, "loss": 2.4667, "step": 1222 }, { "epoch": 0.0920604452473701, "grad_norm": 4.4568586349487305, "learning_rate": 9.901126827812391e-05, "loss": 2.0653, "step": 1223 }, { "epoch": 0.09213571952803026, "grad_norm": 5.564261436462402, "learning_rate": 9.900885443426834e-05, "loss": 2.5238, "step": 1224 }, { "epoch": 0.09221099380869041, "grad_norm": 5.048755645751953, "learning_rate": 9.900643767697704e-05, "loss": 2.2748, "step": 1225 }, { "epoch": 0.09228626808935057, "grad_norm": 5.369926452636719, "learning_rate": 9.90040180063936e-05, "loss": 2.7051, "step": 1226 }, { "epoch": 0.09236154237001072, "grad_norm": 7.708915710449219, "learning_rate": 9.900159542266192e-05, "loss": 2.458, "step": 1227 }, { "epoch": 0.09243681665067088, "grad_norm": 4.9571919441223145, "learning_rate": 9.899916992592597e-05, "loss": 2.5832, "step": 1228 }, { "epoch": 0.09251209093133103, "grad_norm": 5.185609817504883, "learning_rate": 9.899674151632999e-05, "loss": 2.0539, "step": 1229 }, { "epoch": 0.0925873652119912, "grad_norm": 4.914243221282959, "learning_rate": 9.899431019401828e-05, "loss": 2.371, "step": 1230 }, { "epoch": 0.09266263949265136, "grad_norm": 6.205069541931152, "learning_rate": 9.899187595913544e-05, "loss": 2.1225, "step": 1231 }, { "epoch": 0.0927379137733115, "grad_norm": 4.880532264709473, "learning_rate": 9.898943881182613e-05, "loss": 1.9952, "step": 1232 }, { "epoch": 0.09281318805397167, "grad_norm": 4.993622303009033, "learning_rate": 9.898699875223524e-05, "loss": 2.5224, "step": 1233 }, { "epoch": 0.09288846233463181, "grad_norm": 5.312560558319092, "learning_rate": 9.898455578050787e-05, "loss": 2.2535, "step": 1234 }, { "epoch": 0.09296373661529198, "grad_norm": 5.254156589508057, "learning_rate": 9.898210989678917e-05, "loss": 2.437, "step": 1235 }, { "epoch": 0.09303901089595212, "grad_norm": 6.612512111663818, "learning_rate": 9.89796611012246e-05, "loss": 1.9526, "step": 1236 }, { "epoch": 0.09311428517661229, "grad_norm": 4.762012004852295, "learning_rate": 9.897720939395973e-05, "loss": 2.0193, "step": 1237 }, { "epoch": 0.09318955945727243, "grad_norm": 8.582109451293945, "learning_rate": 9.897475477514027e-05, "loss": 2.2787, "step": 1238 }, { "epoch": 0.0932648337379326, "grad_norm": 4.477728366851807, "learning_rate": 9.897229724491218e-05, "loss": 2.2228, "step": 1239 }, { "epoch": 0.09334010801859274, "grad_norm": 5.659603118896484, "learning_rate": 9.896983680342154e-05, "loss": 2.1808, "step": 1240 }, { "epoch": 0.0934153822992529, "grad_norm": 5.629698753356934, "learning_rate": 9.896737345081459e-05, "loss": 2.1982, "step": 1241 }, { "epoch": 0.09349065657991305, "grad_norm": 4.196998596191406, "learning_rate": 9.896490718723782e-05, "loss": 2.1435, "step": 1242 }, { "epoch": 0.09356593086057322, "grad_norm": 4.0380167961120605, "learning_rate": 9.896243801283777e-05, "loss": 2.0633, "step": 1243 }, { "epoch": 0.09364120514123336, "grad_norm": 5.888142108917236, "learning_rate": 9.89599659277613e-05, "loss": 3.1702, "step": 1244 }, { "epoch": 0.09371647942189353, "grad_norm": 5.672674655914307, "learning_rate": 9.895749093215534e-05, "loss": 1.9467, "step": 1245 }, { "epoch": 0.09379175370255367, "grad_norm": 5.711461544036865, "learning_rate": 9.895501302616701e-05, "loss": 2.7244, "step": 1246 }, { "epoch": 0.09386702798321384, "grad_norm": 5.369699954986572, "learning_rate": 9.895253220994362e-05, "loss": 1.8389, "step": 1247 }, { "epoch": 0.093942302263874, "grad_norm": 5.1924920082092285, "learning_rate": 9.895004848363267e-05, "loss": 2.2727, "step": 1248 }, { "epoch": 0.09401757654453415, "grad_norm": 5.881622791290283, "learning_rate": 9.894756184738177e-05, "loss": 2.2776, "step": 1249 }, { "epoch": 0.09409285082519431, "grad_norm": 5.337461471557617, "learning_rate": 9.894507230133878e-05, "loss": 2.1022, "step": 1250 }, { "epoch": 0.09416812510585446, "grad_norm": 4.679605960845947, "learning_rate": 9.894257984565167e-05, "loss": 2.4345, "step": 1251 }, { "epoch": 0.09424339938651462, "grad_norm": 4.815038204193115, "learning_rate": 9.894008448046863e-05, "loss": 2.2069, "step": 1252 }, { "epoch": 0.09431867366717477, "grad_norm": 8.264766693115234, "learning_rate": 9.893758620593798e-05, "loss": 2.1542, "step": 1253 }, { "epoch": 0.09439394794783493, "grad_norm": 5.655462265014648, "learning_rate": 9.893508502220825e-05, "loss": 2.3207, "step": 1254 }, { "epoch": 0.09446922222849508, "grad_norm": 4.800930023193359, "learning_rate": 9.893258092942813e-05, "loss": 1.9531, "step": 1255 }, { "epoch": 0.09454449650915524, "grad_norm": 6.131793975830078, "learning_rate": 9.893007392774647e-05, "loss": 2.3465, "step": 1256 }, { "epoch": 0.09461977078981539, "grad_norm": 6.297909736633301, "learning_rate": 9.892756401731233e-05, "loss": 1.9844, "step": 1257 }, { "epoch": 0.09469504507047555, "grad_norm": 4.924347400665283, "learning_rate": 9.892505119827487e-05, "loss": 2.3395, "step": 1258 }, { "epoch": 0.0947703193511357, "grad_norm": 6.420069694519043, "learning_rate": 9.892253547078352e-05, "loss": 2.3433, "step": 1259 }, { "epoch": 0.09484559363179586, "grad_norm": 4.504122734069824, "learning_rate": 9.892001683498781e-05, "loss": 2.5711, "step": 1260 }, { "epoch": 0.094920867912456, "grad_norm": 5.376524925231934, "learning_rate": 9.891749529103746e-05, "loss": 2.4909, "step": 1261 }, { "epoch": 0.09499614219311617, "grad_norm": 4.911278247833252, "learning_rate": 9.891497083908237e-05, "loss": 2.3502, "step": 1262 }, { "epoch": 0.09507141647377632, "grad_norm": 7.62767219543457, "learning_rate": 9.891244347927262e-05, "loss": 2.4731, "step": 1263 }, { "epoch": 0.09514669075443648, "grad_norm": 4.864021301269531, "learning_rate": 9.890991321175845e-05, "loss": 2.0387, "step": 1264 }, { "epoch": 0.09522196503509664, "grad_norm": 4.829661846160889, "learning_rate": 9.890738003669029e-05, "loss": 2.1206, "step": 1265 }, { "epoch": 0.09529723931575679, "grad_norm": 7.437132835388184, "learning_rate": 9.890484395421871e-05, "loss": 2.2334, "step": 1266 }, { "epoch": 0.09537251359641695, "grad_norm": 5.918606758117676, "learning_rate": 9.890230496449448e-05, "loss": 2.9684, "step": 1267 }, { "epoch": 0.0954477878770771, "grad_norm": 4.703569412231445, "learning_rate": 9.889976306766853e-05, "loss": 1.8511, "step": 1268 }, { "epoch": 0.09552306215773726, "grad_norm": 4.625489711761475, "learning_rate": 9.889721826389197e-05, "loss": 2.2033, "step": 1269 }, { "epoch": 0.09559833643839741, "grad_norm": 5.362358570098877, "learning_rate": 9.88946705533161e-05, "loss": 2.3028, "step": 1270 }, { "epoch": 0.09567361071905757, "grad_norm": 6.535942077636719, "learning_rate": 9.889211993609235e-05, "loss": 2.0545, "step": 1271 }, { "epoch": 0.09574888499971772, "grad_norm": 5.676138877868652, "learning_rate": 9.888956641237238e-05, "loss": 2.2467, "step": 1272 }, { "epoch": 0.09582415928037788, "grad_norm": 6.557033061981201, "learning_rate": 9.888700998230794e-05, "loss": 1.9677, "step": 1273 }, { "epoch": 0.09589943356103803, "grad_norm": 4.518818378448486, "learning_rate": 9.888445064605106e-05, "loss": 2.2068, "step": 1274 }, { "epoch": 0.09597470784169819, "grad_norm": 5.886898994445801, "learning_rate": 9.888188840375382e-05, "loss": 2.05, "step": 1275 }, { "epoch": 0.09604998212235834, "grad_norm": 5.915098190307617, "learning_rate": 9.88793232555686e-05, "loss": 2.6569, "step": 1276 }, { "epoch": 0.0961252564030185, "grad_norm": 6.290450572967529, "learning_rate": 9.887675520164783e-05, "loss": 2.191, "step": 1277 }, { "epoch": 0.09620053068367865, "grad_norm": 5.214074611663818, "learning_rate": 9.887418424214423e-05, "loss": 2.3441, "step": 1278 }, { "epoch": 0.09627580496433881, "grad_norm": 4.566655158996582, "learning_rate": 9.88716103772106e-05, "loss": 2.1281, "step": 1279 }, { "epoch": 0.09635107924499897, "grad_norm": 4.753139972686768, "learning_rate": 9.886903360699997e-05, "loss": 2.4563, "step": 1280 }, { "epoch": 0.09642635352565912, "grad_norm": 6.502361297607422, "learning_rate": 9.886645393166552e-05, "loss": 2.4556, "step": 1281 }, { "epoch": 0.09650162780631928, "grad_norm": 5.372557640075684, "learning_rate": 9.886387135136058e-05, "loss": 2.1031, "step": 1282 }, { "epoch": 0.09657690208697943, "grad_norm": 5.277848720550537, "learning_rate": 9.886128586623871e-05, "loss": 2.5858, "step": 1283 }, { "epoch": 0.09665217636763959, "grad_norm": 5.626523494720459, "learning_rate": 9.88586974764536e-05, "loss": 2.5502, "step": 1284 }, { "epoch": 0.09672745064829974, "grad_norm": 4.654241561889648, "learning_rate": 9.88561061821591e-05, "loss": 2.5378, "step": 1285 }, { "epoch": 0.0968027249289599, "grad_norm": 3.7818820476531982, "learning_rate": 9.885351198350927e-05, "loss": 2.1636, "step": 1286 }, { "epoch": 0.09687799920962005, "grad_norm": 4.863179683685303, "learning_rate": 9.885091488065832e-05, "loss": 2.4674, "step": 1287 }, { "epoch": 0.09695327349028021, "grad_norm": 5.401482582092285, "learning_rate": 9.884831487376068e-05, "loss": 2.33, "step": 1288 }, { "epoch": 0.09702854777094036, "grad_norm": 5.601634979248047, "learning_rate": 9.884571196297086e-05, "loss": 2.341, "step": 1289 }, { "epoch": 0.09710382205160052, "grad_norm": 6.169212341308594, "learning_rate": 9.884310614844363e-05, "loss": 2.5662, "step": 1290 }, { "epoch": 0.09717909633226067, "grad_norm": 6.100691318511963, "learning_rate": 9.884049743033388e-05, "loss": 2.3231, "step": 1291 }, { "epoch": 0.09725437061292083, "grad_norm": 5.652121543884277, "learning_rate": 9.883788580879669e-05, "loss": 1.8696, "step": 1292 }, { "epoch": 0.09732964489358098, "grad_norm": 5.417090892791748, "learning_rate": 9.883527128398732e-05, "loss": 2.3028, "step": 1293 }, { "epoch": 0.09740491917424114, "grad_norm": 4.444824695587158, "learning_rate": 9.883265385606122e-05, "loss": 2.1503, "step": 1294 }, { "epoch": 0.09748019345490129, "grad_norm": 6.258047580718994, "learning_rate": 9.883003352517394e-05, "loss": 2.302, "step": 1295 }, { "epoch": 0.09755546773556145, "grad_norm": 6.353456020355225, "learning_rate": 9.882741029148129e-05, "loss": 2.4965, "step": 1296 }, { "epoch": 0.09763074201622161, "grad_norm": 6.08019495010376, "learning_rate": 9.882478415513918e-05, "loss": 2.1724, "step": 1297 }, { "epoch": 0.09770601629688176, "grad_norm": 5.589347839355469, "learning_rate": 9.882215511630378e-05, "loss": 2.1164, "step": 1298 }, { "epoch": 0.09778129057754192, "grad_norm": 5.3560357093811035, "learning_rate": 9.881952317513131e-05, "loss": 2.1525, "step": 1299 }, { "epoch": 0.09785656485820207, "grad_norm": 5.481121063232422, "learning_rate": 9.881688833177829e-05, "loss": 2.1835, "step": 1300 }, { "epoch": 0.09793183913886223, "grad_norm": 4.726559638977051, "learning_rate": 9.881425058640131e-05, "loss": 2.0267, "step": 1301 }, { "epoch": 0.09800711341952238, "grad_norm": 5.2286834716796875, "learning_rate": 9.881160993915722e-05, "loss": 2.1971, "step": 1302 }, { "epoch": 0.09808238770018254, "grad_norm": 5.180140018463135, "learning_rate": 9.880896639020296e-05, "loss": 2.2377, "step": 1303 }, { "epoch": 0.09815766198084269, "grad_norm": 4.529655456542969, "learning_rate": 9.88063199396957e-05, "loss": 2.1725, "step": 1304 }, { "epoch": 0.09823293626150285, "grad_norm": 5.000154972076416, "learning_rate": 9.880367058779276e-05, "loss": 2.2472, "step": 1305 }, { "epoch": 0.098308210542163, "grad_norm": 5.002303600311279, "learning_rate": 9.880101833465163e-05, "loss": 2.5147, "step": 1306 }, { "epoch": 0.09838348482282316, "grad_norm": 8.194414138793945, "learning_rate": 9.879836318042998e-05, "loss": 2.345, "step": 1307 }, { "epoch": 0.09845875910348331, "grad_norm": 6.684850215911865, "learning_rate": 9.879570512528568e-05, "loss": 2.2136, "step": 1308 }, { "epoch": 0.09853403338414347, "grad_norm": 5.606630802154541, "learning_rate": 9.879304416937671e-05, "loss": 2.188, "step": 1309 }, { "epoch": 0.09860930766480362, "grad_norm": 4.944584369659424, "learning_rate": 9.879038031286128e-05, "loss": 2.0588, "step": 1310 }, { "epoch": 0.09868458194546378, "grad_norm": 7.088937282562256, "learning_rate": 9.87877135558977e-05, "loss": 2.0422, "step": 1311 }, { "epoch": 0.09875985622612395, "grad_norm": 4.895090579986572, "learning_rate": 9.878504389864458e-05, "loss": 2.1093, "step": 1312 }, { "epoch": 0.0988351305067841, "grad_norm": 4.786828517913818, "learning_rate": 9.878237134126056e-05, "loss": 2.1436, "step": 1313 }, { "epoch": 0.09891040478744426, "grad_norm": 6.79971170425415, "learning_rate": 9.877969588390453e-05, "loss": 2.3396, "step": 1314 }, { "epoch": 0.0989856790681044, "grad_norm": 7.508633613586426, "learning_rate": 9.877701752673556e-05, "loss": 1.9994, "step": 1315 }, { "epoch": 0.09906095334876457, "grad_norm": 7.53606653213501, "learning_rate": 9.877433626991283e-05, "loss": 2.4583, "step": 1316 }, { "epoch": 0.09913622762942471, "grad_norm": 5.509038925170898, "learning_rate": 9.877165211359578e-05, "loss": 2.2688, "step": 1317 }, { "epoch": 0.09921150191008488, "grad_norm": 4.700623989105225, "learning_rate": 9.876896505794395e-05, "loss": 2.2001, "step": 1318 }, { "epoch": 0.09928677619074502, "grad_norm": 5.297270774841309, "learning_rate": 9.87662751031171e-05, "loss": 2.2062, "step": 1319 }, { "epoch": 0.09936205047140519, "grad_norm": 8.08660888671875, "learning_rate": 9.876358224927508e-05, "loss": 2.2233, "step": 1320 }, { "epoch": 0.09943732475206533, "grad_norm": 5.491371154785156, "learning_rate": 9.876088649657805e-05, "loss": 2.0448, "step": 1321 }, { "epoch": 0.0995125990327255, "grad_norm": 5.495194911956787, "learning_rate": 9.87581878451862e-05, "loss": 2.028, "step": 1322 }, { "epoch": 0.09958787331338564, "grad_norm": 5.543384075164795, "learning_rate": 9.875548629525999e-05, "loss": 2.176, "step": 1323 }, { "epoch": 0.0996631475940458, "grad_norm": 4.787388801574707, "learning_rate": 9.875278184696002e-05, "loss": 1.8804, "step": 1324 }, { "epoch": 0.09973842187470595, "grad_norm": 4.401033401489258, "learning_rate": 9.875007450044706e-05, "loss": 2.2221, "step": 1325 }, { "epoch": 0.09981369615536612, "grad_norm": 6.260168552398682, "learning_rate": 9.874736425588206e-05, "loss": 2.2908, "step": 1326 }, { "epoch": 0.09988897043602626, "grad_norm": 4.534674167633057, "learning_rate": 9.874465111342612e-05, "loss": 2.2074, "step": 1327 }, { "epoch": 0.09996424471668643, "grad_norm": 6.710474014282227, "learning_rate": 9.874193507324053e-05, "loss": 2.2408, "step": 1328 }, { "epoch": 0.10003951899734659, "grad_norm": 5.775381565093994, "learning_rate": 9.873921613548675e-05, "loss": 2.2554, "step": 1329 }, { "epoch": 0.10011479327800674, "grad_norm": 5.60185432434082, "learning_rate": 9.873649430032643e-05, "loss": 2.2604, "step": 1330 }, { "epoch": 0.1001900675586669, "grad_norm": 4.707239151000977, "learning_rate": 9.873376956792137e-05, "loss": 2.4742, "step": 1331 }, { "epoch": 0.10026534183932705, "grad_norm": 6.17344856262207, "learning_rate": 9.873104193843352e-05, "loss": 2.2377, "step": 1332 }, { "epoch": 0.10034061611998721, "grad_norm": 5.033746242523193, "learning_rate": 9.872831141202508e-05, "loss": 2.5008, "step": 1333 }, { "epoch": 0.10041589040064736, "grad_norm": 4.9681010246276855, "learning_rate": 9.872557798885833e-05, "loss": 2.1991, "step": 1334 }, { "epoch": 0.10049116468130752, "grad_norm": 5.089704990386963, "learning_rate": 9.872284166909578e-05, "loss": 2.2982, "step": 1335 }, { "epoch": 0.10056643896196767, "grad_norm": 4.939492702484131, "learning_rate": 9.872010245290009e-05, "loss": 2.2759, "step": 1336 }, { "epoch": 0.10064171324262783, "grad_norm": 7.233314037322998, "learning_rate": 9.87173603404341e-05, "loss": 2.1301, "step": 1337 }, { "epoch": 0.10071698752328798, "grad_norm": 4.265902042388916, "learning_rate": 9.871461533186083e-05, "loss": 2.1499, "step": 1338 }, { "epoch": 0.10079226180394814, "grad_norm": 5.709811210632324, "learning_rate": 9.871186742734345e-05, "loss": 2.3327, "step": 1339 }, { "epoch": 0.10086753608460829, "grad_norm": 6.284304141998291, "learning_rate": 9.870911662704532e-05, "loss": 2.1441, "step": 1340 }, { "epoch": 0.10094281036526845, "grad_norm": 4.845076084136963, "learning_rate": 9.870636293112999e-05, "loss": 2.5536, "step": 1341 }, { "epoch": 0.1010180846459286, "grad_norm": 5.140475749969482, "learning_rate": 9.870360633976111e-05, "loss": 2.0125, "step": 1342 }, { "epoch": 0.10109335892658876, "grad_norm": 6.250215530395508, "learning_rate": 9.870084685310259e-05, "loss": 2.3919, "step": 1343 }, { "epoch": 0.1011686332072489, "grad_norm": 5.205529689788818, "learning_rate": 9.869808447131847e-05, "loss": 1.9747, "step": 1344 }, { "epoch": 0.10124390748790907, "grad_norm": 5.132730007171631, "learning_rate": 9.869531919457296e-05, "loss": 2.1715, "step": 1345 }, { "epoch": 0.10131918176856923, "grad_norm": 6.4714508056640625, "learning_rate": 9.869255102303044e-05, "loss": 2.6872, "step": 1346 }, { "epoch": 0.10139445604922938, "grad_norm": 5.070152759552002, "learning_rate": 9.868977995685547e-05, "loss": 1.7995, "step": 1347 }, { "epoch": 0.10146973032988954, "grad_norm": 4.949240684509277, "learning_rate": 9.868700599621279e-05, "loss": 2.0117, "step": 1348 }, { "epoch": 0.10154500461054969, "grad_norm": 4.624660968780518, "learning_rate": 9.868422914126731e-05, "loss": 2.2539, "step": 1349 }, { "epoch": 0.10162027889120985, "grad_norm": 4.8336052894592285, "learning_rate": 9.86814493921841e-05, "loss": 2.418, "step": 1350 }, { "epoch": 0.10169555317187, "grad_norm": 6.691450595855713, "learning_rate": 9.86786667491284e-05, "loss": 2.0609, "step": 1351 }, { "epoch": 0.10177082745253016, "grad_norm": 5.7258806228637695, "learning_rate": 9.867588121226563e-05, "loss": 2.1486, "step": 1352 }, { "epoch": 0.10184610173319031, "grad_norm": 7.780943870544434, "learning_rate": 9.86730927817614e-05, "loss": 2.1588, "step": 1353 }, { "epoch": 0.10192137601385047, "grad_norm": 5.153010368347168, "learning_rate": 9.867030145778147e-05, "loss": 2.3842, "step": 1354 }, { "epoch": 0.10199665029451062, "grad_norm": 8.258830070495605, "learning_rate": 9.866750724049175e-05, "loss": 2.2651, "step": 1355 }, { "epoch": 0.10207192457517078, "grad_norm": 5.167582988739014, "learning_rate": 9.866471013005838e-05, "loss": 2.0271, "step": 1356 }, { "epoch": 0.10214719885583093, "grad_norm": 8.057304382324219, "learning_rate": 9.866191012664763e-05, "loss": 2.4244, "step": 1357 }, { "epoch": 0.10222247313649109, "grad_norm": 6.899253845214844, "learning_rate": 9.865910723042593e-05, "loss": 2.2179, "step": 1358 }, { "epoch": 0.10229774741715124, "grad_norm": 4.171437740325928, "learning_rate": 9.865630144155994e-05, "loss": 2.4336, "step": 1359 }, { "epoch": 0.1023730216978114, "grad_norm": 4.7057414054870605, "learning_rate": 9.865349276021644e-05, "loss": 2.007, "step": 1360 }, { "epoch": 0.10244829597847156, "grad_norm": 6.168985843658447, "learning_rate": 9.86506811865624e-05, "loss": 2.0854, "step": 1361 }, { "epoch": 0.10252357025913171, "grad_norm": 5.492602348327637, "learning_rate": 9.864786672076495e-05, "loss": 1.8508, "step": 1362 }, { "epoch": 0.10259884453979187, "grad_norm": 4.3562164306640625, "learning_rate": 9.864504936299141e-05, "loss": 2.13, "step": 1363 }, { "epoch": 0.10267411882045202, "grad_norm": 4.802323818206787, "learning_rate": 9.864222911340926e-05, "loss": 2.0242, "step": 1364 }, { "epoch": 0.10274939310111218, "grad_norm": 4.20440149307251, "learning_rate": 9.863940597218617e-05, "loss": 2.3471, "step": 1365 }, { "epoch": 0.10282466738177233, "grad_norm": 4.615967273712158, "learning_rate": 9.863657993948995e-05, "loss": 1.9809, "step": 1366 }, { "epoch": 0.10289994166243249, "grad_norm": 6.9775919914245605, "learning_rate": 9.863375101548862e-05, "loss": 1.9167, "step": 1367 }, { "epoch": 0.10297521594309264, "grad_norm": 4.219887733459473, "learning_rate": 9.863091920035033e-05, "loss": 2.0872, "step": 1368 }, { "epoch": 0.1030504902237528, "grad_norm": 4.816910266876221, "learning_rate": 9.862808449424341e-05, "loss": 2.1472, "step": 1369 }, { "epoch": 0.10312576450441295, "grad_norm": 5.884079933166504, "learning_rate": 9.862524689733644e-05, "loss": 2.058, "step": 1370 }, { "epoch": 0.10320103878507311, "grad_norm": 7.90699577331543, "learning_rate": 9.862240640979803e-05, "loss": 2.2629, "step": 1371 }, { "epoch": 0.10327631306573326, "grad_norm": 6.91544771194458, "learning_rate": 9.861956303179707e-05, "loss": 2.0087, "step": 1372 }, { "epoch": 0.10335158734639342, "grad_norm": 7.192230701446533, "learning_rate": 9.86167167635026e-05, "loss": 2.0257, "step": 1373 }, { "epoch": 0.10342686162705357, "grad_norm": 5.880402565002441, "learning_rate": 9.861386760508382e-05, "loss": 2.5124, "step": 1374 }, { "epoch": 0.10350213590771373, "grad_norm": 5.4257283210754395, "learning_rate": 9.86110155567101e-05, "loss": 2.2905, "step": 1375 }, { "epoch": 0.10357741018837388, "grad_norm": 5.242028713226318, "learning_rate": 9.860816061855099e-05, "loss": 2.1021, "step": 1376 }, { "epoch": 0.10365268446903404, "grad_norm": 6.060970306396484, "learning_rate": 9.86053027907762e-05, "loss": 2.3967, "step": 1377 }, { "epoch": 0.1037279587496942, "grad_norm": 5.679944038391113, "learning_rate": 9.860244207355562e-05, "loss": 2.2894, "step": 1378 }, { "epoch": 0.10380323303035435, "grad_norm": 6.1499104499816895, "learning_rate": 9.859957846705931e-05, "loss": 2.0166, "step": 1379 }, { "epoch": 0.10387850731101451, "grad_norm": 5.7871198654174805, "learning_rate": 9.859671197145753e-05, "loss": 2.6949, "step": 1380 }, { "epoch": 0.10395378159167466, "grad_norm": 4.203625202178955, "learning_rate": 9.859384258692064e-05, "loss": 2.3076, "step": 1381 }, { "epoch": 0.10402905587233482, "grad_norm": 7.567563533782959, "learning_rate": 9.859097031361924e-05, "loss": 2.1866, "step": 1382 }, { "epoch": 0.10410433015299497, "grad_norm": 3.7766213417053223, "learning_rate": 9.858809515172409e-05, "loss": 2.1515, "step": 1383 }, { "epoch": 0.10417960443365513, "grad_norm": 6.01621675491333, "learning_rate": 9.858521710140609e-05, "loss": 1.9643, "step": 1384 }, { "epoch": 0.10425487871431528, "grad_norm": 5.394628047943115, "learning_rate": 9.858233616283634e-05, "loss": 1.8145, "step": 1385 }, { "epoch": 0.10433015299497544, "grad_norm": 5.330570697784424, "learning_rate": 9.85794523361861e-05, "loss": 2.1916, "step": 1386 }, { "epoch": 0.10440542727563559, "grad_norm": 4.793075084686279, "learning_rate": 9.857656562162683e-05, "loss": 2.3454, "step": 1387 }, { "epoch": 0.10448070155629575, "grad_norm": 7.11953592300415, "learning_rate": 9.85736760193301e-05, "loss": 2.3438, "step": 1388 }, { "epoch": 0.1045559758369559, "grad_norm": 5.012604713439941, "learning_rate": 9.85707835294677e-05, "loss": 2.3994, "step": 1389 }, { "epoch": 0.10463125011761606, "grad_norm": 9.111664772033691, "learning_rate": 9.856788815221159e-05, "loss": 2.4086, "step": 1390 }, { "epoch": 0.10470652439827621, "grad_norm": 5.258410930633545, "learning_rate": 9.856498988773389e-05, "loss": 2.0269, "step": 1391 }, { "epoch": 0.10478179867893637, "grad_norm": 8.604662895202637, "learning_rate": 9.856208873620688e-05, "loss": 2.3107, "step": 1392 }, { "epoch": 0.10485707295959654, "grad_norm": 5.09596061706543, "learning_rate": 9.855918469780303e-05, "loss": 2.0576, "step": 1393 }, { "epoch": 0.10493234724025668, "grad_norm": 5.101263999938965, "learning_rate": 9.8556277772695e-05, "loss": 2.2593, "step": 1394 }, { "epoch": 0.10500762152091685, "grad_norm": 4.718286514282227, "learning_rate": 9.855336796105556e-05, "loss": 2.382, "step": 1395 }, { "epoch": 0.105082895801577, "grad_norm": 5.197150230407715, "learning_rate": 9.855045526305773e-05, "loss": 2.3176, "step": 1396 }, { "epoch": 0.10515817008223716, "grad_norm": 5.26522970199585, "learning_rate": 9.854753967887465e-05, "loss": 2.1254, "step": 1397 }, { "epoch": 0.1052334443628973, "grad_norm": 6.0825419425964355, "learning_rate": 9.85446212086796e-05, "loss": 1.9642, "step": 1398 }, { "epoch": 0.10530871864355747, "grad_norm": 4.064992904663086, "learning_rate": 9.854169985264614e-05, "loss": 2.183, "step": 1399 }, { "epoch": 0.10538399292421761, "grad_norm": 5.792464256286621, "learning_rate": 9.853877561094789e-05, "loss": 2.3714, "step": 1400 }, { "epoch": 0.10545926720487778, "grad_norm": 5.2079758644104, "learning_rate": 9.85358484837587e-05, "loss": 2.5722, "step": 1401 }, { "epoch": 0.10553454148553792, "grad_norm": 4.400840759277344, "learning_rate": 9.853291847125258e-05, "loss": 2.7732, "step": 1402 }, { "epoch": 0.10560981576619809, "grad_norm": 5.1285834312438965, "learning_rate": 9.852998557360373e-05, "loss": 2.3474, "step": 1403 }, { "epoch": 0.10568509004685823, "grad_norm": 4.393636703491211, "learning_rate": 9.852704979098648e-05, "loss": 2.2061, "step": 1404 }, { "epoch": 0.1057603643275184, "grad_norm": 6.350622653961182, "learning_rate": 9.852411112357536e-05, "loss": 2.113, "step": 1405 }, { "epoch": 0.10583563860817854, "grad_norm": 6.430047035217285, "learning_rate": 9.852116957154505e-05, "loss": 2.0917, "step": 1406 }, { "epoch": 0.1059109128888387, "grad_norm": 5.039078712463379, "learning_rate": 9.851822513507045e-05, "loss": 2.4053, "step": 1407 }, { "epoch": 0.10598618716949885, "grad_norm": 4.880955696105957, "learning_rate": 9.851527781432657e-05, "loss": 2.3186, "step": 1408 }, { "epoch": 0.10606146145015902, "grad_norm": 5.812760829925537, "learning_rate": 9.851232760948864e-05, "loss": 1.9694, "step": 1409 }, { "epoch": 0.10613673573081918, "grad_norm": 4.599240303039551, "learning_rate": 9.850937452073203e-05, "loss": 2.4078, "step": 1410 }, { "epoch": 0.10621201001147933, "grad_norm": 4.970304489135742, "learning_rate": 9.850641854823228e-05, "loss": 2.14, "step": 1411 }, { "epoch": 0.10628728429213949, "grad_norm": 5.6756086349487305, "learning_rate": 9.850345969216514e-05, "loss": 2.157, "step": 1412 }, { "epoch": 0.10636255857279964, "grad_norm": 4.906312465667725, "learning_rate": 9.850049795270649e-05, "loss": 2.2399, "step": 1413 }, { "epoch": 0.1064378328534598, "grad_norm": 4.625144958496094, "learning_rate": 9.84975333300324e-05, "loss": 2.2531, "step": 1414 }, { "epoch": 0.10651310713411995, "grad_norm": 4.526831150054932, "learning_rate": 9.84945658243191e-05, "loss": 2.4922, "step": 1415 }, { "epoch": 0.10658838141478011, "grad_norm": 5.445694923400879, "learning_rate": 9.849159543574302e-05, "loss": 2.0667, "step": 1416 }, { "epoch": 0.10666365569544026, "grad_norm": 3.5116755962371826, "learning_rate": 9.848862216448074e-05, "loss": 2.1409, "step": 1417 }, { "epoch": 0.10673892997610042, "grad_norm": 7.167372703552246, "learning_rate": 9.848564601070897e-05, "loss": 2.0867, "step": 1418 }, { "epoch": 0.10681420425676057, "grad_norm": 5.482428073883057, "learning_rate": 9.848266697460469e-05, "loss": 2.4357, "step": 1419 }, { "epoch": 0.10688947853742073, "grad_norm": 5.8753180503845215, "learning_rate": 9.847968505634497e-05, "loss": 2.3368, "step": 1420 }, { "epoch": 0.10696475281808088, "grad_norm": 4.832120895385742, "learning_rate": 9.847670025610707e-05, "loss": 1.996, "step": 1421 }, { "epoch": 0.10704002709874104, "grad_norm": 6.677201747894287, "learning_rate": 9.847371257406843e-05, "loss": 2.5662, "step": 1422 }, { "epoch": 0.10711530137940119, "grad_norm": 4.344008445739746, "learning_rate": 9.847072201040667e-05, "loss": 2.2108, "step": 1423 }, { "epoch": 0.10719057566006135, "grad_norm": 6.102686405181885, "learning_rate": 9.846772856529958e-05, "loss": 2.3959, "step": 1424 }, { "epoch": 0.1072658499407215, "grad_norm": 7.902398586273193, "learning_rate": 9.846473223892508e-05, "loss": 2.1278, "step": 1425 }, { "epoch": 0.10734112422138166, "grad_norm": 5.588354110717773, "learning_rate": 9.846173303146131e-05, "loss": 2.3202, "step": 1426 }, { "epoch": 0.10741639850204182, "grad_norm": 6.013638973236084, "learning_rate": 9.845873094308656e-05, "loss": 2.1313, "step": 1427 }, { "epoch": 0.10749167278270197, "grad_norm": 4.5509443283081055, "learning_rate": 9.84557259739793e-05, "loss": 2.388, "step": 1428 }, { "epoch": 0.10756694706336213, "grad_norm": 4.707016468048096, "learning_rate": 9.845271812431817e-05, "loss": 2.4723, "step": 1429 }, { "epoch": 0.10764222134402228, "grad_norm": 6.561839580535889, "learning_rate": 9.844970739428199e-05, "loss": 2.0649, "step": 1430 }, { "epoch": 0.10771749562468244, "grad_norm": 5.726894378662109, "learning_rate": 9.844669378404969e-05, "loss": 2.1675, "step": 1431 }, { "epoch": 0.10779276990534259, "grad_norm": 7.741239547729492, "learning_rate": 9.844367729380049e-05, "loss": 2.2878, "step": 1432 }, { "epoch": 0.10786804418600275, "grad_norm": 10.629728317260742, "learning_rate": 9.844065792371364e-05, "loss": 2.5216, "step": 1433 }, { "epoch": 0.1079433184666629, "grad_norm": 4.772575378417969, "learning_rate": 9.843763567396868e-05, "loss": 2.0712, "step": 1434 }, { "epoch": 0.10801859274732306, "grad_norm": 4.807673931121826, "learning_rate": 9.843461054474528e-05, "loss": 2.0015, "step": 1435 }, { "epoch": 0.10809386702798321, "grad_norm": 5.960400104522705, "learning_rate": 9.843158253622325e-05, "loss": 2.1179, "step": 1436 }, { "epoch": 0.10816914130864337, "grad_norm": 4.638011932373047, "learning_rate": 9.84285516485826e-05, "loss": 2.0644, "step": 1437 }, { "epoch": 0.10824441558930352, "grad_norm": 5.544718265533447, "learning_rate": 9.84255178820035e-05, "loss": 2.3577, "step": 1438 }, { "epoch": 0.10831968986996368, "grad_norm": 5.5586371421813965, "learning_rate": 9.842248123666632e-05, "loss": 2.5347, "step": 1439 }, { "epoch": 0.10839496415062383, "grad_norm": 4.557663440704346, "learning_rate": 9.841944171275157e-05, "loss": 2.1664, "step": 1440 }, { "epoch": 0.10847023843128399, "grad_norm": 4.60097599029541, "learning_rate": 9.841639931043994e-05, "loss": 2.2154, "step": 1441 }, { "epoch": 0.10854551271194415, "grad_norm": 4.8958282470703125, "learning_rate": 9.84133540299123e-05, "loss": 2.2027, "step": 1442 }, { "epoch": 0.1086207869926043, "grad_norm": 6.68956995010376, "learning_rate": 9.841030587134967e-05, "loss": 2.2456, "step": 1443 }, { "epoch": 0.10869606127326446, "grad_norm": 4.5504889488220215, "learning_rate": 9.840725483493325e-05, "loss": 2.1168, "step": 1444 }, { "epoch": 0.10877133555392461, "grad_norm": 4.70060920715332, "learning_rate": 9.840420092084443e-05, "loss": 2.0425, "step": 1445 }, { "epoch": 0.10884660983458477, "grad_norm": 4.551839828491211, "learning_rate": 9.840114412926478e-05, "loss": 2.1641, "step": 1446 }, { "epoch": 0.10892188411524492, "grad_norm": 5.133751392364502, "learning_rate": 9.839808446037598e-05, "loss": 2.4, "step": 1447 }, { "epoch": 0.10899715839590508, "grad_norm": 4.662445068359375, "learning_rate": 9.83950219143599e-05, "loss": 2.261, "step": 1448 }, { "epoch": 0.10907243267656523, "grad_norm": 7.807523250579834, "learning_rate": 9.839195649139864e-05, "loss": 2.3798, "step": 1449 }, { "epoch": 0.1091477069572254, "grad_norm": 5.682362079620361, "learning_rate": 9.838888819167443e-05, "loss": 2.3878, "step": 1450 }, { "epoch": 0.10922298123788554, "grad_norm": 5.008208274841309, "learning_rate": 9.838581701536967e-05, "loss": 2.1507, "step": 1451 }, { "epoch": 0.1092982555185457, "grad_norm": 5.632689952850342, "learning_rate": 9.83827429626669e-05, "loss": 1.7746, "step": 1452 }, { "epoch": 0.10937352979920585, "grad_norm": 3.6657118797302246, "learning_rate": 9.837966603374889e-05, "loss": 2.3033, "step": 1453 }, { "epoch": 0.10944880407986601, "grad_norm": 5.824965000152588, "learning_rate": 9.837658622879856e-05, "loss": 2.4717, "step": 1454 }, { "epoch": 0.10952407836052616, "grad_norm": 4.90222692489624, "learning_rate": 9.8373503547999e-05, "loss": 2.1955, "step": 1455 }, { "epoch": 0.10959935264118632, "grad_norm": 3.9360909461975098, "learning_rate": 9.837041799153342e-05, "loss": 2.1443, "step": 1456 }, { "epoch": 0.10967462692184647, "grad_norm": 5.997777938842773, "learning_rate": 9.83673295595853e-05, "loss": 2.4089, "step": 1457 }, { "epoch": 0.10974990120250663, "grad_norm": 7.144850730895996, "learning_rate": 9.836423825233821e-05, "loss": 2.2587, "step": 1458 }, { "epoch": 0.1098251754831668, "grad_norm": 6.629261493682861, "learning_rate": 9.836114406997594e-05, "loss": 2.4092, "step": 1459 }, { "epoch": 0.10990044976382694, "grad_norm": 5.653792858123779, "learning_rate": 9.835804701268241e-05, "loss": 2.1819, "step": 1460 }, { "epoch": 0.1099757240444871, "grad_norm": 5.176329612731934, "learning_rate": 9.835494708064175e-05, "loss": 2.0457, "step": 1461 }, { "epoch": 0.11005099832514725, "grad_norm": 4.583652973175049, "learning_rate": 9.835184427403822e-05, "loss": 2.1199, "step": 1462 }, { "epoch": 0.11012627260580742, "grad_norm": 7.92254638671875, "learning_rate": 9.834873859305629e-05, "loss": 2.831, "step": 1463 }, { "epoch": 0.11020154688646756, "grad_norm": 4.404693126678467, "learning_rate": 9.834563003788057e-05, "loss": 2.0809, "step": 1464 }, { "epoch": 0.11027682116712773, "grad_norm": 3.798464775085449, "learning_rate": 9.834251860869588e-05, "loss": 2.3996, "step": 1465 }, { "epoch": 0.11035209544778787, "grad_norm": 6.599026203155518, "learning_rate": 9.833940430568715e-05, "loss": 2.1482, "step": 1466 }, { "epoch": 0.11042736972844804, "grad_norm": 5.410624980926514, "learning_rate": 9.833628712903957e-05, "loss": 2.271, "step": 1467 }, { "epoch": 0.11050264400910818, "grad_norm": 4.607469081878662, "learning_rate": 9.833316707893838e-05, "loss": 2.2432, "step": 1468 }, { "epoch": 0.11057791828976835, "grad_norm": 5.609137535095215, "learning_rate": 9.83300441555691e-05, "loss": 2.2847, "step": 1469 }, { "epoch": 0.1106531925704285, "grad_norm": 4.124760627746582, "learning_rate": 9.832691835911736e-05, "loss": 2.2725, "step": 1470 }, { "epoch": 0.11072846685108866, "grad_norm": 6.28630256652832, "learning_rate": 9.832378968976901e-05, "loss": 2.1676, "step": 1471 }, { "epoch": 0.1108037411317488, "grad_norm": 5.398787975311279, "learning_rate": 9.832065814771e-05, "loss": 2.1924, "step": 1472 }, { "epoch": 0.11087901541240897, "grad_norm": 4.385289192199707, "learning_rate": 9.831752373312653e-05, "loss": 2.2452, "step": 1473 }, { "epoch": 0.11095428969306913, "grad_norm": 4.326237201690674, "learning_rate": 9.831438644620489e-05, "loss": 2.2244, "step": 1474 }, { "epoch": 0.11102956397372928, "grad_norm": 5.806609153747559, "learning_rate": 9.831124628713161e-05, "loss": 1.7286, "step": 1475 }, { "epoch": 0.11110483825438944, "grad_norm": 5.2352399826049805, "learning_rate": 9.830810325609337e-05, "loss": 2.1993, "step": 1476 }, { "epoch": 0.11118011253504959, "grad_norm": 5.491091728210449, "learning_rate": 9.8304957353277e-05, "loss": 2.1426, "step": 1477 }, { "epoch": 0.11125538681570975, "grad_norm": 4.986297130584717, "learning_rate": 9.830180857886952e-05, "loss": 2.0211, "step": 1478 }, { "epoch": 0.1113306610963699, "grad_norm": 4.85268497467041, "learning_rate": 9.829865693305811e-05, "loss": 2.2682, "step": 1479 }, { "epoch": 0.11140593537703006, "grad_norm": 5.306323051452637, "learning_rate": 9.829550241603012e-05, "loss": 2.2754, "step": 1480 }, { "epoch": 0.1114812096576902, "grad_norm": 4.7514777183532715, "learning_rate": 9.82923450279731e-05, "loss": 2.2996, "step": 1481 }, { "epoch": 0.11155648393835037, "grad_norm": 7.218596935272217, "learning_rate": 9.828918476907472e-05, "loss": 2.0388, "step": 1482 }, { "epoch": 0.11163175821901052, "grad_norm": 5.108249664306641, "learning_rate": 9.828602163952287e-05, "loss": 2.0017, "step": 1483 }, { "epoch": 0.11170703249967068, "grad_norm": 6.2356109619140625, "learning_rate": 9.828285563950558e-05, "loss": 2.1725, "step": 1484 }, { "epoch": 0.11178230678033083, "grad_norm": 5.727863311767578, "learning_rate": 9.827968676921108e-05, "loss": 2.8907, "step": 1485 }, { "epoch": 0.11185758106099099, "grad_norm": 5.71699333190918, "learning_rate": 9.827651502882771e-05, "loss": 2.2345, "step": 1486 }, { "epoch": 0.11193285534165114, "grad_norm": 6.761249542236328, "learning_rate": 9.827334041854406e-05, "loss": 2.2687, "step": 1487 }, { "epoch": 0.1120081296223113, "grad_norm": 6.661614418029785, "learning_rate": 9.827016293854882e-05, "loss": 2.1393, "step": 1488 }, { "epoch": 0.11208340390297145, "grad_norm": 6.173034191131592, "learning_rate": 9.82669825890309e-05, "loss": 2.2199, "step": 1489 }, { "epoch": 0.11215867818363161, "grad_norm": 4.6556620597839355, "learning_rate": 9.826379937017936e-05, "loss": 3.0586, "step": 1490 }, { "epoch": 0.11223395246429177, "grad_norm": 4.349430084228516, "learning_rate": 9.826061328218342e-05, "loss": 2.5344, "step": 1491 }, { "epoch": 0.11230922674495192, "grad_norm": 5.721402645111084, "learning_rate": 9.825742432523251e-05, "loss": 2.1835, "step": 1492 }, { "epoch": 0.11238450102561208, "grad_norm": 5.224651336669922, "learning_rate": 9.825423249951618e-05, "loss": 2.1935, "step": 1493 }, { "epoch": 0.11245977530627223, "grad_norm": 5.001589775085449, "learning_rate": 9.82510378052242e-05, "loss": 2.1997, "step": 1494 }, { "epoch": 0.11253504958693239, "grad_norm": 5.430053234100342, "learning_rate": 9.824784024254645e-05, "loss": 2.1947, "step": 1495 }, { "epoch": 0.11261032386759254, "grad_norm": 6.980978965759277, "learning_rate": 9.824463981167305e-05, "loss": 2.3193, "step": 1496 }, { "epoch": 0.1126855981482527, "grad_norm": 6.289052486419678, "learning_rate": 9.824143651279424e-05, "loss": 1.9883, "step": 1497 }, { "epoch": 0.11276087242891285, "grad_norm": 4.463650703430176, "learning_rate": 9.823823034610045e-05, "loss": 1.9558, "step": 1498 }, { "epoch": 0.11283614670957301, "grad_norm": 5.06191349029541, "learning_rate": 9.823502131178226e-05, "loss": 2.2293, "step": 1499 }, { "epoch": 0.11291142099023316, "grad_norm": 4.8617167472839355, "learning_rate": 9.823180941003048e-05, "loss": 2.194, "step": 1500 }, { "epoch": 0.11298669527089332, "grad_norm": 4.87706184387207, "learning_rate": 9.822859464103602e-05, "loss": 2.1378, "step": 1501 }, { "epoch": 0.11306196955155347, "grad_norm": 6.628759860992432, "learning_rate": 9.822537700498999e-05, "loss": 2.1634, "step": 1502 }, { "epoch": 0.11313724383221363, "grad_norm": 4.992720603942871, "learning_rate": 9.822215650208367e-05, "loss": 1.8693, "step": 1503 }, { "epoch": 0.11321251811287378, "grad_norm": 4.385207653045654, "learning_rate": 9.821893313250849e-05, "loss": 2.4103, "step": 1504 }, { "epoch": 0.11328779239353394, "grad_norm": 4.952878475189209, "learning_rate": 9.821570689645612e-05, "loss": 2.3154, "step": 1505 }, { "epoch": 0.11336306667419409, "grad_norm": 7.591663837432861, "learning_rate": 9.82124777941183e-05, "loss": 2.9471, "step": 1506 }, { "epoch": 0.11343834095485425, "grad_norm": 5.995678901672363, "learning_rate": 9.820924582568703e-05, "loss": 2.2399, "step": 1507 }, { "epoch": 0.11351361523551441, "grad_norm": 5.2290496826171875, "learning_rate": 9.820601099135441e-05, "loss": 2.1638, "step": 1508 }, { "epoch": 0.11358888951617456, "grad_norm": 6.682706832885742, "learning_rate": 9.820277329131276e-05, "loss": 2.3476, "step": 1509 }, { "epoch": 0.11366416379683472, "grad_norm": 5.347593784332275, "learning_rate": 9.819953272575455e-05, "loss": 1.9392, "step": 1510 }, { "epoch": 0.11373943807749487, "grad_norm": 6.090190410614014, "learning_rate": 9.81962892948724e-05, "loss": 2.4825, "step": 1511 }, { "epoch": 0.11381471235815503, "grad_norm": 5.386474609375, "learning_rate": 9.819304299885918e-05, "loss": 2.4159, "step": 1512 }, { "epoch": 0.11388998663881518, "grad_norm": 7.41466760635376, "learning_rate": 9.818979383790781e-05, "loss": 2.2016, "step": 1513 }, { "epoch": 0.11396526091947534, "grad_norm": 6.078017711639404, "learning_rate": 9.818654181221147e-05, "loss": 1.9737, "step": 1514 }, { "epoch": 0.11404053520013549, "grad_norm": 5.459829807281494, "learning_rate": 9.818328692196347e-05, "loss": 2.411, "step": 1515 }, { "epoch": 0.11411580948079565, "grad_norm": 4.553897857666016, "learning_rate": 9.818002916735734e-05, "loss": 2.336, "step": 1516 }, { "epoch": 0.1141910837614558, "grad_norm": 6.528814792633057, "learning_rate": 9.81767685485867e-05, "loss": 2.4098, "step": 1517 }, { "epoch": 0.11426635804211596, "grad_norm": 12.733213424682617, "learning_rate": 9.817350506584543e-05, "loss": 2.3403, "step": 1518 }, { "epoch": 0.11434163232277611, "grad_norm": 6.481761455535889, "learning_rate": 9.817023871932749e-05, "loss": 2.187, "step": 1519 }, { "epoch": 0.11441690660343627, "grad_norm": 5.830627918243408, "learning_rate": 9.816696950922709e-05, "loss": 2.1685, "step": 1520 }, { "epoch": 0.11449218088409642, "grad_norm": 4.158799171447754, "learning_rate": 9.816369743573853e-05, "loss": 2.2313, "step": 1521 }, { "epoch": 0.11456745516475658, "grad_norm": 5.076837062835693, "learning_rate": 9.816042249905637e-05, "loss": 2.0637, "step": 1522 }, { "epoch": 0.11464272944541674, "grad_norm": 4.538496017456055, "learning_rate": 9.815714469937529e-05, "loss": 2.045, "step": 1523 }, { "epoch": 0.11471800372607689, "grad_norm": 5.56032657623291, "learning_rate": 9.815386403689013e-05, "loss": 1.7054, "step": 1524 }, { "epoch": 0.11479327800673705, "grad_norm": 4.467593193054199, "learning_rate": 9.815058051179591e-05, "loss": 2.1045, "step": 1525 }, { "epoch": 0.1148685522873972, "grad_norm": 4.002077579498291, "learning_rate": 9.814729412428785e-05, "loss": 2.0444, "step": 1526 }, { "epoch": 0.11494382656805736, "grad_norm": 4.604103088378906, "learning_rate": 9.81440048745613e-05, "loss": 2.5774, "step": 1527 }, { "epoch": 0.11501910084871751, "grad_norm": 4.999310493469238, "learning_rate": 9.81407127628118e-05, "loss": 2.2775, "step": 1528 }, { "epoch": 0.11509437512937767, "grad_norm": 4.501382827758789, "learning_rate": 9.813741778923506e-05, "loss": 2.1369, "step": 1529 }, { "epoch": 0.11516964941003782, "grad_norm": 6.101622581481934, "learning_rate": 9.813411995402697e-05, "loss": 2.5277, "step": 1530 }, { "epoch": 0.11524492369069798, "grad_norm": 4.6172566413879395, "learning_rate": 9.813081925738354e-05, "loss": 2.0067, "step": 1531 }, { "epoch": 0.11532019797135813, "grad_norm": 5.320746421813965, "learning_rate": 9.812751569950101e-05, "loss": 2.1142, "step": 1532 }, { "epoch": 0.1153954722520183, "grad_norm": 7.294499397277832, "learning_rate": 9.812420928057578e-05, "loss": 2.0137, "step": 1533 }, { "epoch": 0.11547074653267844, "grad_norm": 6.202102184295654, "learning_rate": 9.812090000080437e-05, "loss": 3.0938, "step": 1534 }, { "epoch": 0.1155460208133386, "grad_norm": 4.939585208892822, "learning_rate": 9.811758786038354e-05, "loss": 2.2617, "step": 1535 }, { "epoch": 0.11562129509399875, "grad_norm": 6.0335187911987305, "learning_rate": 9.811427285951018e-05, "loss": 2.3668, "step": 1536 }, { "epoch": 0.11569656937465891, "grad_norm": 5.094693183898926, "learning_rate": 9.811095499838134e-05, "loss": 2.2623, "step": 1537 }, { "epoch": 0.11577184365531906, "grad_norm": 5.980704307556152, "learning_rate": 9.81076342771943e-05, "loss": 2.188, "step": 1538 }, { "epoch": 0.11584711793597922, "grad_norm": 5.815098762512207, "learning_rate": 9.81043106961464e-05, "loss": 2.2217, "step": 1539 }, { "epoch": 0.11592239221663939, "grad_norm": 4.305836200714111, "learning_rate": 9.810098425543528e-05, "loss": 2.3161, "step": 1540 }, { "epoch": 0.11599766649729953, "grad_norm": 7.828603744506836, "learning_rate": 9.809765495525867e-05, "loss": 2.0805, "step": 1541 }, { "epoch": 0.1160729407779597, "grad_norm": 6.011185169219971, "learning_rate": 9.809432279581446e-05, "loss": 2.4162, "step": 1542 }, { "epoch": 0.11614821505861984, "grad_norm": 5.189256191253662, "learning_rate": 9.809098777730077e-05, "loss": 2.3483, "step": 1543 }, { "epoch": 0.11622348933928, "grad_norm": 5.920383453369141, "learning_rate": 9.808764989991585e-05, "loss": 2.3059, "step": 1544 }, { "epoch": 0.11629876361994015, "grad_norm": 8.073580741882324, "learning_rate": 9.808430916385811e-05, "loss": 2.5592, "step": 1545 }, { "epoch": 0.11637403790060032, "grad_norm": 5.3328680992126465, "learning_rate": 9.808096556932616e-05, "loss": 2.3284, "step": 1546 }, { "epoch": 0.11644931218126046, "grad_norm": 5.027093410491943, "learning_rate": 9.807761911651878e-05, "loss": 2.0594, "step": 1547 }, { "epoch": 0.11652458646192063, "grad_norm": 4.8395209312438965, "learning_rate": 9.807426980563488e-05, "loss": 2.3464, "step": 1548 }, { "epoch": 0.11659986074258077, "grad_norm": 5.231376647949219, "learning_rate": 9.807091763687359e-05, "loss": 2.1779, "step": 1549 }, { "epoch": 0.11667513502324094, "grad_norm": 4.496547698974609, "learning_rate": 9.806756261043417e-05, "loss": 2.2298, "step": 1550 }, { "epoch": 0.11675040930390108, "grad_norm": 5.417749404907227, "learning_rate": 9.806420472651607e-05, "loss": 2.4479, "step": 1551 }, { "epoch": 0.11682568358456125, "grad_norm": 6.970733642578125, "learning_rate": 9.806084398531892e-05, "loss": 2.2433, "step": 1552 }, { "epoch": 0.1169009578652214, "grad_norm": 4.662734508514404, "learning_rate": 9.80574803870425e-05, "loss": 2.1779, "step": 1553 }, { "epoch": 0.11697623214588156, "grad_norm": 3.979790210723877, "learning_rate": 9.805411393188676e-05, "loss": 2.1901, "step": 1554 }, { "epoch": 0.11705150642654172, "grad_norm": 5.445489883422852, "learning_rate": 9.805074462005183e-05, "loss": 2.3631, "step": 1555 }, { "epoch": 0.11712678070720187, "grad_norm": 5.91757345199585, "learning_rate": 9.804737245173801e-05, "loss": 2.2131, "step": 1556 }, { "epoch": 0.11720205498786203, "grad_norm": 5.672887325286865, "learning_rate": 9.804399742714574e-05, "loss": 2.2574, "step": 1557 }, { "epoch": 0.11727732926852218, "grad_norm": 6.305829048156738, "learning_rate": 9.80406195464757e-05, "loss": 2.2388, "step": 1558 }, { "epoch": 0.11735260354918234, "grad_norm": 6.497957706451416, "learning_rate": 9.803723880992866e-05, "loss": 2.2928, "step": 1559 }, { "epoch": 0.11742787782984249, "grad_norm": 4.699962139129639, "learning_rate": 9.80338552177056e-05, "loss": 1.9581, "step": 1560 }, { "epoch": 0.11750315211050265, "grad_norm": 5.287069320678711, "learning_rate": 9.803046877000768e-05, "loss": 2.7058, "step": 1561 }, { "epoch": 0.1175784263911628, "grad_norm": 5.612594127655029, "learning_rate": 9.802707946703622e-05, "loss": 2.0045, "step": 1562 }, { "epoch": 0.11765370067182296, "grad_norm": 8.759663581848145, "learning_rate": 9.802368730899267e-05, "loss": 1.9407, "step": 1563 }, { "epoch": 0.1177289749524831, "grad_norm": 3.793367624282837, "learning_rate": 9.802029229607873e-05, "loss": 2.0367, "step": 1564 }, { "epoch": 0.11780424923314327, "grad_norm": 4.237112045288086, "learning_rate": 9.801689442849617e-05, "loss": 2.8146, "step": 1565 }, { "epoch": 0.11787952351380342, "grad_norm": 6.132881164550781, "learning_rate": 9.801349370644702e-05, "loss": 2.2959, "step": 1566 }, { "epoch": 0.11795479779446358, "grad_norm": 4.572925090789795, "learning_rate": 9.801009013013345e-05, "loss": 2.534, "step": 1567 }, { "epoch": 0.11803007207512373, "grad_norm": 8.123010635375977, "learning_rate": 9.800668369975776e-05, "loss": 2.5581, "step": 1568 }, { "epoch": 0.11810534635578389, "grad_norm": 4.87898588180542, "learning_rate": 9.800327441552248e-05, "loss": 2.8425, "step": 1569 }, { "epoch": 0.11818062063644404, "grad_norm": 5.832376003265381, "learning_rate": 9.799986227763027e-05, "loss": 1.9416, "step": 1570 }, { "epoch": 0.1182558949171042, "grad_norm": 5.254724025726318, "learning_rate": 9.799644728628398e-05, "loss": 2.1531, "step": 1571 }, { "epoch": 0.11833116919776436, "grad_norm": 8.972786903381348, "learning_rate": 9.799302944168662e-05, "loss": 2.2018, "step": 1572 }, { "epoch": 0.11840644347842451, "grad_norm": 5.709095478057861, "learning_rate": 9.798960874404135e-05, "loss": 2.0954, "step": 1573 }, { "epoch": 0.11848171775908467, "grad_norm": 5.663673400878906, "learning_rate": 9.798618519355155e-05, "loss": 2.52, "step": 1574 }, { "epoch": 0.11855699203974482, "grad_norm": 6.007000923156738, "learning_rate": 9.798275879042073e-05, "loss": 2.2824, "step": 1575 }, { "epoch": 0.11863226632040498, "grad_norm": 3.155287742614746, "learning_rate": 9.797932953485258e-05, "loss": 1.9951, "step": 1576 }, { "epoch": 0.11870754060106513, "grad_norm": 5.0395050048828125, "learning_rate": 9.797589742705094e-05, "loss": 2.5841, "step": 1577 }, { "epoch": 0.11878281488172529, "grad_norm": 3.9909863471984863, "learning_rate": 9.797246246721988e-05, "loss": 2.1333, "step": 1578 }, { "epoch": 0.11885808916238544, "grad_norm": 5.659982204437256, "learning_rate": 9.796902465556354e-05, "loss": 1.9128, "step": 1579 }, { "epoch": 0.1189333634430456, "grad_norm": 5.3229780197143555, "learning_rate": 9.796558399228635e-05, "loss": 2.0977, "step": 1580 }, { "epoch": 0.11900863772370575, "grad_norm": 4.665555000305176, "learning_rate": 9.796214047759282e-05, "loss": 2.0713, "step": 1581 }, { "epoch": 0.11908391200436591, "grad_norm": 5.370392322540283, "learning_rate": 9.795869411168766e-05, "loss": 2.1872, "step": 1582 }, { "epoch": 0.11915918628502606, "grad_norm": 5.251527786254883, "learning_rate": 9.795524489477572e-05, "loss": 2.2613, "step": 1583 }, { "epoch": 0.11923446056568622, "grad_norm": 6.221897125244141, "learning_rate": 9.79517928270621e-05, "loss": 2.9688, "step": 1584 }, { "epoch": 0.11930973484634637, "grad_norm": 6.268673419952393, "learning_rate": 9.794833790875197e-05, "loss": 2.2185, "step": 1585 }, { "epoch": 0.11938500912700653, "grad_norm": 4.565062999725342, "learning_rate": 9.794488014005072e-05, "loss": 2.1612, "step": 1586 }, { "epoch": 0.11946028340766668, "grad_norm": 5.256009578704834, "learning_rate": 9.794141952116393e-05, "loss": 2.1432, "step": 1587 }, { "epoch": 0.11953555768832684, "grad_norm": 6.327003002166748, "learning_rate": 9.793795605229731e-05, "loss": 2.2175, "step": 1588 }, { "epoch": 0.119610831968987, "grad_norm": 4.936118125915527, "learning_rate": 9.793448973365676e-05, "loss": 2.7965, "step": 1589 }, { "epoch": 0.11968610624964715, "grad_norm": 4.425882339477539, "learning_rate": 9.793102056544832e-05, "loss": 2.0861, "step": 1590 }, { "epoch": 0.11976138053030731, "grad_norm": 3.899171829223633, "learning_rate": 9.792754854787824e-05, "loss": 2.0353, "step": 1591 }, { "epoch": 0.11983665481096746, "grad_norm": 4.647331237792969, "learning_rate": 9.79240736811529e-05, "loss": 2.1953, "step": 1592 }, { "epoch": 0.11991192909162762, "grad_norm": 3.512587308883667, "learning_rate": 9.792059596547891e-05, "loss": 2.0129, "step": 1593 }, { "epoch": 0.11998720337228777, "grad_norm": 5.037900447845459, "learning_rate": 9.791711540106299e-05, "loss": 2.5432, "step": 1594 }, { "epoch": 0.12006247765294793, "grad_norm": 4.464351654052734, "learning_rate": 9.791363198811205e-05, "loss": 2.0256, "step": 1595 }, { "epoch": 0.12013775193360808, "grad_norm": 5.208583831787109, "learning_rate": 9.791014572683316e-05, "loss": 2.499, "step": 1596 }, { "epoch": 0.12021302621426824, "grad_norm": 5.325770854949951, "learning_rate": 9.79066566174336e-05, "loss": 2.0296, "step": 1597 }, { "epoch": 0.12028830049492839, "grad_norm": 4.36183500289917, "learning_rate": 9.790316466012073e-05, "loss": 2.17, "step": 1598 }, { "epoch": 0.12036357477558855, "grad_norm": 5.266951084136963, "learning_rate": 9.789966985510218e-05, "loss": 2.0156, "step": 1599 }, { "epoch": 0.1204388490562487, "grad_norm": 6.464771747589111, "learning_rate": 9.789617220258571e-05, "loss": 2.1139, "step": 1600 }, { "epoch": 0.12051412333690886, "grad_norm": 6.859620094299316, "learning_rate": 9.789267170277922e-05, "loss": 2.3087, "step": 1601 }, { "epoch": 0.12058939761756901, "grad_norm": 5.349453926086426, "learning_rate": 9.788916835589082e-05, "loss": 1.9592, "step": 1602 }, { "epoch": 0.12066467189822917, "grad_norm": 4.6302876472473145, "learning_rate": 9.788566216212876e-05, "loss": 2.2156, "step": 1603 }, { "epoch": 0.12073994617888933, "grad_norm": 5.097748756408691, "learning_rate": 9.78821531217015e-05, "loss": 2.0748, "step": 1604 }, { "epoch": 0.12081522045954948, "grad_norm": 7.0460333824157715, "learning_rate": 9.787864123481763e-05, "loss": 2.5018, "step": 1605 }, { "epoch": 0.12089049474020964, "grad_norm": 7.261343955993652, "learning_rate": 9.78751265016859e-05, "loss": 2.2595, "step": 1606 }, { "epoch": 0.12096576902086979, "grad_norm": 5.592549800872803, "learning_rate": 9.787160892251527e-05, "loss": 2.4875, "step": 1607 }, { "epoch": 0.12104104330152995, "grad_norm": 6.423880577087402, "learning_rate": 9.786808849751486e-05, "loss": 2.3915, "step": 1608 }, { "epoch": 0.1211163175821901, "grad_norm": 8.358610153198242, "learning_rate": 9.786456522689393e-05, "loss": 2.2105, "step": 1609 }, { "epoch": 0.12119159186285026, "grad_norm": 5.989588737487793, "learning_rate": 9.786103911086195e-05, "loss": 2.0286, "step": 1610 }, { "epoch": 0.12126686614351041, "grad_norm": 7.00147008895874, "learning_rate": 9.785751014962852e-05, "loss": 2.4252, "step": 1611 }, { "epoch": 0.12134214042417057, "grad_norm": 4.66259241104126, "learning_rate": 9.785397834340341e-05, "loss": 2.148, "step": 1612 }, { "epoch": 0.12141741470483072, "grad_norm": 4.299710273742676, "learning_rate": 9.785044369239662e-05, "loss": 2.4166, "step": 1613 }, { "epoch": 0.12149268898549088, "grad_norm": 5.795466899871826, "learning_rate": 9.784690619681824e-05, "loss": 2.4665, "step": 1614 }, { "epoch": 0.12156796326615103, "grad_norm": 4.6185150146484375, "learning_rate": 9.784336585687857e-05, "loss": 2.0793, "step": 1615 }, { "epoch": 0.1216432375468112, "grad_norm": 4.8778815269470215, "learning_rate": 9.783982267278808e-05, "loss": 1.7552, "step": 1616 }, { "epoch": 0.12171851182747134, "grad_norm": 6.437533855438232, "learning_rate": 9.783627664475741e-05, "loss": 2.0403, "step": 1617 }, { "epoch": 0.1217937861081315, "grad_norm": 3.989184856414795, "learning_rate": 9.783272777299734e-05, "loss": 2.3687, "step": 1618 }, { "epoch": 0.12186906038879165, "grad_norm": 8.013960838317871, "learning_rate": 9.782917605771885e-05, "loss": 2.0486, "step": 1619 }, { "epoch": 0.12194433466945181, "grad_norm": 4.957374095916748, "learning_rate": 9.782562149913307e-05, "loss": 2.2626, "step": 1620 }, { "epoch": 0.12201960895011198, "grad_norm": 4.5848822593688965, "learning_rate": 9.782206409745135e-05, "loss": 1.8456, "step": 1621 }, { "epoch": 0.12209488323077212, "grad_norm": 4.23613166809082, "learning_rate": 9.781850385288512e-05, "loss": 2.4278, "step": 1622 }, { "epoch": 0.12217015751143229, "grad_norm": 4.207071304321289, "learning_rate": 9.781494076564604e-05, "loss": 2.0765, "step": 1623 }, { "epoch": 0.12224543179209243, "grad_norm": 4.560945510864258, "learning_rate": 9.781137483594594e-05, "loss": 2.2401, "step": 1624 }, { "epoch": 0.1223207060727526, "grad_norm": 4.446397304534912, "learning_rate": 9.780780606399678e-05, "loss": 1.9131, "step": 1625 }, { "epoch": 0.12239598035341274, "grad_norm": 6.349095344543457, "learning_rate": 9.780423445001073e-05, "loss": 2.4463, "step": 1626 }, { "epoch": 0.1224712546340729, "grad_norm": 5.460348129272461, "learning_rate": 9.780065999420011e-05, "loss": 2.0926, "step": 1627 }, { "epoch": 0.12254652891473305, "grad_norm": 6.743232727050781, "learning_rate": 9.77970826967774e-05, "loss": 2.1142, "step": 1628 }, { "epoch": 0.12262180319539322, "grad_norm": 6.462011814117432, "learning_rate": 9.779350255795527e-05, "loss": 2.0189, "step": 1629 }, { "epoch": 0.12269707747605337, "grad_norm": 7.219926834106445, "learning_rate": 9.778991957794657e-05, "loss": 2.2696, "step": 1630 }, { "epoch": 0.12277235175671353, "grad_norm": 4.853704452514648, "learning_rate": 9.778633375696425e-05, "loss": 2.1161, "step": 1631 }, { "epoch": 0.12284762603737368, "grad_norm": 4.4934797286987305, "learning_rate": 9.778274509522152e-05, "loss": 2.0316, "step": 1632 }, { "epoch": 0.12292290031803384, "grad_norm": 5.992727756500244, "learning_rate": 9.777915359293168e-05, "loss": 2.0836, "step": 1633 }, { "epoch": 0.12299817459869399, "grad_norm": 6.169079780578613, "learning_rate": 9.777555925030826e-05, "loss": 2.3188, "step": 1634 }, { "epoch": 0.12307344887935415, "grad_norm": 4.415220737457275, "learning_rate": 9.777196206756493e-05, "loss": 2.3539, "step": 1635 }, { "epoch": 0.12314872316001431, "grad_norm": 6.755588531494141, "learning_rate": 9.776836204491553e-05, "loss": 2.129, "step": 1636 }, { "epoch": 0.12322399744067446, "grad_norm": 4.62690544128418, "learning_rate": 9.776475918257406e-05, "loss": 1.9415, "step": 1637 }, { "epoch": 0.12329927172133462, "grad_norm": 7.091202259063721, "learning_rate": 9.776115348075472e-05, "loss": 2.2476, "step": 1638 }, { "epoch": 0.12337454600199477, "grad_norm": 5.287752151489258, "learning_rate": 9.775754493967184e-05, "loss": 2.0417, "step": 1639 }, { "epoch": 0.12344982028265493, "grad_norm": 3.6862854957580566, "learning_rate": 9.775393355953995e-05, "loss": 2.3695, "step": 1640 }, { "epoch": 0.12352509456331508, "grad_norm": 4.684847354888916, "learning_rate": 9.775031934057372e-05, "loss": 1.9462, "step": 1641 }, { "epoch": 0.12360036884397524, "grad_norm": 4.057544708251953, "learning_rate": 9.774670228298803e-05, "loss": 2.294, "step": 1642 }, { "epoch": 0.12367564312463539, "grad_norm": 6.539106369018555, "learning_rate": 9.774308238699787e-05, "loss": 1.9845, "step": 1643 }, { "epoch": 0.12375091740529555, "grad_norm": 5.352054119110107, "learning_rate": 9.773945965281847e-05, "loss": 2.0626, "step": 1644 }, { "epoch": 0.1238261916859557, "grad_norm": 5.649844646453857, "learning_rate": 9.773583408066518e-05, "loss": 2.2001, "step": 1645 }, { "epoch": 0.12390146596661586, "grad_norm": 5.47548770904541, "learning_rate": 9.773220567075352e-05, "loss": 2.0255, "step": 1646 }, { "epoch": 0.12397674024727601, "grad_norm": 4.692183017730713, "learning_rate": 9.772857442329917e-05, "loss": 2.1694, "step": 1647 }, { "epoch": 0.12405201452793617, "grad_norm": 5.586865425109863, "learning_rate": 9.772494033851805e-05, "loss": 1.8244, "step": 1648 }, { "epoch": 0.12412728880859632, "grad_norm": 7.032715797424316, "learning_rate": 9.772130341662614e-05, "loss": 2.4628, "step": 1649 }, { "epoch": 0.12420256308925648, "grad_norm": 4.874334335327148, "learning_rate": 9.771766365783967e-05, "loss": 2.4653, "step": 1650 }, { "epoch": 0.12427783736991663, "grad_norm": 4.872899532318115, "learning_rate": 9.771402106237504e-05, "loss": 2.181, "step": 1651 }, { "epoch": 0.12435311165057679, "grad_norm": 5.087370872497559, "learning_rate": 9.771037563044874e-05, "loss": 2.1197, "step": 1652 }, { "epoch": 0.12442838593123695, "grad_norm": 7.149764537811279, "learning_rate": 9.770672736227751e-05, "loss": 2.3339, "step": 1653 }, { "epoch": 0.1245036602118971, "grad_norm": 5.5925092697143555, "learning_rate": 9.770307625807823e-05, "loss": 2.1305, "step": 1654 }, { "epoch": 0.12457893449255726, "grad_norm": 5.180748462677002, "learning_rate": 9.769942231806794e-05, "loss": 1.9459, "step": 1655 }, { "epoch": 0.12465420877321741, "grad_norm": 8.0724458694458, "learning_rate": 9.769576554246384e-05, "loss": 2.1494, "step": 1656 }, { "epoch": 0.12472948305387757, "grad_norm": 5.70414924621582, "learning_rate": 9.769210593148337e-05, "loss": 2.3089, "step": 1657 }, { "epoch": 0.12480475733453772, "grad_norm": 6.4600605964660645, "learning_rate": 9.768844348534403e-05, "loss": 2.1489, "step": 1658 }, { "epoch": 0.12488003161519788, "grad_norm": 4.639852523803711, "learning_rate": 9.768477820426354e-05, "loss": 2.2941, "step": 1659 }, { "epoch": 0.12495530589585803, "grad_norm": 5.862159729003906, "learning_rate": 9.768111008845982e-05, "loss": 1.9134, "step": 1660 }, { "epoch": 0.1250305801765182, "grad_norm": 4.392872333526611, "learning_rate": 9.767743913815093e-05, "loss": 2.0711, "step": 1661 }, { "epoch": 0.12510585445717834, "grad_norm": 5.746190547943115, "learning_rate": 9.767376535355508e-05, "loss": 2.1864, "step": 1662 }, { "epoch": 0.1251811287378385, "grad_norm": 3.662151575088501, "learning_rate": 9.767008873489067e-05, "loss": 1.904, "step": 1663 }, { "epoch": 0.12525640301849866, "grad_norm": 5.1756911277771, "learning_rate": 9.766640928237624e-05, "loss": 2.312, "step": 1664 }, { "epoch": 0.1253316772991588, "grad_norm": 4.287230968475342, "learning_rate": 9.766272699623058e-05, "loss": 2.0945, "step": 1665 }, { "epoch": 0.12540695157981896, "grad_norm": 4.961116790771484, "learning_rate": 9.765904187667255e-05, "loss": 2.1049, "step": 1666 }, { "epoch": 0.1254822258604791, "grad_norm": 6.955589771270752, "learning_rate": 9.765535392392123e-05, "loss": 2.6384, "step": 1667 }, { "epoch": 0.12555750014113928, "grad_norm": 5.326606273651123, "learning_rate": 9.765166313819588e-05, "loss": 2.1886, "step": 1668 }, { "epoch": 0.12563277442179943, "grad_norm": 4.608530044555664, "learning_rate": 9.764796951971585e-05, "loss": 2.1499, "step": 1669 }, { "epoch": 0.12570804870245958, "grad_norm": 5.765841484069824, "learning_rate": 9.764427306870076e-05, "loss": 2.3587, "step": 1670 }, { "epoch": 0.12578332298311976, "grad_norm": 5.100683212280273, "learning_rate": 9.764057378537035e-05, "loss": 2.3403, "step": 1671 }, { "epoch": 0.1258585972637799, "grad_norm": 5.592106342315674, "learning_rate": 9.76368716699445e-05, "loss": 2.2112, "step": 1672 }, { "epoch": 0.12593387154444005, "grad_norm": 5.025146484375, "learning_rate": 9.763316672264336e-05, "loss": 2.1422, "step": 1673 }, { "epoch": 0.1260091458251002, "grad_norm": 4.339875221252441, "learning_rate": 9.762945894368709e-05, "loss": 2.6697, "step": 1674 }, { "epoch": 0.12608442010576038, "grad_norm": 3.809053421020508, "learning_rate": 9.762574833329617e-05, "loss": 1.9938, "step": 1675 }, { "epoch": 0.12615969438642052, "grad_norm": 7.578778266906738, "learning_rate": 9.762203489169116e-05, "loss": 2.1337, "step": 1676 }, { "epoch": 0.12623496866708067, "grad_norm": 4.791518211364746, "learning_rate": 9.761831861909283e-05, "loss": 2.1314, "step": 1677 }, { "epoch": 0.12631024294774082, "grad_norm": 4.78886079788208, "learning_rate": 9.761459951572208e-05, "loss": 2.1539, "step": 1678 }, { "epoch": 0.126385517228401, "grad_norm": 4.933764934539795, "learning_rate": 9.76108775818e-05, "loss": 2.3209, "step": 1679 }, { "epoch": 0.12646079150906114, "grad_norm": 4.993045330047607, "learning_rate": 9.760715281754787e-05, "loss": 2.1642, "step": 1680 }, { "epoch": 0.1265360657897213, "grad_norm": 4.818905830383301, "learning_rate": 9.76034252231871e-05, "loss": 2.6079, "step": 1681 }, { "epoch": 0.12661134007038144, "grad_norm": 6.519049167633057, "learning_rate": 9.759969479893931e-05, "loss": 2.4723, "step": 1682 }, { "epoch": 0.12668661435104162, "grad_norm": 7.517443656921387, "learning_rate": 9.759596154502622e-05, "loss": 2.9614, "step": 1683 }, { "epoch": 0.12676188863170176, "grad_norm": 4.484193325042725, "learning_rate": 9.75922254616698e-05, "loss": 2.2104, "step": 1684 }, { "epoch": 0.1268371629123619, "grad_norm": 5.735039234161377, "learning_rate": 9.758848654909213e-05, "loss": 2.0253, "step": 1685 }, { "epoch": 0.1269124371930221, "grad_norm": 5.4627838134765625, "learning_rate": 9.758474480751547e-05, "loss": 2.4451, "step": 1686 }, { "epoch": 0.12698771147368224, "grad_norm": 4.789186954498291, "learning_rate": 9.758100023716227e-05, "loss": 2.6446, "step": 1687 }, { "epoch": 0.12706298575434238, "grad_norm": 6.586690425872803, "learning_rate": 9.757725283825514e-05, "loss": 2.4926, "step": 1688 }, { "epoch": 0.12713826003500253, "grad_norm": 6.562723159790039, "learning_rate": 9.757350261101685e-05, "loss": 1.8865, "step": 1689 }, { "epoch": 0.1272135343156627, "grad_norm": 5.989397048950195, "learning_rate": 9.756974955567033e-05, "loss": 2.3004, "step": 1690 }, { "epoch": 0.12728880859632286, "grad_norm": 6.762864112854004, "learning_rate": 9.756599367243869e-05, "loss": 2.0279, "step": 1691 }, { "epoch": 0.127364082876983, "grad_norm": 4.570464611053467, "learning_rate": 9.75622349615452e-05, "loss": 2.0751, "step": 1692 }, { "epoch": 0.12743935715764315, "grad_norm": 7.553378105163574, "learning_rate": 9.75584734232133e-05, "loss": 2.3705, "step": 1693 }, { "epoch": 0.12751463143830333, "grad_norm": 5.850673198699951, "learning_rate": 9.755470905766663e-05, "loss": 2.1451, "step": 1694 }, { "epoch": 0.12758990571896348, "grad_norm": 3.9360392093658447, "learning_rate": 9.755094186512897e-05, "loss": 2.1379, "step": 1695 }, { "epoch": 0.12766517999962362, "grad_norm": 4.635295391082764, "learning_rate": 9.754717184582424e-05, "loss": 2.5343, "step": 1696 }, { "epoch": 0.12774045428028377, "grad_norm": 6.2629852294921875, "learning_rate": 9.754339899997657e-05, "loss": 2.0976, "step": 1697 }, { "epoch": 0.12781572856094395, "grad_norm": 4.538017272949219, "learning_rate": 9.753962332781025e-05, "loss": 2.1238, "step": 1698 }, { "epoch": 0.1278910028416041, "grad_norm": 5.178086757659912, "learning_rate": 9.753584482954974e-05, "loss": 2.112, "step": 1699 }, { "epoch": 0.12796627712226424, "grad_norm": 6.888523578643799, "learning_rate": 9.753206350541963e-05, "loss": 2.4774, "step": 1700 }, { "epoch": 0.12804155140292442, "grad_norm": 5.4148430824279785, "learning_rate": 9.752827935564474e-05, "loss": 2.0926, "step": 1701 }, { "epoch": 0.12811682568358457, "grad_norm": 4.234797477722168, "learning_rate": 9.752449238045002e-05, "loss": 2.2639, "step": 1702 }, { "epoch": 0.12819209996424472, "grad_norm": 5.121828079223633, "learning_rate": 9.752070258006059e-05, "loss": 2.0641, "step": 1703 }, { "epoch": 0.12826737424490486, "grad_norm": 4.237938404083252, "learning_rate": 9.751690995470175e-05, "loss": 1.9471, "step": 1704 }, { "epoch": 0.12834264852556504, "grad_norm": 5.727453231811523, "learning_rate": 9.751311450459894e-05, "loss": 2.1931, "step": 1705 }, { "epoch": 0.1284179228062252, "grad_norm": 5.09385347366333, "learning_rate": 9.750931622997781e-05, "loss": 2.1248, "step": 1706 }, { "epoch": 0.12849319708688534, "grad_norm": 4.489762306213379, "learning_rate": 9.750551513106416e-05, "loss": 2.2304, "step": 1707 }, { "epoch": 0.12856847136754548, "grad_norm": 5.428772926330566, "learning_rate": 9.750171120808394e-05, "loss": 2.5218, "step": 1708 }, { "epoch": 0.12864374564820566, "grad_norm": 6.32047700881958, "learning_rate": 9.749790446126327e-05, "loss": 2.313, "step": 1709 }, { "epoch": 0.1287190199288658, "grad_norm": 4.1708879470825195, "learning_rate": 9.749409489082848e-05, "loss": 2.337, "step": 1710 }, { "epoch": 0.12879429420952596, "grad_norm": 6.77269983291626, "learning_rate": 9.749028249700602e-05, "loss": 2.9133, "step": 1711 }, { "epoch": 0.1288695684901861, "grad_norm": 4.5942702293396, "learning_rate": 9.748646728002253e-05, "loss": 2.5118, "step": 1712 }, { "epoch": 0.12894484277084628, "grad_norm": 4.483979225158691, "learning_rate": 9.748264924010482e-05, "loss": 2.17, "step": 1713 }, { "epoch": 0.12902011705150643, "grad_norm": 6.06091833114624, "learning_rate": 9.747882837747984e-05, "loss": 2.4806, "step": 1714 }, { "epoch": 0.12909539133216658, "grad_norm": 4.3725433349609375, "learning_rate": 9.747500469237476e-05, "loss": 2.8324, "step": 1715 }, { "epoch": 0.12917066561282672, "grad_norm": 4.649985313415527, "learning_rate": 9.747117818501687e-05, "loss": 2.0885, "step": 1716 }, { "epoch": 0.1292459398934869, "grad_norm": 5.056002140045166, "learning_rate": 9.746734885563365e-05, "loss": 2.2877, "step": 1717 }, { "epoch": 0.12932121417414705, "grad_norm": 4.823305130004883, "learning_rate": 9.746351670445273e-05, "loss": 2.351, "step": 1718 }, { "epoch": 0.1293964884548072, "grad_norm": 5.800633430480957, "learning_rate": 9.745968173170194e-05, "loss": 2.358, "step": 1719 }, { "epoch": 0.12947176273546737, "grad_norm": 5.244085788726807, "learning_rate": 9.745584393760923e-05, "loss": 2.243, "step": 1720 }, { "epoch": 0.12954703701612752, "grad_norm": 7.268054485321045, "learning_rate": 9.745200332240278e-05, "loss": 2.1576, "step": 1721 }, { "epoch": 0.12962231129678767, "grad_norm": 5.814276218414307, "learning_rate": 9.744815988631089e-05, "loss": 2.1023, "step": 1722 }, { "epoch": 0.12969758557744782, "grad_norm": 4.464752674102783, "learning_rate": 9.744431362956203e-05, "loss": 2.0206, "step": 1723 }, { "epoch": 0.129772859858108, "grad_norm": 4.056717872619629, "learning_rate": 9.744046455238487e-05, "loss": 1.9266, "step": 1724 }, { "epoch": 0.12984813413876814, "grad_norm": 5.691699981689453, "learning_rate": 9.743661265500821e-05, "loss": 2.5917, "step": 1725 }, { "epoch": 0.1299234084194283, "grad_norm": 6.967303276062012, "learning_rate": 9.743275793766102e-05, "loss": 2.2194, "step": 1726 }, { "epoch": 0.12999868270008844, "grad_norm": 7.970575332641602, "learning_rate": 9.742890040057249e-05, "loss": 3.0811, "step": 1727 }, { "epoch": 0.1300739569807486, "grad_norm": 4.193321228027344, "learning_rate": 9.742504004397192e-05, "loss": 2.3907, "step": 1728 }, { "epoch": 0.13014923126140876, "grad_norm": 5.682349681854248, "learning_rate": 9.742117686808878e-05, "loss": 2.174, "step": 1729 }, { "epoch": 0.1302245055420689, "grad_norm": 5.465896129608154, "learning_rate": 9.741731087315276e-05, "loss": 2.28, "step": 1730 }, { "epoch": 0.13029977982272906, "grad_norm": 4.960921287536621, "learning_rate": 9.741344205939366e-05, "loss": 2.2761, "step": 1731 }, { "epoch": 0.13037505410338923, "grad_norm": 7.062646389007568, "learning_rate": 9.740957042704146e-05, "loss": 2.2874, "step": 1732 }, { "epoch": 0.13045032838404938, "grad_norm": 5.2693257331848145, "learning_rate": 9.740569597632635e-05, "loss": 2.1913, "step": 1733 }, { "epoch": 0.13052560266470953, "grad_norm": 4.980762004852295, "learning_rate": 9.740181870747863e-05, "loss": 2.2419, "step": 1734 }, { "epoch": 0.1306008769453697, "grad_norm": 6.031246662139893, "learning_rate": 9.73979386207288e-05, "loss": 2.2241, "step": 1735 }, { "epoch": 0.13067615122602985, "grad_norm": 7.926592826843262, "learning_rate": 9.739405571630751e-05, "loss": 2.2495, "step": 1736 }, { "epoch": 0.13075142550669, "grad_norm": 8.379772186279297, "learning_rate": 9.739016999444562e-05, "loss": 2.3224, "step": 1737 }, { "epoch": 0.13082669978735015, "grad_norm": 4.8269243240356445, "learning_rate": 9.738628145537407e-05, "loss": 2.0694, "step": 1738 }, { "epoch": 0.13090197406801032, "grad_norm": 4.838050842285156, "learning_rate": 9.738239009932408e-05, "loss": 2.4132, "step": 1739 }, { "epoch": 0.13097724834867047, "grad_norm": 7.829267978668213, "learning_rate": 9.737849592652695e-05, "loss": 2.7272, "step": 1740 }, { "epoch": 0.13105252262933062, "grad_norm": 4.662322044372559, "learning_rate": 9.737459893721416e-05, "loss": 2.0681, "step": 1741 }, { "epoch": 0.13112779690999077, "grad_norm": 4.9359564781188965, "learning_rate": 9.737069913161741e-05, "loss": 2.1911, "step": 1742 }, { "epoch": 0.13120307119065094, "grad_norm": 5.634368896484375, "learning_rate": 9.736679650996851e-05, "loss": 2.5004, "step": 1743 }, { "epoch": 0.1312783454713111, "grad_norm": 6.821021556854248, "learning_rate": 9.73628910724995e-05, "loss": 2.5504, "step": 1744 }, { "epoch": 0.13135361975197124, "grad_norm": 4.493718147277832, "learning_rate": 9.735898281944249e-05, "loss": 2.1869, "step": 1745 }, { "epoch": 0.1314288940326314, "grad_norm": 5.231554985046387, "learning_rate": 9.735507175102983e-05, "loss": 2.0704, "step": 1746 }, { "epoch": 0.13150416831329156, "grad_norm": 4.544377326965332, "learning_rate": 9.735115786749404e-05, "loss": 2.5327, "step": 1747 }, { "epoch": 0.1315794425939517, "grad_norm": 5.713136196136475, "learning_rate": 9.734724116906779e-05, "loss": 1.8814, "step": 1748 }, { "epoch": 0.13165471687461186, "grad_norm": 7.447630405426025, "learning_rate": 9.73433216559839e-05, "loss": 2.411, "step": 1749 }, { "epoch": 0.13172999115527204, "grad_norm": 4.947197914123535, "learning_rate": 9.733939932847538e-05, "loss": 2.3038, "step": 1750 }, { "epoch": 0.13180526543593218, "grad_norm": 5.95836067199707, "learning_rate": 9.73354741867754e-05, "loss": 2.5787, "step": 1751 }, { "epoch": 0.13188053971659233, "grad_norm": 5.138620853424072, "learning_rate": 9.733154623111731e-05, "loss": 2.1285, "step": 1752 }, { "epoch": 0.13195581399725248, "grad_norm": 4.807033538818359, "learning_rate": 9.73276154617346e-05, "loss": 1.9861, "step": 1753 }, { "epoch": 0.13203108827791266, "grad_norm": 5.524970531463623, "learning_rate": 9.732368187886095e-05, "loss": 2.215, "step": 1754 }, { "epoch": 0.1321063625585728, "grad_norm": 6.054365634918213, "learning_rate": 9.73197454827302e-05, "loss": 2.3715, "step": 1755 }, { "epoch": 0.13218163683923295, "grad_norm": 4.977542400360107, "learning_rate": 9.731580627357635e-05, "loss": 2.4739, "step": 1756 }, { "epoch": 0.1322569111198931, "grad_norm": 5.680325984954834, "learning_rate": 9.731186425163359e-05, "loss": 1.9051, "step": 1757 }, { "epoch": 0.13233218540055328, "grad_norm": 6.17794942855835, "learning_rate": 9.730791941713624e-05, "loss": 2.3579, "step": 1758 }, { "epoch": 0.13240745968121342, "grad_norm": 5.49123477935791, "learning_rate": 9.730397177031884e-05, "loss": 2.1278, "step": 1759 }, { "epoch": 0.13248273396187357, "grad_norm": 5.485405445098877, "learning_rate": 9.730002131141603e-05, "loss": 2.0819, "step": 1760 }, { "epoch": 0.13255800824253372, "grad_norm": 9.210233688354492, "learning_rate": 9.729606804066269e-05, "loss": 2.5269, "step": 1761 }, { "epoch": 0.1326332825231939, "grad_norm": 5.67719841003418, "learning_rate": 9.72921119582938e-05, "loss": 2.5882, "step": 1762 }, { "epoch": 0.13270855680385404, "grad_norm": 8.652382850646973, "learning_rate": 9.728815306454455e-05, "loss": 2.1637, "step": 1763 }, { "epoch": 0.1327838310845142, "grad_norm": 5.790106296539307, "learning_rate": 9.72841913596503e-05, "loss": 2.3963, "step": 1764 }, { "epoch": 0.13285910536517434, "grad_norm": 5.1820831298828125, "learning_rate": 9.728022684384654e-05, "loss": 2.2085, "step": 1765 }, { "epoch": 0.13293437964583452, "grad_norm": 4.50135612487793, "learning_rate": 9.727625951736893e-05, "loss": 2.2223, "step": 1766 }, { "epoch": 0.13300965392649466, "grad_norm": 5.442310810089111, "learning_rate": 9.727228938045338e-05, "loss": 2.5634, "step": 1767 }, { "epoch": 0.1330849282071548, "grad_norm": 5.8599138259887695, "learning_rate": 9.726831643333585e-05, "loss": 2.1358, "step": 1768 }, { "epoch": 0.133160202487815, "grad_norm": 4.942758083343506, "learning_rate": 9.726434067625254e-05, "loss": 2.7683, "step": 1769 }, { "epoch": 0.13323547676847514, "grad_norm": 4.242802619934082, "learning_rate": 9.726036210943978e-05, "loss": 2.1248, "step": 1770 }, { "epoch": 0.13331075104913528, "grad_norm": 5.054646015167236, "learning_rate": 9.725638073313413e-05, "loss": 2.5606, "step": 1771 }, { "epoch": 0.13338602532979543, "grad_norm": 4.6928486824035645, "learning_rate": 9.725239654757221e-05, "loss": 2.5103, "step": 1772 }, { "epoch": 0.1334612996104556, "grad_norm": 5.823727607727051, "learning_rate": 9.72484095529909e-05, "loss": 1.9912, "step": 1773 }, { "epoch": 0.13353657389111576, "grad_norm": 8.489828109741211, "learning_rate": 9.724441974962722e-05, "loss": 2.4317, "step": 1774 }, { "epoch": 0.1336118481717759, "grad_norm": 8.048446655273438, "learning_rate": 9.724042713771835e-05, "loss": 1.9961, "step": 1775 }, { "epoch": 0.13368712245243605, "grad_norm": 4.8349409103393555, "learning_rate": 9.723643171750163e-05, "loss": 2.1962, "step": 1776 }, { "epoch": 0.13376239673309623, "grad_norm": 4.24368143081665, "learning_rate": 9.723243348921458e-05, "loss": 2.1277, "step": 1777 }, { "epoch": 0.13383767101375638, "grad_norm": 5.371838569641113, "learning_rate": 9.722843245309487e-05, "loss": 2.3456, "step": 1778 }, { "epoch": 0.13391294529441652, "grad_norm": 4.18806266784668, "learning_rate": 9.722442860938039e-05, "loss": 2.2133, "step": 1779 }, { "epoch": 0.13398821957507667, "grad_norm": 5.439259052276611, "learning_rate": 9.722042195830912e-05, "loss": 2.2445, "step": 1780 }, { "epoch": 0.13406349385573685, "grad_norm": 4.135159492492676, "learning_rate": 9.721641250011925e-05, "loss": 2.0865, "step": 1781 }, { "epoch": 0.134138768136397, "grad_norm": 3.7987565994262695, "learning_rate": 9.721240023504914e-05, "loss": 2.1844, "step": 1782 }, { "epoch": 0.13421404241705714, "grad_norm": 4.4955735206604, "learning_rate": 9.72083851633373e-05, "loss": 2.0196, "step": 1783 }, { "epoch": 0.13428931669771732, "grad_norm": 4.269046306610107, "learning_rate": 9.720436728522242e-05, "loss": 2.2484, "step": 1784 }, { "epoch": 0.13436459097837747, "grad_norm": 5.672484874725342, "learning_rate": 9.720034660094336e-05, "loss": 2.1519, "step": 1785 }, { "epoch": 0.13443986525903762, "grad_norm": 4.3847880363464355, "learning_rate": 9.719632311073911e-05, "loss": 2.0127, "step": 1786 }, { "epoch": 0.13451513953969776, "grad_norm": 6.430187225341797, "learning_rate": 9.719229681484889e-05, "loss": 2.43, "step": 1787 }, { "epoch": 0.13459041382035794, "grad_norm": 5.318563938140869, "learning_rate": 9.718826771351202e-05, "loss": 2.236, "step": 1788 }, { "epoch": 0.1346656881010181, "grad_norm": 6.8275580406188965, "learning_rate": 9.718423580696805e-05, "loss": 2.3024, "step": 1789 }, { "epoch": 0.13474096238167824, "grad_norm": 4.084136486053467, "learning_rate": 9.718020109545663e-05, "loss": 2.1722, "step": 1790 }, { "epoch": 0.13481623666233838, "grad_norm": 4.49135160446167, "learning_rate": 9.717616357921764e-05, "loss": 2.3923, "step": 1791 }, { "epoch": 0.13489151094299856, "grad_norm": 5.338467121124268, "learning_rate": 9.717212325849107e-05, "loss": 2.0977, "step": 1792 }, { "epoch": 0.1349667852236587, "grad_norm": 5.188724517822266, "learning_rate": 9.716808013351715e-05, "loss": 2.0666, "step": 1793 }, { "epoch": 0.13504205950431886, "grad_norm": 6.0009074211120605, "learning_rate": 9.71640342045362e-05, "loss": 2.0435, "step": 1794 }, { "epoch": 0.135117333784979, "grad_norm": 3.454115152359009, "learning_rate": 9.715998547178874e-05, "loss": 2.2185, "step": 1795 }, { "epoch": 0.13519260806563918, "grad_norm": 4.948105812072754, "learning_rate": 9.715593393551546e-05, "loss": 2.1565, "step": 1796 }, { "epoch": 0.13526788234629933, "grad_norm": 4.150262355804443, "learning_rate": 9.715187959595722e-05, "loss": 2.0609, "step": 1797 }, { "epoch": 0.13534315662695948, "grad_norm": 4.311717987060547, "learning_rate": 9.714782245335502e-05, "loss": 1.9061, "step": 1798 }, { "epoch": 0.13541843090761965, "grad_norm": 4.890169143676758, "learning_rate": 9.714376250795007e-05, "loss": 2.4948, "step": 1799 }, { "epoch": 0.1354937051882798, "grad_norm": 4.548721790313721, "learning_rate": 9.713969975998369e-05, "loss": 1.8287, "step": 1800 }, { "epoch": 0.13556897946893995, "grad_norm": 5.311888694763184, "learning_rate": 9.713563420969745e-05, "loss": 2.6011, "step": 1801 }, { "epoch": 0.1356442537496001, "grad_norm": 3.4806299209594727, "learning_rate": 9.713156585733298e-05, "loss": 2.3599, "step": 1802 }, { "epoch": 0.13571952803026027, "grad_norm": 4.0148138999938965, "learning_rate": 9.712749470313216e-05, "loss": 2.0326, "step": 1803 }, { "epoch": 0.13579480231092042, "grad_norm": 7.7831902503967285, "learning_rate": 9.712342074733701e-05, "loss": 2.1464, "step": 1804 }, { "epoch": 0.13587007659158057, "grad_norm": 5.510692119598389, "learning_rate": 9.711934399018969e-05, "loss": 1.9611, "step": 1805 }, { "epoch": 0.13594535087224072, "grad_norm": 4.311880588531494, "learning_rate": 9.711526443193259e-05, "loss": 2.1199, "step": 1806 }, { "epoch": 0.1360206251529009, "grad_norm": 5.091075897216797, "learning_rate": 9.71111820728082e-05, "loss": 2.1322, "step": 1807 }, { "epoch": 0.13609589943356104, "grad_norm": 5.07310152053833, "learning_rate": 9.710709691305922e-05, "loss": 2.1444, "step": 1808 }, { "epoch": 0.1361711737142212, "grad_norm": 4.195400714874268, "learning_rate": 9.71030089529285e-05, "loss": 2.3226, "step": 1809 }, { "epoch": 0.13624644799488134, "grad_norm": 4.664185523986816, "learning_rate": 9.709891819265904e-05, "loss": 2.19, "step": 1810 }, { "epoch": 0.1363217222755415, "grad_norm": 4.323750019073486, "learning_rate": 9.709482463249403e-05, "loss": 2.0303, "step": 1811 }, { "epoch": 0.13639699655620166, "grad_norm": 8.341972351074219, "learning_rate": 9.709072827267683e-05, "loss": 2.2671, "step": 1812 }, { "epoch": 0.1364722708368618, "grad_norm": 3.623511791229248, "learning_rate": 9.708662911345096e-05, "loss": 2.5169, "step": 1813 }, { "epoch": 0.13654754511752198, "grad_norm": 4.627481937408447, "learning_rate": 9.708252715506009e-05, "loss": 1.9635, "step": 1814 }, { "epoch": 0.13662281939818213, "grad_norm": 5.3794379234313965, "learning_rate": 9.707842239774807e-05, "loss": 2.5748, "step": 1815 }, { "epoch": 0.13669809367884228, "grad_norm": 6.208539009094238, "learning_rate": 9.707431484175893e-05, "loss": 1.8715, "step": 1816 }, { "epoch": 0.13677336795950243, "grad_norm": 4.358168601989746, "learning_rate": 9.707020448733684e-05, "loss": 1.9982, "step": 1817 }, { "epoch": 0.1368486422401626, "grad_norm": 3.709925889968872, "learning_rate": 9.706609133472617e-05, "loss": 1.9796, "step": 1818 }, { "epoch": 0.13692391652082275, "grad_norm": 5.560945510864258, "learning_rate": 9.706197538417139e-05, "loss": 2.5432, "step": 1819 }, { "epoch": 0.1369991908014829, "grad_norm": 4.2365851402282715, "learning_rate": 9.705785663591722e-05, "loss": 1.9914, "step": 1820 }, { "epoch": 0.13707446508214305, "grad_norm": 5.010767936706543, "learning_rate": 9.705373509020849e-05, "loss": 2.2454, "step": 1821 }, { "epoch": 0.13714973936280322, "grad_norm": 3.9499197006225586, "learning_rate": 9.704961074729024e-05, "loss": 2.2639, "step": 1822 }, { "epoch": 0.13722501364346337, "grad_norm": 4.281479835510254, "learning_rate": 9.704548360740762e-05, "loss": 2.2585, "step": 1823 }, { "epoch": 0.13730028792412352, "grad_norm": 5.20364236831665, "learning_rate": 9.7041353670806e-05, "loss": 2.7264, "step": 1824 }, { "epoch": 0.13737556220478367, "grad_norm": 5.149622440338135, "learning_rate": 9.703722093773086e-05, "loss": 1.9872, "step": 1825 }, { "epoch": 0.13745083648544384, "grad_norm": 5.009570121765137, "learning_rate": 9.703308540842792e-05, "loss": 2.4357, "step": 1826 }, { "epoch": 0.137526110766104, "grad_norm": 7.407719612121582, "learning_rate": 9.7028947083143e-05, "loss": 2.0713, "step": 1827 }, { "epoch": 0.13760138504676414, "grad_norm": 3.7702975273132324, "learning_rate": 9.702480596212211e-05, "loss": 1.9476, "step": 1828 }, { "epoch": 0.1376766593274243, "grad_norm": 5.987146377563477, "learning_rate": 9.702066204561144e-05, "loss": 2.3998, "step": 1829 }, { "epoch": 0.13775193360808446, "grad_norm": 6.7265400886535645, "learning_rate": 9.701651533385731e-05, "loss": 2.3342, "step": 1830 }, { "epoch": 0.1378272078887446, "grad_norm": 6.512685775756836, "learning_rate": 9.701236582710629e-05, "loss": 2.1574, "step": 1831 }, { "epoch": 0.13790248216940476, "grad_norm": 8.839797019958496, "learning_rate": 9.700821352560498e-05, "loss": 2.3089, "step": 1832 }, { "epoch": 0.13797775645006494, "grad_norm": 6.385513782501221, "learning_rate": 9.700405842960028e-05, "loss": 2.4704, "step": 1833 }, { "epoch": 0.13805303073072508, "grad_norm": 8.119880676269531, "learning_rate": 9.699990053933916e-05, "loss": 2.4203, "step": 1834 }, { "epoch": 0.13812830501138523, "grad_norm": 4.073723316192627, "learning_rate": 9.699573985506881e-05, "loss": 2.3897, "step": 1835 }, { "epoch": 0.13820357929204538, "grad_norm": 6.010770797729492, "learning_rate": 9.699157637703657e-05, "loss": 2.2526, "step": 1836 }, { "epoch": 0.13827885357270556, "grad_norm": 5.169813632965088, "learning_rate": 9.698741010548997e-05, "loss": 2.4182, "step": 1837 }, { "epoch": 0.1383541278533657, "grad_norm": 4.972817897796631, "learning_rate": 9.698324104067664e-05, "loss": 2.2069, "step": 1838 }, { "epoch": 0.13842940213402585, "grad_norm": 7.212032794952393, "learning_rate": 9.697906918284445e-05, "loss": 2.1537, "step": 1839 }, { "epoch": 0.138504676414686, "grad_norm": 5.314699649810791, "learning_rate": 9.69748945322414e-05, "loss": 2.2994, "step": 1840 }, { "epoch": 0.13857995069534618, "grad_norm": 5.680018424987793, "learning_rate": 9.697071708911564e-05, "loss": 2.2162, "step": 1841 }, { "epoch": 0.13865522497600632, "grad_norm": 6.512355327606201, "learning_rate": 9.696653685371553e-05, "loss": 2.2615, "step": 1842 }, { "epoch": 0.13873049925666647, "grad_norm": 4.901081085205078, "learning_rate": 9.696235382628958e-05, "loss": 2.1403, "step": 1843 }, { "epoch": 0.13880577353732662, "grad_norm": 4.775032997131348, "learning_rate": 9.695816800708642e-05, "loss": 1.9616, "step": 1844 }, { "epoch": 0.1388810478179868, "grad_norm": 4.621478080749512, "learning_rate": 9.695397939635493e-05, "loss": 2.4736, "step": 1845 }, { "epoch": 0.13895632209864694, "grad_norm": 4.7510085105896, "learning_rate": 9.694978799434409e-05, "loss": 2.1455, "step": 1846 }, { "epoch": 0.1390315963793071, "grad_norm": 3.76348876953125, "learning_rate": 9.694559380130306e-05, "loss": 1.9571, "step": 1847 }, { "epoch": 0.13910687065996727, "grad_norm": 5.012405872344971, "learning_rate": 9.694139681748118e-05, "loss": 2.2396, "step": 1848 }, { "epoch": 0.13918214494062742, "grad_norm": 4.353190898895264, "learning_rate": 9.693719704312795e-05, "loss": 2.2327, "step": 1849 }, { "epoch": 0.13925741922128757, "grad_norm": 5.981847763061523, "learning_rate": 9.693299447849303e-05, "loss": 2.169, "step": 1850 }, { "epoch": 0.1393326935019477, "grad_norm": 4.708183765411377, "learning_rate": 9.692878912382625e-05, "loss": 2.2332, "step": 1851 }, { "epoch": 0.1394079677826079, "grad_norm": 4.729650497436523, "learning_rate": 9.692458097937763e-05, "loss": 1.9346, "step": 1852 }, { "epoch": 0.13948324206326804, "grad_norm": 4.960033416748047, "learning_rate": 9.692037004539732e-05, "loss": 2.4869, "step": 1853 }, { "epoch": 0.13955851634392819, "grad_norm": 3.768878698348999, "learning_rate": 9.691615632213561e-05, "loss": 1.9531, "step": 1854 }, { "epoch": 0.13963379062458833, "grad_norm": 5.850656509399414, "learning_rate": 9.691193980984303e-05, "loss": 1.8112, "step": 1855 }, { "epoch": 0.1397090649052485, "grad_norm": 6.420674800872803, "learning_rate": 9.690772050877024e-05, "loss": 2.2319, "step": 1856 }, { "epoch": 0.13978433918590866, "grad_norm": 6.607874870300293, "learning_rate": 9.690349841916806e-05, "loss": 2.1272, "step": 1857 }, { "epoch": 0.1398596134665688, "grad_norm": 4.868158340454102, "learning_rate": 9.689927354128749e-05, "loss": 2.2246, "step": 1858 }, { "epoch": 0.13993488774722895, "grad_norm": 5.244241714477539, "learning_rate": 9.689504587537967e-05, "loss": 2.3166, "step": 1859 }, { "epoch": 0.14001016202788913, "grad_norm": 6.573191165924072, "learning_rate": 9.689081542169591e-05, "loss": 1.9857, "step": 1860 }, { "epoch": 0.14008543630854928, "grad_norm": 4.720640182495117, "learning_rate": 9.688658218048774e-05, "loss": 2.1274, "step": 1861 }, { "epoch": 0.14016071058920943, "grad_norm": 4.303944110870361, "learning_rate": 9.68823461520068e-05, "loss": 2.2675, "step": 1862 }, { "epoch": 0.1402359848698696, "grad_norm": 6.73034143447876, "learning_rate": 9.687810733650488e-05, "loss": 2.5371, "step": 1863 }, { "epoch": 0.14031125915052975, "grad_norm": 6.235661029815674, "learning_rate": 9.687386573423401e-05, "loss": 2.2235, "step": 1864 }, { "epoch": 0.1403865334311899, "grad_norm": 5.523322105407715, "learning_rate": 9.68696213454463e-05, "loss": 1.9954, "step": 1865 }, { "epoch": 0.14046180771185005, "grad_norm": 5.003766059875488, "learning_rate": 9.686537417039408e-05, "loss": 2.1717, "step": 1866 }, { "epoch": 0.14053708199251022, "grad_norm": 5.235233783721924, "learning_rate": 9.686112420932984e-05, "loss": 2.3419, "step": 1867 }, { "epoch": 0.14061235627317037, "grad_norm": 5.51898193359375, "learning_rate": 9.685687146250624e-05, "loss": 2.2538, "step": 1868 }, { "epoch": 0.14068763055383052, "grad_norm": 5.234782695770264, "learning_rate": 9.685261593017607e-05, "loss": 2.0887, "step": 1869 }, { "epoch": 0.14076290483449067, "grad_norm": 4.609278678894043, "learning_rate": 9.684835761259232e-05, "loss": 2.5293, "step": 1870 }, { "epoch": 0.14083817911515084, "grad_norm": 4.051346778869629, "learning_rate": 9.684409651000812e-05, "loss": 2.324, "step": 1871 }, { "epoch": 0.140913453395811, "grad_norm": 4.640045642852783, "learning_rate": 9.683983262267682e-05, "loss": 1.8394, "step": 1872 }, { "epoch": 0.14098872767647114, "grad_norm": 6.540625095367432, "learning_rate": 9.683556595085186e-05, "loss": 1.9364, "step": 1873 }, { "epoch": 0.14106400195713129, "grad_norm": 4.141967296600342, "learning_rate": 9.683129649478689e-05, "loss": 2.0072, "step": 1874 }, { "epoch": 0.14113927623779146, "grad_norm": 4.262126445770264, "learning_rate": 9.68270242547357e-05, "loss": 2.2954, "step": 1875 }, { "epoch": 0.1412145505184516, "grad_norm": 4.7414703369140625, "learning_rate": 9.682274923095229e-05, "loss": 2.5297, "step": 1876 }, { "epoch": 0.14128982479911176, "grad_norm": 3.738071918487549, "learning_rate": 9.68184714236908e-05, "loss": 2.2613, "step": 1877 }, { "epoch": 0.1413650990797719, "grad_norm": 5.414370059967041, "learning_rate": 9.68141908332055e-05, "loss": 2.0167, "step": 1878 }, { "epoch": 0.14144037336043208, "grad_norm": 4.328585624694824, "learning_rate": 9.68099074597509e-05, "loss": 2.0192, "step": 1879 }, { "epoch": 0.14151564764109223, "grad_norm": 3.526667833328247, "learning_rate": 9.68056213035816e-05, "loss": 2.7083, "step": 1880 }, { "epoch": 0.14159092192175238, "grad_norm": 4.046936988830566, "learning_rate": 9.680133236495241e-05, "loss": 2.1185, "step": 1881 }, { "epoch": 0.14166619620241255, "grad_norm": 4.649966716766357, "learning_rate": 9.679704064411832e-05, "loss": 2.2994, "step": 1882 }, { "epoch": 0.1417414704830727, "grad_norm": 5.140581130981445, "learning_rate": 9.679274614133442e-05, "loss": 2.1596, "step": 1883 }, { "epoch": 0.14181674476373285, "grad_norm": 4.729349136352539, "learning_rate": 9.678844885685603e-05, "loss": 2.5428, "step": 1884 }, { "epoch": 0.141892019044393, "grad_norm": 6.481583118438721, "learning_rate": 9.678414879093862e-05, "loss": 2.284, "step": 1885 }, { "epoch": 0.14196729332505317, "grad_norm": 4.541432857513428, "learning_rate": 9.677984594383778e-05, "loss": 2.1312, "step": 1886 }, { "epoch": 0.14204256760571332, "grad_norm": 3.671532154083252, "learning_rate": 9.677554031580935e-05, "loss": 2.1774, "step": 1887 }, { "epoch": 0.14211784188637347, "grad_norm": 4.682297229766846, "learning_rate": 9.677123190710925e-05, "loss": 2.0723, "step": 1888 }, { "epoch": 0.14219311616703362, "grad_norm": 4.896402359008789, "learning_rate": 9.676692071799362e-05, "loss": 2.2624, "step": 1889 }, { "epoch": 0.1422683904476938, "grad_norm": 5.723679542541504, "learning_rate": 9.676260674871874e-05, "loss": 1.9701, "step": 1890 }, { "epoch": 0.14234366472835394, "grad_norm": 3.2864460945129395, "learning_rate": 9.675828999954105e-05, "loss": 2.2517, "step": 1891 }, { "epoch": 0.1424189390090141, "grad_norm": 3.565432548522949, "learning_rate": 9.67539704707172e-05, "loss": 2.2873, "step": 1892 }, { "epoch": 0.14249421328967424, "grad_norm": 5.5910515785217285, "learning_rate": 9.674964816250395e-05, "loss": 2.0594, "step": 1893 }, { "epoch": 0.1425694875703344, "grad_norm": 3.683396100997925, "learning_rate": 9.674532307515827e-05, "loss": 2.0167, "step": 1894 }, { "epoch": 0.14264476185099456, "grad_norm": 4.884602069854736, "learning_rate": 9.674099520893724e-05, "loss": 2.5024, "step": 1895 }, { "epoch": 0.1427200361316547, "grad_norm": 5.192716598510742, "learning_rate": 9.673666456409817e-05, "loss": 2.2091, "step": 1896 }, { "epoch": 0.14279531041231489, "grad_norm": 7.458247184753418, "learning_rate": 9.67323311408985e-05, "loss": 2.0761, "step": 1897 }, { "epoch": 0.14287058469297503, "grad_norm": 4.641127109527588, "learning_rate": 9.672799493959584e-05, "loss": 2.1959, "step": 1898 }, { "epoch": 0.14294585897363518, "grad_norm": 6.1288862228393555, "learning_rate": 9.672365596044795e-05, "loss": 2.1263, "step": 1899 }, { "epoch": 0.14302113325429533, "grad_norm": 4.811298370361328, "learning_rate": 9.671931420371278e-05, "loss": 2.5735, "step": 1900 }, { "epoch": 0.1430964075349555, "grad_norm": 3.9263174533843994, "learning_rate": 9.671496966964842e-05, "loss": 1.8406, "step": 1901 }, { "epoch": 0.14317168181561565, "grad_norm": 7.9855804443359375, "learning_rate": 9.671062235851317e-05, "loss": 2.0001, "step": 1902 }, { "epoch": 0.1432469560962758, "grad_norm": 5.881470203399658, "learning_rate": 9.670627227056543e-05, "loss": 2.0452, "step": 1903 }, { "epoch": 0.14332223037693595, "grad_norm": 6.645844459533691, "learning_rate": 9.670191940606384e-05, "loss": 2.7314, "step": 1904 }, { "epoch": 0.14339750465759613, "grad_norm": 6.873920917510986, "learning_rate": 9.669756376526714e-05, "loss": 2.6188, "step": 1905 }, { "epoch": 0.14347277893825627, "grad_norm": 5.633041858673096, "learning_rate": 9.669320534843425e-05, "loss": 2.445, "step": 1906 }, { "epoch": 0.14354805321891642, "grad_norm": 5.7597126960754395, "learning_rate": 9.66888441558243e-05, "loss": 2.3496, "step": 1907 }, { "epoch": 0.14362332749957657, "grad_norm": 5.59921932220459, "learning_rate": 9.668448018769652e-05, "loss": 2.1463, "step": 1908 }, { "epoch": 0.14369860178023675, "grad_norm": 4.217930793762207, "learning_rate": 9.668011344431034e-05, "loss": 2.4306, "step": 1909 }, { "epoch": 0.1437738760608969, "grad_norm": 5.275418281555176, "learning_rate": 9.667574392592537e-05, "loss": 2.5023, "step": 1910 }, { "epoch": 0.14384915034155704, "grad_norm": 5.853795051574707, "learning_rate": 9.667137163280134e-05, "loss": 2.203, "step": 1911 }, { "epoch": 0.14392442462221722, "grad_norm": 4.18360710144043, "learning_rate": 9.666699656519817e-05, "loss": 2.3513, "step": 1912 }, { "epoch": 0.14399969890287737, "grad_norm": 5.436791896820068, "learning_rate": 9.666261872337599e-05, "loss": 2.0364, "step": 1913 }, { "epoch": 0.1440749731835375, "grad_norm": 4.552731513977051, "learning_rate": 9.665823810759498e-05, "loss": 2.1606, "step": 1914 }, { "epoch": 0.14415024746419766, "grad_norm": 4.0237717628479, "learning_rate": 9.665385471811562e-05, "loss": 2.2284, "step": 1915 }, { "epoch": 0.14422552174485784, "grad_norm": 4.607576370239258, "learning_rate": 9.664946855519845e-05, "loss": 2.2797, "step": 1916 }, { "epoch": 0.14430079602551799, "grad_norm": 6.482448577880859, "learning_rate": 9.664507961910423e-05, "loss": 2.1925, "step": 1917 }, { "epoch": 0.14437607030617813, "grad_norm": 5.397335529327393, "learning_rate": 9.664068791009386e-05, "loss": 2.0366, "step": 1918 }, { "epoch": 0.14445134458683828, "grad_norm": 6.092636585235596, "learning_rate": 9.663629342842843e-05, "loss": 1.9911, "step": 1919 }, { "epoch": 0.14452661886749846, "grad_norm": 11.476844787597656, "learning_rate": 9.663189617436916e-05, "loss": 2.1739, "step": 1920 }, { "epoch": 0.1446018931481586, "grad_norm": 4.44120979309082, "learning_rate": 9.662749614817748e-05, "loss": 2.1389, "step": 1921 }, { "epoch": 0.14467716742881875, "grad_norm": 5.036876201629639, "learning_rate": 9.662309335011494e-05, "loss": 2.2473, "step": 1922 }, { "epoch": 0.1447524417094789, "grad_norm": 5.119384288787842, "learning_rate": 9.661868778044328e-05, "loss": 2.2134, "step": 1923 }, { "epoch": 0.14482771599013908, "grad_norm": 4.753607273101807, "learning_rate": 9.661427943942437e-05, "loss": 2.1122, "step": 1924 }, { "epoch": 0.14490299027079923, "grad_norm": 5.219104290008545, "learning_rate": 9.660986832732032e-05, "loss": 1.8996, "step": 1925 }, { "epoch": 0.14497826455145937, "grad_norm": 4.637187957763672, "learning_rate": 9.660545444439333e-05, "loss": 2.1506, "step": 1926 }, { "epoch": 0.14505353883211952, "grad_norm": 5.25298547744751, "learning_rate": 9.66010377909058e-05, "loss": 2.2684, "step": 1927 }, { "epoch": 0.1451288131127797, "grad_norm": 5.068880081176758, "learning_rate": 9.659661836712028e-05, "loss": 2.174, "step": 1928 }, { "epoch": 0.14520408739343985, "grad_norm": 3.5758395195007324, "learning_rate": 9.659219617329951e-05, "loss": 2.2494, "step": 1929 }, { "epoch": 0.1452793616741, "grad_norm": 5.59762716293335, "learning_rate": 9.658777120970637e-05, "loss": 2.2994, "step": 1930 }, { "epoch": 0.14535463595476017, "grad_norm": 4.5902299880981445, "learning_rate": 9.65833434766039e-05, "loss": 2.1852, "step": 1931 }, { "epoch": 0.14542991023542032, "grad_norm": 4.434948921203613, "learning_rate": 9.657891297425531e-05, "loss": 2.5772, "step": 1932 }, { "epoch": 0.14550518451608047, "grad_norm": 4.257136821746826, "learning_rate": 9.657447970292402e-05, "loss": 2.2725, "step": 1933 }, { "epoch": 0.1455804587967406, "grad_norm": 4.4683098793029785, "learning_rate": 9.657004366287352e-05, "loss": 2.2595, "step": 1934 }, { "epoch": 0.1456557330774008, "grad_norm": 5.881546497344971, "learning_rate": 9.656560485436757e-05, "loss": 1.9656, "step": 1935 }, { "epoch": 0.14573100735806094, "grad_norm": 5.927746772766113, "learning_rate": 9.656116327767002e-05, "loss": 2.3611, "step": 1936 }, { "epoch": 0.14580628163872109, "grad_norm": 6.368885517120361, "learning_rate": 9.65567189330449e-05, "loss": 2.0469, "step": 1937 }, { "epoch": 0.14588155591938123, "grad_norm": 9.653582572937012, "learning_rate": 9.655227182075643e-05, "loss": 2.3902, "step": 1938 }, { "epoch": 0.1459568302000414, "grad_norm": 5.578766822814941, "learning_rate": 9.6547821941069e-05, "loss": 2.4388, "step": 1939 }, { "epoch": 0.14603210448070156, "grad_norm": 6.3838043212890625, "learning_rate": 9.654336929424708e-05, "loss": 2.1256, "step": 1940 }, { "epoch": 0.1461073787613617, "grad_norm": 6.19985818862915, "learning_rate": 9.653891388055541e-05, "loss": 2.2711, "step": 1941 }, { "epoch": 0.14618265304202185, "grad_norm": 5.388345241546631, "learning_rate": 9.653445570025887e-05, "loss": 1.9546, "step": 1942 }, { "epoch": 0.14625792732268203, "grad_norm": 5.75406551361084, "learning_rate": 9.652999475362243e-05, "loss": 1.9034, "step": 1943 }, { "epoch": 0.14633320160334218, "grad_norm": 5.268181800842285, "learning_rate": 9.652553104091133e-05, "loss": 2.354, "step": 1944 }, { "epoch": 0.14640847588400233, "grad_norm": 4.4514336585998535, "learning_rate": 9.65210645623909e-05, "loss": 2.2878, "step": 1945 }, { "epoch": 0.1464837501646625, "grad_norm": 5.7580156326293945, "learning_rate": 9.651659531832667e-05, "loss": 2.0802, "step": 1946 }, { "epoch": 0.14655902444532265, "grad_norm": 4.718121528625488, "learning_rate": 9.651212330898432e-05, "loss": 1.9684, "step": 1947 }, { "epoch": 0.1466342987259828, "grad_norm": 3.99812912940979, "learning_rate": 9.650764853462967e-05, "loss": 2.5342, "step": 1948 }, { "epoch": 0.14670957300664295, "grad_norm": 4.740177154541016, "learning_rate": 9.650317099552879e-05, "loss": 2.2826, "step": 1949 }, { "epoch": 0.14678484728730312, "grad_norm": 5.82108211517334, "learning_rate": 9.649869069194783e-05, "loss": 2.0493, "step": 1950 }, { "epoch": 0.14686012156796327, "grad_norm": 4.381932258605957, "learning_rate": 9.649420762415311e-05, "loss": 2.236, "step": 1951 }, { "epoch": 0.14693539584862342, "grad_norm": 4.588272571563721, "learning_rate": 9.648972179241116e-05, "loss": 2.0181, "step": 1952 }, { "epoch": 0.14701067012928357, "grad_norm": 5.516083240509033, "learning_rate": 9.648523319698865e-05, "loss": 2.3241, "step": 1953 }, { "epoch": 0.14708594440994374, "grad_norm": 9.62438678741455, "learning_rate": 9.648074183815241e-05, "loss": 2.4048, "step": 1954 }, { "epoch": 0.1471612186906039, "grad_norm": 5.418132305145264, "learning_rate": 9.647624771616944e-05, "loss": 2.3005, "step": 1955 }, { "epoch": 0.14723649297126404, "grad_norm": 7.159862518310547, "learning_rate": 9.647175083130687e-05, "loss": 2.0388, "step": 1956 }, { "epoch": 0.14731176725192419, "grad_norm": 5.345907211303711, "learning_rate": 9.646725118383207e-05, "loss": 2.2907, "step": 1957 }, { "epoch": 0.14738704153258436, "grad_norm": 4.737625598907471, "learning_rate": 9.646274877401252e-05, "loss": 1.9337, "step": 1958 }, { "epoch": 0.1474623158132445, "grad_norm": 5.6605119705200195, "learning_rate": 9.645824360211589e-05, "loss": 2.3023, "step": 1959 }, { "epoch": 0.14753759009390466, "grad_norm": 4.911399841308594, "learning_rate": 9.645373566840994e-05, "loss": 1.931, "step": 1960 }, { "epoch": 0.14761286437456483, "grad_norm": 5.157008647918701, "learning_rate": 9.644922497316274e-05, "loss": 2.6263, "step": 1961 }, { "epoch": 0.14768813865522498, "grad_norm": 7.209107875823975, "learning_rate": 9.644471151664236e-05, "loss": 2.3765, "step": 1962 }, { "epoch": 0.14776341293588513, "grad_norm": 5.182798862457275, "learning_rate": 9.644019529911716e-05, "loss": 2.3124, "step": 1963 }, { "epoch": 0.14783868721654528, "grad_norm": 4.9227471351623535, "learning_rate": 9.64356763208556e-05, "loss": 2.3799, "step": 1964 }, { "epoch": 0.14791396149720545, "grad_norm": 5.272462844848633, "learning_rate": 9.643115458212631e-05, "loss": 2.0561, "step": 1965 }, { "epoch": 0.1479892357778656, "grad_norm": 6.156679630279541, "learning_rate": 9.642663008319812e-05, "loss": 2.2961, "step": 1966 }, { "epoch": 0.14806451005852575, "grad_norm": 5.657508373260498, "learning_rate": 9.642210282433999e-05, "loss": 2.4496, "step": 1967 }, { "epoch": 0.1481397843391859, "grad_norm": 5.954901218414307, "learning_rate": 9.641757280582103e-05, "loss": 2.4012, "step": 1968 }, { "epoch": 0.14821505861984607, "grad_norm": 5.273434638977051, "learning_rate": 9.641304002791057e-05, "loss": 2.2418, "step": 1969 }, { "epoch": 0.14829033290050622, "grad_norm": 4.450619697570801, "learning_rate": 9.640850449087804e-05, "loss": 2.1015, "step": 1970 }, { "epoch": 0.14836560718116637, "grad_norm": 4.208042144775391, "learning_rate": 9.640396619499308e-05, "loss": 2.3728, "step": 1971 }, { "epoch": 0.14844088146182652, "grad_norm": 5.491754531860352, "learning_rate": 9.639942514052548e-05, "loss": 2.0012, "step": 1972 }, { "epoch": 0.1485161557424867, "grad_norm": 4.738384246826172, "learning_rate": 9.639488132774518e-05, "loss": 2.137, "step": 1973 }, { "epoch": 0.14859143002314684, "grad_norm": 4.606748104095459, "learning_rate": 9.639033475692233e-05, "loss": 2.244, "step": 1974 }, { "epoch": 0.148666704303807, "grad_norm": 5.480950355529785, "learning_rate": 9.638578542832717e-05, "loss": 2.397, "step": 1975 }, { "epoch": 0.14874197858446717, "grad_norm": 4.1901350021362305, "learning_rate": 9.638123334223017e-05, "loss": 1.8605, "step": 1976 }, { "epoch": 0.14881725286512731, "grad_norm": 4.445580005645752, "learning_rate": 9.637667849890193e-05, "loss": 2.2525, "step": 1977 }, { "epoch": 0.14889252714578746, "grad_norm": 3.7742133140563965, "learning_rate": 9.637212089861323e-05, "loss": 2.1167, "step": 1978 }, { "epoch": 0.1489678014264476, "grad_norm": 4.743167877197266, "learning_rate": 9.636756054163498e-05, "loss": 2.1636, "step": 1979 }, { "epoch": 0.14904307570710779, "grad_norm": 4.316456317901611, "learning_rate": 9.636299742823831e-05, "loss": 2.2178, "step": 1980 }, { "epoch": 0.14911834998776793, "grad_norm": 3.9753384590148926, "learning_rate": 9.635843155869449e-05, "loss": 2.2348, "step": 1981 }, { "epoch": 0.14919362426842808, "grad_norm": 4.181934356689453, "learning_rate": 9.63538629332749e-05, "loss": 2.1192, "step": 1982 }, { "epoch": 0.14926889854908823, "grad_norm": 4.687638282775879, "learning_rate": 9.634929155225118e-05, "loss": 2.0832, "step": 1983 }, { "epoch": 0.1493441728297484, "grad_norm": 3.862661361694336, "learning_rate": 9.634471741589505e-05, "loss": 2.4405, "step": 1984 }, { "epoch": 0.14941944711040855, "grad_norm": 3.4980392456054688, "learning_rate": 9.634014052447849e-05, "loss": 2.0628, "step": 1985 }, { "epoch": 0.1494947213910687, "grad_norm": 4.194029331207275, "learning_rate": 9.63355608782735e-05, "loss": 2.2522, "step": 1986 }, { "epoch": 0.14956999567172885, "grad_norm": 5.431275367736816, "learning_rate": 9.633097847755238e-05, "loss": 2.3805, "step": 1987 }, { "epoch": 0.14964526995238903, "grad_norm": 5.273715019226074, "learning_rate": 9.632639332258752e-05, "loss": 2.6288, "step": 1988 }, { "epoch": 0.14972054423304917, "grad_norm": 4.149738311767578, "learning_rate": 9.632180541365152e-05, "loss": 2.0956, "step": 1989 }, { "epoch": 0.14979581851370932, "grad_norm": 5.5944647789001465, "learning_rate": 9.631721475101709e-05, "loss": 2.1588, "step": 1990 }, { "epoch": 0.14987109279436947, "grad_norm": 5.478260040283203, "learning_rate": 9.631262133495714e-05, "loss": 1.9695, "step": 1991 }, { "epoch": 0.14994636707502965, "grad_norm": 5.873048305511475, "learning_rate": 9.630802516574476e-05, "loss": 2.498, "step": 1992 }, { "epoch": 0.1500216413556898, "grad_norm": 5.332206726074219, "learning_rate": 9.630342624365311e-05, "loss": 1.9052, "step": 1993 }, { "epoch": 0.15009691563634994, "grad_norm": 5.516310691833496, "learning_rate": 9.629882456895567e-05, "loss": 1.893, "step": 1994 }, { "epoch": 0.15017218991701012, "grad_norm": 5.302465438842773, "learning_rate": 9.629422014192595e-05, "loss": 1.9819, "step": 1995 }, { "epoch": 0.15024746419767027, "grad_norm": 5.737462997436523, "learning_rate": 9.628961296283766e-05, "loss": 2.0859, "step": 1996 }, { "epoch": 0.15032273847833041, "grad_norm": 5.143144607543945, "learning_rate": 9.62850030319647e-05, "loss": 1.9948, "step": 1997 }, { "epoch": 0.15039801275899056, "grad_norm": 4.926069259643555, "learning_rate": 9.628039034958113e-05, "loss": 2.1829, "step": 1998 }, { "epoch": 0.15047328703965074, "grad_norm": 4.048475742340088, "learning_rate": 9.627577491596113e-05, "loss": 2.4922, "step": 1999 }, { "epoch": 0.1505485613203109, "grad_norm": 4.139350891113281, "learning_rate": 9.627115673137912e-05, "loss": 2.2655, "step": 2000 }, { "epoch": 0.15062383560097103, "grad_norm": 5.4989142417907715, "learning_rate": 9.626653579610958e-05, "loss": 2.0027, "step": 2001 }, { "epoch": 0.15069910988163118, "grad_norm": 4.3663530349731445, "learning_rate": 9.626191211042726e-05, "loss": 1.8043, "step": 2002 }, { "epoch": 0.15077438416229136, "grad_norm": 6.856672763824463, "learning_rate": 9.625728567460699e-05, "loss": 2.0533, "step": 2003 }, { "epoch": 0.1508496584429515, "grad_norm": 5.059989929199219, "learning_rate": 9.625265648892384e-05, "loss": 2.2027, "step": 2004 }, { "epoch": 0.15092493272361165, "grad_norm": 4.941856861114502, "learning_rate": 9.624802455365294e-05, "loss": 2.081, "step": 2005 }, { "epoch": 0.1510002070042718, "grad_norm": 3.969909906387329, "learning_rate": 9.624338986906969e-05, "loss": 2.5115, "step": 2006 }, { "epoch": 0.15107548128493198, "grad_norm": 4.537139892578125, "learning_rate": 9.623875243544962e-05, "loss": 2.1718, "step": 2007 }, { "epoch": 0.15115075556559213, "grad_norm": 3.777068614959717, "learning_rate": 9.623411225306837e-05, "loss": 2.2531, "step": 2008 }, { "epoch": 0.15122602984625227, "grad_norm": 3.402850389480591, "learning_rate": 9.622946932220182e-05, "loss": 2.3155, "step": 2009 }, { "epoch": 0.15130130412691245, "grad_norm": 4.730953693389893, "learning_rate": 9.622482364312594e-05, "loss": 2.1493, "step": 2010 }, { "epoch": 0.1513765784075726, "grad_norm": 5.613648891448975, "learning_rate": 9.622017521611698e-05, "loss": 2.0707, "step": 2011 }, { "epoch": 0.15145185268823275, "grad_norm": 3.657475233078003, "learning_rate": 9.621552404145118e-05, "loss": 2.3353, "step": 2012 }, { "epoch": 0.1515271269688929, "grad_norm": 3.5609402656555176, "learning_rate": 9.621087011940509e-05, "loss": 1.8909, "step": 2013 }, { "epoch": 0.15160240124955307, "grad_norm": 4.627619743347168, "learning_rate": 9.620621345025538e-05, "loss": 2.186, "step": 2014 }, { "epoch": 0.15167767553021322, "grad_norm": 4.744054317474365, "learning_rate": 9.620155403427885e-05, "loss": 2.2233, "step": 2015 }, { "epoch": 0.15175294981087337, "grad_norm": 4.01196813583374, "learning_rate": 9.619689187175249e-05, "loss": 1.9883, "step": 2016 }, { "epoch": 0.15182822409153351, "grad_norm": 5.643701076507568, "learning_rate": 9.619222696295348e-05, "loss": 2.3755, "step": 2017 }, { "epoch": 0.1519034983721937, "grad_norm": 4.8457536697387695, "learning_rate": 9.618755930815912e-05, "loss": 2.0702, "step": 2018 }, { "epoch": 0.15197877265285384, "grad_norm": 4.948049068450928, "learning_rate": 9.618288890764688e-05, "loss": 2.0424, "step": 2019 }, { "epoch": 0.152054046933514, "grad_norm": 5.378200054168701, "learning_rate": 9.617821576169439e-05, "loss": 2.2498, "step": 2020 }, { "epoch": 0.15212932121417413, "grad_norm": 5.266604423522949, "learning_rate": 9.61735398705795e-05, "loss": 1.7246, "step": 2021 }, { "epoch": 0.1522045954948343, "grad_norm": 5.899149417877197, "learning_rate": 9.616886123458013e-05, "loss": 2.2373, "step": 2022 }, { "epoch": 0.15227986977549446, "grad_norm": 5.359463691711426, "learning_rate": 9.616417985397446e-05, "loss": 2.2125, "step": 2023 }, { "epoch": 0.1523551440561546, "grad_norm": 5.404905796051025, "learning_rate": 9.615949572904073e-05, "loss": 2.3286, "step": 2024 }, { "epoch": 0.15243041833681478, "grad_norm": 5.987763404846191, "learning_rate": 9.615480886005744e-05, "loss": 2.466, "step": 2025 }, { "epoch": 0.15250569261747493, "grad_norm": 6.86370849609375, "learning_rate": 9.61501192473032e-05, "loss": 2.1007, "step": 2026 }, { "epoch": 0.15258096689813508, "grad_norm": 7.341048240661621, "learning_rate": 9.614542689105676e-05, "loss": 2.1356, "step": 2027 }, { "epoch": 0.15265624117879523, "grad_norm": 4.952999591827393, "learning_rate": 9.614073179159713e-05, "loss": 1.9574, "step": 2028 }, { "epoch": 0.1527315154594554, "grad_norm": 6.23982572555542, "learning_rate": 9.613603394920337e-05, "loss": 2.1264, "step": 2029 }, { "epoch": 0.15280678974011555, "grad_norm": 5.975821495056152, "learning_rate": 9.613133336415478e-05, "loss": 2.0535, "step": 2030 }, { "epoch": 0.1528820640207757, "grad_norm": 5.267685413360596, "learning_rate": 9.612663003673079e-05, "loss": 2.0035, "step": 2031 }, { "epoch": 0.15295733830143585, "grad_norm": 5.806224822998047, "learning_rate": 9.612192396721099e-05, "loss": 2.2577, "step": 2032 }, { "epoch": 0.15303261258209602, "grad_norm": 5.5962982177734375, "learning_rate": 9.611721515587515e-05, "loss": 2.34, "step": 2033 }, { "epoch": 0.15310788686275617, "grad_norm": 4.431849002838135, "learning_rate": 9.611250360300319e-05, "loss": 1.9954, "step": 2034 }, { "epoch": 0.15318316114341632, "grad_norm": 4.238766193389893, "learning_rate": 9.610778930887521e-05, "loss": 2.2883, "step": 2035 }, { "epoch": 0.15325843542407647, "grad_norm": 4.898646831512451, "learning_rate": 9.610307227377145e-05, "loss": 1.9973, "step": 2036 }, { "epoch": 0.15333370970473664, "grad_norm": 5.965308666229248, "learning_rate": 9.609835249797233e-05, "loss": 2.2187, "step": 2037 }, { "epoch": 0.1534089839853968, "grad_norm": 5.081641674041748, "learning_rate": 9.609362998175843e-05, "loss": 1.9027, "step": 2038 }, { "epoch": 0.15348425826605694, "grad_norm": 4.4746856689453125, "learning_rate": 9.608890472541048e-05, "loss": 2.1957, "step": 2039 }, { "epoch": 0.1535595325467171, "grad_norm": 3.936408042907715, "learning_rate": 9.608417672920939e-05, "loss": 1.9187, "step": 2040 }, { "epoch": 0.15363480682737726, "grad_norm": 5.039267539978027, "learning_rate": 9.60794459934362e-05, "loss": 1.919, "step": 2041 }, { "epoch": 0.1537100811080374, "grad_norm": 4.060781478881836, "learning_rate": 9.607471251837219e-05, "loss": 2.1018, "step": 2042 }, { "epoch": 0.15378535538869756, "grad_norm": 4.91633415222168, "learning_rate": 9.606997630429872e-05, "loss": 1.9919, "step": 2043 }, { "epoch": 0.15386062966935773, "grad_norm": 4.289859771728516, "learning_rate": 9.606523735149735e-05, "loss": 2.5446, "step": 2044 }, { "epoch": 0.15393590395001788, "grad_norm": 3.936551094055176, "learning_rate": 9.60604956602498e-05, "loss": 2.3704, "step": 2045 }, { "epoch": 0.15401117823067803, "grad_norm": 10.058585166931152, "learning_rate": 9.605575123083793e-05, "loss": 2.2148, "step": 2046 }, { "epoch": 0.15408645251133818, "grad_norm": 5.448790073394775, "learning_rate": 9.605100406354379e-05, "loss": 2.2353, "step": 2047 }, { "epoch": 0.15416172679199835, "grad_norm": 3.798121929168701, "learning_rate": 9.604625415864962e-05, "loss": 2.2022, "step": 2048 }, { "epoch": 0.1542370010726585, "grad_norm": 3.8325226306915283, "learning_rate": 9.604150151643775e-05, "loss": 1.9418, "step": 2049 }, { "epoch": 0.15431227535331865, "grad_norm": 6.15515661239624, "learning_rate": 9.603674613719072e-05, "loss": 2.3556, "step": 2050 }, { "epoch": 0.1543875496339788, "grad_norm": 4.198747158050537, "learning_rate": 9.603198802119122e-05, "loss": 2.1127, "step": 2051 }, { "epoch": 0.15446282391463897, "grad_norm": 4.799349784851074, "learning_rate": 9.602722716872213e-05, "loss": 1.9679, "step": 2052 }, { "epoch": 0.15453809819529912, "grad_norm": 6.379265785217285, "learning_rate": 9.602246358006643e-05, "loss": 2.4261, "step": 2053 }, { "epoch": 0.15461337247595927, "grad_norm": 7.0591607093811035, "learning_rate": 9.601769725550736e-05, "loss": 2.2622, "step": 2054 }, { "epoch": 0.15468864675661942, "grad_norm": 4.3434929847717285, "learning_rate": 9.601292819532819e-05, "loss": 2.3136, "step": 2055 }, { "epoch": 0.1547639210372796, "grad_norm": 7.49066162109375, "learning_rate": 9.600815639981249e-05, "loss": 2.1691, "step": 2056 }, { "epoch": 0.15483919531793974, "grad_norm": 4.92397403717041, "learning_rate": 9.600338186924389e-05, "loss": 2.5493, "step": 2057 }, { "epoch": 0.1549144695985999, "grad_norm": 4.865357875823975, "learning_rate": 9.599860460390624e-05, "loss": 1.9386, "step": 2058 }, { "epoch": 0.15498974387926007, "grad_norm": 4.770902633666992, "learning_rate": 9.599382460408353e-05, "loss": 2.0517, "step": 2059 }, { "epoch": 0.15506501815992021, "grad_norm": 5.135201930999756, "learning_rate": 9.598904187005991e-05, "loss": 2.2801, "step": 2060 }, { "epoch": 0.15514029244058036, "grad_norm": 6.234017848968506, "learning_rate": 9.598425640211972e-05, "loss": 2.3472, "step": 2061 }, { "epoch": 0.1552155667212405, "grad_norm": 5.580723762512207, "learning_rate": 9.597946820054743e-05, "loss": 2.0529, "step": 2062 }, { "epoch": 0.1552908410019007, "grad_norm": 5.65152645111084, "learning_rate": 9.597467726562768e-05, "loss": 2.7563, "step": 2063 }, { "epoch": 0.15536611528256083, "grad_norm": 6.707641124725342, "learning_rate": 9.596988359764529e-05, "loss": 2.2833, "step": 2064 }, { "epoch": 0.15544138956322098, "grad_norm": 6.38559627532959, "learning_rate": 9.596508719688522e-05, "loss": 2.1667, "step": 2065 }, { "epoch": 0.15551666384388113, "grad_norm": 5.363674640655518, "learning_rate": 9.59602880636326e-05, "loss": 2.6474, "step": 2066 }, { "epoch": 0.1555919381245413, "grad_norm": 4.697733402252197, "learning_rate": 9.595548619817274e-05, "loss": 2.236, "step": 2067 }, { "epoch": 0.15566721240520145, "grad_norm": 4.307929992675781, "learning_rate": 9.595068160079107e-05, "loss": 2.0107, "step": 2068 }, { "epoch": 0.1557424866858616, "grad_norm": 5.606794834136963, "learning_rate": 9.594587427177324e-05, "loss": 2.0769, "step": 2069 }, { "epoch": 0.15581776096652175, "grad_norm": 3.8274621963500977, "learning_rate": 9.594106421140501e-05, "loss": 1.966, "step": 2070 }, { "epoch": 0.15589303524718193, "grad_norm": 4.292691707611084, "learning_rate": 9.593625141997234e-05, "loss": 2.1974, "step": 2071 }, { "epoch": 0.15596830952784208, "grad_norm": 4.167516231536865, "learning_rate": 9.593143589776133e-05, "loss": 2.3495, "step": 2072 }, { "epoch": 0.15604358380850222, "grad_norm": 4.153419017791748, "learning_rate": 9.592661764505824e-05, "loss": 2.172, "step": 2073 }, { "epoch": 0.1561188580891624, "grad_norm": 3.9185049533843994, "learning_rate": 9.59217966621495e-05, "loss": 1.9352, "step": 2074 }, { "epoch": 0.15619413236982255, "grad_norm": 4.827195644378662, "learning_rate": 9.591697294932173e-05, "loss": 2.2603, "step": 2075 }, { "epoch": 0.1562694066504827, "grad_norm": 4.187567710876465, "learning_rate": 9.591214650686167e-05, "loss": 2.0342, "step": 2076 }, { "epoch": 0.15634468093114284, "grad_norm": 4.472141742706299, "learning_rate": 9.590731733505622e-05, "loss": 2.0691, "step": 2077 }, { "epoch": 0.15641995521180302, "grad_norm": 4.0354695320129395, "learning_rate": 9.59024854341925e-05, "loss": 2.0649, "step": 2078 }, { "epoch": 0.15649522949246317, "grad_norm": 3.723557233810425, "learning_rate": 9.58976508045577e-05, "loss": 2.0413, "step": 2079 }, { "epoch": 0.15657050377312332, "grad_norm": 4.510951042175293, "learning_rate": 9.589281344643927e-05, "loss": 2.0874, "step": 2080 }, { "epoch": 0.15664577805378346, "grad_norm": 3.7920453548431396, "learning_rate": 9.588797336012477e-05, "loss": 2.5182, "step": 2081 }, { "epoch": 0.15672105233444364, "grad_norm": 4.730495452880859, "learning_rate": 9.588313054590192e-05, "loss": 2.1439, "step": 2082 }, { "epoch": 0.1567963266151038, "grad_norm": 6.249584197998047, "learning_rate": 9.58782850040586e-05, "loss": 2.3361, "step": 2083 }, { "epoch": 0.15687160089576394, "grad_norm": 3.6124255657196045, "learning_rate": 9.58734367348829e-05, "loss": 2.5142, "step": 2084 }, { "epoch": 0.15694687517642408, "grad_norm": 5.7303290367126465, "learning_rate": 9.586858573866299e-05, "loss": 2.1577, "step": 2085 }, { "epoch": 0.15702214945708426, "grad_norm": 5.092860221862793, "learning_rate": 9.586373201568728e-05, "loss": 2.1525, "step": 2086 }, { "epoch": 0.1570974237377444, "grad_norm": 3.9583547115325928, "learning_rate": 9.585887556624429e-05, "loss": 2.346, "step": 2087 }, { "epoch": 0.15717269801840456, "grad_norm": 4.521017551422119, "learning_rate": 9.585401639062273e-05, "loss": 1.8967, "step": 2088 }, { "epoch": 0.1572479722990647, "grad_norm": 5.252394676208496, "learning_rate": 9.584915448911147e-05, "loss": 2.201, "step": 2089 }, { "epoch": 0.15732324657972488, "grad_norm": 5.868988037109375, "learning_rate": 9.584428986199954e-05, "loss": 1.8643, "step": 2090 }, { "epoch": 0.15739852086038503, "grad_norm": 4.457388877868652, "learning_rate": 9.583942250957611e-05, "loss": 2.035, "step": 2091 }, { "epoch": 0.15747379514104518, "grad_norm": 6.086800575256348, "learning_rate": 9.583455243213054e-05, "loss": 2.6587, "step": 2092 }, { "epoch": 0.15754906942170535, "grad_norm": 4.44578218460083, "learning_rate": 9.582967962995235e-05, "loss": 2.2085, "step": 2093 }, { "epoch": 0.1576243437023655, "grad_norm": 5.932287216186523, "learning_rate": 9.58248041033312e-05, "loss": 2.0856, "step": 2094 }, { "epoch": 0.15769961798302565, "grad_norm": 4.791714191436768, "learning_rate": 9.581992585255692e-05, "loss": 2.0318, "step": 2095 }, { "epoch": 0.1577748922636858, "grad_norm": 6.2426981925964355, "learning_rate": 9.581504487791954e-05, "loss": 2.2789, "step": 2096 }, { "epoch": 0.15785016654434597, "grad_norm": 5.699377536773682, "learning_rate": 9.58101611797092e-05, "loss": 2.3412, "step": 2097 }, { "epoch": 0.15792544082500612, "grad_norm": 7.385111331939697, "learning_rate": 9.580527475821621e-05, "loss": 2.247, "step": 2098 }, { "epoch": 0.15800071510566627, "grad_norm": 5.266499996185303, "learning_rate": 9.580038561373108e-05, "loss": 2.0686, "step": 2099 }, { "epoch": 0.15807598938632642, "grad_norm": 4.207569599151611, "learning_rate": 9.579549374654442e-05, "loss": 2.3634, "step": 2100 }, { "epoch": 0.1581512636669866, "grad_norm": 7.277801513671875, "learning_rate": 9.579059915694708e-05, "loss": 2.9515, "step": 2101 }, { "epoch": 0.15822653794764674, "grad_norm": 6.7272047996521, "learning_rate": 9.578570184523e-05, "loss": 1.9149, "step": 2102 }, { "epoch": 0.1583018122283069, "grad_norm": 4.62692403793335, "learning_rate": 9.578080181168432e-05, "loss": 2.1776, "step": 2103 }, { "epoch": 0.15837708650896704, "grad_norm": 4.971329689025879, "learning_rate": 9.577589905660133e-05, "loss": 2.2639, "step": 2104 }, { "epoch": 0.1584523607896272, "grad_norm": 5.457459926605225, "learning_rate": 9.577099358027249e-05, "loss": 2.1835, "step": 2105 }, { "epoch": 0.15852763507028736, "grad_norm": 4.928334712982178, "learning_rate": 9.57660853829894e-05, "loss": 2.1405, "step": 2106 }, { "epoch": 0.1586029093509475, "grad_norm": 4.502702713012695, "learning_rate": 9.576117446504387e-05, "loss": 2.0504, "step": 2107 }, { "epoch": 0.15867818363160768, "grad_norm": 7.410353183746338, "learning_rate": 9.575626082672782e-05, "loss": 1.8989, "step": 2108 }, { "epoch": 0.15875345791226783, "grad_norm": 5.7210493087768555, "learning_rate": 9.575134446833333e-05, "loss": 2.695, "step": 2109 }, { "epoch": 0.15882873219292798, "grad_norm": 4.770233154296875, "learning_rate": 9.574642539015271e-05, "loss": 2.205, "step": 2110 }, { "epoch": 0.15890400647358813, "grad_norm": 6.2118072509765625, "learning_rate": 9.574150359247835e-05, "loss": 2.2313, "step": 2111 }, { "epoch": 0.1589792807542483, "grad_norm": 5.2125563621521, "learning_rate": 9.573657907560286e-05, "loss": 1.9515, "step": 2112 }, { "epoch": 0.15905455503490845, "grad_norm": 3.98368501663208, "learning_rate": 9.573165183981897e-05, "loss": 2.9803, "step": 2113 }, { "epoch": 0.1591298293155686, "grad_norm": 4.522790431976318, "learning_rate": 9.57267218854196e-05, "loss": 2.0788, "step": 2114 }, { "epoch": 0.15920510359622875, "grad_norm": 4.263027667999268, "learning_rate": 9.572178921269781e-05, "loss": 2.0058, "step": 2115 }, { "epoch": 0.15928037787688892, "grad_norm": 6.23157262802124, "learning_rate": 9.571685382194686e-05, "loss": 2.6592, "step": 2116 }, { "epoch": 0.15935565215754907, "grad_norm": 3.7246224880218506, "learning_rate": 9.571191571346012e-05, "loss": 2.1737, "step": 2117 }, { "epoch": 0.15943092643820922, "grad_norm": 4.581549644470215, "learning_rate": 9.570697488753113e-05, "loss": 2.1702, "step": 2118 }, { "epoch": 0.15950620071886937, "grad_norm": 4.184098720550537, "learning_rate": 9.570203134445366e-05, "loss": 1.9539, "step": 2119 }, { "epoch": 0.15958147499952954, "grad_norm": 5.316369533538818, "learning_rate": 9.569708508452156e-05, "loss": 2.5667, "step": 2120 }, { "epoch": 0.1596567492801897, "grad_norm": 6.95085334777832, "learning_rate": 9.569213610802885e-05, "loss": 2.3969, "step": 2121 }, { "epoch": 0.15973202356084984, "grad_norm": 4.367122173309326, "learning_rate": 9.568718441526976e-05, "loss": 2.4738, "step": 2122 }, { "epoch": 0.15980729784151002, "grad_norm": 4.955066680908203, "learning_rate": 9.568223000653866e-05, "loss": 2.2542, "step": 2123 }, { "epoch": 0.15988257212217016, "grad_norm": 4.541550636291504, "learning_rate": 9.567727288213005e-05, "loss": 1.8783, "step": 2124 }, { "epoch": 0.1599578464028303, "grad_norm": 3.552600383758545, "learning_rate": 9.567231304233863e-05, "loss": 1.9578, "step": 2125 }, { "epoch": 0.16003312068349046, "grad_norm": 4.2720627784729, "learning_rate": 9.566735048745926e-05, "loss": 2.7564, "step": 2126 }, { "epoch": 0.16010839496415064, "grad_norm": 4.8603644371032715, "learning_rate": 9.566238521778693e-05, "loss": 2.3159, "step": 2127 }, { "epoch": 0.16018366924481078, "grad_norm": 5.021570205688477, "learning_rate": 9.565741723361682e-05, "loss": 2.0601, "step": 2128 }, { "epoch": 0.16025894352547093, "grad_norm": 4.532212734222412, "learning_rate": 9.565244653524426e-05, "loss": 1.8818, "step": 2129 }, { "epoch": 0.16033421780613108, "grad_norm": 6.954552173614502, "learning_rate": 9.564747312296474e-05, "loss": 2.9218, "step": 2130 }, { "epoch": 0.16040949208679126, "grad_norm": 6.202856540679932, "learning_rate": 9.564249699707394e-05, "loss": 2.3562, "step": 2131 }, { "epoch": 0.1604847663674514, "grad_norm": 6.85939359664917, "learning_rate": 9.563751815786764e-05, "loss": 2.2643, "step": 2132 }, { "epoch": 0.16056004064811155, "grad_norm": 5.891876697540283, "learning_rate": 9.563253660564185e-05, "loss": 2.1593, "step": 2133 }, { "epoch": 0.1606353149287717, "grad_norm": 8.343067169189453, "learning_rate": 9.562755234069267e-05, "loss": 1.9639, "step": 2134 }, { "epoch": 0.16071058920943188, "grad_norm": 4.527833938598633, "learning_rate": 9.562256536331644e-05, "loss": 2.5304, "step": 2135 }, { "epoch": 0.16078586349009202, "grad_norm": 3.9452993869781494, "learning_rate": 9.56175756738096e-05, "loss": 2.1859, "step": 2136 }, { "epoch": 0.16086113777075217, "grad_norm": 6.359346389770508, "learning_rate": 9.561258327246877e-05, "loss": 2.0809, "step": 2137 }, { "epoch": 0.16093641205141235, "grad_norm": 4.81974458694458, "learning_rate": 9.560758815959074e-05, "loss": 2.2716, "step": 2138 }, { "epoch": 0.1610116863320725, "grad_norm": 4.2938313484191895, "learning_rate": 9.560259033547248e-05, "loss": 2.1051, "step": 2139 }, { "epoch": 0.16108696061273264, "grad_norm": 3.4313347339630127, "learning_rate": 9.559758980041105e-05, "loss": 2.2185, "step": 2140 }, { "epoch": 0.1611622348933928, "grad_norm": 4.162536144256592, "learning_rate": 9.559258655470375e-05, "loss": 2.53, "step": 2141 }, { "epoch": 0.16123750917405297, "grad_norm": 5.780637741088867, "learning_rate": 9.5587580598648e-05, "loss": 2.126, "step": 2142 }, { "epoch": 0.16131278345471312, "grad_norm": 4.964845180511475, "learning_rate": 9.558257193254139e-05, "loss": 2.7349, "step": 2143 }, { "epoch": 0.16138805773537326, "grad_norm": 4.01690149307251, "learning_rate": 9.557756055668165e-05, "loss": 2.0901, "step": 2144 }, { "epoch": 0.1614633320160334, "grad_norm": 5.37811803817749, "learning_rate": 9.557254647136675e-05, "loss": 2.3779, "step": 2145 }, { "epoch": 0.1615386062966936, "grad_norm": 6.000019550323486, "learning_rate": 9.556752967689469e-05, "loss": 2.0882, "step": 2146 }, { "epoch": 0.16161388057735374, "grad_norm": 4.8202643394470215, "learning_rate": 9.556251017356375e-05, "loss": 2.279, "step": 2147 }, { "epoch": 0.16168915485801388, "grad_norm": 4.633787155151367, "learning_rate": 9.555748796167232e-05, "loss": 2.2039, "step": 2148 }, { "epoch": 0.16176442913867403, "grad_norm": 5.664914608001709, "learning_rate": 9.555246304151894e-05, "loss": 2.306, "step": 2149 }, { "epoch": 0.1618397034193342, "grad_norm": 4.398691177368164, "learning_rate": 9.554743541340234e-05, "loss": 2.1348, "step": 2150 }, { "epoch": 0.16191497769999436, "grad_norm": 4.392038345336914, "learning_rate": 9.55424050776214e-05, "loss": 2.182, "step": 2151 }, { "epoch": 0.1619902519806545, "grad_norm": 5.23166036605835, "learning_rate": 9.553737203447514e-05, "loss": 2.3464, "step": 2152 }, { "epoch": 0.16206552626131465, "grad_norm": 4.836452960968018, "learning_rate": 9.553233628426279e-05, "loss": 2.0934, "step": 2153 }, { "epoch": 0.16214080054197483, "grad_norm": 5.13286828994751, "learning_rate": 9.552729782728369e-05, "loss": 2.1849, "step": 2154 }, { "epoch": 0.16221607482263498, "grad_norm": 4.531123161315918, "learning_rate": 9.552225666383737e-05, "loss": 1.9598, "step": 2155 }, { "epoch": 0.16229134910329512, "grad_norm": 5.338104248046875, "learning_rate": 9.551721279422351e-05, "loss": 2.4238, "step": 2156 }, { "epoch": 0.1623666233839553, "grad_norm": 4.85247802734375, "learning_rate": 9.551216621874195e-05, "loss": 1.9163, "step": 2157 }, { "epoch": 0.16244189766461545, "grad_norm": 3.82311749458313, "learning_rate": 9.55071169376927e-05, "loss": 1.9752, "step": 2158 }, { "epoch": 0.1625171719452756, "grad_norm": 5.538928031921387, "learning_rate": 9.55020649513759e-05, "loss": 2.2083, "step": 2159 }, { "epoch": 0.16259244622593574, "grad_norm": 5.192752838134766, "learning_rate": 9.549701026009193e-05, "loss": 2.237, "step": 2160 }, { "epoch": 0.16266772050659592, "grad_norm": 4.371562957763672, "learning_rate": 9.549195286414123e-05, "loss": 2.1165, "step": 2161 }, { "epoch": 0.16274299478725607, "grad_norm": 6.093799591064453, "learning_rate": 9.548689276382447e-05, "loss": 2.1418, "step": 2162 }, { "epoch": 0.16281826906791622, "grad_norm": 4.156737327575684, "learning_rate": 9.548182995944244e-05, "loss": 1.9759, "step": 2163 }, { "epoch": 0.16289354334857636, "grad_norm": 4.536795139312744, "learning_rate": 9.547676445129613e-05, "loss": 1.8443, "step": 2164 }, { "epoch": 0.16296881762923654, "grad_norm": 5.529335975646973, "learning_rate": 9.547169623968665e-05, "loss": 1.8961, "step": 2165 }, { "epoch": 0.1630440919098967, "grad_norm": 7.771794319152832, "learning_rate": 9.546662532491533e-05, "loss": 2.0879, "step": 2166 }, { "epoch": 0.16311936619055684, "grad_norm": 5.329034328460693, "learning_rate": 9.546155170728356e-05, "loss": 2.2964, "step": 2167 }, { "epoch": 0.16319464047121698, "grad_norm": 3.9763481616973877, "learning_rate": 9.5456475387093e-05, "loss": 2.6385, "step": 2168 }, { "epoch": 0.16326991475187716, "grad_norm": 4.062328338623047, "learning_rate": 9.54513963646454e-05, "loss": 1.9477, "step": 2169 }, { "epoch": 0.1633451890325373, "grad_norm": 4.845366954803467, "learning_rate": 9.544631464024271e-05, "loss": 2.2485, "step": 2170 }, { "epoch": 0.16342046331319746, "grad_norm": 6.792723178863525, "learning_rate": 9.544123021418701e-05, "loss": 1.8903, "step": 2171 }, { "epoch": 0.16349573759385763, "grad_norm": 4.109110355377197, "learning_rate": 9.543614308678057e-05, "loss": 2.359, "step": 2172 }, { "epoch": 0.16357101187451778, "grad_norm": 5.6095356941223145, "learning_rate": 9.543105325832577e-05, "loss": 2.4139, "step": 2173 }, { "epoch": 0.16364628615517793, "grad_norm": 4.7631754875183105, "learning_rate": 9.542596072912524e-05, "loss": 2.0527, "step": 2174 }, { "epoch": 0.16372156043583808, "grad_norm": 3.9870476722717285, "learning_rate": 9.542086549948167e-05, "loss": 2.5087, "step": 2175 }, { "epoch": 0.16379683471649825, "grad_norm": 4.602846622467041, "learning_rate": 9.541576756969797e-05, "loss": 2.2427, "step": 2176 }, { "epoch": 0.1638721089971584, "grad_norm": 4.71099328994751, "learning_rate": 9.54106669400772e-05, "loss": 2.1087, "step": 2177 }, { "epoch": 0.16394738327781855, "grad_norm": 5.087553024291992, "learning_rate": 9.54055636109226e-05, "loss": 1.7692, "step": 2178 }, { "epoch": 0.1640226575584787, "grad_norm": 8.089323997497559, "learning_rate": 9.540045758253751e-05, "loss": 2.2295, "step": 2179 }, { "epoch": 0.16409793183913887, "grad_norm": 5.659903526306152, "learning_rate": 9.539534885522547e-05, "loss": 2.0684, "step": 2180 }, { "epoch": 0.16417320611979902, "grad_norm": 5.205198287963867, "learning_rate": 9.539023742929022e-05, "loss": 2.5589, "step": 2181 }, { "epoch": 0.16424848040045917, "grad_norm": 6.978159427642822, "learning_rate": 9.538512330503557e-05, "loss": 2.1062, "step": 2182 }, { "epoch": 0.16432375468111932, "grad_norm": 4.487802028656006, "learning_rate": 9.538000648276559e-05, "loss": 2.0254, "step": 2183 }, { "epoch": 0.1643990289617795, "grad_norm": 3.3947670459747314, "learning_rate": 9.537488696278443e-05, "loss": 2.7997, "step": 2184 }, { "epoch": 0.16447430324243964, "grad_norm": 4.046313762664795, "learning_rate": 9.536976474539642e-05, "loss": 1.9636, "step": 2185 }, { "epoch": 0.1645495775230998, "grad_norm": 5.125520706176758, "learning_rate": 9.536463983090608e-05, "loss": 2.0246, "step": 2186 }, { "epoch": 0.16462485180375996, "grad_norm": 4.133129596710205, "learning_rate": 9.535951221961807e-05, "loss": 2.3225, "step": 2187 }, { "epoch": 0.1647001260844201, "grad_norm": 4.609368801116943, "learning_rate": 9.53543819118372e-05, "loss": 2.1459, "step": 2188 }, { "epoch": 0.16477540036508026, "grad_norm": 5.235116004943848, "learning_rate": 9.534924890786849e-05, "loss": 2.0638, "step": 2189 }, { "epoch": 0.1648506746457404, "grad_norm": 3.448789596557617, "learning_rate": 9.534411320801704e-05, "loss": 2.3114, "step": 2190 }, { "epoch": 0.16492594892640058, "grad_norm": 5.083763599395752, "learning_rate": 9.533897481258815e-05, "loss": 1.968, "step": 2191 }, { "epoch": 0.16500122320706073, "grad_norm": 3.7599713802337646, "learning_rate": 9.533383372188731e-05, "loss": 2.072, "step": 2192 }, { "epoch": 0.16507649748772088, "grad_norm": 5.607828617095947, "learning_rate": 9.532868993622015e-05, "loss": 1.9586, "step": 2193 }, { "epoch": 0.16515177176838103, "grad_norm": 6.540659427642822, "learning_rate": 9.532354345589241e-05, "loss": 2.1076, "step": 2194 }, { "epoch": 0.1652270460490412, "grad_norm": 4.928903579711914, "learning_rate": 9.531839428121008e-05, "loss": 2.429, "step": 2195 }, { "epoch": 0.16530232032970135, "grad_norm": 4.7645368576049805, "learning_rate": 9.531324241247924e-05, "loss": 2.0913, "step": 2196 }, { "epoch": 0.1653775946103615, "grad_norm": 5.299357891082764, "learning_rate": 9.530808785000614e-05, "loss": 2.4863, "step": 2197 }, { "epoch": 0.16545286889102165, "grad_norm": 5.263016223907471, "learning_rate": 9.530293059409723e-05, "loss": 2.1709, "step": 2198 }, { "epoch": 0.16552814317168182, "grad_norm": 5.059325218200684, "learning_rate": 9.52977706450591e-05, "loss": 1.9798, "step": 2199 }, { "epoch": 0.16560341745234197, "grad_norm": 5.113597393035889, "learning_rate": 9.529260800319846e-05, "loss": 2.1529, "step": 2200 }, { "epoch": 0.16567869173300212, "grad_norm": 5.493249893188477, "learning_rate": 9.528744266882224e-05, "loss": 1.9849, "step": 2201 }, { "epoch": 0.16575396601366227, "grad_norm": 3.821795701980591, "learning_rate": 9.528227464223749e-05, "loss": 1.9154, "step": 2202 }, { "epoch": 0.16582924029432244, "grad_norm": 3.966338634490967, "learning_rate": 9.527710392375144e-05, "loss": 2.1736, "step": 2203 }, { "epoch": 0.1659045145749826, "grad_norm": 4.234589576721191, "learning_rate": 9.527193051367149e-05, "loss": 1.9443, "step": 2204 }, { "epoch": 0.16597978885564274, "grad_norm": 4.575554370880127, "learning_rate": 9.526675441230517e-05, "loss": 1.9432, "step": 2205 }, { "epoch": 0.16605506313630292, "grad_norm": 4.4436540603637695, "learning_rate": 9.526157561996018e-05, "loss": 2.1861, "step": 2206 }, { "epoch": 0.16613033741696306, "grad_norm": 3.480534553527832, "learning_rate": 9.525639413694438e-05, "loss": 2.171, "step": 2207 }, { "epoch": 0.1662056116976232, "grad_norm": 4.550610065460205, "learning_rate": 9.525120996356582e-05, "loss": 1.8931, "step": 2208 }, { "epoch": 0.16628088597828336, "grad_norm": 5.82284688949585, "learning_rate": 9.524602310013266e-05, "loss": 2.2064, "step": 2209 }, { "epoch": 0.16635616025894354, "grad_norm": 5.232897758483887, "learning_rate": 9.524083354695325e-05, "loss": 2.0327, "step": 2210 }, { "epoch": 0.16643143453960368, "grad_norm": 6.37138032913208, "learning_rate": 9.52356413043361e-05, "loss": 1.8247, "step": 2211 }, { "epoch": 0.16650670882026383, "grad_norm": 5.429400444030762, "learning_rate": 9.523044637258986e-05, "loss": 2.1906, "step": 2212 }, { "epoch": 0.16658198310092398, "grad_norm": 4.930382251739502, "learning_rate": 9.522524875202338e-05, "loss": 2.2628, "step": 2213 }, { "epoch": 0.16665725738158416, "grad_norm": 5.107860565185547, "learning_rate": 9.522004844294563e-05, "loss": 2.3101, "step": 2214 }, { "epoch": 0.1667325316622443, "grad_norm": 4.301032543182373, "learning_rate": 9.521484544566573e-05, "loss": 2.1433, "step": 2215 }, { "epoch": 0.16680780594290445, "grad_norm": 6.7353739738464355, "learning_rate": 9.520963976049302e-05, "loss": 3.0137, "step": 2216 }, { "epoch": 0.1668830802235646, "grad_norm": 4.559290885925293, "learning_rate": 9.520443138773694e-05, "loss": 2.8433, "step": 2217 }, { "epoch": 0.16695835450422478, "grad_norm": 4.9988298416137695, "learning_rate": 9.519922032770712e-05, "loss": 2.1131, "step": 2218 }, { "epoch": 0.16703362878488492, "grad_norm": 4.082030296325684, "learning_rate": 9.519400658071336e-05, "loss": 2.1254, "step": 2219 }, { "epoch": 0.16710890306554507, "grad_norm": 4.1938323974609375, "learning_rate": 9.518879014706556e-05, "loss": 2.1296, "step": 2220 }, { "epoch": 0.16718417734620525, "grad_norm": 4.549106121063232, "learning_rate": 9.518357102707387e-05, "loss": 2.1653, "step": 2221 }, { "epoch": 0.1672594516268654, "grad_norm": 4.568601131439209, "learning_rate": 9.517834922104851e-05, "loss": 2.2272, "step": 2222 }, { "epoch": 0.16733472590752554, "grad_norm": 4.738588809967041, "learning_rate": 9.517312472929992e-05, "loss": 2.3205, "step": 2223 }, { "epoch": 0.1674100001881857, "grad_norm": 3.195651054382324, "learning_rate": 9.516789755213868e-05, "loss": 2.2214, "step": 2224 }, { "epoch": 0.16748527446884587, "grad_norm": 4.565551280975342, "learning_rate": 9.516266768987555e-05, "loss": 1.8288, "step": 2225 }, { "epoch": 0.16756054874950602, "grad_norm": 7.472075939178467, "learning_rate": 9.51574351428214e-05, "loss": 2.6066, "step": 2226 }, { "epoch": 0.16763582303016616, "grad_norm": 5.391735553741455, "learning_rate": 9.51521999112873e-05, "loss": 2.3681, "step": 2227 }, { "epoch": 0.1677110973108263, "grad_norm": 6.187355041503906, "learning_rate": 9.514696199558448e-05, "loss": 1.8019, "step": 2228 }, { "epoch": 0.1677863715914865, "grad_norm": 4.559253692626953, "learning_rate": 9.514172139602431e-05, "loss": 1.9727, "step": 2229 }, { "epoch": 0.16786164587214664, "grad_norm": 3.7746024131774902, "learning_rate": 9.513647811291832e-05, "loss": 2.0619, "step": 2230 }, { "epoch": 0.16793692015280678, "grad_norm": 4.132672309875488, "learning_rate": 9.513123214657821e-05, "loss": 2.5438, "step": 2231 }, { "epoch": 0.16801219443346693, "grad_norm": 3.707167148590088, "learning_rate": 9.512598349731588e-05, "loss": 2.1179, "step": 2232 }, { "epoch": 0.1680874687141271, "grad_norm": 5.947897911071777, "learning_rate": 9.512073216544329e-05, "loss": 2.3137, "step": 2233 }, { "epoch": 0.16816274299478726, "grad_norm": 4.157155513763428, "learning_rate": 9.511547815127262e-05, "loss": 1.8269, "step": 2234 }, { "epoch": 0.1682380172754474, "grad_norm": 5.308967590332031, "learning_rate": 9.511022145511624e-05, "loss": 1.9455, "step": 2235 }, { "epoch": 0.16831329155610758, "grad_norm": 5.097304344177246, "learning_rate": 9.510496207728663e-05, "loss": 2.0311, "step": 2236 }, { "epoch": 0.16838856583676773, "grad_norm": 4.092731475830078, "learning_rate": 9.509970001809645e-05, "loss": 2.1319, "step": 2237 }, { "epoch": 0.16846384011742788, "grad_norm": 4.651304721832275, "learning_rate": 9.509443527785851e-05, "loss": 2.1493, "step": 2238 }, { "epoch": 0.16853911439808802, "grad_norm": 4.787350654602051, "learning_rate": 9.508916785688578e-05, "loss": 2.5233, "step": 2239 }, { "epoch": 0.1686143886787482, "grad_norm": 4.076807022094727, "learning_rate": 9.50838977554914e-05, "loss": 2.1228, "step": 2240 }, { "epoch": 0.16868966295940835, "grad_norm": 5.558131694793701, "learning_rate": 9.507862497398864e-05, "loss": 2.2416, "step": 2241 }, { "epoch": 0.1687649372400685, "grad_norm": 6.108593463897705, "learning_rate": 9.5073349512691e-05, "loss": 1.9382, "step": 2242 }, { "epoch": 0.16884021152072864, "grad_norm": 5.862201690673828, "learning_rate": 9.506807137191205e-05, "loss": 2.1214, "step": 2243 }, { "epoch": 0.16891548580138882, "grad_norm": 4.9014763832092285, "learning_rate": 9.506279055196556e-05, "loss": 2.1792, "step": 2244 }, { "epoch": 0.16899076008204897, "grad_norm": 4.522253036499023, "learning_rate": 9.505750705316548e-05, "loss": 2.0104, "step": 2245 }, { "epoch": 0.16906603436270912, "grad_norm": 4.980876922607422, "learning_rate": 9.505222087582589e-05, "loss": 2.2206, "step": 2246 }, { "epoch": 0.16914130864336926, "grad_norm": 4.8022050857543945, "learning_rate": 9.504693202026104e-05, "loss": 2.1094, "step": 2247 }, { "epoch": 0.16921658292402944, "grad_norm": 6.1085896492004395, "learning_rate": 9.504164048678533e-05, "loss": 2.3155, "step": 2248 }, { "epoch": 0.1692918572046896, "grad_norm": 4.865021705627441, "learning_rate": 9.503634627571333e-05, "loss": 2.1186, "step": 2249 }, { "epoch": 0.16936713148534974, "grad_norm": 5.2590651512146, "learning_rate": 9.503104938735978e-05, "loss": 2.0465, "step": 2250 }, { "epoch": 0.16944240576600988, "grad_norm": 4.784515857696533, "learning_rate": 9.502574982203956e-05, "loss": 2.0709, "step": 2251 }, { "epoch": 0.16951768004667006, "grad_norm": 5.401655197143555, "learning_rate": 9.502044758006769e-05, "loss": 2.0731, "step": 2252 }, { "epoch": 0.1695929543273302, "grad_norm": 3.9704253673553467, "learning_rate": 9.501514266175938e-05, "loss": 1.7908, "step": 2253 }, { "epoch": 0.16966822860799036, "grad_norm": 4.363543510437012, "learning_rate": 9.500983506743002e-05, "loss": 2.0285, "step": 2254 }, { "epoch": 0.16974350288865053, "grad_norm": 5.469841003417969, "learning_rate": 9.50045247973951e-05, "loss": 2.0921, "step": 2255 }, { "epoch": 0.16981877716931068, "grad_norm": 5.638004302978516, "learning_rate": 9.499921185197032e-05, "loss": 2.2864, "step": 2256 }, { "epoch": 0.16989405144997083, "grad_norm": 5.755684852600098, "learning_rate": 9.499389623147151e-05, "loss": 2.2282, "step": 2257 }, { "epoch": 0.16996932573063098, "grad_norm": 4.243750095367432, "learning_rate": 9.498857793621468e-05, "loss": 1.8188, "step": 2258 }, { "epoch": 0.17004460001129115, "grad_norm": 4.503023624420166, "learning_rate": 9.498325696651597e-05, "loss": 2.126, "step": 2259 }, { "epoch": 0.1701198742919513, "grad_norm": 4.63602876663208, "learning_rate": 9.497793332269171e-05, "loss": 1.9809, "step": 2260 }, { "epoch": 0.17019514857261145, "grad_norm": 5.731786251068115, "learning_rate": 9.497260700505837e-05, "loss": 2.1542, "step": 2261 }, { "epoch": 0.1702704228532716, "grad_norm": 4.860467433929443, "learning_rate": 9.496727801393257e-05, "loss": 2.0546, "step": 2262 }, { "epoch": 0.17034569713393177, "grad_norm": 6.777553081512451, "learning_rate": 9.496194634963113e-05, "loss": 2.1452, "step": 2263 }, { "epoch": 0.17042097141459192, "grad_norm": 7.032948017120361, "learning_rate": 9.495661201247099e-05, "loss": 2.0653, "step": 2264 }, { "epoch": 0.17049624569525207, "grad_norm": 5.93035888671875, "learning_rate": 9.495127500276926e-05, "loss": 2.2643, "step": 2265 }, { "epoch": 0.17057151997591222, "grad_norm": 5.123088359832764, "learning_rate": 9.494593532084322e-05, "loss": 2.2344, "step": 2266 }, { "epoch": 0.1706467942565724, "grad_norm": 3.9339468479156494, "learning_rate": 9.494059296701027e-05, "loss": 2.0217, "step": 2267 }, { "epoch": 0.17072206853723254, "grad_norm": 4.163431167602539, "learning_rate": 9.493524794158804e-05, "loss": 2.0717, "step": 2268 }, { "epoch": 0.1707973428178927, "grad_norm": 4.021878719329834, "learning_rate": 9.492990024489423e-05, "loss": 2.0992, "step": 2269 }, { "epoch": 0.17087261709855286, "grad_norm": 4.295801162719727, "learning_rate": 9.492454987724678e-05, "loss": 2.1383, "step": 2270 }, { "epoch": 0.170947891379213, "grad_norm": 7.184655666351318, "learning_rate": 9.491919683896374e-05, "loss": 2.3497, "step": 2271 }, { "epoch": 0.17102316565987316, "grad_norm": 5.332753658294678, "learning_rate": 9.491384113036335e-05, "loss": 2.2226, "step": 2272 }, { "epoch": 0.1710984399405333, "grad_norm": 6.286576747894287, "learning_rate": 9.490848275176396e-05, "loss": 2.6511, "step": 2273 }, { "epoch": 0.17117371422119348, "grad_norm": 4.57448148727417, "learning_rate": 9.490312170348412e-05, "loss": 2.454, "step": 2274 }, { "epoch": 0.17124898850185363, "grad_norm": 5.415542125701904, "learning_rate": 9.489775798584255e-05, "loss": 2.0048, "step": 2275 }, { "epoch": 0.17132426278251378, "grad_norm": 5.4753594398498535, "learning_rate": 9.48923915991581e-05, "loss": 1.9631, "step": 2276 }, { "epoch": 0.17139953706317393, "grad_norm": 3.6613900661468506, "learning_rate": 9.488702254374977e-05, "loss": 2.0597, "step": 2277 }, { "epoch": 0.1714748113438341, "grad_norm": 3.686659574508667, "learning_rate": 9.488165081993674e-05, "loss": 1.9906, "step": 2278 }, { "epoch": 0.17155008562449425, "grad_norm": 6.281643390655518, "learning_rate": 9.487627642803835e-05, "loss": 2.2516, "step": 2279 }, { "epoch": 0.1716253599051544, "grad_norm": 5.62980318069458, "learning_rate": 9.48708993683741e-05, "loss": 2.0339, "step": 2280 }, { "epoch": 0.17170063418581455, "grad_norm": 6.123843193054199, "learning_rate": 9.486551964126363e-05, "loss": 2.0877, "step": 2281 }, { "epoch": 0.17177590846647472, "grad_norm": 5.146125316619873, "learning_rate": 9.486013724702676e-05, "loss": 2.2186, "step": 2282 }, { "epoch": 0.17185118274713487, "grad_norm": 3.640549659729004, "learning_rate": 9.485475218598343e-05, "loss": 2.0595, "step": 2283 }, { "epoch": 0.17192645702779502, "grad_norm": 4.005462169647217, "learning_rate": 9.48493644584538e-05, "loss": 2.0645, "step": 2284 }, { "epoch": 0.1720017313084552, "grad_norm": 4.827098369598389, "learning_rate": 9.484397406475813e-05, "loss": 2.2185, "step": 2285 }, { "epoch": 0.17207700558911534, "grad_norm": 5.123477935791016, "learning_rate": 9.483858100521688e-05, "loss": 1.9404, "step": 2286 }, { "epoch": 0.1721522798697755, "grad_norm": 5.968159198760986, "learning_rate": 9.483318528015064e-05, "loss": 2.3006, "step": 2287 }, { "epoch": 0.17222755415043564, "grad_norm": 4.824171543121338, "learning_rate": 9.482778688988018e-05, "loss": 1.9657, "step": 2288 }, { "epoch": 0.17230282843109582, "grad_norm": 8.66939926147461, "learning_rate": 9.48223858347264e-05, "loss": 2.1769, "step": 2289 }, { "epoch": 0.17237810271175596, "grad_norm": 10.327978134155273, "learning_rate": 9.481698211501042e-05, "loss": 2.022, "step": 2290 }, { "epoch": 0.1724533769924161, "grad_norm": 5.406477928161621, "learning_rate": 9.481157573105343e-05, "loss": 1.9567, "step": 2291 }, { "epoch": 0.17252865127307626, "grad_norm": 4.529332160949707, "learning_rate": 9.480616668317685e-05, "loss": 2.5469, "step": 2292 }, { "epoch": 0.17260392555373644, "grad_norm": 5.656266212463379, "learning_rate": 9.480075497170221e-05, "loss": 2.2001, "step": 2293 }, { "epoch": 0.17267919983439659, "grad_norm": 4.836466312408447, "learning_rate": 9.479534059695125e-05, "loss": 2.7274, "step": 2294 }, { "epoch": 0.17275447411505673, "grad_norm": 3.5666630268096924, "learning_rate": 9.478992355924581e-05, "loss": 1.9948, "step": 2295 }, { "epoch": 0.17282974839571688, "grad_norm": 4.9752516746521, "learning_rate": 9.478450385890793e-05, "loss": 2.088, "step": 2296 }, { "epoch": 0.17290502267637706, "grad_norm": 3.6577799320220947, "learning_rate": 9.477908149625981e-05, "loss": 2.086, "step": 2297 }, { "epoch": 0.1729802969570372, "grad_norm": 3.805631160736084, "learning_rate": 9.477365647162377e-05, "loss": 1.9465, "step": 2298 }, { "epoch": 0.17305557123769735, "grad_norm": 4.894316673278809, "learning_rate": 9.476822878532235e-05, "loss": 2.0717, "step": 2299 }, { "epoch": 0.17313084551835753, "grad_norm": 3.622702121734619, "learning_rate": 9.476279843767814e-05, "loss": 1.9883, "step": 2300 }, { "epoch": 0.17320611979901768, "grad_norm": 3.7990994453430176, "learning_rate": 9.475736542901404e-05, "loss": 2.0218, "step": 2301 }, { "epoch": 0.17328139407967783, "grad_norm": 3.273437976837158, "learning_rate": 9.475192975965296e-05, "loss": 1.9075, "step": 2302 }, { "epoch": 0.17335666836033797, "grad_norm": 4.26989221572876, "learning_rate": 9.474649142991808e-05, "loss": 2.4987, "step": 2303 }, { "epoch": 0.17343194264099815, "grad_norm": 4.140346527099609, "learning_rate": 9.474105044013267e-05, "loss": 2.2862, "step": 2304 }, { "epoch": 0.1735072169216583, "grad_norm": 4.889487266540527, "learning_rate": 9.473560679062018e-05, "loss": 1.8517, "step": 2305 }, { "epoch": 0.17358249120231845, "grad_norm": 3.732370615005493, "learning_rate": 9.473016048170424e-05, "loss": 2.1845, "step": 2306 }, { "epoch": 0.1736577654829786, "grad_norm": 4.43004846572876, "learning_rate": 9.472471151370861e-05, "loss": 2.192, "step": 2307 }, { "epoch": 0.17373303976363877, "grad_norm": 5.592849254608154, "learning_rate": 9.47192598869572e-05, "loss": 2.6197, "step": 2308 }, { "epoch": 0.17380831404429892, "grad_norm": 6.032291412353516, "learning_rate": 9.47138056017741e-05, "loss": 2.0181, "step": 2309 }, { "epoch": 0.17388358832495907, "grad_norm": 5.228835582733154, "learning_rate": 9.470834865848357e-05, "loss": 2.2855, "step": 2310 }, { "epoch": 0.1739588626056192, "grad_norm": 5.930988311767578, "learning_rate": 9.470288905740998e-05, "loss": 2.1199, "step": 2311 }, { "epoch": 0.1740341368862794, "grad_norm": 5.700383186340332, "learning_rate": 9.469742679887791e-05, "loss": 2.2224, "step": 2312 }, { "epoch": 0.17410941116693954, "grad_norm": 5.302191257476807, "learning_rate": 9.469196188321207e-05, "loss": 2.0117, "step": 2313 }, { "epoch": 0.17418468544759969, "grad_norm": 5.366429805755615, "learning_rate": 9.468649431073733e-05, "loss": 2.1309, "step": 2314 }, { "epoch": 0.17425995972825983, "grad_norm": 4.630857944488525, "learning_rate": 9.468102408177871e-05, "loss": 2.1334, "step": 2315 }, { "epoch": 0.17433523400892, "grad_norm": 4.050517559051514, "learning_rate": 9.467555119666143e-05, "loss": 2.3049, "step": 2316 }, { "epoch": 0.17441050828958016, "grad_norm": 4.166477203369141, "learning_rate": 9.467007565571081e-05, "loss": 2.1528, "step": 2317 }, { "epoch": 0.1744857825702403, "grad_norm": 3.5567290782928467, "learning_rate": 9.466459745925237e-05, "loss": 2.0358, "step": 2318 }, { "epoch": 0.17456105685090048, "grad_norm": 3.938577175140381, "learning_rate": 9.465911660761177e-05, "loss": 1.953, "step": 2319 }, { "epoch": 0.17463633113156063, "grad_norm": 4.731121063232422, "learning_rate": 9.465363310111483e-05, "loss": 2.2991, "step": 2320 }, { "epoch": 0.17471160541222078, "grad_norm": 4.032708168029785, "learning_rate": 9.464814694008752e-05, "loss": 1.9237, "step": 2321 }, { "epoch": 0.17478687969288093, "grad_norm": 4.967084884643555, "learning_rate": 9.464265812485598e-05, "loss": 2.3047, "step": 2322 }, { "epoch": 0.1748621539735411, "grad_norm": 4.150388240814209, "learning_rate": 9.463716665574652e-05, "loss": 2.2523, "step": 2323 }, { "epoch": 0.17493742825420125, "grad_norm": 5.334815979003906, "learning_rate": 9.463167253308559e-05, "loss": 2.1501, "step": 2324 }, { "epoch": 0.1750127025348614, "grad_norm": 6.422043800354004, "learning_rate": 9.462617575719977e-05, "loss": 1.9845, "step": 2325 }, { "epoch": 0.17508797681552155, "grad_norm": 4.570641040802002, "learning_rate": 9.462067632841585e-05, "loss": 2.1799, "step": 2326 }, { "epoch": 0.17516325109618172, "grad_norm": 5.045809268951416, "learning_rate": 9.461517424706075e-05, "loss": 2.415, "step": 2327 }, { "epoch": 0.17523852537684187, "grad_norm": 4.2256388664245605, "learning_rate": 9.460966951346157e-05, "loss": 1.8744, "step": 2328 }, { "epoch": 0.17531379965750202, "grad_norm": 4.779211044311523, "learning_rate": 9.460416212794554e-05, "loss": 2.1445, "step": 2329 }, { "epoch": 0.17538907393816217, "grad_norm": 5.90493106842041, "learning_rate": 9.459865209084004e-05, "loss": 2.6916, "step": 2330 }, { "epoch": 0.17546434821882234, "grad_norm": 5.32578706741333, "learning_rate": 9.459313940247266e-05, "loss": 2.2656, "step": 2331 }, { "epoch": 0.1755396224994825, "grad_norm": 6.139587879180908, "learning_rate": 9.458762406317107e-05, "loss": 2.2693, "step": 2332 }, { "epoch": 0.17561489678014264, "grad_norm": 5.8783674240112305, "learning_rate": 9.45821060732632e-05, "loss": 2.7173, "step": 2333 }, { "epoch": 0.1756901710608028, "grad_norm": 5.068629741668701, "learning_rate": 9.457658543307701e-05, "loss": 1.6876, "step": 2334 }, { "epoch": 0.17576544534146296, "grad_norm": 4.986094951629639, "learning_rate": 9.457106214294074e-05, "loss": 2.3307, "step": 2335 }, { "epoch": 0.1758407196221231, "grad_norm": 5.165501594543457, "learning_rate": 9.456553620318269e-05, "loss": 2.2215, "step": 2336 }, { "epoch": 0.17591599390278326, "grad_norm": 5.441031455993652, "learning_rate": 9.456000761413141e-05, "loss": 2.0878, "step": 2337 }, { "epoch": 0.17599126818344343, "grad_norm": 5.335861682891846, "learning_rate": 9.455447637611552e-05, "loss": 2.1241, "step": 2338 }, { "epoch": 0.17606654246410358, "grad_norm": 6.4383544921875, "learning_rate": 9.454894248946386e-05, "loss": 2.2103, "step": 2339 }, { "epoch": 0.17614181674476373, "grad_norm": 8.162375450134277, "learning_rate": 9.45434059545054e-05, "loss": 2.0437, "step": 2340 }, { "epoch": 0.17621709102542388, "grad_norm": 5.421572208404541, "learning_rate": 9.453786677156927e-05, "loss": 2.5892, "step": 2341 }, { "epoch": 0.17629236530608405, "grad_norm": 6.447846412658691, "learning_rate": 9.453232494098473e-05, "loss": 2.038, "step": 2342 }, { "epoch": 0.1763676395867442, "grad_norm": 6.637684345245361, "learning_rate": 9.452678046308126e-05, "loss": 2.1189, "step": 2343 }, { "epoch": 0.17644291386740435, "grad_norm": 4.693938732147217, "learning_rate": 9.452123333818845e-05, "loss": 2.3551, "step": 2344 }, { "epoch": 0.1765181881480645, "grad_norm": 4.605899333953857, "learning_rate": 9.451568356663607e-05, "loss": 2.2943, "step": 2345 }, { "epoch": 0.17659346242872467, "grad_norm": 5.670785903930664, "learning_rate": 9.451013114875404e-05, "loss": 2.3798, "step": 2346 }, { "epoch": 0.17666873670938482, "grad_norm": 4.882007122039795, "learning_rate": 9.450457608487242e-05, "loss": 1.9727, "step": 2347 }, { "epoch": 0.17674401099004497, "grad_norm": 4.417944431304932, "learning_rate": 9.449901837532145e-05, "loss": 2.0738, "step": 2348 }, { "epoch": 0.17681928527070515, "grad_norm": 4.64418363571167, "learning_rate": 9.44934580204315e-05, "loss": 2.189, "step": 2349 }, { "epoch": 0.1768945595513653, "grad_norm": 3.882991075515747, "learning_rate": 9.448789502053316e-05, "loss": 1.847, "step": 2350 }, { "epoch": 0.17696983383202544, "grad_norm": 5.01652717590332, "learning_rate": 9.44823293759571e-05, "loss": 2.2999, "step": 2351 }, { "epoch": 0.1770451081126856, "grad_norm": 4.760298728942871, "learning_rate": 9.447676108703421e-05, "loss": 2.1113, "step": 2352 }, { "epoch": 0.17712038239334577, "grad_norm": 4.580170631408691, "learning_rate": 9.447119015409547e-05, "loss": 2.2249, "step": 2353 }, { "epoch": 0.1771956566740059, "grad_norm": 6.8296895027160645, "learning_rate": 9.446561657747209e-05, "loss": 2.3861, "step": 2354 }, { "epoch": 0.17727093095466606, "grad_norm": 5.590836048126221, "learning_rate": 9.446004035749538e-05, "loss": 2.2823, "step": 2355 }, { "epoch": 0.1773462052353262, "grad_norm": 7.718230724334717, "learning_rate": 9.445446149449686e-05, "loss": 2.2516, "step": 2356 }, { "epoch": 0.17742147951598639, "grad_norm": 3.219867706298828, "learning_rate": 9.444887998880814e-05, "loss": 2.2606, "step": 2357 }, { "epoch": 0.17749675379664653, "grad_norm": 4.374912261962891, "learning_rate": 9.444329584076104e-05, "loss": 1.9019, "step": 2358 }, { "epoch": 0.17757202807730668, "grad_norm": 4.683414459228516, "learning_rate": 9.443770905068754e-05, "loss": 2.2066, "step": 2359 }, { "epoch": 0.17764730235796683, "grad_norm": 4.491998672485352, "learning_rate": 9.443211961891973e-05, "loss": 2.3096, "step": 2360 }, { "epoch": 0.177722576638627, "grad_norm": 4.711297988891602, "learning_rate": 9.442652754578991e-05, "loss": 1.9365, "step": 2361 }, { "epoch": 0.17779785091928715, "grad_norm": 7.7094292640686035, "learning_rate": 9.44209328316305e-05, "loss": 2.1713, "step": 2362 }, { "epoch": 0.1778731251999473, "grad_norm": 5.9597063064575195, "learning_rate": 9.441533547677408e-05, "loss": 2.408, "step": 2363 }, { "epoch": 0.17794839948060745, "grad_norm": 4.834662437438965, "learning_rate": 9.440973548155342e-05, "loss": 2.2714, "step": 2364 }, { "epoch": 0.17802367376126763, "grad_norm": 5.837653636932373, "learning_rate": 9.440413284630141e-05, "loss": 2.1192, "step": 2365 }, { "epoch": 0.17809894804192777, "grad_norm": 3.679480791091919, "learning_rate": 9.439852757135111e-05, "loss": 1.9432, "step": 2366 }, { "epoch": 0.17817422232258792, "grad_norm": 5.310233116149902, "learning_rate": 9.439291965703573e-05, "loss": 2.2702, "step": 2367 }, { "epoch": 0.1782494966032481, "grad_norm": 8.406325340270996, "learning_rate": 9.438730910368867e-05, "loss": 2.366, "step": 2368 }, { "epoch": 0.17832477088390825, "grad_norm": 4.908152103424072, "learning_rate": 9.438169591164343e-05, "loss": 2.0483, "step": 2369 }, { "epoch": 0.1784000451645684, "grad_norm": 4.152422904968262, "learning_rate": 9.437608008123374e-05, "loss": 1.8626, "step": 2370 }, { "epoch": 0.17847531944522854, "grad_norm": 4.660198211669922, "learning_rate": 9.437046161279339e-05, "loss": 2.3298, "step": 2371 }, { "epoch": 0.17855059372588872, "grad_norm": 5.6455979347229, "learning_rate": 9.436484050665641e-05, "loss": 2.0798, "step": 2372 }, { "epoch": 0.17862586800654887, "grad_norm": 4.440837383270264, "learning_rate": 9.435921676315698e-05, "loss": 1.9895, "step": 2373 }, { "epoch": 0.178701142287209, "grad_norm": 4.986636638641357, "learning_rate": 9.435359038262939e-05, "loss": 2.1823, "step": 2374 }, { "epoch": 0.17877641656786916, "grad_norm": 5.736190319061279, "learning_rate": 9.43479613654081e-05, "loss": 1.9994, "step": 2375 }, { "epoch": 0.17885169084852934, "grad_norm": 6.821258068084717, "learning_rate": 9.434232971182777e-05, "loss": 1.9361, "step": 2376 }, { "epoch": 0.17892696512918949, "grad_norm": 4.035789966583252, "learning_rate": 9.433669542222317e-05, "loss": 2.0181, "step": 2377 }, { "epoch": 0.17900223940984963, "grad_norm": 4.947262763977051, "learning_rate": 9.433105849692923e-05, "loss": 2.2581, "step": 2378 }, { "epoch": 0.17907751369050978, "grad_norm": 5.483085632324219, "learning_rate": 9.432541893628107e-05, "loss": 1.9159, "step": 2379 }, { "epoch": 0.17915278797116996, "grad_norm": 6.753114700317383, "learning_rate": 9.431977674061394e-05, "loss": 2.3465, "step": 2380 }, { "epoch": 0.1792280622518301, "grad_norm": 5.069437026977539, "learning_rate": 9.431413191026325e-05, "loss": 2.2337, "step": 2381 }, { "epoch": 0.17930333653249025, "grad_norm": 4.258182048797607, "learning_rate": 9.430848444556457e-05, "loss": 2.0306, "step": 2382 }, { "epoch": 0.17937861081315043, "grad_norm": 5.638947010040283, "learning_rate": 9.430283434685363e-05, "loss": 1.8044, "step": 2383 }, { "epoch": 0.17945388509381058, "grad_norm": 4.735197067260742, "learning_rate": 9.42971816144663e-05, "loss": 2.0935, "step": 2384 }, { "epoch": 0.17952915937447073, "grad_norm": 5.030169486999512, "learning_rate": 9.429152624873863e-05, "loss": 2.1498, "step": 2385 }, { "epoch": 0.17960443365513087, "grad_norm": 4.605495929718018, "learning_rate": 9.42858682500068e-05, "loss": 2.2142, "step": 2386 }, { "epoch": 0.17967970793579105, "grad_norm": 4.7468485832214355, "learning_rate": 9.428020761860719e-05, "loss": 2.1551, "step": 2387 }, { "epoch": 0.1797549822164512, "grad_norm": 4.07126522064209, "learning_rate": 9.427454435487628e-05, "loss": 2.2395, "step": 2388 }, { "epoch": 0.17983025649711135, "grad_norm": 3.910022258758545, "learning_rate": 9.426887845915074e-05, "loss": 2.0808, "step": 2389 }, { "epoch": 0.1799055307777715, "grad_norm": 4.715250492095947, "learning_rate": 9.426320993176742e-05, "loss": 1.5938, "step": 2390 }, { "epoch": 0.17998080505843167, "grad_norm": 4.89539098739624, "learning_rate": 9.425753877306326e-05, "loss": 2.1834, "step": 2391 }, { "epoch": 0.18005607933909182, "grad_norm": 4.629598140716553, "learning_rate": 9.42518649833754e-05, "loss": 2.038, "step": 2392 }, { "epoch": 0.18013135361975197, "grad_norm": 6.25762939453125, "learning_rate": 9.424618856304115e-05, "loss": 2.1019, "step": 2393 }, { "epoch": 0.18020662790041211, "grad_norm": 4.406968593597412, "learning_rate": 9.424050951239796e-05, "loss": 2.128, "step": 2394 }, { "epoch": 0.1802819021810723, "grad_norm": 3.9285688400268555, "learning_rate": 9.42348278317834e-05, "loss": 1.7804, "step": 2395 }, { "epoch": 0.18035717646173244, "grad_norm": 4.2842631340026855, "learning_rate": 9.422914352153524e-05, "loss": 2.0389, "step": 2396 }, { "epoch": 0.18043245074239259, "grad_norm": 6.087997913360596, "learning_rate": 9.422345658199144e-05, "loss": 2.2996, "step": 2397 }, { "epoch": 0.18050772502305276, "grad_norm": 4.773106098175049, "learning_rate": 9.421776701349002e-05, "loss": 2.0469, "step": 2398 }, { "epoch": 0.1805829993037129, "grad_norm": 5.146775722503662, "learning_rate": 9.421207481636922e-05, "loss": 1.7992, "step": 2399 }, { "epoch": 0.18065827358437306, "grad_norm": 5.000457286834717, "learning_rate": 9.420637999096744e-05, "loss": 2.3456, "step": 2400 }, { "epoch": 0.1807335478650332, "grad_norm": 6.762832164764404, "learning_rate": 9.42006825376232e-05, "loss": 1.8799, "step": 2401 }, { "epoch": 0.18080882214569338, "grad_norm": 5.668283462524414, "learning_rate": 9.419498245667522e-05, "loss": 2.0409, "step": 2402 }, { "epoch": 0.18088409642635353, "grad_norm": 5.641218185424805, "learning_rate": 9.418927974846234e-05, "loss": 1.8974, "step": 2403 }, { "epoch": 0.18095937070701368, "grad_norm": 6.720699787139893, "learning_rate": 9.418357441332358e-05, "loss": 2.246, "step": 2404 }, { "epoch": 0.18103464498767383, "grad_norm": 7.127935409545898, "learning_rate": 9.417786645159807e-05, "loss": 2.9731, "step": 2405 }, { "epoch": 0.181109919268334, "grad_norm": 4.489436149597168, "learning_rate": 9.417215586362518e-05, "loss": 2.1494, "step": 2406 }, { "epoch": 0.18118519354899415, "grad_norm": 4.5200910568237305, "learning_rate": 9.416644264974437e-05, "loss": 2.3272, "step": 2407 }, { "epoch": 0.1812604678296543, "grad_norm": 5.34370756149292, "learning_rate": 9.416072681029526e-05, "loss": 1.9206, "step": 2408 }, { "epoch": 0.18133574211031445, "grad_norm": 4.176974296569824, "learning_rate": 9.415500834561768e-05, "loss": 1.9707, "step": 2409 }, { "epoch": 0.18141101639097462, "grad_norm": 4.307436943054199, "learning_rate": 9.414928725605152e-05, "loss": 2.0193, "step": 2410 }, { "epoch": 0.18148629067163477, "grad_norm": 4.275622844696045, "learning_rate": 9.414356354193692e-05, "loss": 2.0408, "step": 2411 }, { "epoch": 0.18156156495229492, "grad_norm": 4.892441749572754, "learning_rate": 9.413783720361413e-05, "loss": 2.0104, "step": 2412 }, { "epoch": 0.18163683923295507, "grad_norm": 4.65931510925293, "learning_rate": 9.413210824142358e-05, "loss": 2.3706, "step": 2413 }, { "epoch": 0.18171211351361524, "grad_norm": 3.8563642501831055, "learning_rate": 9.412637665570581e-05, "loss": 2.2299, "step": 2414 }, { "epoch": 0.1817873877942754, "grad_norm": 5.556540489196777, "learning_rate": 9.412064244680154e-05, "loss": 2.1063, "step": 2415 }, { "epoch": 0.18186266207493554, "grad_norm": 6.540408611297607, "learning_rate": 9.411490561505171e-05, "loss": 2.0972, "step": 2416 }, { "epoch": 0.18193793635559571, "grad_norm": 4.668656349182129, "learning_rate": 9.41091661607973e-05, "loss": 2.2057, "step": 2417 }, { "epoch": 0.18201321063625586, "grad_norm": 4.944366931915283, "learning_rate": 9.410342408437953e-05, "loss": 2.0218, "step": 2418 }, { "epoch": 0.182088484916916, "grad_norm": 6.270229339599609, "learning_rate": 9.409767938613973e-05, "loss": 2.0377, "step": 2419 }, { "epoch": 0.18216375919757616, "grad_norm": 5.974524974822998, "learning_rate": 9.409193206641944e-05, "loss": 1.8561, "step": 2420 }, { "epoch": 0.18223903347823633, "grad_norm": 5.791412353515625, "learning_rate": 9.408618212556031e-05, "loss": 2.6372, "step": 2421 }, { "epoch": 0.18231430775889648, "grad_norm": 4.222814083099365, "learning_rate": 9.408042956390412e-05, "loss": 1.8803, "step": 2422 }, { "epoch": 0.18238958203955663, "grad_norm": 4.217424392700195, "learning_rate": 9.40746743817929e-05, "loss": 2.1173, "step": 2423 }, { "epoch": 0.18246485632021678, "grad_norm": 4.1311798095703125, "learning_rate": 9.406891657956875e-05, "loss": 1.9785, "step": 2424 }, { "epoch": 0.18254013060087695, "grad_norm": 4.623928546905518, "learning_rate": 9.406315615757396e-05, "loss": 2.0211, "step": 2425 }, { "epoch": 0.1826154048815371, "grad_norm": 4.721377849578857, "learning_rate": 9.405739311615094e-05, "loss": 2.1413, "step": 2426 }, { "epoch": 0.18269067916219725, "grad_norm": 3.957151174545288, "learning_rate": 9.405162745564233e-05, "loss": 1.9126, "step": 2427 }, { "epoch": 0.1827659534428574, "grad_norm": 4.023303985595703, "learning_rate": 9.404585917639088e-05, "loss": 2.2212, "step": 2428 }, { "epoch": 0.18284122772351757, "grad_norm": 3.649458885192871, "learning_rate": 9.404008827873946e-05, "loss": 1.9352, "step": 2429 }, { "epoch": 0.18291650200417772, "grad_norm": 4.543708801269531, "learning_rate": 9.403431476303118e-05, "loss": 2.0626, "step": 2430 }, { "epoch": 0.18299177628483787, "grad_norm": 4.4156107902526855, "learning_rate": 9.402853862960923e-05, "loss": 2.1769, "step": 2431 }, { "epoch": 0.18306705056549805, "grad_norm": 4.563872337341309, "learning_rate": 9.402275987881698e-05, "loss": 2.5336, "step": 2432 }, { "epoch": 0.1831423248461582, "grad_norm": 6.249185085296631, "learning_rate": 9.401697851099798e-05, "loss": 2.2064, "step": 2433 }, { "epoch": 0.18321759912681834, "grad_norm": 3.949712038040161, "learning_rate": 9.40111945264959e-05, "loss": 2.1051, "step": 2434 }, { "epoch": 0.1832928734074785, "grad_norm": 4.53810453414917, "learning_rate": 9.400540792565459e-05, "loss": 2.0903, "step": 2435 }, { "epoch": 0.18336814768813867, "grad_norm": 5.742558002471924, "learning_rate": 9.399961870881804e-05, "loss": 2.2207, "step": 2436 }, { "epoch": 0.18344342196879881, "grad_norm": 4.680918216705322, "learning_rate": 9.399382687633042e-05, "loss": 1.8058, "step": 2437 }, { "epoch": 0.18351869624945896, "grad_norm": 5.94005012512207, "learning_rate": 9.398803242853601e-05, "loss": 2.2041, "step": 2438 }, { "epoch": 0.1835939705301191, "grad_norm": 7.358811855316162, "learning_rate": 9.39822353657793e-05, "loss": 1.9529, "step": 2439 }, { "epoch": 0.1836692448107793, "grad_norm": 5.252130031585693, "learning_rate": 9.397643568840488e-05, "loss": 2.3559, "step": 2440 }, { "epoch": 0.18374451909143943, "grad_norm": 4.878549098968506, "learning_rate": 9.397063339675755e-05, "loss": 2.1199, "step": 2441 }, { "epoch": 0.18381979337209958, "grad_norm": 3.991593599319458, "learning_rate": 9.396482849118224e-05, "loss": 2.0138, "step": 2442 }, { "epoch": 0.18389506765275973, "grad_norm": 5.3004865646362305, "learning_rate": 9.395902097202403e-05, "loss": 2.137, "step": 2443 }, { "epoch": 0.1839703419334199, "grad_norm": 6.8371782302856445, "learning_rate": 9.395321083962813e-05, "loss": 1.7962, "step": 2444 }, { "epoch": 0.18404561621408005, "grad_norm": 6.035629749298096, "learning_rate": 9.394739809433998e-05, "loss": 2.2543, "step": 2445 }, { "epoch": 0.1841208904947402, "grad_norm": 4.844130516052246, "learning_rate": 9.39415827365051e-05, "loss": 2.0448, "step": 2446 }, { "epoch": 0.18419616477540038, "grad_norm": 6.140071392059326, "learning_rate": 9.393576476646923e-05, "loss": 2.2053, "step": 2447 }, { "epoch": 0.18427143905606053, "grad_norm": 5.780943393707275, "learning_rate": 9.39299441845782e-05, "loss": 2.0894, "step": 2448 }, { "epoch": 0.18434671333672067, "grad_norm": 4.227553367614746, "learning_rate": 9.392412099117802e-05, "loss": 1.7292, "step": 2449 }, { "epoch": 0.18442198761738082, "grad_norm": 4.283457279205322, "learning_rate": 9.39182951866149e-05, "loss": 2.121, "step": 2450 }, { "epoch": 0.184497261898041, "grad_norm": 4.520118236541748, "learning_rate": 9.391246677123514e-05, "loss": 2.16, "step": 2451 }, { "epoch": 0.18457253617870115, "grad_norm": 3.396616220474243, "learning_rate": 9.390663574538523e-05, "loss": 2.1485, "step": 2452 }, { "epoch": 0.1846478104593613, "grad_norm": 4.4132866859436035, "learning_rate": 9.390080210941181e-05, "loss": 1.9867, "step": 2453 }, { "epoch": 0.18472308474002144, "grad_norm": 3.8098809719085693, "learning_rate": 9.389496586366167e-05, "loss": 2.4707, "step": 2454 }, { "epoch": 0.18479835902068162, "grad_norm": 4.23969030380249, "learning_rate": 9.388912700848176e-05, "loss": 2.2455, "step": 2455 }, { "epoch": 0.18487363330134177, "grad_norm": 5.872971057891846, "learning_rate": 9.388328554421917e-05, "loss": 2.1556, "step": 2456 }, { "epoch": 0.18494890758200191, "grad_norm": 5.10668420791626, "learning_rate": 9.387744147122117e-05, "loss": 2.2551, "step": 2457 }, { "epoch": 0.18502418186266206, "grad_norm": 5.751786231994629, "learning_rate": 9.387159478983518e-05, "loss": 2.0982, "step": 2458 }, { "epoch": 0.18509945614332224, "grad_norm": 5.4257612228393555, "learning_rate": 9.386574550040876e-05, "loss": 1.9487, "step": 2459 }, { "epoch": 0.1851747304239824, "grad_norm": 5.04498291015625, "learning_rate": 9.385989360328964e-05, "loss": 2.1612, "step": 2460 }, { "epoch": 0.18525000470464253, "grad_norm": 4.236330986022949, "learning_rate": 9.385403909882568e-05, "loss": 2.2415, "step": 2461 }, { "epoch": 0.1853252789853027, "grad_norm": 5.090738773345947, "learning_rate": 9.384818198736496e-05, "loss": 2.2152, "step": 2462 }, { "epoch": 0.18540055326596286, "grad_norm": 4.343742847442627, "learning_rate": 9.384232226925561e-05, "loss": 2.2333, "step": 2463 }, { "epoch": 0.185475827546623, "grad_norm": 4.728951930999756, "learning_rate": 9.3836459944846e-05, "loss": 2.2967, "step": 2464 }, { "epoch": 0.18555110182728315, "grad_norm": 3.680865526199341, "learning_rate": 9.383059501448462e-05, "loss": 1.9406, "step": 2465 }, { "epoch": 0.18562637610794333, "grad_norm": 5.822722911834717, "learning_rate": 9.382472747852013e-05, "loss": 1.9613, "step": 2466 }, { "epoch": 0.18570165038860348, "grad_norm": 5.067307949066162, "learning_rate": 9.381885733730136e-05, "loss": 2.5257, "step": 2467 }, { "epoch": 0.18577692466926363, "grad_norm": 4.1995530128479, "learning_rate": 9.381298459117723e-05, "loss": 2.0076, "step": 2468 }, { "epoch": 0.18585219894992377, "grad_norm": 5.420987129211426, "learning_rate": 9.380710924049689e-05, "loss": 1.9664, "step": 2469 }, { "epoch": 0.18592747323058395, "grad_norm": 4.720597267150879, "learning_rate": 9.380123128560961e-05, "loss": 1.7115, "step": 2470 }, { "epoch": 0.1860027475112441, "grad_norm": 5.392365455627441, "learning_rate": 9.379535072686479e-05, "loss": 1.8641, "step": 2471 }, { "epoch": 0.18607802179190425, "grad_norm": 5.885098934173584, "learning_rate": 9.378946756461205e-05, "loss": 2.2107, "step": 2472 }, { "epoch": 0.1861532960725644, "grad_norm": 5.320254325866699, "learning_rate": 9.378358179920111e-05, "loss": 1.9729, "step": 2473 }, { "epoch": 0.18622857035322457, "grad_norm": 4.235252857208252, "learning_rate": 9.377769343098185e-05, "loss": 2.2764, "step": 2474 }, { "epoch": 0.18630384463388472, "grad_norm": 6.214223384857178, "learning_rate": 9.377180246030432e-05, "loss": 2.4978, "step": 2475 }, { "epoch": 0.18637911891454487, "grad_norm": 7.149608135223389, "learning_rate": 9.376590888751875e-05, "loss": 2.4284, "step": 2476 }, { "epoch": 0.18645439319520501, "grad_norm": 7.12389612197876, "learning_rate": 9.376001271297546e-05, "loss": 1.8931, "step": 2477 }, { "epoch": 0.1865296674758652, "grad_norm": 5.371740341186523, "learning_rate": 9.375411393702497e-05, "loss": 2.0803, "step": 2478 }, { "epoch": 0.18660494175652534, "grad_norm": 4.696757793426514, "learning_rate": 9.374821256001798e-05, "loss": 2.2489, "step": 2479 }, { "epoch": 0.1866802160371855, "grad_norm": 4.32313871383667, "learning_rate": 9.374230858230525e-05, "loss": 2.1105, "step": 2480 }, { "epoch": 0.18675549031784566, "grad_norm": 4.14240026473999, "learning_rate": 9.373640200423781e-05, "loss": 2.3715, "step": 2481 }, { "epoch": 0.1868307645985058, "grad_norm": 4.030320167541504, "learning_rate": 9.373049282616674e-05, "loss": 1.932, "step": 2482 }, { "epoch": 0.18690603887916596, "grad_norm": 4.575912952423096, "learning_rate": 9.372458104844337e-05, "loss": 2.2811, "step": 2483 }, { "epoch": 0.1869813131598261, "grad_norm": 4.238185882568359, "learning_rate": 9.37186666714191e-05, "loss": 1.9182, "step": 2484 }, { "epoch": 0.18705658744048628, "grad_norm": 5.361331462860107, "learning_rate": 9.371274969544556e-05, "loss": 2.0284, "step": 2485 }, { "epoch": 0.18713186172114643, "grad_norm": 4.422515869140625, "learning_rate": 9.370683012087447e-05, "loss": 1.9103, "step": 2486 }, { "epoch": 0.18720713600180658, "grad_norm": 4.045429229736328, "learning_rate": 9.370090794805772e-05, "loss": 2.238, "step": 2487 }, { "epoch": 0.18728241028246673, "grad_norm": 4.757269859313965, "learning_rate": 9.369498317734741e-05, "loss": 2.5913, "step": 2488 }, { "epoch": 0.1873576845631269, "grad_norm": 4.8929643630981445, "learning_rate": 9.368905580909571e-05, "loss": 2.2632, "step": 2489 }, { "epoch": 0.18743295884378705, "grad_norm": 3.796651840209961, "learning_rate": 9.3683125843655e-05, "loss": 2.1742, "step": 2490 }, { "epoch": 0.1875082331244472, "grad_norm": 4.369163513183594, "learning_rate": 9.36771932813778e-05, "loss": 1.9691, "step": 2491 }, { "epoch": 0.18758350740510735, "grad_norm": 5.370289325714111, "learning_rate": 9.367125812261679e-05, "loss": 2.1115, "step": 2492 }, { "epoch": 0.18765878168576752, "grad_norm": 3.3248627185821533, "learning_rate": 9.366532036772479e-05, "loss": 2.1965, "step": 2493 }, { "epoch": 0.18773405596642767, "grad_norm": 4.258205413818359, "learning_rate": 9.365938001705476e-05, "loss": 2.0163, "step": 2494 }, { "epoch": 0.18780933024708782, "grad_norm": 4.4000468254089355, "learning_rate": 9.365343707095989e-05, "loss": 2.2695, "step": 2495 }, { "epoch": 0.187884604527748, "grad_norm": 5.0567827224731445, "learning_rate": 9.364749152979344e-05, "loss": 2.2224, "step": 2496 }, { "epoch": 0.18795987880840814, "grad_norm": 4.769439220428467, "learning_rate": 9.364154339390884e-05, "loss": 2.0068, "step": 2497 }, { "epoch": 0.1880351530890683, "grad_norm": 3.545689105987549, "learning_rate": 9.363559266365973e-05, "loss": 2.0759, "step": 2498 }, { "epoch": 0.18811042736972844, "grad_norm": 3.941669225692749, "learning_rate": 9.362963933939982e-05, "loss": 2.2694, "step": 2499 }, { "epoch": 0.18818570165038861, "grad_norm": 7.426388740539551, "learning_rate": 9.362368342148304e-05, "loss": 2.248, "step": 2500 }, { "epoch": 0.18826097593104876, "grad_norm": 3.741877317428589, "learning_rate": 9.361772491026347e-05, "loss": 2.0463, "step": 2501 }, { "epoch": 0.1883362502117089, "grad_norm": 5.458040237426758, "learning_rate": 9.361176380609529e-05, "loss": 2.3928, "step": 2502 }, { "epoch": 0.18841152449236906, "grad_norm": 4.791382312774658, "learning_rate": 9.36058001093329e-05, "loss": 2.2111, "step": 2503 }, { "epoch": 0.18848679877302923, "grad_norm": 4.490954399108887, "learning_rate": 9.359983382033081e-05, "loss": 2.4891, "step": 2504 }, { "epoch": 0.18856207305368938, "grad_norm": 5.043197154998779, "learning_rate": 9.359386493944369e-05, "loss": 2.5872, "step": 2505 }, { "epoch": 0.18863734733434953, "grad_norm": 5.550272464752197, "learning_rate": 9.35878934670264e-05, "loss": 2.136, "step": 2506 }, { "epoch": 0.18871262161500968, "grad_norm": 5.618335247039795, "learning_rate": 9.358191940343389e-05, "loss": 1.9605, "step": 2507 }, { "epoch": 0.18878789589566985, "grad_norm": 4.352292537689209, "learning_rate": 9.357594274902133e-05, "loss": 2.4042, "step": 2508 }, { "epoch": 0.18886317017633, "grad_norm": 4.100133419036865, "learning_rate": 9.356996350414402e-05, "loss": 2.3039, "step": 2509 }, { "epoch": 0.18893844445699015, "grad_norm": 4.744510650634766, "learning_rate": 9.356398166915738e-05, "loss": 1.9309, "step": 2510 }, { "epoch": 0.18901371873765033, "grad_norm": 5.219865322113037, "learning_rate": 9.355799724441703e-05, "loss": 2.0551, "step": 2511 }, { "epoch": 0.18908899301831048, "grad_norm": 4.375669479370117, "learning_rate": 9.355201023027876e-05, "loss": 2.0971, "step": 2512 }, { "epoch": 0.18916426729897062, "grad_norm": 3.9628045558929443, "learning_rate": 9.35460206270984e-05, "loss": 2.137, "step": 2513 }, { "epoch": 0.18923954157963077, "grad_norm": 3.951970338821411, "learning_rate": 9.354002843523209e-05, "loss": 2.1666, "step": 2514 }, { "epoch": 0.18931481586029095, "grad_norm": 5.941013813018799, "learning_rate": 9.3534033655036e-05, "loss": 1.9261, "step": 2515 }, { "epoch": 0.1893900901409511, "grad_norm": 5.416547775268555, "learning_rate": 9.352803628686653e-05, "loss": 2.0508, "step": 2516 }, { "epoch": 0.18946536442161124, "grad_norm": 4.743608474731445, "learning_rate": 9.352203633108021e-05, "loss": 2.484, "step": 2517 }, { "epoch": 0.1895406387022714, "grad_norm": 4.451670169830322, "learning_rate": 9.35160337880337e-05, "loss": 2.0116, "step": 2518 }, { "epoch": 0.18961591298293157, "grad_norm": 4.249894618988037, "learning_rate": 9.351002865808383e-05, "loss": 2.2439, "step": 2519 }, { "epoch": 0.18969118726359172, "grad_norm": 5.0988688468933105, "learning_rate": 9.350402094158761e-05, "loss": 2.179, "step": 2520 }, { "epoch": 0.18976646154425186, "grad_norm": 5.777320861816406, "learning_rate": 9.349801063890217e-05, "loss": 1.8623, "step": 2521 }, { "epoch": 0.189841735824912, "grad_norm": 5.133855819702148, "learning_rate": 9.349199775038482e-05, "loss": 1.999, "step": 2522 }, { "epoch": 0.1899170101055722, "grad_norm": 6.937568664550781, "learning_rate": 9.348598227639299e-05, "loss": 1.9257, "step": 2523 }, { "epoch": 0.18999228438623234, "grad_norm": 4.837934970855713, "learning_rate": 9.34799642172843e-05, "loss": 2.1202, "step": 2524 }, { "epoch": 0.19006755866689248, "grad_norm": 8.245290756225586, "learning_rate": 9.347394357341648e-05, "loss": 1.9762, "step": 2525 }, { "epoch": 0.19014283294755263, "grad_norm": 5.214762210845947, "learning_rate": 9.346792034514746e-05, "loss": 1.7786, "step": 2526 }, { "epoch": 0.1902181072282128, "grad_norm": 4.201835632324219, "learning_rate": 9.346189453283529e-05, "loss": 2.0194, "step": 2527 }, { "epoch": 0.19029338150887296, "grad_norm": 4.834220886230469, "learning_rate": 9.34558661368382e-05, "loss": 2.0185, "step": 2528 }, { "epoch": 0.1903686557895331, "grad_norm": 4.203024864196777, "learning_rate": 9.344983515751456e-05, "loss": 2.3082, "step": 2529 }, { "epoch": 0.19044393007019328, "grad_norm": 4.904376029968262, "learning_rate": 9.344380159522289e-05, "loss": 2.2924, "step": 2530 }, { "epoch": 0.19051920435085343, "grad_norm": 4.9724273681640625, "learning_rate": 9.343776545032188e-05, "loss": 2.0996, "step": 2531 }, { "epoch": 0.19059447863151358, "grad_norm": 4.520656585693359, "learning_rate": 9.343172672317034e-05, "loss": 2.0701, "step": 2532 }, { "epoch": 0.19066975291217372, "grad_norm": 4.859874248504639, "learning_rate": 9.342568541412726e-05, "loss": 2.1396, "step": 2533 }, { "epoch": 0.1907450271928339, "grad_norm": 4.253837585449219, "learning_rate": 9.34196415235518e-05, "loss": 1.9302, "step": 2534 }, { "epoch": 0.19082030147349405, "grad_norm": 5.5314459800720215, "learning_rate": 9.341359505180322e-05, "loss": 1.978, "step": 2535 }, { "epoch": 0.1908955757541542, "grad_norm": 4.943543910980225, "learning_rate": 9.3407545999241e-05, "loss": 2.0852, "step": 2536 }, { "epoch": 0.19097085003481434, "grad_norm": 5.548428058624268, "learning_rate": 9.340149436622471e-05, "loss": 1.8628, "step": 2537 }, { "epoch": 0.19104612431547452, "grad_norm": 5.43626594543457, "learning_rate": 9.339544015311411e-05, "loss": 2.2098, "step": 2538 }, { "epoch": 0.19112139859613467, "grad_norm": 6.1091437339782715, "learning_rate": 9.338938336026912e-05, "loss": 2.1499, "step": 2539 }, { "epoch": 0.19119667287679482, "grad_norm": 4.8484697341918945, "learning_rate": 9.338332398804976e-05, "loss": 2.0968, "step": 2540 }, { "epoch": 0.19127194715745496, "grad_norm": 5.473712921142578, "learning_rate": 9.337726203681629e-05, "loss": 2.2765, "step": 2541 }, { "epoch": 0.19134722143811514, "grad_norm": 3.590080738067627, "learning_rate": 9.337119750692906e-05, "loss": 2.1073, "step": 2542 }, { "epoch": 0.1914224957187753, "grad_norm": 4.980856895446777, "learning_rate": 9.336513039874859e-05, "loss": 2.2553, "step": 2543 }, { "epoch": 0.19149776999943544, "grad_norm": 4.057771682739258, "learning_rate": 9.335906071263553e-05, "loss": 2.1213, "step": 2544 }, { "epoch": 0.1915730442800956, "grad_norm": 5.293370723724365, "learning_rate": 9.335298844895072e-05, "loss": 2.1464, "step": 2545 }, { "epoch": 0.19164831856075576, "grad_norm": 5.37392520904541, "learning_rate": 9.334691360805516e-05, "loss": 2.0742, "step": 2546 }, { "epoch": 0.1917235928414159, "grad_norm": 5.438200950622559, "learning_rate": 9.334083619030993e-05, "loss": 2.5388, "step": 2547 }, { "epoch": 0.19179886712207606, "grad_norm": 7.548996925354004, "learning_rate": 9.333475619607636e-05, "loss": 1.9418, "step": 2548 }, { "epoch": 0.19187414140273623, "grad_norm": 5.1043291091918945, "learning_rate": 9.332867362571589e-05, "loss": 2.1118, "step": 2549 }, { "epoch": 0.19194941568339638, "grad_norm": 4.160706043243408, "learning_rate": 9.33225884795901e-05, "loss": 2.1057, "step": 2550 }, { "epoch": 0.19202468996405653, "grad_norm": 4.977852821350098, "learning_rate": 9.331650075806071e-05, "loss": 2.131, "step": 2551 }, { "epoch": 0.19209996424471668, "grad_norm": 4.698365211486816, "learning_rate": 9.331041046148965e-05, "loss": 2.3714, "step": 2552 }, { "epoch": 0.19217523852537685, "grad_norm": 4.620189189910889, "learning_rate": 9.330431759023897e-05, "loss": 2.1871, "step": 2553 }, { "epoch": 0.192250512806037, "grad_norm": 4.794996738433838, "learning_rate": 9.329822214467085e-05, "loss": 2.3205, "step": 2554 }, { "epoch": 0.19232578708669715, "grad_norm": 4.8759050369262695, "learning_rate": 9.329212412514765e-05, "loss": 2.1524, "step": 2555 }, { "epoch": 0.1924010613673573, "grad_norm": 3.981215476989746, "learning_rate": 9.32860235320319e-05, "loss": 1.8509, "step": 2556 }, { "epoch": 0.19247633564801747, "grad_norm": 4.774701118469238, "learning_rate": 9.327992036568626e-05, "loss": 2.0014, "step": 2557 }, { "epoch": 0.19255160992867762, "grad_norm": 5.7614240646362305, "learning_rate": 9.327381462647354e-05, "loss": 2.3369, "step": 2558 }, { "epoch": 0.19262688420933777, "grad_norm": 5.10483455657959, "learning_rate": 9.32677063147567e-05, "loss": 1.8325, "step": 2559 }, { "epoch": 0.19270215848999794, "grad_norm": 4.313691139221191, "learning_rate": 9.326159543089887e-05, "loss": 1.8342, "step": 2560 }, { "epoch": 0.1927774327706581, "grad_norm": 4.264848709106445, "learning_rate": 9.32554819752633e-05, "loss": 2.0532, "step": 2561 }, { "epoch": 0.19285270705131824, "grad_norm": 4.446906566619873, "learning_rate": 9.324936594821348e-05, "loss": 2.6164, "step": 2562 }, { "epoch": 0.1929279813319784, "grad_norm": 6.16718864440918, "learning_rate": 9.324324735011291e-05, "loss": 1.956, "step": 2563 }, { "epoch": 0.19300325561263856, "grad_norm": 5.927128314971924, "learning_rate": 9.323712618132541e-05, "loss": 2.1769, "step": 2564 }, { "epoch": 0.1930785298932987, "grad_norm": 6.929825782775879, "learning_rate": 9.323100244221478e-05, "loss": 2.1547, "step": 2565 }, { "epoch": 0.19315380417395886, "grad_norm": 5.624950408935547, "learning_rate": 9.322487613314512e-05, "loss": 2.0811, "step": 2566 }, { "epoch": 0.193229078454619, "grad_norm": 4.538450717926025, "learning_rate": 9.321874725448058e-05, "loss": 2.1482, "step": 2567 }, { "epoch": 0.19330435273527918, "grad_norm": 4.675336837768555, "learning_rate": 9.321261580658556e-05, "loss": 2.0677, "step": 2568 }, { "epoch": 0.19337962701593933, "grad_norm": 5.141780853271484, "learning_rate": 9.32064817898245e-05, "loss": 1.8231, "step": 2569 }, { "epoch": 0.19345490129659948, "grad_norm": 3.757761001586914, "learning_rate": 9.320034520456207e-05, "loss": 1.8837, "step": 2570 }, { "epoch": 0.19353017557725963, "grad_norm": 4.213200092315674, "learning_rate": 9.319420605116307e-05, "loss": 2.166, "step": 2571 }, { "epoch": 0.1936054498579198, "grad_norm": 5.930081844329834, "learning_rate": 9.318806432999245e-05, "loss": 1.8438, "step": 2572 }, { "epoch": 0.19368072413857995, "grad_norm": 6.261200428009033, "learning_rate": 9.318192004141536e-05, "loss": 2.0288, "step": 2573 }, { "epoch": 0.1937559984192401, "grad_norm": 5.915249824523926, "learning_rate": 9.3175773185797e-05, "loss": 2.0577, "step": 2574 }, { "epoch": 0.19383127269990025, "grad_norm": 6.85219144821167, "learning_rate": 9.31696237635028e-05, "loss": 2.0642, "step": 2575 }, { "epoch": 0.19390654698056042, "grad_norm": 4.204634666442871, "learning_rate": 9.316347177489835e-05, "loss": 2.2449, "step": 2576 }, { "epoch": 0.19398182126122057, "grad_norm": 4.536286354064941, "learning_rate": 9.315731722034934e-05, "loss": 1.9782, "step": 2577 }, { "epoch": 0.19405709554188072, "grad_norm": 5.71628999710083, "learning_rate": 9.315116010022168e-05, "loss": 1.98, "step": 2578 }, { "epoch": 0.1941323698225409, "grad_norm": 5.147578239440918, "learning_rate": 9.314500041488135e-05, "loss": 2.0296, "step": 2579 }, { "epoch": 0.19420764410320104, "grad_norm": 4.791040897369385, "learning_rate": 9.313883816469455e-05, "loss": 2.0343, "step": 2580 }, { "epoch": 0.1942829183838612, "grad_norm": 5.327045440673828, "learning_rate": 9.31326733500276e-05, "loss": 2.1418, "step": 2581 }, { "epoch": 0.19435819266452134, "grad_norm": 4.423271179199219, "learning_rate": 9.312650597124697e-05, "loss": 2.1174, "step": 2582 }, { "epoch": 0.19443346694518152, "grad_norm": 3.8413028717041016, "learning_rate": 9.31203360287193e-05, "loss": 2.1181, "step": 2583 }, { "epoch": 0.19450874122584166, "grad_norm": 4.535932540893555, "learning_rate": 9.31141635228114e-05, "loss": 2.3185, "step": 2584 }, { "epoch": 0.1945840155065018, "grad_norm": 4.861112117767334, "learning_rate": 9.310798845389018e-05, "loss": 1.9685, "step": 2585 }, { "epoch": 0.19465928978716196, "grad_norm": 5.055726051330566, "learning_rate": 9.310181082232272e-05, "loss": 2.2517, "step": 2586 }, { "epoch": 0.19473456406782214, "grad_norm": 5.35298490524292, "learning_rate": 9.309563062847628e-05, "loss": 2.6957, "step": 2587 }, { "epoch": 0.19480983834848228, "grad_norm": 6.587187767028809, "learning_rate": 9.308944787271827e-05, "loss": 2.0718, "step": 2588 }, { "epoch": 0.19488511262914243, "grad_norm": 4.75013542175293, "learning_rate": 9.308326255541621e-05, "loss": 2.0937, "step": 2589 }, { "epoch": 0.19496038690980258, "grad_norm": 4.977231025695801, "learning_rate": 9.307707467693783e-05, "loss": 2.3111, "step": 2590 }, { "epoch": 0.19503566119046276, "grad_norm": 4.176503658294678, "learning_rate": 9.307088423765096e-05, "loss": 2.8169, "step": 2591 }, { "epoch": 0.1951109354711229, "grad_norm": 5.028183937072754, "learning_rate": 9.306469123792358e-05, "loss": 2.2027, "step": 2592 }, { "epoch": 0.19518620975178305, "grad_norm": 4.943946361541748, "learning_rate": 9.30584956781239e-05, "loss": 1.9997, "step": 2593 }, { "epoch": 0.19526148403244323, "grad_norm": 3.7517528533935547, "learning_rate": 9.305229755862019e-05, "loss": 2.2685, "step": 2594 }, { "epoch": 0.19533675831310338, "grad_norm": 5.388548374176025, "learning_rate": 9.304609687978092e-05, "loss": 2.201, "step": 2595 }, { "epoch": 0.19541203259376352, "grad_norm": 5.044585227966309, "learning_rate": 9.30398936419747e-05, "loss": 1.9716, "step": 2596 }, { "epoch": 0.19548730687442367, "grad_norm": 4.254343032836914, "learning_rate": 9.30336878455703e-05, "loss": 2.3274, "step": 2597 }, { "epoch": 0.19556258115508385, "grad_norm": 5.093156814575195, "learning_rate": 9.302747949093664e-05, "loss": 2.1795, "step": 2598 }, { "epoch": 0.195637855435744, "grad_norm": 3.9735777378082275, "learning_rate": 9.302126857844279e-05, "loss": 2.0776, "step": 2599 }, { "epoch": 0.19571312971640414, "grad_norm": 4.32735538482666, "learning_rate": 9.301505510845795e-05, "loss": 2.1883, "step": 2600 }, { "epoch": 0.1957884039970643, "grad_norm": 4.864895343780518, "learning_rate": 9.300883908135152e-05, "loss": 1.837, "step": 2601 }, { "epoch": 0.19586367827772447, "grad_norm": 4.4535298347473145, "learning_rate": 9.3002620497493e-05, "loss": 2.1299, "step": 2602 }, { "epoch": 0.19593895255838462, "grad_norm": 4.141602993011475, "learning_rate": 9.299639935725209e-05, "loss": 1.9555, "step": 2603 }, { "epoch": 0.19601422683904476, "grad_norm": 3.575546979904175, "learning_rate": 9.299017566099861e-05, "loss": 1.7796, "step": 2604 }, { "epoch": 0.1960895011197049, "grad_norm": 4.551851749420166, "learning_rate": 9.298394940910254e-05, "loss": 2.3008, "step": 2605 }, { "epoch": 0.1961647754003651, "grad_norm": 4.713071346282959, "learning_rate": 9.297772060193399e-05, "loss": 2.7555, "step": 2606 }, { "epoch": 0.19624004968102524, "grad_norm": 5.464794158935547, "learning_rate": 9.297148923986329e-05, "loss": 2.045, "step": 2607 }, { "epoch": 0.19631532396168538, "grad_norm": 4.088005065917969, "learning_rate": 9.296525532326085e-05, "loss": 1.8355, "step": 2608 }, { "epoch": 0.19639059824234556, "grad_norm": 4.126147747039795, "learning_rate": 9.295901885249727e-05, "loss": 2.1773, "step": 2609 }, { "epoch": 0.1964658725230057, "grad_norm": 3.7973711490631104, "learning_rate": 9.295277982794327e-05, "loss": 1.8913, "step": 2610 }, { "epoch": 0.19654114680366586, "grad_norm": 5.4946208000183105, "learning_rate": 9.294653824996976e-05, "loss": 2.1021, "step": 2611 }, { "epoch": 0.196616421084326, "grad_norm": 6.627760410308838, "learning_rate": 9.294029411894779e-05, "loss": 2.0899, "step": 2612 }, { "epoch": 0.19669169536498618, "grad_norm": 4.387209892272949, "learning_rate": 9.293404743524852e-05, "loss": 2.2512, "step": 2613 }, { "epoch": 0.19676696964564633, "grad_norm": 4.849071025848389, "learning_rate": 9.292779819924333e-05, "loss": 1.9134, "step": 2614 }, { "epoch": 0.19684224392630648, "grad_norm": 5.269917011260986, "learning_rate": 9.292154641130371e-05, "loss": 1.9464, "step": 2615 }, { "epoch": 0.19691751820696662, "grad_norm": 6.315644264221191, "learning_rate": 9.291529207180132e-05, "loss": 2.2925, "step": 2616 }, { "epoch": 0.1969927924876268, "grad_norm": 5.099985122680664, "learning_rate": 9.290903518110793e-05, "loss": 2.2075, "step": 2617 }, { "epoch": 0.19706806676828695, "grad_norm": 5.856556415557861, "learning_rate": 9.290277573959555e-05, "loss": 2.341, "step": 2618 }, { "epoch": 0.1971433410489471, "grad_norm": 6.284870147705078, "learning_rate": 9.289651374763624e-05, "loss": 2.2362, "step": 2619 }, { "epoch": 0.19721861532960724, "grad_norm": 6.153741359710693, "learning_rate": 9.289024920560228e-05, "loss": 2.1497, "step": 2620 }, { "epoch": 0.19729388961026742, "grad_norm": 5.033062934875488, "learning_rate": 9.288398211386606e-05, "loss": 2.0753, "step": 2621 }, { "epoch": 0.19736916389092757, "grad_norm": 4.078958034515381, "learning_rate": 9.287771247280014e-05, "loss": 2.136, "step": 2622 }, { "epoch": 0.19744443817158772, "grad_norm": 7.0571980476379395, "learning_rate": 9.287144028277726e-05, "loss": 2.0564, "step": 2623 }, { "epoch": 0.1975197124522479, "grad_norm": 4.978921890258789, "learning_rate": 9.286516554417028e-05, "loss": 2.348, "step": 2624 }, { "epoch": 0.19759498673290804, "grad_norm": 3.79028582572937, "learning_rate": 9.285888825735218e-05, "loss": 2.2694, "step": 2625 }, { "epoch": 0.1976702610135682, "grad_norm": 4.624883651733398, "learning_rate": 9.285260842269616e-05, "loss": 1.9855, "step": 2626 }, { "epoch": 0.19774553529422834, "grad_norm": 6.384089469909668, "learning_rate": 9.284632604057553e-05, "loss": 2.0368, "step": 2627 }, { "epoch": 0.1978208095748885, "grad_norm": 6.101902484893799, "learning_rate": 9.284004111136374e-05, "loss": 2.0705, "step": 2628 }, { "epoch": 0.19789608385554866, "grad_norm": 5.448572158813477, "learning_rate": 9.283375363543444e-05, "loss": 2.2335, "step": 2629 }, { "epoch": 0.1979713581362088, "grad_norm": 5.265905380249023, "learning_rate": 9.28274636131614e-05, "loss": 2.3178, "step": 2630 }, { "epoch": 0.19804663241686896, "grad_norm": 7.9123148918151855, "learning_rate": 9.282117104491852e-05, "loss": 2.1305, "step": 2631 }, { "epoch": 0.19812190669752913, "grad_norm": 7.143202781677246, "learning_rate": 9.281487593107989e-05, "loss": 2.4103, "step": 2632 }, { "epoch": 0.19819718097818928, "grad_norm": 4.973522663116455, "learning_rate": 9.280857827201973e-05, "loss": 1.8422, "step": 2633 }, { "epoch": 0.19827245525884943, "grad_norm": 4.00935173034668, "learning_rate": 9.280227806811244e-05, "loss": 2.0586, "step": 2634 }, { "epoch": 0.19834772953950958, "grad_norm": 3.9648549556732178, "learning_rate": 9.279597531973252e-05, "loss": 2.3299, "step": 2635 }, { "epoch": 0.19842300382016975, "grad_norm": 3.629316568374634, "learning_rate": 9.278967002725465e-05, "loss": 2.3687, "step": 2636 }, { "epoch": 0.1984982781008299, "grad_norm": 3.7593750953674316, "learning_rate": 9.27833621910537e-05, "loss": 1.9443, "step": 2637 }, { "epoch": 0.19857355238149005, "grad_norm": 4.642870903015137, "learning_rate": 9.277705181150463e-05, "loss": 2.5142, "step": 2638 }, { "epoch": 0.1986488266621502, "grad_norm": 4.226221561431885, "learning_rate": 9.277073888898255e-05, "loss": 2.0096, "step": 2639 }, { "epoch": 0.19872410094281037, "grad_norm": 4.961756229400635, "learning_rate": 9.276442342386277e-05, "loss": 2.0554, "step": 2640 }, { "epoch": 0.19879937522347052, "grad_norm": 5.621178150177002, "learning_rate": 9.275810541652072e-05, "loss": 2.1673, "step": 2641 }, { "epoch": 0.19887464950413067, "grad_norm": 5.010156631469727, "learning_rate": 9.275178486733201e-05, "loss": 2.4134, "step": 2642 }, { "epoch": 0.19894992378479084, "grad_norm": 3.99113392829895, "learning_rate": 9.274546177667234e-05, "loss": 2.2111, "step": 2643 }, { "epoch": 0.199025198065451, "grad_norm": 4.587311267852783, "learning_rate": 9.273913614491761e-05, "loss": 2.0292, "step": 2644 }, { "epoch": 0.19910047234611114, "grad_norm": 4.786252975463867, "learning_rate": 9.273280797244388e-05, "loss": 2.186, "step": 2645 }, { "epoch": 0.1991757466267713, "grad_norm": 5.825490474700928, "learning_rate": 9.272647725962733e-05, "loss": 2.4339, "step": 2646 }, { "epoch": 0.19925102090743146, "grad_norm": 3.9713294506073, "learning_rate": 9.27201440068443e-05, "loss": 2.105, "step": 2647 }, { "epoch": 0.1993262951880916, "grad_norm": 4.747962474822998, "learning_rate": 9.271380821447129e-05, "loss": 1.911, "step": 2648 }, { "epoch": 0.19940156946875176, "grad_norm": 3.773463726043701, "learning_rate": 9.270746988288494e-05, "loss": 2.0269, "step": 2649 }, { "epoch": 0.1994768437494119, "grad_norm": 4.771273612976074, "learning_rate": 9.270112901246207e-05, "loss": 2.0478, "step": 2650 }, { "epoch": 0.19955211803007208, "grad_norm": 6.746914386749268, "learning_rate": 9.269478560357958e-05, "loss": 2.4765, "step": 2651 }, { "epoch": 0.19962739231073223, "grad_norm": 6.104433059692383, "learning_rate": 9.268843965661457e-05, "loss": 2.2402, "step": 2652 }, { "epoch": 0.19970266659139238, "grad_norm": 4.703186511993408, "learning_rate": 9.268209117194435e-05, "loss": 2.1815, "step": 2653 }, { "epoch": 0.19977794087205253, "grad_norm": 7.270728588104248, "learning_rate": 9.267574014994626e-05, "loss": 2.0614, "step": 2654 }, { "epoch": 0.1998532151527127, "grad_norm": 5.134958267211914, "learning_rate": 9.266938659099786e-05, "loss": 1.6658, "step": 2655 }, { "epoch": 0.19992848943337285, "grad_norm": 6.875425338745117, "learning_rate": 9.266303049547687e-05, "loss": 2.5154, "step": 2656 }, { "epoch": 0.200003763714033, "grad_norm": 4.7156572341918945, "learning_rate": 9.265667186376113e-05, "loss": 2.3171, "step": 2657 }, { "epoch": 0.20007903799469318, "grad_norm": 7.363480567932129, "learning_rate": 9.265031069622865e-05, "loss": 2.6819, "step": 2658 }, { "epoch": 0.20015431227535332, "grad_norm": 7.363480567932129, "learning_rate": 9.265031069622865e-05, "loss": 1.7411, "step": 2659 }, { "epoch": 0.20022958655601347, "grad_norm": 5.607661724090576, "learning_rate": 9.264394699325757e-05, "loss": 2.4929, "step": 2660 }, { "epoch": 0.20030486083667362, "grad_norm": 7.142682075500488, "learning_rate": 9.263758075522619e-05, "loss": 2.4227, "step": 2661 }, { "epoch": 0.2003801351173338, "grad_norm": 6.157449722290039, "learning_rate": 9.263121198251299e-05, "loss": 2.2881, "step": 2662 }, { "epoch": 0.20045540939799394, "grad_norm": 4.961678981781006, "learning_rate": 9.262484067549656e-05, "loss": 2.3077, "step": 2663 }, { "epoch": 0.2005306836786541, "grad_norm": 4.743991851806641, "learning_rate": 9.261846683455565e-05, "loss": 2.2727, "step": 2664 }, { "epoch": 0.20060595795931424, "grad_norm": 6.180283069610596, "learning_rate": 9.261209046006918e-05, "loss": 2.1534, "step": 2665 }, { "epoch": 0.20068123223997442, "grad_norm": 7.317596435546875, "learning_rate": 9.260571155241619e-05, "loss": 2.449, "step": 2666 }, { "epoch": 0.20075650652063456, "grad_norm": 3.833103895187378, "learning_rate": 9.259933011197591e-05, "loss": 2.2059, "step": 2667 }, { "epoch": 0.2008317808012947, "grad_norm": 4.415266513824463, "learning_rate": 9.259294613912767e-05, "loss": 2.0813, "step": 2668 }, { "epoch": 0.20090705508195486, "grad_norm": 4.395266532897949, "learning_rate": 9.2586559634251e-05, "loss": 2.1295, "step": 2669 }, { "epoch": 0.20098232936261504, "grad_norm": 7.380612373352051, "learning_rate": 9.258017059772556e-05, "loss": 2.2435, "step": 2670 }, { "epoch": 0.20105760364327518, "grad_norm": 4.570094108581543, "learning_rate": 9.257377902993114e-05, "loss": 2.2224, "step": 2671 }, { "epoch": 0.20113287792393533, "grad_norm": 9.569217681884766, "learning_rate": 9.256738493124773e-05, "loss": 2.3579, "step": 2672 }, { "epoch": 0.2012081522045955, "grad_norm": 5.0647149085998535, "learning_rate": 9.256098830205542e-05, "loss": 2.1132, "step": 2673 }, { "epoch": 0.20128342648525566, "grad_norm": 4.9264020919799805, "learning_rate": 9.255458914273448e-05, "loss": 2.1071, "step": 2674 }, { "epoch": 0.2013587007659158, "grad_norm": 5.490030288696289, "learning_rate": 9.254818745366531e-05, "loss": 2.5883, "step": 2675 }, { "epoch": 0.20143397504657595, "grad_norm": 5.552890777587891, "learning_rate": 9.25417832352285e-05, "loss": 2.2194, "step": 2676 }, { "epoch": 0.20150924932723613, "grad_norm": 6.219507694244385, "learning_rate": 9.253537648780473e-05, "loss": 2.2628, "step": 2677 }, { "epoch": 0.20158452360789628, "grad_norm": 6.691783428192139, "learning_rate": 9.252896721177487e-05, "loss": 2.5432, "step": 2678 }, { "epoch": 0.20165979788855642, "grad_norm": 4.836513996124268, "learning_rate": 9.252255540751994e-05, "loss": 2.2211, "step": 2679 }, { "epoch": 0.20173507216921657, "grad_norm": 4.348185062408447, "learning_rate": 9.25161410754211e-05, "loss": 1.993, "step": 2680 }, { "epoch": 0.20181034644987675, "grad_norm": 5.402199745178223, "learning_rate": 9.250972421585968e-05, "loss": 2.1034, "step": 2681 }, { "epoch": 0.2018856207305369, "grad_norm": 3.2253646850585938, "learning_rate": 9.250330482921712e-05, "loss": 2.0738, "step": 2682 }, { "epoch": 0.20196089501119704, "grad_norm": 4.809782028198242, "learning_rate": 9.249688291587504e-05, "loss": 1.9647, "step": 2683 }, { "epoch": 0.2020361692918572, "grad_norm": 4.990761756896973, "learning_rate": 9.249045847621521e-05, "loss": 2.5667, "step": 2684 }, { "epoch": 0.20211144357251737, "grad_norm": 5.1938347816467285, "learning_rate": 9.248403151061956e-05, "loss": 2.138, "step": 2685 }, { "epoch": 0.20218671785317752, "grad_norm": 5.125086307525635, "learning_rate": 9.247760201947011e-05, "loss": 1.888, "step": 2686 }, { "epoch": 0.20226199213383766, "grad_norm": 4.409528732299805, "learning_rate": 9.247117000314911e-05, "loss": 1.848, "step": 2687 }, { "epoch": 0.2023372664144978, "grad_norm": 5.727984428405762, "learning_rate": 9.246473546203893e-05, "loss": 2.3168, "step": 2688 }, { "epoch": 0.202412540695158, "grad_norm": 3.619250535964966, "learning_rate": 9.245829839652205e-05, "loss": 1.9843, "step": 2689 }, { "epoch": 0.20248781497581814, "grad_norm": 6.49954080581665, "learning_rate": 9.245185880698118e-05, "loss": 1.9022, "step": 2690 }, { "epoch": 0.20256308925647828, "grad_norm": 4.470929145812988, "learning_rate": 9.24454166937991e-05, "loss": 2.5562, "step": 2691 }, { "epoch": 0.20263836353713846, "grad_norm": 5.310934066772461, "learning_rate": 9.243897205735878e-05, "loss": 2.31, "step": 2692 }, { "epoch": 0.2027136378177986, "grad_norm": 5.04948616027832, "learning_rate": 9.243252489804335e-05, "loss": 1.917, "step": 2693 }, { "epoch": 0.20278891209845876, "grad_norm": 4.324390411376953, "learning_rate": 9.242607521623606e-05, "loss": 2.0204, "step": 2694 }, { "epoch": 0.2028641863791189, "grad_norm": 4.0668230056762695, "learning_rate": 9.241962301232035e-05, "loss": 2.2603, "step": 2695 }, { "epoch": 0.20293946065977908, "grad_norm": 5.163023471832275, "learning_rate": 9.241316828667976e-05, "loss": 2.2552, "step": 2696 }, { "epoch": 0.20301473494043923, "grad_norm": 3.842123031616211, "learning_rate": 9.2406711039698e-05, "loss": 2.0686, "step": 2697 }, { "epoch": 0.20309000922109938, "grad_norm": 3.7394609451293945, "learning_rate": 9.240025127175896e-05, "loss": 2.2229, "step": 2698 }, { "epoch": 0.20316528350175952, "grad_norm": 5.072762966156006, "learning_rate": 9.239378898324664e-05, "loss": 1.7493, "step": 2699 }, { "epoch": 0.2032405577824197, "grad_norm": 3.9831929206848145, "learning_rate": 9.238732417454521e-05, "loss": 2.0991, "step": 2700 }, { "epoch": 0.20331583206307985, "grad_norm": 5.643798828125, "learning_rate": 9.238085684603897e-05, "loss": 2.1445, "step": 2701 }, { "epoch": 0.20339110634374, "grad_norm": 4.979661464691162, "learning_rate": 9.23743869981124e-05, "loss": 1.9424, "step": 2702 }, { "epoch": 0.20346638062440014, "grad_norm": 4.533657550811768, "learning_rate": 9.23679146311501e-05, "loss": 2.1678, "step": 2703 }, { "epoch": 0.20354165490506032, "grad_norm": 4.680571556091309, "learning_rate": 9.236143974553686e-05, "loss": 2.5833, "step": 2704 }, { "epoch": 0.20361692918572047, "grad_norm": 5.625328063964844, "learning_rate": 9.235496234165756e-05, "loss": 2.228, "step": 2705 }, { "epoch": 0.20369220346638062, "grad_norm": 4.053057670593262, "learning_rate": 9.234848241989729e-05, "loss": 2.4346, "step": 2706 }, { "epoch": 0.2037674777470408, "grad_norm": 4.140114784240723, "learning_rate": 9.234199998064125e-05, "loss": 2.103, "step": 2707 }, { "epoch": 0.20384275202770094, "grad_norm": 5.398926258087158, "learning_rate": 9.23355150242748e-05, "loss": 2.1543, "step": 2708 }, { "epoch": 0.2039180263083611, "grad_norm": 5.658435344696045, "learning_rate": 9.232902755118345e-05, "loss": 1.9006, "step": 2709 }, { "epoch": 0.20399330058902124, "grad_norm": 4.941132545471191, "learning_rate": 9.232253756175288e-05, "loss": 1.902, "step": 2710 }, { "epoch": 0.2040685748696814, "grad_norm": 4.231126308441162, "learning_rate": 9.231604505636889e-05, "loss": 1.9513, "step": 2711 }, { "epoch": 0.20414384915034156, "grad_norm": 6.0593037605285645, "learning_rate": 9.230955003541743e-05, "loss": 2.6645, "step": 2712 }, { "epoch": 0.2042191234310017, "grad_norm": 5.5126471519470215, "learning_rate": 9.230305249928461e-05, "loss": 2.1858, "step": 2713 }, { "epoch": 0.20429439771166186, "grad_norm": 5.879733085632324, "learning_rate": 9.229655244835673e-05, "loss": 2.278, "step": 2714 }, { "epoch": 0.20436967199232203, "grad_norm": 4.923147678375244, "learning_rate": 9.229004988302014e-05, "loss": 2.5746, "step": 2715 }, { "epoch": 0.20444494627298218, "grad_norm": 7.514196872711182, "learning_rate": 9.228354480366146e-05, "loss": 1.9377, "step": 2716 }, { "epoch": 0.20452022055364233, "grad_norm": 7.064095497131348, "learning_rate": 9.227703721066734e-05, "loss": 2.0156, "step": 2717 }, { "epoch": 0.20459549483430248, "grad_norm": 4.517171382904053, "learning_rate": 9.227052710442468e-05, "loss": 2.0787, "step": 2718 }, { "epoch": 0.20467076911496265, "grad_norm": 7.329111099243164, "learning_rate": 9.226401448532045e-05, "loss": 2.3843, "step": 2719 }, { "epoch": 0.2047460433956228, "grad_norm": 5.9707183837890625, "learning_rate": 9.225749935374184e-05, "loss": 2.3209, "step": 2720 }, { "epoch": 0.20482131767628295, "grad_norm": 5.696382999420166, "learning_rate": 9.225098171007615e-05, "loss": 2.2724, "step": 2721 }, { "epoch": 0.20489659195694312, "grad_norm": 5.391494274139404, "learning_rate": 9.224446155471083e-05, "loss": 2.6625, "step": 2722 }, { "epoch": 0.20497186623760327, "grad_norm": 5.725043296813965, "learning_rate": 9.223793888803347e-05, "loss": 2.3145, "step": 2723 }, { "epoch": 0.20504714051826342, "grad_norm": 4.924393653869629, "learning_rate": 9.223141371043184e-05, "loss": 2.1112, "step": 2724 }, { "epoch": 0.20512241479892357, "grad_norm": 5.879674911499023, "learning_rate": 9.222488602229385e-05, "loss": 2.3642, "step": 2725 }, { "epoch": 0.20519768907958374, "grad_norm": 4.345478057861328, "learning_rate": 9.221835582400754e-05, "loss": 2.0124, "step": 2726 }, { "epoch": 0.2052729633602439, "grad_norm": 4.756162643432617, "learning_rate": 9.221182311596112e-05, "loss": 2.0019, "step": 2727 }, { "epoch": 0.20534823764090404, "grad_norm": 5.437966346740723, "learning_rate": 9.220528789854293e-05, "loss": 1.9805, "step": 2728 }, { "epoch": 0.2054235119215642, "grad_norm": 5.083278656005859, "learning_rate": 9.219875017214146e-05, "loss": 2.108, "step": 2729 }, { "epoch": 0.20549878620222436, "grad_norm": 4.584100246429443, "learning_rate": 9.219220993714539e-05, "loss": 2.2582, "step": 2730 }, { "epoch": 0.2055740604828845, "grad_norm": 7.760411739349365, "learning_rate": 9.21856671939435e-05, "loss": 2.346, "step": 2731 }, { "epoch": 0.20564933476354466, "grad_norm": 5.776886940002441, "learning_rate": 9.217912194292474e-05, "loss": 2.4974, "step": 2732 }, { "epoch": 0.2057246090442048, "grad_norm": 5.723376750946045, "learning_rate": 9.21725741844782e-05, "loss": 2.1521, "step": 2733 }, { "epoch": 0.20579988332486499, "grad_norm": 6.4057135581970215, "learning_rate": 9.216602391899314e-05, "loss": 2.1174, "step": 2734 }, { "epoch": 0.20587515760552513, "grad_norm": 4.838561534881592, "learning_rate": 9.215947114685895e-05, "loss": 2.1417, "step": 2735 }, { "epoch": 0.20595043188618528, "grad_norm": 6.190004825592041, "learning_rate": 9.215291586846516e-05, "loss": 1.854, "step": 2736 }, { "epoch": 0.20602570616684543, "grad_norm": 10.128652572631836, "learning_rate": 9.214635808420147e-05, "loss": 2.1287, "step": 2737 }, { "epoch": 0.2061009804475056, "grad_norm": 7.153855800628662, "learning_rate": 9.213979779445771e-05, "loss": 2.3407, "step": 2738 }, { "epoch": 0.20617625472816575, "grad_norm": 5.416784763336182, "learning_rate": 9.213323499962392e-05, "loss": 2.1244, "step": 2739 }, { "epoch": 0.2062515290088259, "grad_norm": 4.468263149261475, "learning_rate": 9.212666970009017e-05, "loss": 2.0533, "step": 2740 }, { "epoch": 0.20632680328948608, "grad_norm": 4.061378479003906, "learning_rate": 9.21201018962468e-05, "loss": 1.997, "step": 2741 }, { "epoch": 0.20640207757014623, "grad_norm": 4.568488121032715, "learning_rate": 9.211353158848423e-05, "loss": 2.4459, "step": 2742 }, { "epoch": 0.20647735185080637, "grad_norm": 6.1667256355285645, "learning_rate": 9.210695877719303e-05, "loss": 2.138, "step": 2743 }, { "epoch": 0.20655262613146652, "grad_norm": 4.548626899719238, "learning_rate": 9.210038346276395e-05, "loss": 2.3282, "step": 2744 }, { "epoch": 0.2066279004121267, "grad_norm": 4.237787246704102, "learning_rate": 9.20938056455879e-05, "loss": 2.1, "step": 2745 }, { "epoch": 0.20670317469278685, "grad_norm": 4.625013828277588, "learning_rate": 9.208722532605584e-05, "loss": 1.9612, "step": 2746 }, { "epoch": 0.206778448973447, "grad_norm": 5.302488327026367, "learning_rate": 9.208064250455904e-05, "loss": 2.2188, "step": 2747 }, { "epoch": 0.20685372325410714, "grad_norm": 6.519745349884033, "learning_rate": 9.207405718148876e-05, "loss": 1.943, "step": 2748 }, { "epoch": 0.20692899753476732, "grad_norm": 4.911252975463867, "learning_rate": 9.206746935723652e-05, "loss": 2.1185, "step": 2749 }, { "epoch": 0.20700427181542747, "grad_norm": 6.025754928588867, "learning_rate": 9.206087903219391e-05, "loss": 2.4712, "step": 2750 }, { "epoch": 0.2070795460960876, "grad_norm": 4.421285629272461, "learning_rate": 9.205428620675274e-05, "loss": 2.3608, "step": 2751 }, { "epoch": 0.20715482037674776, "grad_norm": 6.1203179359436035, "learning_rate": 9.204769088130493e-05, "loss": 2.3081, "step": 2752 }, { "epoch": 0.20723009465740794, "grad_norm": 4.159307479858398, "learning_rate": 9.204109305624253e-05, "loss": 2.0839, "step": 2753 }, { "epoch": 0.20730536893806809, "grad_norm": 4.70167350769043, "learning_rate": 9.20344927319578e-05, "loss": 2.0895, "step": 2754 }, { "epoch": 0.20738064321872823, "grad_norm": 4.928372383117676, "learning_rate": 9.202788990884306e-05, "loss": 2.1404, "step": 2755 }, { "epoch": 0.2074559174993884, "grad_norm": 4.6017327308654785, "learning_rate": 9.202128458729087e-05, "loss": 2.1956, "step": 2756 }, { "epoch": 0.20753119178004856, "grad_norm": 6.752647876739502, "learning_rate": 9.201467676769389e-05, "loss": 2.1492, "step": 2757 }, { "epoch": 0.2076064660607087, "grad_norm": 4.317784309387207, "learning_rate": 9.200806645044492e-05, "loss": 1.8446, "step": 2758 }, { "epoch": 0.20768174034136885, "grad_norm": 4.698585510253906, "learning_rate": 9.200145363593694e-05, "loss": 2.2889, "step": 2759 }, { "epoch": 0.20775701462202903, "grad_norm": 5.974823474884033, "learning_rate": 9.199483832456305e-05, "loss": 2.2835, "step": 2760 }, { "epoch": 0.20783228890268918, "grad_norm": 4.162746429443359, "learning_rate": 9.198822051671651e-05, "loss": 1.8286, "step": 2761 }, { "epoch": 0.20790756318334933, "grad_norm": 6.423340797424316, "learning_rate": 9.198160021279076e-05, "loss": 2.1872, "step": 2762 }, { "epoch": 0.20798283746400947, "grad_norm": 5.672008991241455, "learning_rate": 9.197497741317933e-05, "loss": 2.2306, "step": 2763 }, { "epoch": 0.20805811174466965, "grad_norm": 7.741572856903076, "learning_rate": 9.196835211827593e-05, "loss": 2.1358, "step": 2764 }, { "epoch": 0.2081333860253298, "grad_norm": 5.3536577224731445, "learning_rate": 9.19617243284744e-05, "loss": 1.8629, "step": 2765 }, { "epoch": 0.20820866030598995, "grad_norm": 4.840548038482666, "learning_rate": 9.195509404416878e-05, "loss": 1.9415, "step": 2766 }, { "epoch": 0.2082839345866501, "grad_norm": 5.354896545410156, "learning_rate": 9.194846126575321e-05, "loss": 2.087, "step": 2767 }, { "epoch": 0.20835920886731027, "grad_norm": 3.991562604904175, "learning_rate": 9.194182599362198e-05, "loss": 2.1069, "step": 2768 }, { "epoch": 0.20843448314797042, "grad_norm": 6.801267623901367, "learning_rate": 9.193518822816952e-05, "loss": 2.2929, "step": 2769 }, { "epoch": 0.20850975742863057, "grad_norm": 4.3830790519714355, "learning_rate": 9.192854796979045e-05, "loss": 2.0509, "step": 2770 }, { "epoch": 0.20858503170929074, "grad_norm": 4.92519998550415, "learning_rate": 9.192190521887951e-05, "loss": 2.3104, "step": 2771 }, { "epoch": 0.2086603059899509, "grad_norm": 5.735259532928467, "learning_rate": 9.191525997583161e-05, "loss": 2.0312, "step": 2772 }, { "epoch": 0.20873558027061104, "grad_norm": 5.806834697723389, "learning_rate": 9.190861224104175e-05, "loss": 1.8885, "step": 2773 }, { "epoch": 0.20881085455127119, "grad_norm": 3.834347724914551, "learning_rate": 9.190196201490516e-05, "loss": 2.0853, "step": 2774 }, { "epoch": 0.20888612883193136, "grad_norm": 3.846677780151367, "learning_rate": 9.189530929781715e-05, "loss": 2.4739, "step": 2775 }, { "epoch": 0.2089614031125915, "grad_norm": 5.2583489418029785, "learning_rate": 9.18886540901732e-05, "loss": 2.1837, "step": 2776 }, { "epoch": 0.20903667739325166, "grad_norm": 3.9179093837738037, "learning_rate": 9.188199639236897e-05, "loss": 2.0571, "step": 2777 }, { "epoch": 0.2091119516739118, "grad_norm": 4.368052005767822, "learning_rate": 9.187533620480023e-05, "loss": 2.0461, "step": 2778 }, { "epoch": 0.20918722595457198, "grad_norm": 5.238763809204102, "learning_rate": 9.18686735278629e-05, "loss": 2.065, "step": 2779 }, { "epoch": 0.20926250023523213, "grad_norm": 3.870419502258301, "learning_rate": 9.186200836195307e-05, "loss": 1.9187, "step": 2780 }, { "epoch": 0.20933777451589228, "grad_norm": 5.346678733825684, "learning_rate": 9.185534070746695e-05, "loss": 2.0468, "step": 2781 }, { "epoch": 0.20941304879655243, "grad_norm": 4.438832759857178, "learning_rate": 9.184867056480093e-05, "loss": 2.1511, "step": 2782 }, { "epoch": 0.2094883230772126, "grad_norm": 7.27894401550293, "learning_rate": 9.184199793435152e-05, "loss": 1.8806, "step": 2783 }, { "epoch": 0.20956359735787275, "grad_norm": 4.2289838790893555, "learning_rate": 9.183532281651539e-05, "loss": 1.9441, "step": 2784 }, { "epoch": 0.2096388716385329, "grad_norm": 4.106243133544922, "learning_rate": 9.182864521168936e-05, "loss": 2.0805, "step": 2785 }, { "epoch": 0.20971414591919307, "grad_norm": 6.5418219566345215, "learning_rate": 9.182196512027039e-05, "loss": 1.7826, "step": 2786 }, { "epoch": 0.20978942019985322, "grad_norm": 5.005733966827393, "learning_rate": 9.181528254265559e-05, "loss": 2.0603, "step": 2787 }, { "epoch": 0.20986469448051337, "grad_norm": 5.310815811157227, "learning_rate": 9.180859747924223e-05, "loss": 2.2185, "step": 2788 }, { "epoch": 0.20993996876117352, "grad_norm": 5.525910377502441, "learning_rate": 9.180190993042772e-05, "loss": 1.9102, "step": 2789 }, { "epoch": 0.2100152430418337, "grad_norm": 4.9078497886657715, "learning_rate": 9.179521989660957e-05, "loss": 2.064, "step": 2790 }, { "epoch": 0.21009051732249384, "grad_norm": 4.449695587158203, "learning_rate": 9.178852737818557e-05, "loss": 1.9704, "step": 2791 }, { "epoch": 0.210165791603154, "grad_norm": 4.521839618682861, "learning_rate": 9.178183237555349e-05, "loss": 2.1179, "step": 2792 }, { "epoch": 0.21024106588381414, "grad_norm": 5.896556377410889, "learning_rate": 9.177513488911138e-05, "loss": 1.7694, "step": 2793 }, { "epoch": 0.2103163401644743, "grad_norm": 3.947960615158081, "learning_rate": 9.176843491925735e-05, "loss": 2.2439, "step": 2794 }, { "epoch": 0.21039161444513446, "grad_norm": 4.381667137145996, "learning_rate": 9.176173246638972e-05, "loss": 1.7337, "step": 2795 }, { "epoch": 0.2104668887257946, "grad_norm": 4.278743267059326, "learning_rate": 9.175502753090692e-05, "loss": 2.1427, "step": 2796 }, { "epoch": 0.21054216300645476, "grad_norm": 5.845564365386963, "learning_rate": 9.174832011320755e-05, "loss": 2.5555, "step": 2797 }, { "epoch": 0.21061743728711493, "grad_norm": 5.500210285186768, "learning_rate": 9.174161021369033e-05, "loss": 2.0677, "step": 2798 }, { "epoch": 0.21069271156777508, "grad_norm": 4.367582321166992, "learning_rate": 9.173489783275415e-05, "loss": 2.443, "step": 2799 }, { "epoch": 0.21076798584843523, "grad_norm": 4.06942081451416, "learning_rate": 9.172818297079806e-05, "loss": 1.7442, "step": 2800 }, { "epoch": 0.21084326012909538, "grad_norm": 4.764585494995117, "learning_rate": 9.172146562822121e-05, "loss": 1.9274, "step": 2801 }, { "epoch": 0.21091853440975555, "grad_norm": 5.261363506317139, "learning_rate": 9.171474580542295e-05, "loss": 1.8798, "step": 2802 }, { "epoch": 0.2109938086904157, "grad_norm": 4.888810634613037, "learning_rate": 9.170802350280274e-05, "loss": 2.0509, "step": 2803 }, { "epoch": 0.21106908297107585, "grad_norm": 4.8682942390441895, "learning_rate": 9.170129872076021e-05, "loss": 2.5575, "step": 2804 }, { "epoch": 0.21114435725173603, "grad_norm": 5.021416187286377, "learning_rate": 9.169457145969512e-05, "loss": 2.0261, "step": 2805 }, { "epoch": 0.21121963153239617, "grad_norm": 5.399176597595215, "learning_rate": 9.168784172000739e-05, "loss": 2.4254, "step": 2806 }, { "epoch": 0.21129490581305632, "grad_norm": 5.843966484069824, "learning_rate": 9.168110950209709e-05, "loss": 1.9917, "step": 2807 }, { "epoch": 0.21137018009371647, "grad_norm": 4.437451362609863, "learning_rate": 9.167437480636443e-05, "loss": 2.2837, "step": 2808 }, { "epoch": 0.21144545437437665, "grad_norm": 5.064668655395508, "learning_rate": 9.166763763320976e-05, "loss": 2.5844, "step": 2809 }, { "epoch": 0.2115207286550368, "grad_norm": 5.718889236450195, "learning_rate": 9.16608979830336e-05, "loss": 2.149, "step": 2810 }, { "epoch": 0.21159600293569694, "grad_norm": 4.556615352630615, "learning_rate": 9.165415585623658e-05, "loss": 2.1195, "step": 2811 }, { "epoch": 0.2116712772163571, "grad_norm": 6.086655616760254, "learning_rate": 9.164741125321952e-05, "loss": 2.2139, "step": 2812 }, { "epoch": 0.21174655149701727, "grad_norm": 4.455840110778809, "learning_rate": 9.164066417438335e-05, "loss": 1.8764, "step": 2813 }, { "epoch": 0.2118218257776774, "grad_norm": 4.952279567718506, "learning_rate": 9.16339146201292e-05, "loss": 2.007, "step": 2814 }, { "epoch": 0.21189710005833756, "grad_norm": 5.059940338134766, "learning_rate": 9.162716259085827e-05, "loss": 2.0489, "step": 2815 }, { "epoch": 0.2119723743389977, "grad_norm": 5.385522365570068, "learning_rate": 9.162040808697196e-05, "loss": 2.1337, "step": 2816 }, { "epoch": 0.21204764861965789, "grad_norm": 8.4082670211792, "learning_rate": 9.161365110887181e-05, "loss": 2.4877, "step": 2817 }, { "epoch": 0.21212292290031803, "grad_norm": 5.099520206451416, "learning_rate": 9.160689165695952e-05, "loss": 2.0511, "step": 2818 }, { "epoch": 0.21219819718097818, "grad_norm": 4.262336730957031, "learning_rate": 9.16001297316369e-05, "loss": 1.7989, "step": 2819 }, { "epoch": 0.21227347146163836, "grad_norm": 4.891003131866455, "learning_rate": 9.159336533330594e-05, "loss": 2.367, "step": 2820 }, { "epoch": 0.2123487457422985, "grad_norm": 4.341526031494141, "learning_rate": 9.158659846236875e-05, "loss": 1.8421, "step": 2821 }, { "epoch": 0.21242402002295865, "grad_norm": 3.5332159996032715, "learning_rate": 9.157982911922759e-05, "loss": 2.0078, "step": 2822 }, { "epoch": 0.2124992943036188, "grad_norm": 5.055265426635742, "learning_rate": 9.15730573042849e-05, "loss": 2.344, "step": 2823 }, { "epoch": 0.21257456858427898, "grad_norm": 5.544818878173828, "learning_rate": 9.156628301794325e-05, "loss": 1.9671, "step": 2824 }, { "epoch": 0.21264984286493913, "grad_norm": 5.053716659545898, "learning_rate": 9.155950626060535e-05, "loss": 2.0973, "step": 2825 }, { "epoch": 0.21272511714559927, "grad_norm": 5.481313228607178, "learning_rate": 9.155272703267403e-05, "loss": 2.9367, "step": 2826 }, { "epoch": 0.21280039142625942, "grad_norm": 7.212437152862549, "learning_rate": 9.154594533455232e-05, "loss": 2.1106, "step": 2827 }, { "epoch": 0.2128756657069196, "grad_norm": 4.365868091583252, "learning_rate": 9.153916116664338e-05, "loss": 2.0575, "step": 2828 }, { "epoch": 0.21295093998757975, "grad_norm": 5.271259784698486, "learning_rate": 9.153237452935049e-05, "loss": 2.0631, "step": 2829 }, { "epoch": 0.2130262142682399, "grad_norm": 3.9800784587860107, "learning_rate": 9.15255854230771e-05, "loss": 1.9935, "step": 2830 }, { "epoch": 0.21310148854890004, "grad_norm": 4.016927719116211, "learning_rate": 9.151879384822682e-05, "loss": 1.9925, "step": 2831 }, { "epoch": 0.21317676282956022, "grad_norm": 5.193508148193359, "learning_rate": 9.151199980520336e-05, "loss": 2.2573, "step": 2832 }, { "epoch": 0.21325203711022037, "grad_norm": 5.365655899047852, "learning_rate": 9.150520329441063e-05, "loss": 2.2443, "step": 2833 }, { "epoch": 0.21332731139088051, "grad_norm": 5.702099800109863, "learning_rate": 9.149840431625266e-05, "loss": 2.0334, "step": 2834 }, { "epoch": 0.2134025856715407, "grad_norm": 4.0529890060424805, "learning_rate": 9.149160287113363e-05, "loss": 1.9494, "step": 2835 }, { "epoch": 0.21347785995220084, "grad_norm": 4.112748622894287, "learning_rate": 9.148479895945784e-05, "loss": 2.094, "step": 2836 }, { "epoch": 0.21355313423286099, "grad_norm": 4.626742839813232, "learning_rate": 9.147799258162981e-05, "loss": 2.2898, "step": 2837 }, { "epoch": 0.21362840851352113, "grad_norm": 7.863895893096924, "learning_rate": 9.147118373805413e-05, "loss": 2.5628, "step": 2838 }, { "epoch": 0.2137036827941813, "grad_norm": 4.257025718688965, "learning_rate": 9.146437242913556e-05, "loss": 1.898, "step": 2839 }, { "epoch": 0.21377895707484146, "grad_norm": 5.227463245391846, "learning_rate": 9.145755865527904e-05, "loss": 1.8511, "step": 2840 }, { "epoch": 0.2138542313555016, "grad_norm": 4.386064052581787, "learning_rate": 9.145074241688959e-05, "loss": 1.9155, "step": 2841 }, { "epoch": 0.21392950563616175, "grad_norm": 4.630362510681152, "learning_rate": 9.144392371437245e-05, "loss": 2.5548, "step": 2842 }, { "epoch": 0.21400477991682193, "grad_norm": 4.202327728271484, "learning_rate": 9.143710254813296e-05, "loss": 1.8846, "step": 2843 }, { "epoch": 0.21408005419748208, "grad_norm": 4.9407477378845215, "learning_rate": 9.143027891857663e-05, "loss": 2.4482, "step": 2844 }, { "epoch": 0.21415532847814223, "grad_norm": 4.4251017570495605, "learning_rate": 9.142345282610908e-05, "loss": 2.0174, "step": 2845 }, { "epoch": 0.21423060275880237, "grad_norm": 4.769384384155273, "learning_rate": 9.141662427113613e-05, "loss": 2.1288, "step": 2846 }, { "epoch": 0.21430587703946255, "grad_norm": 5.329495906829834, "learning_rate": 9.140979325406369e-05, "loss": 1.8539, "step": 2847 }, { "epoch": 0.2143811513201227, "grad_norm": 4.017544746398926, "learning_rate": 9.140295977529788e-05, "loss": 2.0459, "step": 2848 }, { "epoch": 0.21445642560078285, "grad_norm": 4.645512580871582, "learning_rate": 9.139612383524489e-05, "loss": 2.3162, "step": 2849 }, { "epoch": 0.214531699881443, "grad_norm": 4.684594631195068, "learning_rate": 9.138928543431113e-05, "loss": 1.9761, "step": 2850 }, { "epoch": 0.21460697416210317, "grad_norm": 4.723893165588379, "learning_rate": 9.138244457290311e-05, "loss": 2.077, "step": 2851 }, { "epoch": 0.21468224844276332, "grad_norm": 4.869959831237793, "learning_rate": 9.13756012514275e-05, "loss": 2.4885, "step": 2852 }, { "epoch": 0.21475752272342347, "grad_norm": 4.7527689933776855, "learning_rate": 9.13687554702911e-05, "loss": 1.9081, "step": 2853 }, { "epoch": 0.21483279700408364, "grad_norm": 5.106403827667236, "learning_rate": 9.13619072299009e-05, "loss": 2.1309, "step": 2854 }, { "epoch": 0.2149080712847438, "grad_norm": 4.389427661895752, "learning_rate": 9.135505653066399e-05, "loss": 1.6671, "step": 2855 }, { "epoch": 0.21498334556540394, "grad_norm": 3.9092814922332764, "learning_rate": 9.134820337298763e-05, "loss": 2.1034, "step": 2856 }, { "epoch": 0.2150586198460641, "grad_norm": 4.435222148895264, "learning_rate": 9.134134775727922e-05, "loss": 2.2736, "step": 2857 }, { "epoch": 0.21513389412672426, "grad_norm": 4.515613079071045, "learning_rate": 9.133448968394631e-05, "loss": 2.2219, "step": 2858 }, { "epoch": 0.2152091684073844, "grad_norm": 6.38145112991333, "learning_rate": 9.13276291533966e-05, "loss": 1.9501, "step": 2859 }, { "epoch": 0.21528444268804456, "grad_norm": 10.269143104553223, "learning_rate": 9.132076616603791e-05, "loss": 2.1356, "step": 2860 }, { "epoch": 0.2153597169687047, "grad_norm": 4.484152793884277, "learning_rate": 9.131390072227823e-05, "loss": 2.3876, "step": 2861 }, { "epoch": 0.21543499124936488, "grad_norm": 3.530357837677002, "learning_rate": 9.13070328225257e-05, "loss": 1.8473, "step": 2862 }, { "epoch": 0.21551026553002503, "grad_norm": 4.104977130889893, "learning_rate": 9.13001624671886e-05, "loss": 2.3885, "step": 2863 }, { "epoch": 0.21558553981068518, "grad_norm": 5.4687299728393555, "learning_rate": 9.129328965667534e-05, "loss": 2.3085, "step": 2864 }, { "epoch": 0.21566081409134533, "grad_norm": 6.70385217666626, "learning_rate": 9.128641439139448e-05, "loss": 2.4711, "step": 2865 }, { "epoch": 0.2157360883720055, "grad_norm": 4.164482116699219, "learning_rate": 9.127953667175476e-05, "loss": 1.967, "step": 2866 }, { "epoch": 0.21581136265266565, "grad_norm": 8.239279747009277, "learning_rate": 9.127265649816504e-05, "loss": 1.9405, "step": 2867 }, { "epoch": 0.2158866369333258, "grad_norm": 5.549882411956787, "learning_rate": 9.12657738710343e-05, "loss": 1.9267, "step": 2868 }, { "epoch": 0.21596191121398597, "grad_norm": 4.876378059387207, "learning_rate": 9.12588887907717e-05, "loss": 2.2171, "step": 2869 }, { "epoch": 0.21603718549464612, "grad_norm": 4.760179042816162, "learning_rate": 9.125200125778657e-05, "loss": 1.9083, "step": 2870 }, { "epoch": 0.21611245977530627, "grad_norm": 4.280489921569824, "learning_rate": 9.124511127248832e-05, "loss": 1.9738, "step": 2871 }, { "epoch": 0.21618773405596642, "grad_norm": 4.674266338348389, "learning_rate": 9.123821883528653e-05, "loss": 2.0772, "step": 2872 }, { "epoch": 0.2162630083366266, "grad_norm": 4.658254623413086, "learning_rate": 9.123132394659098e-05, "loss": 2.5261, "step": 2873 }, { "epoch": 0.21633828261728674, "grad_norm": 4.049352645874023, "learning_rate": 9.122442660681153e-05, "loss": 2.1706, "step": 2874 }, { "epoch": 0.2164135568979469, "grad_norm": 3.8615479469299316, "learning_rate": 9.12175268163582e-05, "loss": 2.0985, "step": 2875 }, { "epoch": 0.21648883117860704, "grad_norm": 4.295295715332031, "learning_rate": 9.121062457564115e-05, "loss": 2.2902, "step": 2876 }, { "epoch": 0.21656410545926721, "grad_norm": 4.983851909637451, "learning_rate": 9.120371988507073e-05, "loss": 2.0875, "step": 2877 }, { "epoch": 0.21663937973992736, "grad_norm": 4.82524299621582, "learning_rate": 9.11968127450574e-05, "loss": 2.1808, "step": 2878 }, { "epoch": 0.2167146540205875, "grad_norm": 3.676748752593994, "learning_rate": 9.118990315601175e-05, "loss": 1.6644, "step": 2879 }, { "epoch": 0.21678992830124766, "grad_norm": 6.100306510925293, "learning_rate": 9.118299111834454e-05, "loss": 2.0209, "step": 2880 }, { "epoch": 0.21686520258190783, "grad_norm": 3.7935211658477783, "learning_rate": 9.117607663246668e-05, "loss": 2.1315, "step": 2881 }, { "epoch": 0.21694047686256798, "grad_norm": 6.240111827850342, "learning_rate": 9.116915969878921e-05, "loss": 2.0366, "step": 2882 }, { "epoch": 0.21701575114322813, "grad_norm": 5.97177267074585, "learning_rate": 9.116224031772331e-05, "loss": 2.0558, "step": 2883 }, { "epoch": 0.2170910254238883, "grad_norm": 4.682295799255371, "learning_rate": 9.115531848968035e-05, "loss": 2.5279, "step": 2884 }, { "epoch": 0.21716629970454845, "grad_norm": 5.760948657989502, "learning_rate": 9.114839421507179e-05, "loss": 2.1068, "step": 2885 }, { "epoch": 0.2172415739852086, "grad_norm": 3.304828405380249, "learning_rate": 9.114146749430926e-05, "loss": 1.9157, "step": 2886 }, { "epoch": 0.21731684826586875, "grad_norm": 4.40485143661499, "learning_rate": 9.113453832780453e-05, "loss": 2.0141, "step": 2887 }, { "epoch": 0.21739212254652893, "grad_norm": 4.610844612121582, "learning_rate": 9.112760671596953e-05, "loss": 2.394, "step": 2888 }, { "epoch": 0.21746739682718907, "grad_norm": 9.409422874450684, "learning_rate": 9.112067265921633e-05, "loss": 2.0827, "step": 2889 }, { "epoch": 0.21754267110784922, "grad_norm": 4.706923484802246, "learning_rate": 9.111373615795711e-05, "loss": 1.9139, "step": 2890 }, { "epoch": 0.21761794538850937, "grad_norm": 4.656936168670654, "learning_rate": 9.110679721260424e-05, "loss": 1.9504, "step": 2891 }, { "epoch": 0.21769321966916955, "grad_norm": 4.418980121612549, "learning_rate": 9.109985582357024e-05, "loss": 1.9901, "step": 2892 }, { "epoch": 0.2177684939498297, "grad_norm": 5.177255153656006, "learning_rate": 9.109291199126774e-05, "loss": 2.331, "step": 2893 }, { "epoch": 0.21784376823048984, "grad_norm": 6.097462177276611, "learning_rate": 9.108596571610952e-05, "loss": 1.9097, "step": 2894 }, { "epoch": 0.21791904251115, "grad_norm": 5.419043064117432, "learning_rate": 9.107901699850853e-05, "loss": 2.1482, "step": 2895 }, { "epoch": 0.21799431679181017, "grad_norm": 4.536962985992432, "learning_rate": 9.107206583887786e-05, "loss": 2.1616, "step": 2896 }, { "epoch": 0.21806959107247031, "grad_norm": 4.24278450012207, "learning_rate": 9.106511223763073e-05, "loss": 2.313, "step": 2897 }, { "epoch": 0.21814486535313046, "grad_norm": 4.091751575469971, "learning_rate": 9.105815619518048e-05, "loss": 2.0003, "step": 2898 }, { "epoch": 0.2182201396337906, "grad_norm": 5.067935943603516, "learning_rate": 9.105119771194066e-05, "loss": 2.1062, "step": 2899 }, { "epoch": 0.2182954139144508, "grad_norm": 6.963771820068359, "learning_rate": 9.104423678832494e-05, "loss": 2.3203, "step": 2900 }, { "epoch": 0.21837068819511093, "grad_norm": 4.553853988647461, "learning_rate": 9.103727342474711e-05, "loss": 2.0733, "step": 2901 }, { "epoch": 0.21844596247577108, "grad_norm": 4.527489185333252, "learning_rate": 9.103030762162112e-05, "loss": 1.8942, "step": 2902 }, { "epoch": 0.21852123675643126, "grad_norm": 5.539656162261963, "learning_rate": 9.102333937936107e-05, "loss": 1.6873, "step": 2903 }, { "epoch": 0.2185965110370914, "grad_norm": 5.065639495849609, "learning_rate": 9.101636869838119e-05, "loss": 2.0554, "step": 2904 }, { "epoch": 0.21867178531775155, "grad_norm": 5.687830448150635, "learning_rate": 9.10093955790959e-05, "loss": 2.0371, "step": 2905 }, { "epoch": 0.2187470595984117, "grad_norm": 4.382602214813232, "learning_rate": 9.10024200219197e-05, "loss": 2.0513, "step": 2906 }, { "epoch": 0.21882233387907188, "grad_norm": 4.833364486694336, "learning_rate": 9.099544202726728e-05, "loss": 1.9644, "step": 2907 }, { "epoch": 0.21889760815973203, "grad_norm": 5.483856201171875, "learning_rate": 9.098846159555346e-05, "loss": 2.0015, "step": 2908 }, { "epoch": 0.21897288244039217, "grad_norm": 5.608835697174072, "learning_rate": 9.098147872719321e-05, "loss": 1.7457, "step": 2909 }, { "epoch": 0.21904815672105232, "grad_norm": 5.9853949546813965, "learning_rate": 9.097449342260165e-05, "loss": 2.6678, "step": 2910 }, { "epoch": 0.2191234310017125, "grad_norm": 4.045847415924072, "learning_rate": 9.0967505682194e-05, "loss": 1.9462, "step": 2911 }, { "epoch": 0.21919870528237265, "grad_norm": 5.401683807373047, "learning_rate": 9.096051550638572e-05, "loss": 2.281, "step": 2912 }, { "epoch": 0.2192739795630328, "grad_norm": 6.834976673126221, "learning_rate": 9.095352289559228e-05, "loss": 2.0505, "step": 2913 }, { "epoch": 0.21934925384369294, "grad_norm": 5.75833797454834, "learning_rate": 9.094652785022945e-05, "loss": 1.8767, "step": 2914 }, { "epoch": 0.21942452812435312, "grad_norm": 3.7877635955810547, "learning_rate": 9.093953037071301e-05, "loss": 2.2009, "step": 2915 }, { "epoch": 0.21949980240501327, "grad_norm": 5.442332744598389, "learning_rate": 9.093253045745895e-05, "loss": 1.9677, "step": 2916 }, { "epoch": 0.21957507668567341, "grad_norm": 4.543300628662109, "learning_rate": 9.092552811088342e-05, "loss": 1.8725, "step": 2917 }, { "epoch": 0.2196503509663336, "grad_norm": 6.140537261962891, "learning_rate": 9.091852333140267e-05, "loss": 1.9658, "step": 2918 }, { "epoch": 0.21972562524699374, "grad_norm": 6.194490432739258, "learning_rate": 9.091151611943312e-05, "loss": 2.2637, "step": 2919 }, { "epoch": 0.2198008995276539, "grad_norm": 5.404478073120117, "learning_rate": 9.09045064753913e-05, "loss": 1.8551, "step": 2920 }, { "epoch": 0.21987617380831403, "grad_norm": 4.314300537109375, "learning_rate": 9.089749439969398e-05, "loss": 1.8512, "step": 2921 }, { "epoch": 0.2199514480889742, "grad_norm": 4.811122894287109, "learning_rate": 9.089047989275793e-05, "loss": 1.7737, "step": 2922 }, { "epoch": 0.22002672236963436, "grad_norm": 4.302212238311768, "learning_rate": 9.088346295500019e-05, "loss": 2.1659, "step": 2923 }, { "epoch": 0.2201019966502945, "grad_norm": 5.338231563568115, "learning_rate": 9.087644358683789e-05, "loss": 2.0363, "step": 2924 }, { "epoch": 0.22017727093095465, "grad_norm": 5.035805702209473, "learning_rate": 9.086942178868829e-05, "loss": 2.1317, "step": 2925 }, { "epoch": 0.22025254521161483, "grad_norm": 5.151651382446289, "learning_rate": 9.086239756096886e-05, "loss": 2.3377, "step": 2926 }, { "epoch": 0.22032781949227498, "grad_norm": 4.626562595367432, "learning_rate": 9.085537090409712e-05, "loss": 2.0775, "step": 2927 }, { "epoch": 0.22040309377293513, "grad_norm": 6.436605930328369, "learning_rate": 9.084834181849081e-05, "loss": 1.9339, "step": 2928 }, { "epoch": 0.22047836805359528, "grad_norm": 5.9805450439453125, "learning_rate": 9.08413103045678e-05, "loss": 2.0937, "step": 2929 }, { "epoch": 0.22055364233425545, "grad_norm": 4.39972448348999, "learning_rate": 9.083427636274606e-05, "loss": 1.9688, "step": 2930 }, { "epoch": 0.2206289166149156, "grad_norm": 4.032160758972168, "learning_rate": 9.082723999344377e-05, "loss": 2.2035, "step": 2931 }, { "epoch": 0.22070419089557575, "grad_norm": 7.017184734344482, "learning_rate": 9.082020119707921e-05, "loss": 1.9799, "step": 2932 }, { "epoch": 0.22077946517623592, "grad_norm": 5.398980617523193, "learning_rate": 9.08131599740708e-05, "loss": 2.1431, "step": 2933 }, { "epoch": 0.22085473945689607, "grad_norm": 4.855225563049316, "learning_rate": 9.080611632483714e-05, "loss": 2.2772, "step": 2934 }, { "epoch": 0.22093001373755622, "grad_norm": 5.293283462524414, "learning_rate": 9.079907024979696e-05, "loss": 2.1082, "step": 2935 }, { "epoch": 0.22100528801821637, "grad_norm": 5.850619316101074, "learning_rate": 9.079202174936912e-05, "loss": 1.7753, "step": 2936 }, { "epoch": 0.22108056229887654, "grad_norm": 4.3017191886901855, "learning_rate": 9.078497082397262e-05, "loss": 2.2775, "step": 2937 }, { "epoch": 0.2211558365795367, "grad_norm": 4.076857566833496, "learning_rate": 9.077791747402664e-05, "loss": 2.1259, "step": 2938 }, { "epoch": 0.22123111086019684, "grad_norm": 4.446213722229004, "learning_rate": 9.077086169995048e-05, "loss": 2.1826, "step": 2939 }, { "epoch": 0.221306385140857, "grad_norm": 4.013009548187256, "learning_rate": 9.076380350216355e-05, "loss": 2.0302, "step": 2940 }, { "epoch": 0.22138165942151716, "grad_norm": 4.595061302185059, "learning_rate": 9.075674288108549e-05, "loss": 1.7048, "step": 2941 }, { "epoch": 0.2214569337021773, "grad_norm": 8.099377632141113, "learning_rate": 9.0749679837136e-05, "loss": 2.5522, "step": 2942 }, { "epoch": 0.22153220798283746, "grad_norm": 8.246782302856445, "learning_rate": 9.074261437073497e-05, "loss": 2.1225, "step": 2943 }, { "epoch": 0.2216074822634976, "grad_norm": 4.502189636230469, "learning_rate": 9.073554648230241e-05, "loss": 2.2436, "step": 2944 }, { "epoch": 0.22168275654415778, "grad_norm": 5.9204816818237305, "learning_rate": 9.07284761722585e-05, "loss": 2.0374, "step": 2945 }, { "epoch": 0.22175803082481793, "grad_norm": 4.8395538330078125, "learning_rate": 9.072140344102355e-05, "loss": 2.0299, "step": 2946 }, { "epoch": 0.22183330510547808, "grad_norm": 6.0673675537109375, "learning_rate": 9.0714328289018e-05, "loss": 2.2062, "step": 2947 }, { "epoch": 0.22190857938613825, "grad_norm": 4.742865562438965, "learning_rate": 9.070725071666248e-05, "loss": 1.9603, "step": 2948 }, { "epoch": 0.2219838536667984, "grad_norm": 5.113400459289551, "learning_rate": 9.070017072437769e-05, "loss": 2.0069, "step": 2949 }, { "epoch": 0.22205912794745855, "grad_norm": 4.952245712280273, "learning_rate": 9.069308831258452e-05, "loss": 2.2253, "step": 2950 }, { "epoch": 0.2221344022281187, "grad_norm": 5.182546615600586, "learning_rate": 9.068600348170401e-05, "loss": 1.9472, "step": 2951 }, { "epoch": 0.22220967650877888, "grad_norm": 9.822628021240234, "learning_rate": 9.067891623215737e-05, "loss": 2.3668, "step": 2952 }, { "epoch": 0.22228495078943902, "grad_norm": 6.70409631729126, "learning_rate": 9.067182656436585e-05, "loss": 2.1276, "step": 2953 }, { "epoch": 0.22236022507009917, "grad_norm": 5.011364459991455, "learning_rate": 9.066473447875094e-05, "loss": 2.1604, "step": 2954 }, { "epoch": 0.22243549935075932, "grad_norm": 6.013547420501709, "learning_rate": 9.065763997573427e-05, "loss": 1.9803, "step": 2955 }, { "epoch": 0.2225107736314195, "grad_norm": 5.591979026794434, "learning_rate": 9.065054305573755e-05, "loss": 1.8404, "step": 2956 }, { "epoch": 0.22258604791207964, "grad_norm": 5.186059474945068, "learning_rate": 9.064344371918267e-05, "loss": 1.9471, "step": 2957 }, { "epoch": 0.2226613221927398, "grad_norm": 5.612191200256348, "learning_rate": 9.06363419664917e-05, "loss": 2.2213, "step": 2958 }, { "epoch": 0.22273659647339994, "grad_norm": 4.949213027954102, "learning_rate": 9.062923779808678e-05, "loss": 1.7411, "step": 2959 }, { "epoch": 0.22281187075406012, "grad_norm": 4.895546913146973, "learning_rate": 9.062213121439027e-05, "loss": 2.1532, "step": 2960 }, { "epoch": 0.22288714503472026, "grad_norm": 6.915483474731445, "learning_rate": 9.061502221582461e-05, "loss": 2.4376, "step": 2961 }, { "epoch": 0.2229624193153804, "grad_norm": 4.932807445526123, "learning_rate": 9.060791080281241e-05, "loss": 1.9923, "step": 2962 }, { "epoch": 0.22303769359604056, "grad_norm": 6.073930263519287, "learning_rate": 9.060079697577644e-05, "loss": 1.876, "step": 2963 }, { "epoch": 0.22311296787670074, "grad_norm": 5.532196998596191, "learning_rate": 9.059368073513958e-05, "loss": 2.0465, "step": 2964 }, { "epoch": 0.22318824215736088, "grad_norm": 4.24959135055542, "learning_rate": 9.058656208132487e-05, "loss": 2.2441, "step": 2965 }, { "epoch": 0.22326351643802103, "grad_norm": 4.8348236083984375, "learning_rate": 9.057944101475553e-05, "loss": 2.1498, "step": 2966 }, { "epoch": 0.2233387907186812, "grad_norm": 4.467230319976807, "learning_rate": 9.057231753585483e-05, "loss": 1.8951, "step": 2967 }, { "epoch": 0.22341406499934136, "grad_norm": 4.355803966522217, "learning_rate": 9.056519164504627e-05, "loss": 2.3531, "step": 2968 }, { "epoch": 0.2234893392800015, "grad_norm": 5.565178394317627, "learning_rate": 9.055806334275347e-05, "loss": 2.0837, "step": 2969 }, { "epoch": 0.22356461356066165, "grad_norm": 3.871276617050171, "learning_rate": 9.055093262940017e-05, "loss": 2.2883, "step": 2970 }, { "epoch": 0.22363988784132183, "grad_norm": 4.407426357269287, "learning_rate": 9.054379950541028e-05, "loss": 1.9799, "step": 2971 }, { "epoch": 0.22371516212198198, "grad_norm": 7.752662181854248, "learning_rate": 9.053666397120786e-05, "loss": 2.3833, "step": 2972 }, { "epoch": 0.22379043640264212, "grad_norm": 8.185113906860352, "learning_rate": 9.052952602721706e-05, "loss": 2.2716, "step": 2973 }, { "epoch": 0.22386571068330227, "grad_norm": 5.350937366485596, "learning_rate": 9.052238567386224e-05, "loss": 2.2734, "step": 2974 }, { "epoch": 0.22394098496396245, "grad_norm": 4.131227970123291, "learning_rate": 9.051524291156785e-05, "loss": 2.2203, "step": 2975 }, { "epoch": 0.2240162592446226, "grad_norm": 3.5607070922851562, "learning_rate": 9.050809774075853e-05, "loss": 2.0199, "step": 2976 }, { "epoch": 0.22409153352528274, "grad_norm": 4.787525653839111, "learning_rate": 9.050095016185903e-05, "loss": 1.9822, "step": 2977 }, { "epoch": 0.2241668078059429, "grad_norm": 5.674337863922119, "learning_rate": 9.049380017529426e-05, "loss": 2.0224, "step": 2978 }, { "epoch": 0.22424208208660307, "grad_norm": 3.7880828380584717, "learning_rate": 9.048664778148924e-05, "loss": 2.1697, "step": 2979 }, { "epoch": 0.22431735636726322, "grad_norm": 5.682531356811523, "learning_rate": 9.047949298086919e-05, "loss": 2.4035, "step": 2980 }, { "epoch": 0.22439263064792336, "grad_norm": 4.9303178787231445, "learning_rate": 9.047233577385944e-05, "loss": 2.0629, "step": 2981 }, { "epoch": 0.22446790492858354, "grad_norm": 5.042529582977295, "learning_rate": 9.046517616088545e-05, "loss": 2.221, "step": 2982 }, { "epoch": 0.2245431792092437, "grad_norm": 4.45997953414917, "learning_rate": 9.045801414237284e-05, "loss": 1.9773, "step": 2983 }, { "epoch": 0.22461845348990384, "grad_norm": 4.140063285827637, "learning_rate": 9.045084971874738e-05, "loss": 1.9895, "step": 2984 }, { "epoch": 0.22469372777056398, "grad_norm": 5.330562591552734, "learning_rate": 9.044368289043497e-05, "loss": 2.411, "step": 2985 }, { "epoch": 0.22476900205122416, "grad_norm": 4.238677024841309, "learning_rate": 9.043651365786166e-05, "loss": 2.2728, "step": 2986 }, { "epoch": 0.2248442763318843, "grad_norm": 4.427063465118408, "learning_rate": 9.042934202145364e-05, "loss": 2.1105, "step": 2987 }, { "epoch": 0.22491955061254446, "grad_norm": 4.295475959777832, "learning_rate": 9.042216798163722e-05, "loss": 1.9844, "step": 2988 }, { "epoch": 0.2249948248932046, "grad_norm": 4.440731048583984, "learning_rate": 9.041499153883893e-05, "loss": 2.2493, "step": 2989 }, { "epoch": 0.22507009917386478, "grad_norm": 5.3758063316345215, "learning_rate": 9.040781269348533e-05, "loss": 2.4395, "step": 2990 }, { "epoch": 0.22514537345452493, "grad_norm": 4.190469264984131, "learning_rate": 9.040063144600322e-05, "loss": 2.3388, "step": 2991 }, { "epoch": 0.22522064773518508, "grad_norm": 4.746899127960205, "learning_rate": 9.03934477968195e-05, "loss": 1.9926, "step": 2992 }, { "epoch": 0.22529592201584522, "grad_norm": 4.665939807891846, "learning_rate": 9.03862617463612e-05, "loss": 2.0094, "step": 2993 }, { "epoch": 0.2253711962965054, "grad_norm": 3.8885819911956787, "learning_rate": 9.037907329505553e-05, "loss": 1.9834, "step": 2994 }, { "epoch": 0.22544647057716555, "grad_norm": 5.015994548797607, "learning_rate": 9.03718824433298e-05, "loss": 2.1075, "step": 2995 }, { "epoch": 0.2255217448578257, "grad_norm": 6.6822333335876465, "learning_rate": 9.036468919161151e-05, "loss": 2.2278, "step": 2996 }, { "epoch": 0.22559701913848587, "grad_norm": 4.36100435256958, "learning_rate": 9.035749354032825e-05, "loss": 1.9734, "step": 2997 }, { "epoch": 0.22567229341914602, "grad_norm": 3.667426586151123, "learning_rate": 9.035029548990782e-05, "loss": 2.008, "step": 2998 }, { "epoch": 0.22574756769980617, "grad_norm": 4.259113788604736, "learning_rate": 9.034309504077809e-05, "loss": 1.6353, "step": 2999 }, { "epoch": 0.22582284198046632, "grad_norm": 3.9176275730133057, "learning_rate": 9.033589219336711e-05, "loss": 1.97, "step": 3000 }, { "epoch": 0.2258981162611265, "grad_norm": 6.052648544311523, "learning_rate": 9.03286869481031e-05, "loss": 2.7073, "step": 3001 }, { "epoch": 0.22597339054178664, "grad_norm": 6.663978576660156, "learning_rate": 9.032147930541435e-05, "loss": 2.4969, "step": 3002 }, { "epoch": 0.2260486648224468, "grad_norm": 8.080892562866211, "learning_rate": 9.031426926572935e-05, "loss": 2.1463, "step": 3003 }, { "epoch": 0.22612393910310694, "grad_norm": 6.703983306884766, "learning_rate": 9.030705682947671e-05, "loss": 1.9333, "step": 3004 }, { "epoch": 0.2261992133837671, "grad_norm": 7.132099151611328, "learning_rate": 9.029984199708521e-05, "loss": 2.0013, "step": 3005 }, { "epoch": 0.22627448766442726, "grad_norm": 4.324505805969238, "learning_rate": 9.029262476898372e-05, "loss": 1.8208, "step": 3006 }, { "epoch": 0.2263497619450874, "grad_norm": 4.36423921585083, "learning_rate": 9.028540514560132e-05, "loss": 2.0609, "step": 3007 }, { "epoch": 0.22642503622574756, "grad_norm": 6.873592376708984, "learning_rate": 9.027818312736717e-05, "loss": 1.9287, "step": 3008 }, { "epoch": 0.22650031050640773, "grad_norm": 6.495251655578613, "learning_rate": 9.02709587147106e-05, "loss": 2.2841, "step": 3009 }, { "epoch": 0.22657558478706788, "grad_norm": 4.629186153411865, "learning_rate": 9.026373190806108e-05, "loss": 2.2215, "step": 3010 }, { "epoch": 0.22665085906772803, "grad_norm": 4.751518249511719, "learning_rate": 9.025650270784823e-05, "loss": 1.9765, "step": 3011 }, { "epoch": 0.22672613334838818, "grad_norm": 4.488245964050293, "learning_rate": 9.024927111450182e-05, "loss": 2.1709, "step": 3012 }, { "epoch": 0.22680140762904835, "grad_norm": 4.00231409072876, "learning_rate": 9.02420371284517e-05, "loss": 2.157, "step": 3013 }, { "epoch": 0.2268766819097085, "grad_norm": 7.076344966888428, "learning_rate": 9.023480075012795e-05, "loss": 2.1229, "step": 3014 }, { "epoch": 0.22695195619036865, "grad_norm": 6.816217422485352, "learning_rate": 9.022756197996077e-05, "loss": 2.0886, "step": 3015 }, { "epoch": 0.22702723047102882, "grad_norm": 4.8998260498046875, "learning_rate": 9.022032081838042e-05, "loss": 2.1428, "step": 3016 }, { "epoch": 0.22710250475168897, "grad_norm": 4.486428737640381, "learning_rate": 9.021307726581743e-05, "loss": 1.7001, "step": 3017 }, { "epoch": 0.22717777903234912, "grad_norm": 5.397170543670654, "learning_rate": 9.020583132270235e-05, "loss": 2.277, "step": 3018 }, { "epoch": 0.22725305331300927, "grad_norm": 4.8494791984558105, "learning_rate": 9.019858298946598e-05, "loss": 2.0772, "step": 3019 }, { "epoch": 0.22732832759366944, "grad_norm": 6.169023036956787, "learning_rate": 9.01913322665392e-05, "loss": 2.2601, "step": 3020 }, { "epoch": 0.2274036018743296, "grad_norm": 4.9365997314453125, "learning_rate": 9.018407915435302e-05, "loss": 1.977, "step": 3021 }, { "epoch": 0.22747887615498974, "grad_norm": 6.302662372589111, "learning_rate": 9.017682365333866e-05, "loss": 2.5235, "step": 3022 }, { "epoch": 0.2275541504356499, "grad_norm": 8.961931228637695, "learning_rate": 9.01695657639274e-05, "loss": 2.1748, "step": 3023 }, { "epoch": 0.22762942471631006, "grad_norm": 4.763588905334473, "learning_rate": 9.016230548655071e-05, "loss": 1.9832, "step": 3024 }, { "epoch": 0.2277046989969702, "grad_norm": 6.08983039855957, "learning_rate": 9.01550428216402e-05, "loss": 2.0133, "step": 3025 }, { "epoch": 0.22777997327763036, "grad_norm": 4.464398384094238, "learning_rate": 9.014777776962762e-05, "loss": 2.1519, "step": 3026 }, { "epoch": 0.2278552475582905, "grad_norm": 4.90877628326416, "learning_rate": 9.014051033094484e-05, "loss": 2.276, "step": 3027 }, { "epoch": 0.22793052183895068, "grad_norm": 3.8603129386901855, "learning_rate": 9.013324050602391e-05, "loss": 1.9222, "step": 3028 }, { "epoch": 0.22800579611961083, "grad_norm": 4.96784782409668, "learning_rate": 9.012596829529697e-05, "loss": 2.118, "step": 3029 }, { "epoch": 0.22808107040027098, "grad_norm": 7.061264514923096, "learning_rate": 9.011869369919636e-05, "loss": 2.3798, "step": 3030 }, { "epoch": 0.22815634468093116, "grad_norm": 4.7067766189575195, "learning_rate": 9.011141671815452e-05, "loss": 2.07, "step": 3031 }, { "epoch": 0.2282316189615913, "grad_norm": 4.883420467376709, "learning_rate": 9.010413735260404e-05, "loss": 2.2647, "step": 3032 }, { "epoch": 0.22830689324225145, "grad_norm": 5.399378776550293, "learning_rate": 9.009685560297768e-05, "loss": 2.0837, "step": 3033 }, { "epoch": 0.2283821675229116, "grad_norm": 4.923521041870117, "learning_rate": 9.00895714697083e-05, "loss": 1.9732, "step": 3034 }, { "epoch": 0.22845744180357178, "grad_norm": 5.1057939529418945, "learning_rate": 9.008228495322892e-05, "loss": 2.117, "step": 3035 }, { "epoch": 0.22853271608423192, "grad_norm": 3.8005971908569336, "learning_rate": 9.007499605397272e-05, "loss": 2.1132, "step": 3036 }, { "epoch": 0.22860799036489207, "grad_norm": 4.073781967163086, "learning_rate": 9.006770477237298e-05, "loss": 2.2012, "step": 3037 }, { "epoch": 0.22868326464555222, "grad_norm": 4.875649452209473, "learning_rate": 9.006041110886317e-05, "loss": 1.9903, "step": 3038 }, { "epoch": 0.2287585389262124, "grad_norm": 4.754485607147217, "learning_rate": 9.005311506387686e-05, "loss": 2.1516, "step": 3039 }, { "epoch": 0.22883381320687254, "grad_norm": 4.883895397186279, "learning_rate": 9.00458166378478e-05, "loss": 2.2613, "step": 3040 }, { "epoch": 0.2289090874875327, "grad_norm": 4.565127849578857, "learning_rate": 9.003851583120981e-05, "loss": 2.1924, "step": 3041 }, { "epoch": 0.22898436176819284, "grad_norm": 5.656824111938477, "learning_rate": 9.003121264439697e-05, "loss": 2.3375, "step": 3042 }, { "epoch": 0.22905963604885302, "grad_norm": 4.560620307922363, "learning_rate": 9.002390707784338e-05, "loss": 2.3094, "step": 3043 }, { "epoch": 0.22913491032951316, "grad_norm": 5.578507423400879, "learning_rate": 9.001659913198336e-05, "loss": 1.891, "step": 3044 }, { "epoch": 0.2292101846101733, "grad_norm": 4.357603549957275, "learning_rate": 9.000928880725136e-05, "loss": 2.0809, "step": 3045 }, { "epoch": 0.2292854588908335, "grad_norm": 3.7798662185668945, "learning_rate": 9.000197610408192e-05, "loss": 1.7311, "step": 3046 }, { "epoch": 0.22936073317149364, "grad_norm": 4.265188694000244, "learning_rate": 8.999466102290978e-05, "loss": 2.3679, "step": 3047 }, { "epoch": 0.22943600745215378, "grad_norm": 7.112925052642822, "learning_rate": 8.998734356416981e-05, "loss": 2.2524, "step": 3048 }, { "epoch": 0.22951128173281393, "grad_norm": 3.864196300506592, "learning_rate": 8.998002372829699e-05, "loss": 2.159, "step": 3049 }, { "epoch": 0.2295865560134741, "grad_norm": 4.573678970336914, "learning_rate": 8.997270151572649e-05, "loss": 1.925, "step": 3050 }, { "epoch": 0.22966183029413426, "grad_norm": 4.890881538391113, "learning_rate": 8.996537692689357e-05, "loss": 1.9123, "step": 3051 }, { "epoch": 0.2297371045747944, "grad_norm": 5.622613430023193, "learning_rate": 8.995804996223367e-05, "loss": 1.9269, "step": 3052 }, { "epoch": 0.22981237885545455, "grad_norm": 6.246424674987793, "learning_rate": 8.995072062218235e-05, "loss": 2.2979, "step": 3053 }, { "epoch": 0.22988765313611473, "grad_norm": 4.573148250579834, "learning_rate": 8.99433889071753e-05, "loss": 2.0967, "step": 3054 }, { "epoch": 0.22996292741677488, "grad_norm": 4.3591179847717285, "learning_rate": 8.993605481764841e-05, "loss": 2.2305, "step": 3055 }, { "epoch": 0.23003820169743502, "grad_norm": 5.699021339416504, "learning_rate": 8.992871835403767e-05, "loss": 1.93, "step": 3056 }, { "epoch": 0.23011347597809517, "grad_norm": 4.548457622528076, "learning_rate": 8.992137951677916e-05, "loss": 2.0882, "step": 3057 }, { "epoch": 0.23018875025875535, "grad_norm": 6.617466449737549, "learning_rate": 8.99140383063092e-05, "loss": 1.6243, "step": 3058 }, { "epoch": 0.2302640245394155, "grad_norm": 4.906440734863281, "learning_rate": 8.99066947230642e-05, "loss": 2.2427, "step": 3059 }, { "epoch": 0.23033929882007564, "grad_norm": 4.505703926086426, "learning_rate": 8.989934876748068e-05, "loss": 2.402, "step": 3060 }, { "epoch": 0.2304145731007358, "grad_norm": 5.4420928955078125, "learning_rate": 8.989200043999537e-05, "loss": 2.0293, "step": 3061 }, { "epoch": 0.23048984738139597, "grad_norm": 4.512494087219238, "learning_rate": 8.988464974104509e-05, "loss": 1.9191, "step": 3062 }, { "epoch": 0.23056512166205612, "grad_norm": 4.833025932312012, "learning_rate": 8.987729667106683e-05, "loss": 2.0507, "step": 3063 }, { "epoch": 0.23064039594271626, "grad_norm": 5.658215045928955, "learning_rate": 8.98699412304977e-05, "loss": 2.0965, "step": 3064 }, { "epoch": 0.23071567022337644, "grad_norm": 6.153744697570801, "learning_rate": 8.986258341977498e-05, "loss": 2.4311, "step": 3065 }, { "epoch": 0.2307909445040366, "grad_norm": 5.608066558837891, "learning_rate": 8.985522323933603e-05, "loss": 2.1879, "step": 3066 }, { "epoch": 0.23086621878469674, "grad_norm": 4.241041660308838, "learning_rate": 8.984786068961843e-05, "loss": 2.2134, "step": 3067 }, { "epoch": 0.23094149306535688, "grad_norm": 5.847661018371582, "learning_rate": 8.984049577105985e-05, "loss": 2.1156, "step": 3068 }, { "epoch": 0.23101676734601706, "grad_norm": 4.819242000579834, "learning_rate": 8.983312848409811e-05, "loss": 2.2144, "step": 3069 }, { "epoch": 0.2310920416266772, "grad_norm": 4.251035690307617, "learning_rate": 8.982575882917119e-05, "loss": 2.1086, "step": 3070 }, { "epoch": 0.23116731590733736, "grad_norm": 5.436933994293213, "learning_rate": 8.981838680671715e-05, "loss": 1.9431, "step": 3071 }, { "epoch": 0.2312425901879975, "grad_norm": 4.8714399337768555, "learning_rate": 8.981101241717427e-05, "loss": 2.123, "step": 3072 }, { "epoch": 0.23131786446865768, "grad_norm": 4.5080671310424805, "learning_rate": 8.980363566098096e-05, "loss": 2.2184, "step": 3073 }, { "epoch": 0.23139313874931783, "grad_norm": 5.637725830078125, "learning_rate": 8.97962565385757e-05, "loss": 2.7501, "step": 3074 }, { "epoch": 0.23146841302997798, "grad_norm": 4.687811374664307, "learning_rate": 8.978887505039717e-05, "loss": 2.127, "step": 3075 }, { "epoch": 0.23154368731063812, "grad_norm": 4.3032355308532715, "learning_rate": 8.97814911968842e-05, "loss": 1.8478, "step": 3076 }, { "epoch": 0.2316189615912983, "grad_norm": 6.097776889801025, "learning_rate": 8.977410497847571e-05, "loss": 2.492, "step": 3077 }, { "epoch": 0.23169423587195845, "grad_norm": 5.614762783050537, "learning_rate": 8.976671639561082e-05, "loss": 1.8767, "step": 3078 }, { "epoch": 0.2317695101526186, "grad_norm": 7.298680782318115, "learning_rate": 8.975932544872874e-05, "loss": 2.2084, "step": 3079 }, { "epoch": 0.23184478443327877, "grad_norm": 5.520145416259766, "learning_rate": 8.975193213826885e-05, "loss": 1.9535, "step": 3080 }, { "epoch": 0.23192005871393892, "grad_norm": 4.96815824508667, "learning_rate": 8.974453646467063e-05, "loss": 1.8722, "step": 3081 }, { "epoch": 0.23199533299459907, "grad_norm": 4.702606201171875, "learning_rate": 8.973713842837378e-05, "loss": 2.298, "step": 3082 }, { "epoch": 0.23207060727525922, "grad_norm": 6.2534284591674805, "learning_rate": 8.972973802981805e-05, "loss": 1.5298, "step": 3083 }, { "epoch": 0.2321458815559194, "grad_norm": 4.1786394119262695, "learning_rate": 8.972233526944343e-05, "loss": 1.8238, "step": 3084 }, { "epoch": 0.23222115583657954, "grad_norm": 5.316036701202393, "learning_rate": 8.971493014768991e-05, "loss": 1.9646, "step": 3085 }, { "epoch": 0.2322964301172397, "grad_norm": 3.139407157897949, "learning_rate": 8.970752266499778e-05, "loss": 2.2641, "step": 3086 }, { "epoch": 0.23237170439789984, "grad_norm": 5.529141902923584, "learning_rate": 8.970011282180734e-05, "loss": 2.1762, "step": 3087 }, { "epoch": 0.23244697867856, "grad_norm": 4.312731742858887, "learning_rate": 8.969270061855911e-05, "loss": 2.1871, "step": 3088 }, { "epoch": 0.23252225295922016, "grad_norm": 4.658344745635986, "learning_rate": 8.968528605569373e-05, "loss": 2.0451, "step": 3089 }, { "epoch": 0.2325975272398803, "grad_norm": 5.684056282043457, "learning_rate": 8.967786913365195e-05, "loss": 2.4996, "step": 3090 }, { "epoch": 0.23267280152054046, "grad_norm": 5.3817458152771, "learning_rate": 8.96704498528747e-05, "loss": 2.2046, "step": 3091 }, { "epoch": 0.23274807580120063, "grad_norm": 5.300045967102051, "learning_rate": 8.966302821380304e-05, "loss": 1.9514, "step": 3092 }, { "epoch": 0.23282335008186078, "grad_norm": 3.844289541244507, "learning_rate": 8.965560421687816e-05, "loss": 1.7736, "step": 3093 }, { "epoch": 0.23289862436252093, "grad_norm": 4.255853652954102, "learning_rate": 8.964817786254138e-05, "loss": 1.8253, "step": 3094 }, { "epoch": 0.2329738986431811, "grad_norm": 6.417258262634277, "learning_rate": 8.964074915123421e-05, "loss": 2.1792, "step": 3095 }, { "epoch": 0.23304917292384125, "grad_norm": 5.512781143188477, "learning_rate": 8.963331808339823e-05, "loss": 2.0537, "step": 3096 }, { "epoch": 0.2331244472045014, "grad_norm": 4.924642562866211, "learning_rate": 8.962588465947522e-05, "loss": 2.0586, "step": 3097 }, { "epoch": 0.23319972148516155, "grad_norm": 4.606882095336914, "learning_rate": 8.961844887990706e-05, "loss": 2.0768, "step": 3098 }, { "epoch": 0.23327499576582172, "grad_norm": 4.865094184875488, "learning_rate": 8.96110107451358e-05, "loss": 1.9203, "step": 3099 }, { "epoch": 0.23335027004648187, "grad_norm": 4.5715837478637695, "learning_rate": 8.960357025560359e-05, "loss": 2.0566, "step": 3100 }, { "epoch": 0.23342554432714202, "grad_norm": 3.477773904800415, "learning_rate": 8.959612741175278e-05, "loss": 1.776, "step": 3101 }, { "epoch": 0.23350081860780217, "grad_norm": 4.101693153381348, "learning_rate": 8.958868221402581e-05, "loss": 2.4927, "step": 3102 }, { "epoch": 0.23357609288846234, "grad_norm": 4.088027000427246, "learning_rate": 8.958123466286528e-05, "loss": 2.0329, "step": 3103 }, { "epoch": 0.2336513671691225, "grad_norm": 4.115071773529053, "learning_rate": 8.95737847587139e-05, "loss": 1.8868, "step": 3104 }, { "epoch": 0.23372664144978264, "grad_norm": 3.3214151859283447, "learning_rate": 8.956633250201457e-05, "loss": 2.2385, "step": 3105 }, { "epoch": 0.2338019157304428, "grad_norm": 4.605666637420654, "learning_rate": 8.955887789321031e-05, "loss": 1.8063, "step": 3106 }, { "epoch": 0.23387719001110296, "grad_norm": 4.58540678024292, "learning_rate": 8.955142093274426e-05, "loss": 2.394, "step": 3107 }, { "epoch": 0.2339524642917631, "grad_norm": 3.4408278465270996, "learning_rate": 8.954396162105974e-05, "loss": 1.9515, "step": 3108 }, { "epoch": 0.23402773857242326, "grad_norm": 6.240958213806152, "learning_rate": 8.953649995860015e-05, "loss": 2.2987, "step": 3109 }, { "epoch": 0.23410301285308344, "grad_norm": 4.513462066650391, "learning_rate": 8.952903594580908e-05, "loss": 2.0325, "step": 3110 }, { "epoch": 0.23417828713374358, "grad_norm": 4.148117542266846, "learning_rate": 8.952156958313025e-05, "loss": 2.3954, "step": 3111 }, { "epoch": 0.23425356141440373, "grad_norm": 4.6144609451293945, "learning_rate": 8.95141008710075e-05, "loss": 2.0167, "step": 3112 }, { "epoch": 0.23432883569506388, "grad_norm": 3.2475714683532715, "learning_rate": 8.950662980988483e-05, "loss": 1.8625, "step": 3113 }, { "epoch": 0.23440410997572406, "grad_norm": 3.3102614879608154, "learning_rate": 8.949915640020639e-05, "loss": 1.84, "step": 3114 }, { "epoch": 0.2344793842563842, "grad_norm": 4.453986644744873, "learning_rate": 8.949168064241643e-05, "loss": 2.0672, "step": 3115 }, { "epoch": 0.23455465853704435, "grad_norm": 5.168568134307861, "learning_rate": 8.948420253695937e-05, "loss": 1.8874, "step": 3116 }, { "epoch": 0.2346299328177045, "grad_norm": 5.015261173248291, "learning_rate": 8.947672208427976e-05, "loss": 2.2946, "step": 3117 }, { "epoch": 0.23470520709836468, "grad_norm": 6.297513961791992, "learning_rate": 8.94692392848223e-05, "loss": 1.9489, "step": 3118 }, { "epoch": 0.23478048137902482, "grad_norm": 4.652873992919922, "learning_rate": 8.946175413903181e-05, "loss": 1.7196, "step": 3119 }, { "epoch": 0.23485575565968497, "grad_norm": 5.195075511932373, "learning_rate": 8.945426664735326e-05, "loss": 2.1111, "step": 3120 }, { "epoch": 0.23493102994034512, "grad_norm": 4.507716178894043, "learning_rate": 8.944677681023176e-05, "loss": 2.232, "step": 3121 }, { "epoch": 0.2350063042210053, "grad_norm": 5.152048110961914, "learning_rate": 8.943928462811259e-05, "loss": 2.3662, "step": 3122 }, { "epoch": 0.23508157850166544, "grad_norm": 6.105501174926758, "learning_rate": 8.943179010144109e-05, "loss": 2.0616, "step": 3123 }, { "epoch": 0.2351568527823256, "grad_norm": 4.871524810791016, "learning_rate": 8.942429323066282e-05, "loss": 2.1278, "step": 3124 }, { "epoch": 0.23523212706298574, "grad_norm": 6.164617538452148, "learning_rate": 8.941679401622344e-05, "loss": 1.8989, "step": 3125 }, { "epoch": 0.23530740134364592, "grad_norm": 5.468104839324951, "learning_rate": 8.940929245856875e-05, "loss": 2.038, "step": 3126 }, { "epoch": 0.23538267562430606, "grad_norm": 4.448368072509766, "learning_rate": 8.940178855814469e-05, "loss": 1.8849, "step": 3127 }, { "epoch": 0.2354579499049662, "grad_norm": 6.080448150634766, "learning_rate": 8.939428231539738e-05, "loss": 1.8613, "step": 3128 }, { "epoch": 0.2355332241856264, "grad_norm": 6.833067893981934, "learning_rate": 8.938677373077302e-05, "loss": 2.4226, "step": 3129 }, { "epoch": 0.23560849846628654, "grad_norm": 9.775557518005371, "learning_rate": 8.937926280471796e-05, "loss": 2.1885, "step": 3130 }, { "epoch": 0.23568377274694668, "grad_norm": 4.300649166107178, "learning_rate": 8.937174953767872e-05, "loss": 1.9099, "step": 3131 }, { "epoch": 0.23575904702760683, "grad_norm": 4.808737754821777, "learning_rate": 8.936423393010194e-05, "loss": 1.9384, "step": 3132 }, { "epoch": 0.235834321308267, "grad_norm": 5.373995304107666, "learning_rate": 8.935671598243441e-05, "loss": 2.1004, "step": 3133 }, { "epoch": 0.23590959558892716, "grad_norm": 5.3765645027160645, "learning_rate": 8.934919569512303e-05, "loss": 2.1998, "step": 3134 }, { "epoch": 0.2359848698695873, "grad_norm": 4.715597629547119, "learning_rate": 8.934167306861488e-05, "loss": 2.1741, "step": 3135 }, { "epoch": 0.23606014415024745, "grad_norm": 5.471828937530518, "learning_rate": 8.933414810335716e-05, "loss": 2.1155, "step": 3136 }, { "epoch": 0.23613541843090763, "grad_norm": 5.577611923217773, "learning_rate": 8.932662079979718e-05, "loss": 2.01, "step": 3137 }, { "epoch": 0.23621069271156778, "grad_norm": 4.3506059646606445, "learning_rate": 8.931909115838244e-05, "loss": 2.1198, "step": 3138 }, { "epoch": 0.23628596699222792, "grad_norm": 3.760789632797241, "learning_rate": 8.931155917956055e-05, "loss": 2.2164, "step": 3139 }, { "epoch": 0.23636124127288807, "grad_norm": 5.615194320678711, "learning_rate": 8.930402486377927e-05, "loss": 2.0517, "step": 3140 }, { "epoch": 0.23643651555354825, "grad_norm": 4.123208999633789, "learning_rate": 8.929648821148648e-05, "loss": 1.8561, "step": 3141 }, { "epoch": 0.2365117898342084, "grad_norm": 4.531689167022705, "learning_rate": 8.928894922313022e-05, "loss": 2.0198, "step": 3142 }, { "epoch": 0.23658706411486854, "grad_norm": 6.550711154937744, "learning_rate": 8.928140789915866e-05, "loss": 2.0334, "step": 3143 }, { "epoch": 0.23666233839552872, "grad_norm": 4.847607135772705, "learning_rate": 8.927386424002011e-05, "loss": 2.4329, "step": 3144 }, { "epoch": 0.23673761267618887, "grad_norm": 4.96705436706543, "learning_rate": 8.926631824616303e-05, "loss": 2.1111, "step": 3145 }, { "epoch": 0.23681288695684902, "grad_norm": 5.4653120040893555, "learning_rate": 8.925876991803599e-05, "loss": 1.7965, "step": 3146 }, { "epoch": 0.23688816123750916, "grad_norm": 3.8454763889312744, "learning_rate": 8.925121925608772e-05, "loss": 2.2273, "step": 3147 }, { "epoch": 0.23696343551816934, "grad_norm": 5.1777663230896, "learning_rate": 8.924366626076711e-05, "loss": 2.1696, "step": 3148 }, { "epoch": 0.2370387097988295, "grad_norm": 4.664035320281982, "learning_rate": 8.923611093252313e-05, "loss": 1.9461, "step": 3149 }, { "epoch": 0.23711398407948964, "grad_norm": 4.680776596069336, "learning_rate": 8.922855327180494e-05, "loss": 2.1509, "step": 3150 }, { "epoch": 0.23718925836014979, "grad_norm": 4.545822620391846, "learning_rate": 8.922099327906181e-05, "loss": 2.1153, "step": 3151 }, { "epoch": 0.23726453264080996, "grad_norm": 4.652801036834717, "learning_rate": 8.921343095474317e-05, "loss": 1.9073, "step": 3152 }, { "epoch": 0.2373398069214701, "grad_norm": 3.912487506866455, "learning_rate": 8.920586629929858e-05, "loss": 2.0896, "step": 3153 }, { "epoch": 0.23741508120213026, "grad_norm": 3.8954620361328125, "learning_rate": 8.919829931317774e-05, "loss": 2.1027, "step": 3154 }, { "epoch": 0.2374903554827904, "grad_norm": 5.0088982582092285, "learning_rate": 8.919072999683046e-05, "loss": 2.1641, "step": 3155 }, { "epoch": 0.23756562976345058, "grad_norm": 4.781058311462402, "learning_rate": 8.918315835070674e-05, "loss": 2.0671, "step": 3156 }, { "epoch": 0.23764090404411073, "grad_norm": 3.9386327266693115, "learning_rate": 8.917558437525668e-05, "loss": 2.2063, "step": 3157 }, { "epoch": 0.23771617832477088, "grad_norm": 4.667530059814453, "learning_rate": 8.916800807093056e-05, "loss": 1.9672, "step": 3158 }, { "epoch": 0.23779145260543105, "grad_norm": 3.55936598777771, "learning_rate": 8.916042943817872e-05, "loss": 2.3099, "step": 3159 }, { "epoch": 0.2378667268860912, "grad_norm": 5.442907333374023, "learning_rate": 8.915284847745171e-05, "loss": 2.1016, "step": 3160 }, { "epoch": 0.23794200116675135, "grad_norm": 5.0765252113342285, "learning_rate": 8.914526518920022e-05, "loss": 2.1358, "step": 3161 }, { "epoch": 0.2380172754474115, "grad_norm": 5.0043559074401855, "learning_rate": 8.913767957387502e-05, "loss": 1.9611, "step": 3162 }, { "epoch": 0.23809254972807167, "grad_norm": 4.878602981567383, "learning_rate": 8.913009163192707e-05, "loss": 2.0599, "step": 3163 }, { "epoch": 0.23816782400873182, "grad_norm": 5.485286235809326, "learning_rate": 8.912250136380746e-05, "loss": 1.6888, "step": 3164 }, { "epoch": 0.23824309828939197, "grad_norm": 4.859185695648193, "learning_rate": 8.91149087699674e-05, "loss": 1.624, "step": 3165 }, { "epoch": 0.23831837257005212, "grad_norm": 4.253639221191406, "learning_rate": 8.910731385085824e-05, "loss": 1.9612, "step": 3166 }, { "epoch": 0.2383936468507123, "grad_norm": 5.439131259918213, "learning_rate": 8.909971660693148e-05, "loss": 1.9577, "step": 3167 }, { "epoch": 0.23846892113137244, "grad_norm": 6.699115753173828, "learning_rate": 8.909211703863876e-05, "loss": 1.7809, "step": 3168 }, { "epoch": 0.2385441954120326, "grad_norm": 5.474216938018799, "learning_rate": 8.908451514643185e-05, "loss": 2.4552, "step": 3169 }, { "epoch": 0.23861946969269274, "grad_norm": 4.317144870758057, "learning_rate": 8.907691093076266e-05, "loss": 1.899, "step": 3170 }, { "epoch": 0.2386947439733529, "grad_norm": 5.524565696716309, "learning_rate": 8.906930439208323e-05, "loss": 1.7165, "step": 3171 }, { "epoch": 0.23877001825401306, "grad_norm": 5.491680145263672, "learning_rate": 8.906169553084577e-05, "loss": 2.2379, "step": 3172 }, { "epoch": 0.2388452925346732, "grad_norm": 4.068700790405273, "learning_rate": 8.905408434750259e-05, "loss": 1.9099, "step": 3173 }, { "epoch": 0.23892056681533336, "grad_norm": 5.242240905761719, "learning_rate": 8.904647084250615e-05, "loss": 1.7492, "step": 3174 }, { "epoch": 0.23899584109599353, "grad_norm": 5.466667652130127, "learning_rate": 8.903885501630907e-05, "loss": 2.2747, "step": 3175 }, { "epoch": 0.23907111537665368, "grad_norm": 5.807854175567627, "learning_rate": 8.903123686936406e-05, "loss": 1.9065, "step": 3176 }, { "epoch": 0.23914638965731383, "grad_norm": 5.579805850982666, "learning_rate": 8.9023616402124e-05, "loss": 1.7062, "step": 3177 }, { "epoch": 0.239221663937974, "grad_norm": 4.477570056915283, "learning_rate": 8.901599361504194e-05, "loss": 2.6109, "step": 3178 }, { "epoch": 0.23929693821863415, "grad_norm": 3.5368828773498535, "learning_rate": 8.900836850857099e-05, "loss": 1.9125, "step": 3179 }, { "epoch": 0.2393722124992943, "grad_norm": 5.75269079208374, "learning_rate": 8.900074108316448e-05, "loss": 1.7974, "step": 3180 }, { "epoch": 0.23944748677995445, "grad_norm": 4.291672706604004, "learning_rate": 8.899311133927579e-05, "loss": 2.0582, "step": 3181 }, { "epoch": 0.23952276106061463, "grad_norm": 3.6255385875701904, "learning_rate": 8.898547927735854e-05, "loss": 2.144, "step": 3182 }, { "epoch": 0.23959803534127477, "grad_norm": 6.381038188934326, "learning_rate": 8.89778448978664e-05, "loss": 1.8225, "step": 3183 }, { "epoch": 0.23967330962193492, "grad_norm": 4.766451358795166, "learning_rate": 8.897020820125322e-05, "loss": 2.016, "step": 3184 }, { "epoch": 0.23974858390259507, "grad_norm": 5.317236423492432, "learning_rate": 8.896256918797298e-05, "loss": 1.9968, "step": 3185 }, { "epoch": 0.23982385818325525, "grad_norm": 5.136428356170654, "learning_rate": 8.89549278584798e-05, "loss": 2.1573, "step": 3186 }, { "epoch": 0.2398991324639154, "grad_norm": 6.299529552459717, "learning_rate": 8.894728421322793e-05, "loss": 2.4789, "step": 3187 }, { "epoch": 0.23997440674457554, "grad_norm": 6.992971420288086, "learning_rate": 8.893963825267177e-05, "loss": 2.433, "step": 3188 }, { "epoch": 0.2400496810252357, "grad_norm": 4.698974609375, "learning_rate": 8.893198997726583e-05, "loss": 2.0936, "step": 3189 }, { "epoch": 0.24012495530589587, "grad_norm": 5.291773319244385, "learning_rate": 8.892433938746483e-05, "loss": 1.9175, "step": 3190 }, { "epoch": 0.240200229586556, "grad_norm": 7.637009620666504, "learning_rate": 8.89166864837235e-05, "loss": 2.3605, "step": 3191 }, { "epoch": 0.24027550386721616, "grad_norm": 4.988747596740723, "learning_rate": 8.890903126649685e-05, "loss": 1.9909, "step": 3192 }, { "epoch": 0.24035077814787634, "grad_norm": 4.667901039123535, "learning_rate": 8.890137373623992e-05, "loss": 2.093, "step": 3193 }, { "epoch": 0.24042605242853649, "grad_norm": 4.370298862457275, "learning_rate": 8.889371389340795e-05, "loss": 1.8265, "step": 3194 }, { "epoch": 0.24050132670919663, "grad_norm": 5.158820152282715, "learning_rate": 8.888605173845628e-05, "loss": 2.4405, "step": 3195 }, { "epoch": 0.24057660098985678, "grad_norm": 4.049282550811768, "learning_rate": 8.887838727184041e-05, "loss": 2.0967, "step": 3196 }, { "epoch": 0.24065187527051696, "grad_norm": 5.029526710510254, "learning_rate": 8.887072049401599e-05, "loss": 2.2693, "step": 3197 }, { "epoch": 0.2407271495511771, "grad_norm": 4.659379482269287, "learning_rate": 8.886305140543876e-05, "loss": 2.2833, "step": 3198 }, { "epoch": 0.24080242383183725, "grad_norm": 7.6814141273498535, "learning_rate": 8.885538000656462e-05, "loss": 2.7273, "step": 3199 }, { "epoch": 0.2408776981124974, "grad_norm": 4.647086143493652, "learning_rate": 8.884770629784965e-05, "loss": 2.2066, "step": 3200 }, { "epoch": 0.24095297239315758, "grad_norm": 6.569813251495361, "learning_rate": 8.884003027974999e-05, "loss": 2.1592, "step": 3201 }, { "epoch": 0.24102824667381773, "grad_norm": 5.138356685638428, "learning_rate": 8.883235195272196e-05, "loss": 1.9341, "step": 3202 }, { "epoch": 0.24110352095447787, "grad_norm": 4.743891716003418, "learning_rate": 8.882467131722206e-05, "loss": 2.1359, "step": 3203 }, { "epoch": 0.24117879523513802, "grad_norm": 3.7779014110565186, "learning_rate": 8.881698837370685e-05, "loss": 1.9735, "step": 3204 }, { "epoch": 0.2412540695157982, "grad_norm": 6.693047046661377, "learning_rate": 8.880930312263304e-05, "loss": 2.2381, "step": 3205 }, { "epoch": 0.24132934379645835, "grad_norm": 7.864299774169922, "learning_rate": 8.880161556445751e-05, "loss": 2.1537, "step": 3206 }, { "epoch": 0.2414046180771185, "grad_norm": 4.908127307891846, "learning_rate": 8.87939256996373e-05, "loss": 2.1698, "step": 3207 }, { "epoch": 0.24147989235777867, "grad_norm": 4.617619037628174, "learning_rate": 8.87862335286295e-05, "loss": 2.33, "step": 3208 }, { "epoch": 0.24155516663843882, "grad_norm": 5.731082916259766, "learning_rate": 8.877853905189139e-05, "loss": 1.8775, "step": 3209 }, { "epoch": 0.24163044091909897, "grad_norm": 5.916258811950684, "learning_rate": 8.877084226988043e-05, "loss": 2.1458, "step": 3210 }, { "epoch": 0.2417057151997591, "grad_norm": 4.0186920166015625, "learning_rate": 8.876314318305415e-05, "loss": 1.8395, "step": 3211 }, { "epoch": 0.2417809894804193, "grad_norm": 4.932983875274658, "learning_rate": 8.87554417918702e-05, "loss": 2.1925, "step": 3212 }, { "epoch": 0.24185626376107944, "grad_norm": 4.3356194496154785, "learning_rate": 8.874773809678644e-05, "loss": 2.1817, "step": 3213 }, { "epoch": 0.24193153804173959, "grad_norm": 5.016956806182861, "learning_rate": 8.874003209826087e-05, "loss": 2.0678, "step": 3214 }, { "epoch": 0.24200681232239973, "grad_norm": 4.181794166564941, "learning_rate": 8.873232379675151e-05, "loss": 1.9835, "step": 3215 }, { "epoch": 0.2420820866030599, "grad_norm": 5.459475040435791, "learning_rate": 8.872461319271665e-05, "loss": 2.3716, "step": 3216 }, { "epoch": 0.24215736088372006, "grad_norm": 5.499965667724609, "learning_rate": 8.871690028661465e-05, "loss": 2.4414, "step": 3217 }, { "epoch": 0.2422326351643802, "grad_norm": 4.132140159606934, "learning_rate": 8.8709185078904e-05, "loss": 2.0363, "step": 3218 }, { "epoch": 0.24230790944504035, "grad_norm": 6.4035258293151855, "learning_rate": 8.870146757004338e-05, "loss": 1.8633, "step": 3219 }, { "epoch": 0.24238318372570053, "grad_norm": 3.8113491535186768, "learning_rate": 8.869374776049157e-05, "loss": 2.4163, "step": 3220 }, { "epoch": 0.24245845800636068, "grad_norm": 3.9124670028686523, "learning_rate": 8.868602565070747e-05, "loss": 1.9247, "step": 3221 }, { "epoch": 0.24253373228702083, "grad_norm": 4.697439670562744, "learning_rate": 8.867830124115016e-05, "loss": 1.9543, "step": 3222 }, { "epoch": 0.242609006567681, "grad_norm": 5.2570271492004395, "learning_rate": 8.867057453227881e-05, "loss": 1.7818, "step": 3223 }, { "epoch": 0.24268428084834115, "grad_norm": 4.84550142288208, "learning_rate": 8.866284552455276e-05, "loss": 2.4094, "step": 3224 }, { "epoch": 0.2427595551290013, "grad_norm": 4.9503912925720215, "learning_rate": 8.865511421843149e-05, "loss": 2.222, "step": 3225 }, { "epoch": 0.24283482940966145, "grad_norm": 5.553617000579834, "learning_rate": 8.864738061437459e-05, "loss": 2.0724, "step": 3226 }, { "epoch": 0.24291010369032162, "grad_norm": 4.538109302520752, "learning_rate": 8.863964471284179e-05, "loss": 2.1492, "step": 3227 }, { "epoch": 0.24298537797098177, "grad_norm": 3.849200487136841, "learning_rate": 8.8631906514293e-05, "loss": 2.0135, "step": 3228 }, { "epoch": 0.24306065225164192, "grad_norm": 5.269128322601318, "learning_rate": 8.862416601918822e-05, "loss": 2.5036, "step": 3229 }, { "epoch": 0.24313592653230207, "grad_norm": 4.566773891448975, "learning_rate": 8.861642322798757e-05, "loss": 2.2918, "step": 3230 }, { "epoch": 0.24321120081296224, "grad_norm": 5.218326568603516, "learning_rate": 8.860867814115138e-05, "loss": 2.1897, "step": 3231 }, { "epoch": 0.2432864750936224, "grad_norm": 5.25179386138916, "learning_rate": 8.860093075914005e-05, "loss": 1.9741, "step": 3232 }, { "epoch": 0.24336174937428254, "grad_norm": 4.57682991027832, "learning_rate": 8.859318108241414e-05, "loss": 2.5103, "step": 3233 }, { "epoch": 0.24343702365494269, "grad_norm": 3.9245426654815674, "learning_rate": 8.858542911143437e-05, "loss": 2.0015, "step": 3234 }, { "epoch": 0.24351229793560286, "grad_norm": 5.288936138153076, "learning_rate": 8.857767484666153e-05, "loss": 2.2405, "step": 3235 }, { "epoch": 0.243587572216263, "grad_norm": 4.212207317352295, "learning_rate": 8.856991828855663e-05, "loss": 1.9943, "step": 3236 }, { "epoch": 0.24366284649692316, "grad_norm": 4.822325229644775, "learning_rate": 8.856215943758076e-05, "loss": 2.3207, "step": 3237 }, { "epoch": 0.2437381207775833, "grad_norm": 4.394848823547363, "learning_rate": 8.855439829419514e-05, "loss": 2.0547, "step": 3238 }, { "epoch": 0.24381339505824348, "grad_norm": 6.581297397613525, "learning_rate": 8.854663485886116e-05, "loss": 1.9425, "step": 3239 }, { "epoch": 0.24388866933890363, "grad_norm": 3.85432767868042, "learning_rate": 8.853886913204037e-05, "loss": 1.9968, "step": 3240 }, { "epoch": 0.24396394361956378, "grad_norm": 5.349886417388916, "learning_rate": 8.853110111419437e-05, "loss": 2.0066, "step": 3241 }, { "epoch": 0.24403921790022395, "grad_norm": 4.470263481140137, "learning_rate": 8.852333080578497e-05, "loss": 1.9458, "step": 3242 }, { "epoch": 0.2441144921808841, "grad_norm": 6.385467052459717, "learning_rate": 8.85155582072741e-05, "loss": 2.3326, "step": 3243 }, { "epoch": 0.24418976646154425, "grad_norm": 6.3552422523498535, "learning_rate": 8.85077833191238e-05, "loss": 2.4346, "step": 3244 }, { "epoch": 0.2442650407422044, "grad_norm": 5.7520060539245605, "learning_rate": 8.850000614179627e-05, "loss": 1.9974, "step": 3245 }, { "epoch": 0.24434031502286457, "grad_norm": 5.259435176849365, "learning_rate": 8.849222667575384e-05, "loss": 2.2463, "step": 3246 }, { "epoch": 0.24441558930352472, "grad_norm": 6.514979839324951, "learning_rate": 8.8484444921459e-05, "loss": 2.0718, "step": 3247 }, { "epoch": 0.24449086358418487, "grad_norm": 4.0444560050964355, "learning_rate": 8.847666087937432e-05, "loss": 2.1929, "step": 3248 }, { "epoch": 0.24456613786484502, "grad_norm": 6.167499542236328, "learning_rate": 8.846887454996254e-05, "loss": 1.8931, "step": 3249 }, { "epoch": 0.2446414121455052, "grad_norm": 5.100831031799316, "learning_rate": 8.846108593368656e-05, "loss": 2.031, "step": 3250 }, { "epoch": 0.24471668642616534, "grad_norm": 4.207341194152832, "learning_rate": 8.845329503100939e-05, "loss": 2.1415, "step": 3251 }, { "epoch": 0.2447919607068255, "grad_norm": 5.961008071899414, "learning_rate": 8.844550184239415e-05, "loss": 2.0773, "step": 3252 }, { "epoch": 0.24486723498748564, "grad_norm": 4.484932899475098, "learning_rate": 8.843770636830415e-05, "loss": 1.792, "step": 3253 }, { "epoch": 0.2449425092681458, "grad_norm": 6.538995265960693, "learning_rate": 8.842990860920279e-05, "loss": 1.9059, "step": 3254 }, { "epoch": 0.24501778354880596, "grad_norm": 5.3386149406433105, "learning_rate": 8.842210856555362e-05, "loss": 2.0881, "step": 3255 }, { "epoch": 0.2450930578294661, "grad_norm": 3.9098312854766846, "learning_rate": 8.841430623782035e-05, "loss": 1.9012, "step": 3256 }, { "epoch": 0.24516833211012629, "grad_norm": 4.072517395019531, "learning_rate": 8.84065016264668e-05, "loss": 2.1567, "step": 3257 }, { "epoch": 0.24524360639078643, "grad_norm": 3.917480707168579, "learning_rate": 8.839869473195693e-05, "loss": 1.7459, "step": 3258 }, { "epoch": 0.24531888067144658, "grad_norm": 4.662839412689209, "learning_rate": 8.839088555475484e-05, "loss": 1.9131, "step": 3259 }, { "epoch": 0.24539415495210673, "grad_norm": 4.041963577270508, "learning_rate": 8.838307409532475e-05, "loss": 2.0014, "step": 3260 }, { "epoch": 0.2454694292327669, "grad_norm": 5.345216751098633, "learning_rate": 8.837526035413104e-05, "loss": 2.0544, "step": 3261 }, { "epoch": 0.24554470351342705, "grad_norm": 4.985344409942627, "learning_rate": 8.836744433163822e-05, "loss": 2.9741, "step": 3262 }, { "epoch": 0.2456199777940872, "grad_norm": 6.469967365264893, "learning_rate": 8.835962602831092e-05, "loss": 2.3676, "step": 3263 }, { "epoch": 0.24569525207474735, "grad_norm": 3.80484938621521, "learning_rate": 8.835180544461391e-05, "loss": 1.828, "step": 3264 }, { "epoch": 0.24577052635540753, "grad_norm": 3.9377801418304443, "learning_rate": 8.834398258101212e-05, "loss": 2.4294, "step": 3265 }, { "epoch": 0.24584580063606767, "grad_norm": 5.02417516708374, "learning_rate": 8.833615743797058e-05, "loss": 2.1795, "step": 3266 }, { "epoch": 0.24592107491672782, "grad_norm": 3.6834230422973633, "learning_rate": 8.832833001595449e-05, "loss": 1.9889, "step": 3267 }, { "epoch": 0.24599634919738797, "grad_norm": 4.532587051391602, "learning_rate": 8.832050031542915e-05, "loss": 2.2274, "step": 3268 }, { "epoch": 0.24607162347804815, "grad_norm": 3.6254334449768066, "learning_rate": 8.831266833686001e-05, "loss": 1.7468, "step": 3269 }, { "epoch": 0.2461468977587083, "grad_norm": 5.678330421447754, "learning_rate": 8.830483408071269e-05, "loss": 2.2031, "step": 3270 }, { "epoch": 0.24622217203936844, "grad_norm": 4.265519142150879, "learning_rate": 8.829699754745287e-05, "loss": 2.078, "step": 3271 }, { "epoch": 0.24629744632002862, "grad_norm": 7.205012321472168, "learning_rate": 8.828915873754644e-05, "loss": 2.0402, "step": 3272 }, { "epoch": 0.24637272060068877, "grad_norm": 6.666203498840332, "learning_rate": 8.82813176514594e-05, "loss": 2.0742, "step": 3273 }, { "epoch": 0.24644799488134891, "grad_norm": 6.356449604034424, "learning_rate": 8.827347428965785e-05, "loss": 2.1263, "step": 3274 }, { "epoch": 0.24652326916200906, "grad_norm": 4.000171661376953, "learning_rate": 8.826562865260807e-05, "loss": 2.2328, "step": 3275 }, { "epoch": 0.24659854344266924, "grad_norm": 5.625492572784424, "learning_rate": 8.825778074077649e-05, "loss": 2.0938, "step": 3276 }, { "epoch": 0.24667381772332939, "grad_norm": 5.216203212738037, "learning_rate": 8.82499305546296e-05, "loss": 2.0113, "step": 3277 }, { "epoch": 0.24674909200398953, "grad_norm": 5.553715229034424, "learning_rate": 8.82420780946341e-05, "loss": 2.0522, "step": 3278 }, { "epoch": 0.24682436628464968, "grad_norm": 4.141896724700928, "learning_rate": 8.823422336125677e-05, "loss": 2.1696, "step": 3279 }, { "epoch": 0.24689964056530986, "grad_norm": 4.694346904754639, "learning_rate": 8.822636635496457e-05, "loss": 2.2712, "step": 3280 }, { "epoch": 0.24697491484597, "grad_norm": 5.814883708953857, "learning_rate": 8.821850707622458e-05, "loss": 2.3358, "step": 3281 }, { "epoch": 0.24705018912663015, "grad_norm": 5.676123142242432, "learning_rate": 8.821064552550399e-05, "loss": 2.1686, "step": 3282 }, { "epoch": 0.2471254634072903, "grad_norm": 4.168556213378906, "learning_rate": 8.820278170327018e-05, "loss": 2.0238, "step": 3283 }, { "epoch": 0.24720073768795048, "grad_norm": 5.1784257888793945, "learning_rate": 8.819491560999062e-05, "loss": 2.2653, "step": 3284 }, { "epoch": 0.24727601196861063, "grad_norm": 6.038859844207764, "learning_rate": 8.818704724613291e-05, "loss": 2.0224, "step": 3285 }, { "epoch": 0.24735128624927077, "grad_norm": 4.4210686683654785, "learning_rate": 8.817917661216482e-05, "loss": 2.1923, "step": 3286 }, { "epoch": 0.24742656052993092, "grad_norm": 4.8354411125183105, "learning_rate": 8.817130370855422e-05, "loss": 2.2924, "step": 3287 }, { "epoch": 0.2475018348105911, "grad_norm": 4.830492973327637, "learning_rate": 8.816342853576913e-05, "loss": 1.903, "step": 3288 }, { "epoch": 0.24757710909125125, "grad_norm": 4.170045375823975, "learning_rate": 8.815555109427773e-05, "loss": 1.7754, "step": 3289 }, { "epoch": 0.2476523833719114, "grad_norm": 4.566800594329834, "learning_rate": 8.814767138454832e-05, "loss": 2.1097, "step": 3290 }, { "epoch": 0.24772765765257157, "grad_norm": 4.780071258544922, "learning_rate": 8.813978940704927e-05, "loss": 1.6647, "step": 3291 }, { "epoch": 0.24780293193323172, "grad_norm": 5.604640483856201, "learning_rate": 8.813190516224919e-05, "loss": 1.8964, "step": 3292 }, { "epoch": 0.24787820621389187, "grad_norm": 6.145547389984131, "learning_rate": 8.812401865061675e-05, "loss": 2.3407, "step": 3293 }, { "epoch": 0.24795348049455201, "grad_norm": 6.518482208251953, "learning_rate": 8.811612987262079e-05, "loss": 2.0306, "step": 3294 }, { "epoch": 0.2480287547752122, "grad_norm": 6.609365940093994, "learning_rate": 8.810823882873029e-05, "loss": 2.6311, "step": 3295 }, { "epoch": 0.24810402905587234, "grad_norm": 6.722716808319092, "learning_rate": 8.810034551941432e-05, "loss": 2.2381, "step": 3296 }, { "epoch": 0.2481793033365325, "grad_norm": 4.61924409866333, "learning_rate": 8.809244994514216e-05, "loss": 1.8746, "step": 3297 }, { "epoch": 0.24825457761719263, "grad_norm": 4.667189121246338, "learning_rate": 8.808455210638313e-05, "loss": 2.0654, "step": 3298 }, { "epoch": 0.2483298518978528, "grad_norm": 4.460404396057129, "learning_rate": 8.807665200360675e-05, "loss": 2.2116, "step": 3299 }, { "epoch": 0.24840512617851296, "grad_norm": 3.8346314430236816, "learning_rate": 8.806874963728267e-05, "loss": 2.0176, "step": 3300 }, { "epoch": 0.2484804004591731, "grad_norm": 4.841716289520264, "learning_rate": 8.806084500788065e-05, "loss": 1.6538, "step": 3301 }, { "epoch": 0.24855567473983325, "grad_norm": 6.082234859466553, "learning_rate": 8.80529381158706e-05, "loss": 2.4185, "step": 3302 }, { "epoch": 0.24863094902049343, "grad_norm": 5.5185394287109375, "learning_rate": 8.80450289617226e-05, "loss": 2.261, "step": 3303 }, { "epoch": 0.24870622330115358, "grad_norm": 5.971257209777832, "learning_rate": 8.803711754590676e-05, "loss": 2.0064, "step": 3304 }, { "epoch": 0.24878149758181373, "grad_norm": 4.2381744384765625, "learning_rate": 8.802920386889341e-05, "loss": 2.1358, "step": 3305 }, { "epoch": 0.2488567718624739, "grad_norm": 5.927640914916992, "learning_rate": 8.802128793115303e-05, "loss": 2.4818, "step": 3306 }, { "epoch": 0.24893204614313405, "grad_norm": 3.640798568725586, "learning_rate": 8.801336973315619e-05, "loss": 1.7848, "step": 3307 }, { "epoch": 0.2490073204237942, "grad_norm": 3.7607762813568115, "learning_rate": 8.800544927537356e-05, "loss": 2.4525, "step": 3308 }, { "epoch": 0.24908259470445435, "grad_norm": 4.451687812805176, "learning_rate": 8.799752655827604e-05, "loss": 1.9191, "step": 3309 }, { "epoch": 0.24915786898511452, "grad_norm": 4.012941360473633, "learning_rate": 8.798960158233458e-05, "loss": 2.0683, "step": 3310 }, { "epoch": 0.24923314326577467, "grad_norm": 2.915116310119629, "learning_rate": 8.798167434802033e-05, "loss": 2.0742, "step": 3311 }, { "epoch": 0.24930841754643482, "grad_norm": 3.7754080295562744, "learning_rate": 8.797374485580451e-05, "loss": 1.7844, "step": 3312 }, { "epoch": 0.24938369182709497, "grad_norm": 3.72605299949646, "learning_rate": 8.796581310615851e-05, "loss": 2.0949, "step": 3313 }, { "epoch": 0.24945896610775514, "grad_norm": 4.392784118652344, "learning_rate": 8.795787909955387e-05, "loss": 2.0711, "step": 3314 }, { "epoch": 0.2495342403884153, "grad_norm": 4.005949974060059, "learning_rate": 8.794994283646223e-05, "loss": 1.9918, "step": 3315 }, { "epoch": 0.24960951466907544, "grad_norm": 5.2048845291137695, "learning_rate": 8.794200431735537e-05, "loss": 2.0352, "step": 3316 }, { "epoch": 0.2496847889497356, "grad_norm": 3.9257380962371826, "learning_rate": 8.793406354270523e-05, "loss": 1.9416, "step": 3317 }, { "epoch": 0.24976006323039576, "grad_norm": 6.32564640045166, "learning_rate": 8.792612051298385e-05, "loss": 2.3855, "step": 3318 }, { "epoch": 0.2498353375110559, "grad_norm": 4.4817891120910645, "learning_rate": 8.791817522866344e-05, "loss": 2.1471, "step": 3319 }, { "epoch": 0.24991061179171606, "grad_norm": 4.599247455596924, "learning_rate": 8.79102276902163e-05, "loss": 2.0043, "step": 3320 }, { "epoch": 0.24998588607237623, "grad_norm": 5.344282627105713, "learning_rate": 8.790227789811489e-05, "loss": 2.2064, "step": 3321 }, { "epoch": 0.2500611603530364, "grad_norm": 4.306174278259277, "learning_rate": 8.789432585283183e-05, "loss": 1.9538, "step": 3322 }, { "epoch": 0.25013643463369656, "grad_norm": 5.088106632232666, "learning_rate": 8.788637155483982e-05, "loss": 1.8962, "step": 3323 }, { "epoch": 0.2502117089143567, "grad_norm": 3.599647283554077, "learning_rate": 8.787841500461173e-05, "loss": 2.264, "step": 3324 }, { "epoch": 0.25028698319501685, "grad_norm": 3.4975473880767822, "learning_rate": 8.787045620262057e-05, "loss": 1.9251, "step": 3325 }, { "epoch": 0.250362257475677, "grad_norm": 4.483765602111816, "learning_rate": 8.786249514933944e-05, "loss": 2.0298, "step": 3326 }, { "epoch": 0.25043753175633715, "grad_norm": 5.014871597290039, "learning_rate": 8.785453184524161e-05, "loss": 1.9818, "step": 3327 }, { "epoch": 0.2505128060369973, "grad_norm": 5.549981117248535, "learning_rate": 8.784656629080048e-05, "loss": 1.9732, "step": 3328 }, { "epoch": 0.25058808031765745, "grad_norm": 3.8736355304718018, "learning_rate": 8.783859848648958e-05, "loss": 1.9212, "step": 3329 }, { "epoch": 0.2506633545983176, "grad_norm": 5.429457664489746, "learning_rate": 8.783062843278257e-05, "loss": 1.8222, "step": 3330 }, { "epoch": 0.2507386288789778, "grad_norm": 5.866755962371826, "learning_rate": 8.782265613015325e-05, "loss": 2.2229, "step": 3331 }, { "epoch": 0.2508139031596379, "grad_norm": 4.262095928192139, "learning_rate": 8.781468157907555e-05, "loss": 2.2646, "step": 3332 }, { "epoch": 0.2508891774402981, "grad_norm": 4.359179973602295, "learning_rate": 8.780670478002353e-05, "loss": 2.0286, "step": 3333 }, { "epoch": 0.2509644517209582, "grad_norm": 4.44980525970459, "learning_rate": 8.779872573347138e-05, "loss": 2.1966, "step": 3334 }, { "epoch": 0.2510397260016184, "grad_norm": 4.281689167022705, "learning_rate": 8.779074443989346e-05, "loss": 2.0202, "step": 3335 }, { "epoch": 0.25111500028227857, "grad_norm": 4.681268692016602, "learning_rate": 8.778276089976421e-05, "loss": 1.9536, "step": 3336 }, { "epoch": 0.2511902745629387, "grad_norm": 3.7675046920776367, "learning_rate": 8.777477511355822e-05, "loss": 1.8321, "step": 3337 }, { "epoch": 0.25126554884359886, "grad_norm": 4.948992729187012, "learning_rate": 8.776678708175025e-05, "loss": 1.9032, "step": 3338 }, { "epoch": 0.25134082312425904, "grad_norm": 5.282908916473389, "learning_rate": 8.775879680481516e-05, "loss": 1.8272, "step": 3339 }, { "epoch": 0.25141609740491916, "grad_norm": 5.23888635635376, "learning_rate": 8.775080428322794e-05, "loss": 2.1043, "step": 3340 }, { "epoch": 0.25149137168557933, "grad_norm": 3.9256439208984375, "learning_rate": 8.774280951746372e-05, "loss": 2.1799, "step": 3341 }, { "epoch": 0.2515666459662395, "grad_norm": 3.6742818355560303, "learning_rate": 8.773481250799777e-05, "loss": 2.0432, "step": 3342 }, { "epoch": 0.25164192024689963, "grad_norm": 4.291045188903809, "learning_rate": 8.77268132553055e-05, "loss": 2.2712, "step": 3343 }, { "epoch": 0.2517171945275598, "grad_norm": 4.5728678703308105, "learning_rate": 8.77188117598624e-05, "loss": 2.0552, "step": 3344 }, { "epoch": 0.2517924688082199, "grad_norm": 3.9421279430389404, "learning_rate": 8.77108080221442e-05, "loss": 2.1023, "step": 3345 }, { "epoch": 0.2518677430888801, "grad_norm": 5.261859893798828, "learning_rate": 8.770280204262666e-05, "loss": 2.2573, "step": 3346 }, { "epoch": 0.2519430173695403, "grad_norm": 4.453205108642578, "learning_rate": 8.769479382178572e-05, "loss": 1.7899, "step": 3347 }, { "epoch": 0.2520182916502004, "grad_norm": 4.459711074829102, "learning_rate": 8.768678336009745e-05, "loss": 1.8279, "step": 3348 }, { "epoch": 0.2520935659308606, "grad_norm": 3.706254482269287, "learning_rate": 8.767877065803803e-05, "loss": 1.9549, "step": 3349 }, { "epoch": 0.25216884021152075, "grad_norm": 3.9402592182159424, "learning_rate": 8.767075571608383e-05, "loss": 1.971, "step": 3350 }, { "epoch": 0.25224411449218087, "grad_norm": 4.983983516693115, "learning_rate": 8.766273853471128e-05, "loss": 1.9869, "step": 3351 }, { "epoch": 0.25231938877284105, "grad_norm": 6.2729315757751465, "learning_rate": 8.765471911439697e-05, "loss": 2.2604, "step": 3352 }, { "epoch": 0.2523946630535012, "grad_norm": 4.499894618988037, "learning_rate": 8.764669745561768e-05, "loss": 1.7951, "step": 3353 }, { "epoch": 0.25246993733416134, "grad_norm": 3.6646013259887695, "learning_rate": 8.763867355885023e-05, "loss": 1.9109, "step": 3354 }, { "epoch": 0.2525452116148215, "grad_norm": 3.9780445098876953, "learning_rate": 8.763064742457165e-05, "loss": 2.2108, "step": 3355 }, { "epoch": 0.25262048589548164, "grad_norm": 5.8783721923828125, "learning_rate": 8.762261905325905e-05, "loss": 2.2732, "step": 3356 }, { "epoch": 0.2526957601761418, "grad_norm": 5.000054836273193, "learning_rate": 8.76145884453897e-05, "loss": 2.1449, "step": 3357 }, { "epoch": 0.252771034456802, "grad_norm": 6.021858215332031, "learning_rate": 8.760655560144098e-05, "loss": 2.1501, "step": 3358 }, { "epoch": 0.2528463087374621, "grad_norm": 5.94951868057251, "learning_rate": 8.759852052189045e-05, "loss": 2.8932, "step": 3359 }, { "epoch": 0.2529215830181223, "grad_norm": 4.1834845542907715, "learning_rate": 8.759048320721576e-05, "loss": 2.1667, "step": 3360 }, { "epoch": 0.25299685729878246, "grad_norm": 6.927559852600098, "learning_rate": 8.75824436578947e-05, "loss": 2.1306, "step": 3361 }, { "epoch": 0.2530721315794426, "grad_norm": 3.9181342124938965, "learning_rate": 8.75744018744052e-05, "loss": 2.539, "step": 3362 }, { "epoch": 0.25314740586010276, "grad_norm": 4.221496105194092, "learning_rate": 8.75663578572253e-05, "loss": 1.9139, "step": 3363 }, { "epoch": 0.2532226801407629, "grad_norm": 6.087464332580566, "learning_rate": 8.755831160683325e-05, "loss": 2.0362, "step": 3364 }, { "epoch": 0.25329795442142305, "grad_norm": 4.116762638092041, "learning_rate": 8.755026312370733e-05, "loss": 1.8901, "step": 3365 }, { "epoch": 0.25337322870208323, "grad_norm": 5.038180351257324, "learning_rate": 8.754221240832599e-05, "loss": 2.2415, "step": 3366 }, { "epoch": 0.25344850298274335, "grad_norm": 4.341926097869873, "learning_rate": 8.753415946116787e-05, "loss": 2.4661, "step": 3367 }, { "epoch": 0.2535237772634035, "grad_norm": 4.785672187805176, "learning_rate": 8.752610428271165e-05, "loss": 1.9654, "step": 3368 }, { "epoch": 0.2535990515440637, "grad_norm": 5.808557510375977, "learning_rate": 8.751804687343621e-05, "loss": 2.1148, "step": 3369 }, { "epoch": 0.2536743258247238, "grad_norm": 5.877425670623779, "learning_rate": 8.750998723382055e-05, "loss": 2.0915, "step": 3370 }, { "epoch": 0.253749600105384, "grad_norm": 4.9935994148254395, "learning_rate": 8.750192536434376e-05, "loss": 2.1862, "step": 3371 }, { "epoch": 0.2538248743860442, "grad_norm": 9.6919527053833, "learning_rate": 8.749386126548512e-05, "loss": 2.2075, "step": 3372 }, { "epoch": 0.2539001486667043, "grad_norm": 4.741062164306641, "learning_rate": 8.7485794937724e-05, "loss": 1.7482, "step": 3373 }, { "epoch": 0.25397542294736447, "grad_norm": 5.2496442794799805, "learning_rate": 8.747772638153992e-05, "loss": 1.8747, "step": 3374 }, { "epoch": 0.2540506972280246, "grad_norm": 4.504026889801025, "learning_rate": 8.746965559741257e-05, "loss": 1.9155, "step": 3375 }, { "epoch": 0.25412597150868477, "grad_norm": 4.862549304962158, "learning_rate": 8.74615825858217e-05, "loss": 1.9825, "step": 3376 }, { "epoch": 0.25420124578934494, "grad_norm": 5.344687461853027, "learning_rate": 8.745350734724723e-05, "loss": 1.8682, "step": 3377 }, { "epoch": 0.25427652007000506, "grad_norm": 5.085996627807617, "learning_rate": 8.744542988216922e-05, "loss": 2.223, "step": 3378 }, { "epoch": 0.25435179435066524, "grad_norm": 4.684676170349121, "learning_rate": 8.743735019106782e-05, "loss": 2.1211, "step": 3379 }, { "epoch": 0.2544270686313254, "grad_norm": 4.374932289123535, "learning_rate": 8.74292682744234e-05, "loss": 2.1404, "step": 3380 }, { "epoch": 0.25450234291198554, "grad_norm": 6.876060962677002, "learning_rate": 8.742118413271637e-05, "loss": 2.1856, "step": 3381 }, { "epoch": 0.2545776171926457, "grad_norm": 5.487707614898682, "learning_rate": 8.741309776642732e-05, "loss": 1.9215, "step": 3382 }, { "epoch": 0.25465289147330583, "grad_norm": 5.342986106872559, "learning_rate": 8.740500917603696e-05, "loss": 2.2173, "step": 3383 }, { "epoch": 0.254728165753966, "grad_norm": 5.712308883666992, "learning_rate": 8.739691836202613e-05, "loss": 2.436, "step": 3384 }, { "epoch": 0.2548034400346262, "grad_norm": 4.376852035522461, "learning_rate": 8.73888253248758e-05, "loss": 2.094, "step": 3385 }, { "epoch": 0.2548787143152863, "grad_norm": 4.699362754821777, "learning_rate": 8.738073006506708e-05, "loss": 1.9965, "step": 3386 }, { "epoch": 0.2549539885959465, "grad_norm": 5.363994121551514, "learning_rate": 8.737263258308122e-05, "loss": 1.822, "step": 3387 }, { "epoch": 0.25502926287660665, "grad_norm": 5.242997169494629, "learning_rate": 8.736453287939958e-05, "loss": 2.462, "step": 3388 }, { "epoch": 0.2551045371572668, "grad_norm": 7.264503479003906, "learning_rate": 8.73564309545037e-05, "loss": 2.1427, "step": 3389 }, { "epoch": 0.25517981143792695, "grad_norm": 5.701240062713623, "learning_rate": 8.734832680887517e-05, "loss": 2.3662, "step": 3390 }, { "epoch": 0.2552550857185871, "grad_norm": 6.550506591796875, "learning_rate": 8.734022044299577e-05, "loss": 2.2641, "step": 3391 }, { "epoch": 0.25533035999924725, "grad_norm": 4.799621105194092, "learning_rate": 8.73321118573474e-05, "loss": 1.7875, "step": 3392 }, { "epoch": 0.2554056342799074, "grad_norm": 6.353862762451172, "learning_rate": 8.732400105241211e-05, "loss": 2.4473, "step": 3393 }, { "epoch": 0.25548090856056754, "grad_norm": 6.884500026702881, "learning_rate": 8.731588802867205e-05, "loss": 2.0167, "step": 3394 }, { "epoch": 0.2555561828412277, "grad_norm": 6.473781585693359, "learning_rate": 8.730777278660951e-05, "loss": 2.4102, "step": 3395 }, { "epoch": 0.2556314571218879, "grad_norm": 9.139266967773438, "learning_rate": 8.729965532670694e-05, "loss": 2.3445, "step": 3396 }, { "epoch": 0.255706731402548, "grad_norm": 4.931876182556152, "learning_rate": 8.729153564944688e-05, "loss": 2.0216, "step": 3397 }, { "epoch": 0.2557820056832082, "grad_norm": 3.9296631813049316, "learning_rate": 8.728341375531203e-05, "loss": 1.9161, "step": 3398 }, { "epoch": 0.25585727996386837, "grad_norm": 4.26688289642334, "learning_rate": 8.72752896447852e-05, "loss": 2.0306, "step": 3399 }, { "epoch": 0.2559325542445285, "grad_norm": 4.134580135345459, "learning_rate": 8.726716331834937e-05, "loss": 2.3585, "step": 3400 }, { "epoch": 0.25600782852518866, "grad_norm": 5.008721351623535, "learning_rate": 8.72590347764876e-05, "loss": 2.2548, "step": 3401 }, { "epoch": 0.25608310280584884, "grad_norm": 4.195300102233887, "learning_rate": 8.725090401968312e-05, "loss": 1.9368, "step": 3402 }, { "epoch": 0.25615837708650896, "grad_norm": 5.666923999786377, "learning_rate": 8.724277104841929e-05, "loss": 2.0947, "step": 3403 }, { "epoch": 0.25623365136716914, "grad_norm": 4.095748424530029, "learning_rate": 8.723463586317957e-05, "loss": 2.4304, "step": 3404 }, { "epoch": 0.25630892564782926, "grad_norm": 4.958882808685303, "learning_rate": 8.72264984644476e-05, "loss": 2.4885, "step": 3405 }, { "epoch": 0.25638419992848943, "grad_norm": 4.125391483306885, "learning_rate": 8.72183588527071e-05, "loss": 2.1755, "step": 3406 }, { "epoch": 0.2564594742091496, "grad_norm": 5.554819107055664, "learning_rate": 8.721021702844197e-05, "loss": 2.2817, "step": 3407 }, { "epoch": 0.2565347484898097, "grad_norm": 3.444105386734009, "learning_rate": 8.72020729921362e-05, "loss": 2.3285, "step": 3408 }, { "epoch": 0.2566100227704699, "grad_norm": 4.626033782958984, "learning_rate": 8.719392674427394e-05, "loss": 2.104, "step": 3409 }, { "epoch": 0.2566852970511301, "grad_norm": 3.7324564456939697, "learning_rate": 8.718577828533944e-05, "loss": 1.9851, "step": 3410 }, { "epoch": 0.2567605713317902, "grad_norm": 4.411436557769775, "learning_rate": 8.717762761581714e-05, "loss": 2.301, "step": 3411 }, { "epoch": 0.2568358456124504, "grad_norm": 3.9267332553863525, "learning_rate": 8.716947473619154e-05, "loss": 2.0016, "step": 3412 }, { "epoch": 0.2569111198931105, "grad_norm": 4.398698329925537, "learning_rate": 8.716131964694731e-05, "loss": 2.0531, "step": 3413 }, { "epoch": 0.25698639417377067, "grad_norm": 4.13516092300415, "learning_rate": 8.715316234856928e-05, "loss": 1.8714, "step": 3414 }, { "epoch": 0.25706166845443085, "grad_norm": 4.921574115753174, "learning_rate": 8.714500284154232e-05, "loss": 1.9893, "step": 3415 }, { "epoch": 0.25713694273509097, "grad_norm": 4.164416790008545, "learning_rate": 8.713684112635154e-05, "loss": 2.0498, "step": 3416 }, { "epoch": 0.25721221701575114, "grad_norm": 4.605637073516846, "learning_rate": 8.712867720348212e-05, "loss": 1.9556, "step": 3417 }, { "epoch": 0.2572874912964113, "grad_norm": 3.7386839389801025, "learning_rate": 8.712051107341936e-05, "loss": 2.0976, "step": 3418 }, { "epoch": 0.25736276557707144, "grad_norm": 5.0655035972595215, "learning_rate": 8.711234273664874e-05, "loss": 1.9368, "step": 3419 }, { "epoch": 0.2574380398577316, "grad_norm": 5.053414344787598, "learning_rate": 8.710417219365583e-05, "loss": 1.9514, "step": 3420 }, { "epoch": 0.2575133141383918, "grad_norm": 4.10591983795166, "learning_rate": 8.709599944492635e-05, "loss": 2.2746, "step": 3421 }, { "epoch": 0.2575885884190519, "grad_norm": 4.1164960861206055, "learning_rate": 8.708782449094613e-05, "loss": 2.0249, "step": 3422 }, { "epoch": 0.2576638626997121, "grad_norm": 5.9446492195129395, "learning_rate": 8.707964733220116e-05, "loss": 1.8944, "step": 3423 }, { "epoch": 0.2577391369803722, "grad_norm": 4.235020637512207, "learning_rate": 8.707146796917756e-05, "loss": 2.0211, "step": 3424 }, { "epoch": 0.2578144112610324, "grad_norm": 4.341794013977051, "learning_rate": 8.706328640236156e-05, "loss": 2.1195, "step": 3425 }, { "epoch": 0.25788968554169256, "grad_norm": 4.268988609313965, "learning_rate": 8.705510263223953e-05, "loss": 2.0087, "step": 3426 }, { "epoch": 0.2579649598223527, "grad_norm": 4.792752742767334, "learning_rate": 8.704691665929797e-05, "loss": 2.2867, "step": 3427 }, { "epoch": 0.25804023410301286, "grad_norm": 5.365433216094971, "learning_rate": 8.70387284840235e-05, "loss": 1.9162, "step": 3428 }, { "epoch": 0.25811550838367303, "grad_norm": 4.760412693023682, "learning_rate": 8.703053810690292e-05, "loss": 2.1497, "step": 3429 }, { "epoch": 0.25819078266433315, "grad_norm": 3.640106201171875, "learning_rate": 8.702234552842307e-05, "loss": 1.9194, "step": 3430 }, { "epoch": 0.2582660569449933, "grad_norm": 6.287134170532227, "learning_rate": 8.701415074907104e-05, "loss": 1.9811, "step": 3431 }, { "epoch": 0.25834133122565345, "grad_norm": 4.32661247253418, "learning_rate": 8.700595376933394e-05, "loss": 2.1024, "step": 3432 }, { "epoch": 0.2584166055063136, "grad_norm": 5.4799933433532715, "learning_rate": 8.699775458969908e-05, "loss": 2.1458, "step": 3433 }, { "epoch": 0.2584918797869738, "grad_norm": 5.0961809158325195, "learning_rate": 8.698955321065386e-05, "loss": 2.0579, "step": 3434 }, { "epoch": 0.2585671540676339, "grad_norm": 5.822303295135498, "learning_rate": 8.698134963268583e-05, "loss": 2.2369, "step": 3435 }, { "epoch": 0.2586424283482941, "grad_norm": 4.892825126647949, "learning_rate": 8.697314385628268e-05, "loss": 1.9262, "step": 3436 }, { "epoch": 0.25871770262895427, "grad_norm": 5.237296104431152, "learning_rate": 8.696493588193222e-05, "loss": 2.1281, "step": 3437 }, { "epoch": 0.2587929769096144, "grad_norm": 4.93479061126709, "learning_rate": 8.695672571012239e-05, "loss": 1.9472, "step": 3438 }, { "epoch": 0.25886825119027457, "grad_norm": 5.76990270614624, "learning_rate": 8.694851334134125e-05, "loss": 1.9941, "step": 3439 }, { "epoch": 0.25894352547093474, "grad_norm": 4.854992389678955, "learning_rate": 8.694029877607702e-05, "loss": 2.1369, "step": 3440 }, { "epoch": 0.25901879975159486, "grad_norm": 3.966226816177368, "learning_rate": 8.693208201481799e-05, "loss": 2.0117, "step": 3441 }, { "epoch": 0.25909407403225504, "grad_norm": 6.378432273864746, "learning_rate": 8.692386305805269e-05, "loss": 2.1988, "step": 3442 }, { "epoch": 0.25916934831291516, "grad_norm": 4.8376898765563965, "learning_rate": 8.691564190626967e-05, "loss": 1.618, "step": 3443 }, { "epoch": 0.25924462259357534, "grad_norm": 4.570425987243652, "learning_rate": 8.690741855995765e-05, "loss": 2.0759, "step": 3444 }, { "epoch": 0.2593198968742355, "grad_norm": 4.492746353149414, "learning_rate": 8.689919301960549e-05, "loss": 1.854, "step": 3445 }, { "epoch": 0.25939517115489563, "grad_norm": 5.882372856140137, "learning_rate": 8.68909652857022e-05, "loss": 2.1512, "step": 3446 }, { "epoch": 0.2594704454355558, "grad_norm": 4.4794793128967285, "learning_rate": 8.688273535873686e-05, "loss": 1.6465, "step": 3447 }, { "epoch": 0.259545719716216, "grad_norm": 3.931464195251465, "learning_rate": 8.687450323919875e-05, "loss": 1.864, "step": 3448 }, { "epoch": 0.2596209939968761, "grad_norm": 4.325509071350098, "learning_rate": 8.68662689275772e-05, "loss": 1.903, "step": 3449 }, { "epoch": 0.2596962682775363, "grad_norm": 4.37838077545166, "learning_rate": 8.685803242436177e-05, "loss": 1.6749, "step": 3450 }, { "epoch": 0.25977154255819646, "grad_norm": 4.602680206298828, "learning_rate": 8.684979373004205e-05, "loss": 2.022, "step": 3451 }, { "epoch": 0.2598468168388566, "grad_norm": 3.8039469718933105, "learning_rate": 8.684155284510785e-05, "loss": 2.1983, "step": 3452 }, { "epoch": 0.25992209111951675, "grad_norm": 7.304414749145508, "learning_rate": 8.683330977004902e-05, "loss": 2.0093, "step": 3453 }, { "epoch": 0.25999736540017687, "grad_norm": 6.7230329513549805, "learning_rate": 8.682506450535563e-05, "loss": 2.6703, "step": 3454 }, { "epoch": 0.26007263968083705, "grad_norm": 10.431265830993652, "learning_rate": 8.681681705151781e-05, "loss": 2.2693, "step": 3455 }, { "epoch": 0.2601479139614972, "grad_norm": 5.38029146194458, "learning_rate": 8.680856740902585e-05, "loss": 2.18, "step": 3456 }, { "epoch": 0.26022318824215734, "grad_norm": 3.5006840229034424, "learning_rate": 8.680031557837018e-05, "loss": 1.7489, "step": 3457 }, { "epoch": 0.2602984625228175, "grad_norm": 4.128906726837158, "learning_rate": 8.679206156004134e-05, "loss": 1.8874, "step": 3458 }, { "epoch": 0.2603737368034777, "grad_norm": 5.0077314376831055, "learning_rate": 8.678380535453e-05, "loss": 2.1523, "step": 3459 }, { "epoch": 0.2604490110841378, "grad_norm": 5.8345818519592285, "learning_rate": 8.677554696232699e-05, "loss": 2.1446, "step": 3460 }, { "epoch": 0.260524285364798, "grad_norm": 5.284924507141113, "learning_rate": 8.676728638392321e-05, "loss": 1.9388, "step": 3461 }, { "epoch": 0.2605995596454581, "grad_norm": 5.802793502807617, "learning_rate": 8.675902361980979e-05, "loss": 2.5715, "step": 3462 }, { "epoch": 0.2606748339261183, "grad_norm": 4.506278038024902, "learning_rate": 8.675075867047786e-05, "loss": 2.046, "step": 3463 }, { "epoch": 0.26075010820677846, "grad_norm": 6.851126670837402, "learning_rate": 8.674249153641878e-05, "loss": 2.4547, "step": 3464 }, { "epoch": 0.2608253824874386, "grad_norm": 4.001614093780518, "learning_rate": 8.673422221812402e-05, "loss": 1.8197, "step": 3465 }, { "epoch": 0.26090065676809876, "grad_norm": 5.8435797691345215, "learning_rate": 8.672595071608513e-05, "loss": 2.684, "step": 3466 }, { "epoch": 0.26097593104875894, "grad_norm": 5.1660943031311035, "learning_rate": 8.671767703079387e-05, "loss": 1.7656, "step": 3467 }, { "epoch": 0.26105120532941906, "grad_norm": 4.083738803863525, "learning_rate": 8.670940116274205e-05, "loss": 1.8932, "step": 3468 }, { "epoch": 0.26112647961007923, "grad_norm": 4.045709133148193, "learning_rate": 8.670112311242166e-05, "loss": 2.1297, "step": 3469 }, { "epoch": 0.2612017538907394, "grad_norm": 3.737746000289917, "learning_rate": 8.669284288032482e-05, "loss": 2.1864, "step": 3470 }, { "epoch": 0.26127702817139953, "grad_norm": 4.691811561584473, "learning_rate": 8.668456046694376e-05, "loss": 2.2395, "step": 3471 }, { "epoch": 0.2613523024520597, "grad_norm": 4.320754051208496, "learning_rate": 8.667627587277082e-05, "loss": 1.904, "step": 3472 }, { "epoch": 0.2614275767327198, "grad_norm": 6.436110019683838, "learning_rate": 8.666798909829852e-05, "loss": 2.1441, "step": 3473 }, { "epoch": 0.26150285101338, "grad_norm": 4.605068683624268, "learning_rate": 8.66597001440195e-05, "loss": 2.2468, "step": 3474 }, { "epoch": 0.2615781252940402, "grad_norm": 4.703996658325195, "learning_rate": 8.665140901042647e-05, "loss": 1.8968, "step": 3475 }, { "epoch": 0.2616533995747003, "grad_norm": 4.316329002380371, "learning_rate": 8.664311569801235e-05, "loss": 1.7415, "step": 3476 }, { "epoch": 0.26172867385536047, "grad_norm": 4.848236560821533, "learning_rate": 8.663482020727014e-05, "loss": 1.9466, "step": 3477 }, { "epoch": 0.26180394813602065, "grad_norm": 3.2008492946624756, "learning_rate": 8.662652253869299e-05, "loss": 2.1173, "step": 3478 }, { "epoch": 0.26187922241668077, "grad_norm": 5.0430073738098145, "learning_rate": 8.661822269277416e-05, "loss": 2.2131, "step": 3479 }, { "epoch": 0.26195449669734094, "grad_norm": 5.203665733337402, "learning_rate": 8.660992067000707e-05, "loss": 1.938, "step": 3480 }, { "epoch": 0.26202977097800106, "grad_norm": 6.658170700073242, "learning_rate": 8.660161647088524e-05, "loss": 2.6746, "step": 3481 }, { "epoch": 0.26210504525866124, "grad_norm": 5.447011470794678, "learning_rate": 8.659331009590233e-05, "loss": 2.0946, "step": 3482 }, { "epoch": 0.2621803195393214, "grad_norm": 4.5602641105651855, "learning_rate": 8.658500154555214e-05, "loss": 1.882, "step": 3483 }, { "epoch": 0.26225559381998154, "grad_norm": 6.496964454650879, "learning_rate": 8.657669082032856e-05, "loss": 2.3845, "step": 3484 }, { "epoch": 0.2623308681006417, "grad_norm": 4.06599235534668, "learning_rate": 8.656837792072567e-05, "loss": 2.2797, "step": 3485 }, { "epoch": 0.2624061423813019, "grad_norm": 4.25238561630249, "learning_rate": 8.656006284723765e-05, "loss": 2.2503, "step": 3486 }, { "epoch": 0.262481416661962, "grad_norm": 3.3910272121429443, "learning_rate": 8.655174560035879e-05, "loss": 1.9028, "step": 3487 }, { "epoch": 0.2625566909426222, "grad_norm": 5.794406414031982, "learning_rate": 8.654342618058354e-05, "loss": 2.0506, "step": 3488 }, { "epoch": 0.26263196522328236, "grad_norm": 4.569392681121826, "learning_rate": 8.653510458840645e-05, "loss": 2.0771, "step": 3489 }, { "epoch": 0.2627072395039425, "grad_norm": 4.954533576965332, "learning_rate": 8.652678082432224e-05, "loss": 2.2721, "step": 3490 }, { "epoch": 0.26278251378460266, "grad_norm": 3.7053730487823486, "learning_rate": 8.651845488882569e-05, "loss": 2.2615, "step": 3491 }, { "epoch": 0.2628577880652628, "grad_norm": 4.0285325050354, "learning_rate": 8.65101267824118e-05, "loss": 2.0601, "step": 3492 }, { "epoch": 0.26293306234592295, "grad_norm": 4.186265468597412, "learning_rate": 8.650179650557561e-05, "loss": 1.749, "step": 3493 }, { "epoch": 0.26300833662658313, "grad_norm": 6.563839912414551, "learning_rate": 8.649346405881237e-05, "loss": 2.0697, "step": 3494 }, { "epoch": 0.26308361090724325, "grad_norm": 4.802326202392578, "learning_rate": 8.648512944261741e-05, "loss": 2.0839, "step": 3495 }, { "epoch": 0.2631588851879034, "grad_norm": 5.419128894805908, "learning_rate": 8.647679265748619e-05, "loss": 1.7791, "step": 3496 }, { "epoch": 0.2632341594685636, "grad_norm": 4.4524431228637695, "learning_rate": 8.64684537039143e-05, "loss": 1.9806, "step": 3497 }, { "epoch": 0.2633094337492237, "grad_norm": 7.896785259246826, "learning_rate": 8.646011258239747e-05, "loss": 1.9088, "step": 3498 }, { "epoch": 0.2633847080298839, "grad_norm": 4.490203380584717, "learning_rate": 8.645176929343159e-05, "loss": 1.8399, "step": 3499 }, { "epoch": 0.26345998231054407, "grad_norm": 6.67177152633667, "learning_rate": 8.64434238375126e-05, "loss": 2.4361, "step": 3500 }, { "epoch": 0.2635352565912042, "grad_norm": 4.74786376953125, "learning_rate": 8.643507621513663e-05, "loss": 1.8581, "step": 3501 }, { "epoch": 0.26361053087186437, "grad_norm": 4.057636260986328, "learning_rate": 8.642672642679991e-05, "loss": 1.8163, "step": 3502 }, { "epoch": 0.2636858051525245, "grad_norm": 6.529108047485352, "learning_rate": 8.641837447299886e-05, "loss": 2.1189, "step": 3503 }, { "epoch": 0.26376107943318466, "grad_norm": 5.8578619956970215, "learning_rate": 8.641002035422992e-05, "loss": 1.8417, "step": 3504 }, { "epoch": 0.26383635371384484, "grad_norm": 4.551418304443359, "learning_rate": 8.640166407098974e-05, "loss": 2.0364, "step": 3505 }, { "epoch": 0.26391162799450496, "grad_norm": 4.891948699951172, "learning_rate": 8.639330562377508e-05, "loss": 2.0196, "step": 3506 }, { "epoch": 0.26398690227516514, "grad_norm": 4.2468671798706055, "learning_rate": 8.638494501308283e-05, "loss": 2.0781, "step": 3507 }, { "epoch": 0.2640621765558253, "grad_norm": 4.8525285720825195, "learning_rate": 8.637658223940999e-05, "loss": 2.0506, "step": 3508 }, { "epoch": 0.26413745083648543, "grad_norm": 6.0103631019592285, "learning_rate": 8.636821730325373e-05, "loss": 2.0097, "step": 3509 }, { "epoch": 0.2642127251171456, "grad_norm": 4.095498561859131, "learning_rate": 8.635985020511129e-05, "loss": 1.8606, "step": 3510 }, { "epoch": 0.26428799939780573, "grad_norm": 6.583241939544678, "learning_rate": 8.635148094548008e-05, "loss": 2.2384, "step": 3511 }, { "epoch": 0.2643632736784659, "grad_norm": 6.626463890075684, "learning_rate": 8.634310952485764e-05, "loss": 1.849, "step": 3512 }, { "epoch": 0.2644385479591261, "grad_norm": 4.025700092315674, "learning_rate": 8.633473594374162e-05, "loss": 2.4964, "step": 3513 }, { "epoch": 0.2645138222397862, "grad_norm": 9.353435516357422, "learning_rate": 8.632636020262981e-05, "loss": 2.3538, "step": 3514 }, { "epoch": 0.2645890965204464, "grad_norm": 5.180330753326416, "learning_rate": 8.63179823020201e-05, "loss": 2.0134, "step": 3515 }, { "epoch": 0.26466437080110655, "grad_norm": 4.322362422943115, "learning_rate": 8.630960224241055e-05, "loss": 2.0556, "step": 3516 }, { "epoch": 0.2647396450817667, "grad_norm": 5.330920219421387, "learning_rate": 8.630122002429934e-05, "loss": 2.0454, "step": 3517 }, { "epoch": 0.26481491936242685, "grad_norm": 4.6195759773254395, "learning_rate": 8.629283564818478e-05, "loss": 1.915, "step": 3518 }, { "epoch": 0.264890193643087, "grad_norm": 5.498222827911377, "learning_rate": 8.628444911456524e-05, "loss": 1.8984, "step": 3519 }, { "epoch": 0.26496546792374714, "grad_norm": 5.760746002197266, "learning_rate": 8.627606042393934e-05, "loss": 2.4864, "step": 3520 }, { "epoch": 0.2650407422044073, "grad_norm": 5.845561504364014, "learning_rate": 8.626766957680572e-05, "loss": 2.2566, "step": 3521 }, { "epoch": 0.26511601648506744, "grad_norm": 7.052467346191406, "learning_rate": 8.625927657366321e-05, "loss": 2.1368, "step": 3522 }, { "epoch": 0.2651912907657276, "grad_norm": 4.2269768714904785, "learning_rate": 8.625088141501074e-05, "loss": 2.1631, "step": 3523 }, { "epoch": 0.2652665650463878, "grad_norm": 4.330885887145996, "learning_rate": 8.624248410134739e-05, "loss": 2.4266, "step": 3524 }, { "epoch": 0.2653418393270479, "grad_norm": 4.239013671875, "learning_rate": 8.623408463317237e-05, "loss": 1.9877, "step": 3525 }, { "epoch": 0.2654171136077081, "grad_norm": 5.516305446624756, "learning_rate": 8.622568301098496e-05, "loss": 1.7916, "step": 3526 }, { "epoch": 0.26549238788836826, "grad_norm": 5.711477279663086, "learning_rate": 8.621727923528465e-05, "loss": 1.8021, "step": 3527 }, { "epoch": 0.2655676621690284, "grad_norm": 6.629284381866455, "learning_rate": 8.620887330657102e-05, "loss": 1.9709, "step": 3528 }, { "epoch": 0.26564293644968856, "grad_norm": 4.86653470993042, "learning_rate": 8.620046522534376e-05, "loss": 2.1331, "step": 3529 }, { "epoch": 0.2657182107303487, "grad_norm": 7.280300140380859, "learning_rate": 8.619205499210269e-05, "loss": 2.4143, "step": 3530 }, { "epoch": 0.26579348501100886, "grad_norm": 6.372810363769531, "learning_rate": 8.618364260734781e-05, "loss": 2.0764, "step": 3531 }, { "epoch": 0.26586875929166903, "grad_norm": 4.045786380767822, "learning_rate": 8.617522807157921e-05, "loss": 2.0302, "step": 3532 }, { "epoch": 0.26594403357232915, "grad_norm": 4.7104291915893555, "learning_rate": 8.61668113852971e-05, "loss": 2.0957, "step": 3533 }, { "epoch": 0.26601930785298933, "grad_norm": 4.618283748626709, "learning_rate": 8.615839254900182e-05, "loss": 1.9602, "step": 3534 }, { "epoch": 0.2660945821336495, "grad_norm": 3.7331349849700928, "learning_rate": 8.614997156319385e-05, "loss": 1.8533, "step": 3535 }, { "epoch": 0.2661698564143096, "grad_norm": 3.600839853286743, "learning_rate": 8.61415484283738e-05, "loss": 2.2455, "step": 3536 }, { "epoch": 0.2662451306949698, "grad_norm": 4.755758285522461, "learning_rate": 8.613312314504239e-05, "loss": 2.0226, "step": 3537 }, { "epoch": 0.26632040497563, "grad_norm": 4.29447603225708, "learning_rate": 8.61246957137005e-05, "loss": 2.1633, "step": 3538 }, { "epoch": 0.2663956792562901, "grad_norm": 7.697479248046875, "learning_rate": 8.61162661348491e-05, "loss": 2.0379, "step": 3539 }, { "epoch": 0.2664709535369503, "grad_norm": 4.47981595993042, "learning_rate": 8.610783440898931e-05, "loss": 1.8475, "step": 3540 }, { "epoch": 0.2665462278176104, "grad_norm": 5.289915084838867, "learning_rate": 8.609940053662236e-05, "loss": 2.1634, "step": 3541 }, { "epoch": 0.26662150209827057, "grad_norm": 7.253913879394531, "learning_rate": 8.609096451824962e-05, "loss": 1.8922, "step": 3542 }, { "epoch": 0.26669677637893074, "grad_norm": 3.510399341583252, "learning_rate": 8.608252635437261e-05, "loss": 1.8736, "step": 3543 }, { "epoch": 0.26677205065959086, "grad_norm": 6.3173604011535645, "learning_rate": 8.607408604549295e-05, "loss": 2.3701, "step": 3544 }, { "epoch": 0.26684732494025104, "grad_norm": 5.826380729675293, "learning_rate": 8.606564359211238e-05, "loss": 1.9431, "step": 3545 }, { "epoch": 0.2669225992209112, "grad_norm": 5.121246814727783, "learning_rate": 8.605719899473277e-05, "loss": 2.0404, "step": 3546 }, { "epoch": 0.26699787350157134, "grad_norm": 4.491133689880371, "learning_rate": 8.604875225385613e-05, "loss": 2.1306, "step": 3547 }, { "epoch": 0.2670731477822315, "grad_norm": 5.903306484222412, "learning_rate": 8.604030336998463e-05, "loss": 2.4523, "step": 3548 }, { "epoch": 0.2671484220628917, "grad_norm": 7.432837009429932, "learning_rate": 8.60318523436205e-05, "loss": 1.9176, "step": 3549 }, { "epoch": 0.2672236963435518, "grad_norm": 5.928546905517578, "learning_rate": 8.602339917526612e-05, "loss": 2.1684, "step": 3550 }, { "epoch": 0.267298970624212, "grad_norm": 6.407994270324707, "learning_rate": 8.601494386542403e-05, "loss": 2.2811, "step": 3551 }, { "epoch": 0.2673742449048721, "grad_norm": 5.582583904266357, "learning_rate": 8.600648641459688e-05, "loss": 2.3498, "step": 3552 }, { "epoch": 0.2674495191855323, "grad_norm": 9.848580360412598, "learning_rate": 8.599802682328742e-05, "loss": 2.0346, "step": 3553 }, { "epoch": 0.26752479346619246, "grad_norm": 7.439355850219727, "learning_rate": 8.598956509199854e-05, "loss": 2.2476, "step": 3554 }, { "epoch": 0.2676000677468526, "grad_norm": 4.979750156402588, "learning_rate": 8.598110122123329e-05, "loss": 1.8053, "step": 3555 }, { "epoch": 0.26767534202751275, "grad_norm": 4.2381510734558105, "learning_rate": 8.597263521149481e-05, "loss": 2.0136, "step": 3556 }, { "epoch": 0.26775061630817293, "grad_norm": 3.3402321338653564, "learning_rate": 8.596416706328639e-05, "loss": 2.2251, "step": 3557 }, { "epoch": 0.26782589058883305, "grad_norm": 4.999721527099609, "learning_rate": 8.595569677711143e-05, "loss": 1.7848, "step": 3558 }, { "epoch": 0.2679011648694932, "grad_norm": 6.978513240814209, "learning_rate": 8.594722435347347e-05, "loss": 2.1537, "step": 3559 }, { "epoch": 0.26797643915015334, "grad_norm": 5.452956199645996, "learning_rate": 8.593874979287615e-05, "loss": 2.7631, "step": 3560 }, { "epoch": 0.2680517134308135, "grad_norm": 3.072632312774658, "learning_rate": 8.593027309582328e-05, "loss": 2.0021, "step": 3561 }, { "epoch": 0.2681269877114737, "grad_norm": 5.195108413696289, "learning_rate": 8.592179426281878e-05, "loss": 2.1097, "step": 3562 }, { "epoch": 0.2682022619921338, "grad_norm": 6.555400371551514, "learning_rate": 8.591331329436668e-05, "loss": 1.9364, "step": 3563 }, { "epoch": 0.268277536272794, "grad_norm": 8.0423002243042, "learning_rate": 8.590483019097114e-05, "loss": 2.3436, "step": 3564 }, { "epoch": 0.26835281055345417, "grad_norm": 4.884520053863525, "learning_rate": 8.589634495313648e-05, "loss": 2.1912, "step": 3565 }, { "epoch": 0.2684280848341143, "grad_norm": 4.626718521118164, "learning_rate": 8.58878575813671e-05, "loss": 2.5206, "step": 3566 }, { "epoch": 0.26850335911477446, "grad_norm": 3.974411725997925, "learning_rate": 8.587936807616756e-05, "loss": 2.1137, "step": 3567 }, { "epoch": 0.26857863339543464, "grad_norm": 4.4176483154296875, "learning_rate": 8.587087643804256e-05, "loss": 2.0215, "step": 3568 }, { "epoch": 0.26865390767609476, "grad_norm": 5.508111953735352, "learning_rate": 8.586238266749686e-05, "loss": 1.8853, "step": 3569 }, { "epoch": 0.26872918195675494, "grad_norm": 4.688547134399414, "learning_rate": 8.585388676503543e-05, "loss": 2.128, "step": 3570 }, { "epoch": 0.26880445623741506, "grad_norm": 3.828927993774414, "learning_rate": 8.584538873116328e-05, "loss": 1.676, "step": 3571 }, { "epoch": 0.26887973051807523, "grad_norm": 6.4143290519714355, "learning_rate": 8.583688856638563e-05, "loss": 1.9208, "step": 3572 }, { "epoch": 0.2689550047987354, "grad_norm": 5.6260905265808105, "learning_rate": 8.58283862712078e-05, "loss": 2.5867, "step": 3573 }, { "epoch": 0.26903027907939553, "grad_norm": 6.218117713928223, "learning_rate": 8.58198818461352e-05, "loss": 2.0441, "step": 3574 }, { "epoch": 0.2691055533600557, "grad_norm": 7.069939136505127, "learning_rate": 8.58113752916734e-05, "loss": 1.9179, "step": 3575 }, { "epoch": 0.2691808276407159, "grad_norm": 5.20361328125, "learning_rate": 8.58028666083281e-05, "loss": 2.1202, "step": 3576 }, { "epoch": 0.269256101921376, "grad_norm": 5.410080432891846, "learning_rate": 8.57943557966051e-05, "loss": 2.0853, "step": 3577 }, { "epoch": 0.2693313762020362, "grad_norm": 4.13230037689209, "learning_rate": 8.578584285701036e-05, "loss": 1.792, "step": 3578 }, { "epoch": 0.2694066504826963, "grad_norm": 4.529636859893799, "learning_rate": 8.577732779004995e-05, "loss": 2.1396, "step": 3579 }, { "epoch": 0.2694819247633565, "grad_norm": 4.05285120010376, "learning_rate": 8.576881059623003e-05, "loss": 2.2955, "step": 3580 }, { "epoch": 0.26955719904401665, "grad_norm": 4.4058685302734375, "learning_rate": 8.576029127605698e-05, "loss": 2.061, "step": 3581 }, { "epoch": 0.26963247332467677, "grad_norm": 5.240222454071045, "learning_rate": 8.575176983003721e-05, "loss": 2.3837, "step": 3582 }, { "epoch": 0.26970774760533694, "grad_norm": 4.010353088378906, "learning_rate": 8.574324625867732e-05, "loss": 1.7741, "step": 3583 }, { "epoch": 0.2697830218859971, "grad_norm": 6.166420936584473, "learning_rate": 8.573472056248399e-05, "loss": 2.0576, "step": 3584 }, { "epoch": 0.26985829616665724, "grad_norm": 4.106327056884766, "learning_rate": 8.572619274196406e-05, "loss": 2.069, "step": 3585 }, { "epoch": 0.2699335704473174, "grad_norm": 3.5963661670684814, "learning_rate": 8.571766279762448e-05, "loss": 2.0419, "step": 3586 }, { "epoch": 0.2700088447279776, "grad_norm": 4.46986198425293, "learning_rate": 8.570913072997233e-05, "loss": 2.2244, "step": 3587 }, { "epoch": 0.2700841190086377, "grad_norm": 6.361734867095947, "learning_rate": 8.570059653951482e-05, "loss": 2.4293, "step": 3588 }, { "epoch": 0.2701593932892979, "grad_norm": 4.877951622009277, "learning_rate": 8.569206022675927e-05, "loss": 1.6311, "step": 3589 }, { "epoch": 0.270234667569958, "grad_norm": 7.91477108001709, "learning_rate": 8.568352179221317e-05, "loss": 2.2059, "step": 3590 }, { "epoch": 0.2703099418506182, "grad_norm": 6.4916090965271, "learning_rate": 8.567498123638408e-05, "loss": 2.0631, "step": 3591 }, { "epoch": 0.27038521613127836, "grad_norm": 6.365734577178955, "learning_rate": 8.566643855977972e-05, "loss": 2.248, "step": 3592 }, { "epoch": 0.2704604904119385, "grad_norm": 3.8709022998809814, "learning_rate": 8.565789376290793e-05, "loss": 1.9263, "step": 3593 }, { "epoch": 0.27053576469259866, "grad_norm": 5.647452354431152, "learning_rate": 8.564934684627665e-05, "loss": 2.0827, "step": 3594 }, { "epoch": 0.27061103897325883, "grad_norm": 8.403019905090332, "learning_rate": 8.564079781039402e-05, "loss": 1.8089, "step": 3595 }, { "epoch": 0.27068631325391895, "grad_norm": 7.931789875030518, "learning_rate": 8.56322466557682e-05, "loss": 2.3045, "step": 3596 }, { "epoch": 0.27076158753457913, "grad_norm": 7.294676303863525, "learning_rate": 8.562369338290757e-05, "loss": 2.3273, "step": 3597 }, { "epoch": 0.2708368618152393, "grad_norm": 6.213169574737549, "learning_rate": 8.561513799232057e-05, "loss": 2.2608, "step": 3598 }, { "epoch": 0.2709121360958994, "grad_norm": 6.195013046264648, "learning_rate": 8.560658048451584e-05, "loss": 1.9316, "step": 3599 }, { "epoch": 0.2709874103765596, "grad_norm": 4.21322774887085, "learning_rate": 8.559802086000204e-05, "loss": 2.1797, "step": 3600 }, { "epoch": 0.2710626846572197, "grad_norm": 4.740867614746094, "learning_rate": 8.558945911928804e-05, "loss": 1.993, "step": 3601 }, { "epoch": 0.2711379589378799, "grad_norm": 5.991433143615723, "learning_rate": 8.558089526288282e-05, "loss": 1.7565, "step": 3602 }, { "epoch": 0.2712132332185401, "grad_norm": 4.477705955505371, "learning_rate": 8.557232929129547e-05, "loss": 1.9191, "step": 3603 }, { "epoch": 0.2712885074992002, "grad_norm": 4.275289535522461, "learning_rate": 8.556376120503522e-05, "loss": 2.0778, "step": 3604 }, { "epoch": 0.27136378177986037, "grad_norm": 7.188881874084473, "learning_rate": 8.55551910046114e-05, "loss": 2.343, "step": 3605 }, { "epoch": 0.27143905606052054, "grad_norm": 4.844945430755615, "learning_rate": 8.554661869053348e-05, "loss": 2.0136, "step": 3606 }, { "epoch": 0.27151433034118067, "grad_norm": 4.660032272338867, "learning_rate": 8.55380442633111e-05, "loss": 2.0537, "step": 3607 }, { "epoch": 0.27158960462184084, "grad_norm": 4.035862922668457, "learning_rate": 8.552946772345393e-05, "loss": 2.0713, "step": 3608 }, { "epoch": 0.27166487890250096, "grad_norm": 4.965854644775391, "learning_rate": 8.552088907147189e-05, "loss": 2.03, "step": 3609 }, { "epoch": 0.27174015318316114, "grad_norm": 5.072443962097168, "learning_rate": 8.55123083078749e-05, "loss": 1.9861, "step": 3610 }, { "epoch": 0.2718154274638213, "grad_norm": 5.708595275878906, "learning_rate": 8.550372543317307e-05, "loss": 2.65, "step": 3611 }, { "epoch": 0.27189070174448143, "grad_norm": 5.120248317718506, "learning_rate": 8.549514044787663e-05, "loss": 2.047, "step": 3612 }, { "epoch": 0.2719659760251416, "grad_norm": 4.392369270324707, "learning_rate": 8.548655335249593e-05, "loss": 2.5038, "step": 3613 }, { "epoch": 0.2720412503058018, "grad_norm": 5.40472412109375, "learning_rate": 8.547796414754148e-05, "loss": 2.2558, "step": 3614 }, { "epoch": 0.2721165245864619, "grad_norm": 6.653119087219238, "learning_rate": 8.546937283352384e-05, "loss": 1.9401, "step": 3615 }, { "epoch": 0.2721917988671221, "grad_norm": 6.963906764984131, "learning_rate": 8.546077941095376e-05, "loss": 2.2188, "step": 3616 }, { "epoch": 0.27226707314778226, "grad_norm": 3.746129035949707, "learning_rate": 8.545218388034211e-05, "loss": 2.3688, "step": 3617 }, { "epoch": 0.2723423474284424, "grad_norm": 4.360978603363037, "learning_rate": 8.544358624219983e-05, "loss": 1.9599, "step": 3618 }, { "epoch": 0.27241762170910255, "grad_norm": 5.566946029663086, "learning_rate": 8.543498649703807e-05, "loss": 1.9973, "step": 3619 }, { "epoch": 0.2724928959897627, "grad_norm": 4.58626127243042, "learning_rate": 8.542638464536801e-05, "loss": 1.924, "step": 3620 }, { "epoch": 0.27256817027042285, "grad_norm": 5.034262180328369, "learning_rate": 8.541778068770105e-05, "loss": 2.2181, "step": 3621 }, { "epoch": 0.272643444551083, "grad_norm": 3.571824550628662, "learning_rate": 8.540917462454865e-05, "loss": 2.126, "step": 3622 }, { "epoch": 0.27271871883174315, "grad_norm": 5.0311503410339355, "learning_rate": 8.540056645642242e-05, "loss": 2.2951, "step": 3623 }, { "epoch": 0.2727939931124033, "grad_norm": 4.832629680633545, "learning_rate": 8.539195618383409e-05, "loss": 2.0846, "step": 3624 }, { "epoch": 0.2728692673930635, "grad_norm": 5.541370391845703, "learning_rate": 8.538334380729552e-05, "loss": 1.9308, "step": 3625 }, { "epoch": 0.2729445416737236, "grad_norm": 4.064426898956299, "learning_rate": 8.537472932731867e-05, "loss": 1.9538, "step": 3626 }, { "epoch": 0.2730198159543838, "grad_norm": 4.208832263946533, "learning_rate": 8.536611274441567e-05, "loss": 2.1032, "step": 3627 }, { "epoch": 0.27309509023504397, "grad_norm": 3.8191335201263428, "learning_rate": 8.535749405909876e-05, "loss": 2.1858, "step": 3628 }, { "epoch": 0.2731703645157041, "grad_norm": 6.1279520988464355, "learning_rate": 8.534887327188026e-05, "loss": 2.2174, "step": 3629 }, { "epoch": 0.27324563879636427, "grad_norm": 7.215233325958252, "learning_rate": 8.534025038327267e-05, "loss": 2.6492, "step": 3630 }, { "epoch": 0.2733209130770244, "grad_norm": 3.9133596420288086, "learning_rate": 8.533162539378861e-05, "loss": 2.0271, "step": 3631 }, { "epoch": 0.27339618735768456, "grad_norm": 5.657726287841797, "learning_rate": 8.53229983039408e-05, "loss": 1.6412, "step": 3632 }, { "epoch": 0.27347146163834474, "grad_norm": 5.177445411682129, "learning_rate": 8.531436911424209e-05, "loss": 2.0748, "step": 3633 }, { "epoch": 0.27354673591900486, "grad_norm": 4.1661696434021, "learning_rate": 8.530573782520546e-05, "loss": 2.3361, "step": 3634 }, { "epoch": 0.27362201019966503, "grad_norm": 6.5816874504089355, "learning_rate": 8.529710443734402e-05, "loss": 2.3101, "step": 3635 }, { "epoch": 0.2736972844803252, "grad_norm": 4.619372367858887, "learning_rate": 8.5288468951171e-05, "loss": 1.917, "step": 3636 }, { "epoch": 0.27377255876098533, "grad_norm": 3.834791660308838, "learning_rate": 8.527983136719977e-05, "loss": 1.8601, "step": 3637 }, { "epoch": 0.2738478330416455, "grad_norm": 6.1451215744018555, "learning_rate": 8.527119168594377e-05, "loss": 2.1909, "step": 3638 }, { "epoch": 0.2739231073223056, "grad_norm": 6.895201206207275, "learning_rate": 8.526254990791665e-05, "loss": 2.3125, "step": 3639 }, { "epoch": 0.2739983816029658, "grad_norm": 4.202020168304443, "learning_rate": 8.52539060336321e-05, "loss": 2.054, "step": 3640 }, { "epoch": 0.274073655883626, "grad_norm": 5.718653202056885, "learning_rate": 8.524526006360401e-05, "loss": 2.4088, "step": 3641 }, { "epoch": 0.2741489301642861, "grad_norm": 5.545661449432373, "learning_rate": 8.523661199834631e-05, "loss": 2.1962, "step": 3642 }, { "epoch": 0.2742242044449463, "grad_norm": 3.9309816360473633, "learning_rate": 8.522796183837316e-05, "loss": 1.969, "step": 3643 }, { "epoch": 0.27429947872560645, "grad_norm": 5.553758144378662, "learning_rate": 8.521930958419874e-05, "loss": 2.3714, "step": 3644 }, { "epoch": 0.27437475300626657, "grad_norm": 4.314330101013184, "learning_rate": 8.521065523633744e-05, "loss": 2.1054, "step": 3645 }, { "epoch": 0.27445002728692675, "grad_norm": 5.760187149047852, "learning_rate": 8.52019987953037e-05, "loss": 1.8984, "step": 3646 }, { "epoch": 0.2745253015675869, "grad_norm": 4.427959442138672, "learning_rate": 8.519334026161215e-05, "loss": 1.9055, "step": 3647 }, { "epoch": 0.27460057584824704, "grad_norm": 4.975903511047363, "learning_rate": 8.51846796357775e-05, "loss": 1.8556, "step": 3648 }, { "epoch": 0.2746758501289072, "grad_norm": 4.018494129180908, "learning_rate": 8.517601691831461e-05, "loss": 1.9764, "step": 3649 }, { "epoch": 0.27475112440956734, "grad_norm": 4.055217266082764, "learning_rate": 8.516735210973841e-05, "loss": 2.4727, "step": 3650 }, { "epoch": 0.2748263986902275, "grad_norm": 4.514999866485596, "learning_rate": 8.515868521056407e-05, "loss": 2.041, "step": 3651 }, { "epoch": 0.2749016729708877, "grad_norm": 5.443676948547363, "learning_rate": 8.515001622130676e-05, "loss": 2.4615, "step": 3652 }, { "epoch": 0.2749769472515478, "grad_norm": 3.3675954341888428, "learning_rate": 8.514134514248185e-05, "loss": 2.0067, "step": 3653 }, { "epoch": 0.275052221532208, "grad_norm": 4.267234802246094, "learning_rate": 8.51326719746048e-05, "loss": 2.1175, "step": 3654 }, { "epoch": 0.27512749581286816, "grad_norm": 4.728448867797852, "learning_rate": 8.51239967181912e-05, "loss": 1.9433, "step": 3655 }, { "epoch": 0.2752027700935283, "grad_norm": 3.8933749198913574, "learning_rate": 8.511531937375678e-05, "loss": 2.2744, "step": 3656 }, { "epoch": 0.27527804437418846, "grad_norm": 4.720108985900879, "learning_rate": 8.510663994181739e-05, "loss": 2.1416, "step": 3657 }, { "epoch": 0.2753533186548486, "grad_norm": 4.744030952453613, "learning_rate": 8.509795842288897e-05, "loss": 2.0895, "step": 3658 }, { "epoch": 0.27542859293550875, "grad_norm": 4.030458450317383, "learning_rate": 8.508927481748765e-05, "loss": 2.0314, "step": 3659 }, { "epoch": 0.27550386721616893, "grad_norm": 4.617257595062256, "learning_rate": 8.508058912612961e-05, "loss": 2.7968, "step": 3660 }, { "epoch": 0.27557914149682905, "grad_norm": 5.676261901855469, "learning_rate": 8.507190134933122e-05, "loss": 2.2711, "step": 3661 }, { "epoch": 0.2756544157774892, "grad_norm": 4.699383735656738, "learning_rate": 8.506321148760891e-05, "loss": 2.1165, "step": 3662 }, { "epoch": 0.2757296900581494, "grad_norm": 4.686986923217773, "learning_rate": 8.50545195414793e-05, "loss": 2.1272, "step": 3663 }, { "epoch": 0.2758049643388095, "grad_norm": 7.716921806335449, "learning_rate": 8.504582551145907e-05, "loss": 2.843, "step": 3664 }, { "epoch": 0.2758802386194697, "grad_norm": 7.023846626281738, "learning_rate": 8.503712939806509e-05, "loss": 2.061, "step": 3665 }, { "epoch": 0.2759555129001299, "grad_norm": 4.08076810836792, "learning_rate": 8.50284312018143e-05, "loss": 1.8785, "step": 3666 }, { "epoch": 0.27603078718079, "grad_norm": 6.744572639465332, "learning_rate": 8.501973092322377e-05, "loss": 2.2279, "step": 3667 }, { "epoch": 0.27610606146145017, "grad_norm": 5.839383125305176, "learning_rate": 8.501102856281072e-05, "loss": 1.923, "step": 3668 }, { "epoch": 0.2761813357421103, "grad_norm": 5.463138103485107, "learning_rate": 8.500232412109248e-05, "loss": 1.9304, "step": 3669 }, { "epoch": 0.27625661002277047, "grad_norm": 4.074203968048096, "learning_rate": 8.499361759858651e-05, "loss": 2.0525, "step": 3670 }, { "epoch": 0.27633188430343064, "grad_norm": 4.893757343292236, "learning_rate": 8.498490899581037e-05, "loss": 2.1287, "step": 3671 }, { "epoch": 0.27640715858409076, "grad_norm": 3.9854490756988525, "learning_rate": 8.497619831328178e-05, "loss": 2.0592, "step": 3672 }, { "epoch": 0.27648243286475094, "grad_norm": 4.770266532897949, "learning_rate": 8.496748555151855e-05, "loss": 1.9451, "step": 3673 }, { "epoch": 0.2765577071454111, "grad_norm": 4.641678333282471, "learning_rate": 8.495877071103864e-05, "loss": 2.0983, "step": 3674 }, { "epoch": 0.27663298142607123, "grad_norm": 3.688652515411377, "learning_rate": 8.495005379236012e-05, "loss": 2.0546, "step": 3675 }, { "epoch": 0.2767082557067314, "grad_norm": 4.314660549163818, "learning_rate": 8.494133479600121e-05, "loss": 2.5765, "step": 3676 }, { "epoch": 0.2767835299873916, "grad_norm": 5.093321323394775, "learning_rate": 8.493261372248018e-05, "loss": 1.8994, "step": 3677 }, { "epoch": 0.2768588042680517, "grad_norm": 6.219925403594971, "learning_rate": 8.492389057231549e-05, "loss": 2.0182, "step": 3678 }, { "epoch": 0.2769340785487119, "grad_norm": 4.212336540222168, "learning_rate": 8.491516534602573e-05, "loss": 2.0456, "step": 3679 }, { "epoch": 0.277009352829372, "grad_norm": 5.203505992889404, "learning_rate": 8.490643804412956e-05, "loss": 1.9374, "step": 3680 }, { "epoch": 0.2770846271100322, "grad_norm": 4.704999923706055, "learning_rate": 8.489770866714582e-05, "loss": 1.6675, "step": 3681 }, { "epoch": 0.27715990139069235, "grad_norm": 5.450031280517578, "learning_rate": 8.488897721559343e-05, "loss": 2.1451, "step": 3682 }, { "epoch": 0.2772351756713525, "grad_norm": 6.107594966888428, "learning_rate": 8.488024368999144e-05, "loss": 2.1799, "step": 3683 }, { "epoch": 0.27731044995201265, "grad_norm": 6.630837440490723, "learning_rate": 8.487150809085907e-05, "loss": 2.6763, "step": 3684 }, { "epoch": 0.2773857242326728, "grad_norm": 4.827672481536865, "learning_rate": 8.486277041871557e-05, "loss": 1.8602, "step": 3685 }, { "epoch": 0.27746099851333295, "grad_norm": 3.5622708797454834, "learning_rate": 8.485403067408042e-05, "loss": 1.9435, "step": 3686 }, { "epoch": 0.2775362727939931, "grad_norm": 4.234933376312256, "learning_rate": 8.484528885747316e-05, "loss": 1.861, "step": 3687 }, { "epoch": 0.27761154707465324, "grad_norm": 4.909501552581787, "learning_rate": 8.483654496941344e-05, "loss": 1.9997, "step": 3688 }, { "epoch": 0.2776868213553134, "grad_norm": 5.760375499725342, "learning_rate": 8.482779901042108e-05, "loss": 2.3467, "step": 3689 }, { "epoch": 0.2777620956359736, "grad_norm": 6.31289529800415, "learning_rate": 8.481905098101601e-05, "loss": 2.2078, "step": 3690 }, { "epoch": 0.2778373699166337, "grad_norm": 4.647454261779785, "learning_rate": 8.481030088171827e-05, "loss": 1.9932, "step": 3691 }, { "epoch": 0.2779126441972939, "grad_norm": 5.118434906005859, "learning_rate": 8.480154871304802e-05, "loss": 2.2114, "step": 3692 }, { "epoch": 0.27798791847795407, "grad_norm": 5.353497505187988, "learning_rate": 8.479279447552557e-05, "loss": 2.077, "step": 3693 }, { "epoch": 0.2780631927586142, "grad_norm": 5.585944652557373, "learning_rate": 8.47840381696713e-05, "loss": 2.3376, "step": 3694 }, { "epoch": 0.27813846703927436, "grad_norm": 4.524191379547119, "learning_rate": 8.477527979600577e-05, "loss": 2.2071, "step": 3695 }, { "epoch": 0.27821374131993454, "grad_norm": 4.1141581535339355, "learning_rate": 8.476651935504964e-05, "loss": 1.8742, "step": 3696 }, { "epoch": 0.27828901560059466, "grad_norm": 4.320529937744141, "learning_rate": 8.475775684732369e-05, "loss": 1.821, "step": 3697 }, { "epoch": 0.27836428988125483, "grad_norm": 6.32377290725708, "learning_rate": 8.474899227334884e-05, "loss": 2.3346, "step": 3698 }, { "epoch": 0.27843956416191495, "grad_norm": 4.690769672393799, "learning_rate": 8.47402256336461e-05, "loss": 2.014, "step": 3699 }, { "epoch": 0.27851483844257513, "grad_norm": 4.876758098602295, "learning_rate": 8.473145692873661e-05, "loss": 1.8336, "step": 3700 }, { "epoch": 0.2785901127232353, "grad_norm": 3.833322763442993, "learning_rate": 8.472268615914168e-05, "loss": 1.9416, "step": 3701 }, { "epoch": 0.2786653870038954, "grad_norm": 3.9122402667999268, "learning_rate": 8.471391332538268e-05, "loss": 2.0796, "step": 3702 }, { "epoch": 0.2787406612845556, "grad_norm": 4.947221279144287, "learning_rate": 8.470513842798114e-05, "loss": 2.0523, "step": 3703 }, { "epoch": 0.2788159355652158, "grad_norm": 5.889840126037598, "learning_rate": 8.46963614674587e-05, "loss": 2.1409, "step": 3704 }, { "epoch": 0.2788912098458759, "grad_norm": 6.707186698913574, "learning_rate": 8.468758244433712e-05, "loss": 2.2097, "step": 3705 }, { "epoch": 0.2789664841265361, "grad_norm": 6.559521198272705, "learning_rate": 8.46788013591383e-05, "loss": 1.7735, "step": 3706 }, { "epoch": 0.2790417584071962, "grad_norm": 5.264425754547119, "learning_rate": 8.467001821238422e-05, "loss": 2.0271, "step": 3707 }, { "epoch": 0.27911703268785637, "grad_norm": 4.324186325073242, "learning_rate": 8.466123300459707e-05, "loss": 1.9381, "step": 3708 }, { "epoch": 0.27919230696851655, "grad_norm": 4.952320575714111, "learning_rate": 8.465244573629907e-05, "loss": 1.9165, "step": 3709 }, { "epoch": 0.27926758124917667, "grad_norm": 4.5556182861328125, "learning_rate": 8.464365640801259e-05, "loss": 2.3048, "step": 3710 }, { "epoch": 0.27934285552983684, "grad_norm": 4.639623641967773, "learning_rate": 8.463486502026015e-05, "loss": 2.4237, "step": 3711 }, { "epoch": 0.279418129810497, "grad_norm": 4.315785884857178, "learning_rate": 8.462607157356435e-05, "loss": 1.9636, "step": 3712 }, { "epoch": 0.27949340409115714, "grad_norm": 3.648529052734375, "learning_rate": 8.461727606844796e-05, "loss": 1.7965, "step": 3713 }, { "epoch": 0.2795686783718173, "grad_norm": 5.383721828460693, "learning_rate": 8.460847850543382e-05, "loss": 2.2302, "step": 3714 }, { "epoch": 0.2796439526524775, "grad_norm": 3.9807448387145996, "learning_rate": 8.459967888504494e-05, "loss": 2.054, "step": 3715 }, { "epoch": 0.2797192269331376, "grad_norm": 5.0536017417907715, "learning_rate": 8.459087720780443e-05, "loss": 2.2137, "step": 3716 }, { "epoch": 0.2797945012137978, "grad_norm": 5.559696197509766, "learning_rate": 8.458207347423554e-05, "loss": 2.1715, "step": 3717 }, { "epoch": 0.2798697754944579, "grad_norm": 4.65275239944458, "learning_rate": 8.457326768486159e-05, "loss": 2.2653, "step": 3718 }, { "epoch": 0.2799450497751181, "grad_norm": 5.178258895874023, "learning_rate": 8.456445984020607e-05, "loss": 1.9544, "step": 3719 }, { "epoch": 0.28002032405577826, "grad_norm": 4.327636241912842, "learning_rate": 8.455564994079261e-05, "loss": 2.1668, "step": 3720 }, { "epoch": 0.2800955983364384, "grad_norm": 4.348104476928711, "learning_rate": 8.45468379871449e-05, "loss": 1.9429, "step": 3721 }, { "epoch": 0.28017087261709855, "grad_norm": 5.069277286529541, "learning_rate": 8.453802397978681e-05, "loss": 2.3457, "step": 3722 }, { "epoch": 0.28024614689775873, "grad_norm": 4.621848106384277, "learning_rate": 8.452920791924228e-05, "loss": 2.2044, "step": 3723 }, { "epoch": 0.28032142117841885, "grad_norm": 5.038558483123779, "learning_rate": 8.452038980603543e-05, "loss": 2.4712, "step": 3724 }, { "epoch": 0.280396695459079, "grad_norm": 5.322809219360352, "learning_rate": 8.451156964069043e-05, "loss": 2.0299, "step": 3725 }, { "epoch": 0.2804719697397392, "grad_norm": 3.8125722408294678, "learning_rate": 8.450274742373167e-05, "loss": 2.2145, "step": 3726 }, { "epoch": 0.2805472440203993, "grad_norm": 5.403464317321777, "learning_rate": 8.449392315568356e-05, "loss": 2.3177, "step": 3727 }, { "epoch": 0.2806225183010595, "grad_norm": 4.7079854011535645, "learning_rate": 8.448509683707069e-05, "loss": 1.9227, "step": 3728 }, { "epoch": 0.2806977925817196, "grad_norm": 5.48919153213501, "learning_rate": 8.447626846841777e-05, "loss": 2.0462, "step": 3729 }, { "epoch": 0.2807730668623798, "grad_norm": 4.870682716369629, "learning_rate": 8.44674380502496e-05, "loss": 2.2351, "step": 3730 }, { "epoch": 0.28084834114303997, "grad_norm": 4.92102575302124, "learning_rate": 8.445860558309116e-05, "loss": 1.9376, "step": 3731 }, { "epoch": 0.2809236154237001, "grad_norm": 3.806340456008911, "learning_rate": 8.444977106746748e-05, "loss": 1.9041, "step": 3732 }, { "epoch": 0.28099888970436027, "grad_norm": 4.492342472076416, "learning_rate": 8.444093450390373e-05, "loss": 2.0126, "step": 3733 }, { "epoch": 0.28107416398502044, "grad_norm": 4.713959693908691, "learning_rate": 8.44320958929253e-05, "loss": 2.1837, "step": 3734 }, { "epoch": 0.28114943826568056, "grad_norm": 4.521111011505127, "learning_rate": 8.442325523505752e-05, "loss": 1.8736, "step": 3735 }, { "epoch": 0.28122471254634074, "grad_norm": 3.839892864227295, "learning_rate": 8.441441253082601e-05, "loss": 2.0659, "step": 3736 }, { "epoch": 0.28129998682700086, "grad_norm": 5.555484771728516, "learning_rate": 8.440556778075641e-05, "loss": 1.7145, "step": 3737 }, { "epoch": 0.28137526110766103, "grad_norm": 4.659602642059326, "learning_rate": 8.439672098537452e-05, "loss": 2.2435, "step": 3738 }, { "epoch": 0.2814505353883212, "grad_norm": 5.173230171203613, "learning_rate": 8.438787214520626e-05, "loss": 2.0285, "step": 3739 }, { "epoch": 0.28152580966898133, "grad_norm": 4.528423309326172, "learning_rate": 8.43790212607777e-05, "loss": 2.591, "step": 3740 }, { "epoch": 0.2816010839496415, "grad_norm": 5.994011878967285, "learning_rate": 8.437016833261496e-05, "loss": 1.8713, "step": 3741 }, { "epoch": 0.2816763582303017, "grad_norm": 4.154655456542969, "learning_rate": 8.436131336124432e-05, "loss": 2.0949, "step": 3742 }, { "epoch": 0.2817516325109618, "grad_norm": 4.845334529876709, "learning_rate": 8.43524563471922e-05, "loss": 1.9901, "step": 3743 }, { "epoch": 0.281826906791622, "grad_norm": 5.715048313140869, "learning_rate": 8.43435972909851e-05, "loss": 1.8887, "step": 3744 }, { "epoch": 0.28190218107228215, "grad_norm": 3.6951839923858643, "learning_rate": 8.433473619314972e-05, "loss": 1.9527, "step": 3745 }, { "epoch": 0.2819774553529423, "grad_norm": 4.321444034576416, "learning_rate": 8.432587305421278e-05, "loss": 1.9878, "step": 3746 }, { "epoch": 0.28205272963360245, "grad_norm": 4.282951354980469, "learning_rate": 8.431700787470118e-05, "loss": 2.0862, "step": 3747 }, { "epoch": 0.28212800391426257, "grad_norm": 5.607655048370361, "learning_rate": 8.43081406551419e-05, "loss": 2.1657, "step": 3748 }, { "epoch": 0.28220327819492275, "grad_norm": 5.064688682556152, "learning_rate": 8.429927139606213e-05, "loss": 2.1957, "step": 3749 }, { "epoch": 0.2822785524755829, "grad_norm": 5.386007308959961, "learning_rate": 8.429040009798908e-05, "loss": 2.0302, "step": 3750 }, { "epoch": 0.28235382675624304, "grad_norm": 6.051460266113281, "learning_rate": 8.428152676145013e-05, "loss": 2.2934, "step": 3751 }, { "epoch": 0.2824291010369032, "grad_norm": 5.103939056396484, "learning_rate": 8.427265138697279e-05, "loss": 2.291, "step": 3752 }, { "epoch": 0.2825043753175634, "grad_norm": 5.415882587432861, "learning_rate": 8.426377397508465e-05, "loss": 2.2452, "step": 3753 }, { "epoch": 0.2825796495982235, "grad_norm": 6.235812187194824, "learning_rate": 8.425489452631348e-05, "loss": 1.9409, "step": 3754 }, { "epoch": 0.2826549238788837, "grad_norm": 5.436208248138428, "learning_rate": 8.424601304118711e-05, "loss": 2.2726, "step": 3755 }, { "epoch": 0.2827301981595438, "grad_norm": 4.568819046020508, "learning_rate": 8.423712952023353e-05, "loss": 2.0822, "step": 3756 }, { "epoch": 0.282805472440204, "grad_norm": 4.734124183654785, "learning_rate": 8.422824396398082e-05, "loss": 1.9301, "step": 3757 }, { "epoch": 0.28288074672086416, "grad_norm": 5.557109355926514, "learning_rate": 8.421935637295724e-05, "loss": 1.9187, "step": 3758 }, { "epoch": 0.2829560210015243, "grad_norm": 4.806347846984863, "learning_rate": 8.42104667476911e-05, "loss": 2.0105, "step": 3759 }, { "epoch": 0.28303129528218446, "grad_norm": 7.374860763549805, "learning_rate": 8.420157508871087e-05, "loss": 2.0197, "step": 3760 }, { "epoch": 0.28310656956284463, "grad_norm": 5.27902889251709, "learning_rate": 8.419268139654514e-05, "loss": 2.4943, "step": 3761 }, { "epoch": 0.28318184384350475, "grad_norm": 4.831271648406982, "learning_rate": 8.418378567172262e-05, "loss": 2.3162, "step": 3762 }, { "epoch": 0.28325711812416493, "grad_norm": 4.960517883300781, "learning_rate": 8.41748879147721e-05, "loss": 2.2269, "step": 3763 }, { "epoch": 0.2833323924048251, "grad_norm": 6.046313285827637, "learning_rate": 8.416598812622259e-05, "loss": 1.7854, "step": 3764 }, { "epoch": 0.2834076666854852, "grad_norm": 5.182833671569824, "learning_rate": 8.415708630660309e-05, "loss": 2.4509, "step": 3765 }, { "epoch": 0.2834829409661454, "grad_norm": 4.593726634979248, "learning_rate": 8.414818245644283e-05, "loss": 1.7849, "step": 3766 }, { "epoch": 0.2835582152468055, "grad_norm": 5.483936786651611, "learning_rate": 8.41392765762711e-05, "loss": 2.188, "step": 3767 }, { "epoch": 0.2836334895274657, "grad_norm": 3.8305039405822754, "learning_rate": 8.413036866661733e-05, "loss": 1.6908, "step": 3768 }, { "epoch": 0.2837087638081259, "grad_norm": 4.9925432205200195, "learning_rate": 8.412145872801107e-05, "loss": 2.1793, "step": 3769 }, { "epoch": 0.283784038088786, "grad_norm": 4.195642471313477, "learning_rate": 8.4112546760982e-05, "loss": 1.9961, "step": 3770 }, { "epoch": 0.28385931236944617, "grad_norm": 4.224948406219482, "learning_rate": 8.410363276605992e-05, "loss": 2.0325, "step": 3771 }, { "epoch": 0.28393458665010635, "grad_norm": 3.92268443107605, "learning_rate": 8.409471674377471e-05, "loss": 2.0449, "step": 3772 }, { "epoch": 0.28400986093076647, "grad_norm": 10.248950004577637, "learning_rate": 8.408579869465641e-05, "loss": 2.6393, "step": 3773 }, { "epoch": 0.28408513521142664, "grad_norm": 5.0474467277526855, "learning_rate": 8.40768786192352e-05, "loss": 2.6215, "step": 3774 }, { "epoch": 0.2841604094920868, "grad_norm": 6.735265254974365, "learning_rate": 8.406795651804132e-05, "loss": 1.957, "step": 3775 }, { "epoch": 0.28423568377274694, "grad_norm": 6.065521240234375, "learning_rate": 8.40590323916052e-05, "loss": 1.916, "step": 3776 }, { "epoch": 0.2843109580534071, "grad_norm": 4.69582462310791, "learning_rate": 8.40501062404573e-05, "loss": 2.258, "step": 3777 }, { "epoch": 0.28438623233406723, "grad_norm": 3.578555107116699, "learning_rate": 8.40411780651283e-05, "loss": 2.1657, "step": 3778 }, { "epoch": 0.2844615066147274, "grad_norm": 4.259191989898682, "learning_rate": 8.403224786614894e-05, "loss": 2.0313, "step": 3779 }, { "epoch": 0.2845367808953876, "grad_norm": 3.659712076187134, "learning_rate": 8.402331564405009e-05, "loss": 2.2301, "step": 3780 }, { "epoch": 0.2846120551760477, "grad_norm": 4.338321685791016, "learning_rate": 8.401438139936275e-05, "loss": 1.9096, "step": 3781 }, { "epoch": 0.2846873294567079, "grad_norm": 5.078385353088379, "learning_rate": 8.400544513261803e-05, "loss": 1.8121, "step": 3782 }, { "epoch": 0.28476260373736806, "grad_norm": 7.463127613067627, "learning_rate": 8.399650684434719e-05, "loss": 2.3373, "step": 3783 }, { "epoch": 0.2848378780180282, "grad_norm": 4.69392728805542, "learning_rate": 8.398756653508154e-05, "loss": 2.2265, "step": 3784 }, { "epoch": 0.28491315229868835, "grad_norm": 6.1369733810424805, "learning_rate": 8.397862420535258e-05, "loss": 2.0806, "step": 3785 }, { "epoch": 0.2849884265793485, "grad_norm": 5.64315938949585, "learning_rate": 8.396967985569192e-05, "loss": 2.4179, "step": 3786 }, { "epoch": 0.28506370086000865, "grad_norm": 4.775173664093018, "learning_rate": 8.396073348663126e-05, "loss": 1.7027, "step": 3787 }, { "epoch": 0.2851389751406688, "grad_norm": 3.9329686164855957, "learning_rate": 8.395178509870244e-05, "loss": 2.1827, "step": 3788 }, { "epoch": 0.28521424942132895, "grad_norm": 3.681978940963745, "learning_rate": 8.39428346924374e-05, "loss": 1.6844, "step": 3789 }, { "epoch": 0.2852895237019891, "grad_norm": 4.753894329071045, "learning_rate": 8.393388226836825e-05, "loss": 1.8726, "step": 3790 }, { "epoch": 0.2853647979826493, "grad_norm": 4.579850196838379, "learning_rate": 8.392492782702717e-05, "loss": 2.0369, "step": 3791 }, { "epoch": 0.2854400722633094, "grad_norm": 6.411602020263672, "learning_rate": 8.391597136894645e-05, "loss": 2.2986, "step": 3792 }, { "epoch": 0.2855153465439696, "grad_norm": 6.695382595062256, "learning_rate": 8.390701289465856e-05, "loss": 2.0889, "step": 3793 }, { "epoch": 0.28559062082462977, "grad_norm": 7.453019142150879, "learning_rate": 8.389805240469604e-05, "loss": 2.0118, "step": 3794 }, { "epoch": 0.2856658951052899, "grad_norm": 6.5703206062316895, "learning_rate": 8.388908989959156e-05, "loss": 1.8328, "step": 3795 }, { "epoch": 0.28574116938595007, "grad_norm": 4.400692939758301, "learning_rate": 8.388012537987795e-05, "loss": 1.791, "step": 3796 }, { "epoch": 0.2858164436666102, "grad_norm": 4.215025901794434, "learning_rate": 8.387115884608808e-05, "loss": 2.4272, "step": 3797 }, { "epoch": 0.28589171794727036, "grad_norm": 4.3364338874816895, "learning_rate": 8.386219029875502e-05, "loss": 2.0135, "step": 3798 }, { "epoch": 0.28596699222793054, "grad_norm": 4.847531318664551, "learning_rate": 8.385321973841189e-05, "loss": 1.9906, "step": 3799 }, { "epoch": 0.28604226650859066, "grad_norm": 4.136904716491699, "learning_rate": 8.384424716559198e-05, "loss": 2.1823, "step": 3800 }, { "epoch": 0.28611754078925083, "grad_norm": 4.262138366699219, "learning_rate": 8.38352725808287e-05, "loss": 2.1362, "step": 3801 }, { "epoch": 0.286192815069911, "grad_norm": 3.498966932296753, "learning_rate": 8.382629598465555e-05, "loss": 1.8396, "step": 3802 }, { "epoch": 0.28626808935057113, "grad_norm": 4.954427242279053, "learning_rate": 8.381731737760614e-05, "loss": 1.9453, "step": 3803 }, { "epoch": 0.2863433636312313, "grad_norm": 4.326083660125732, "learning_rate": 8.380833676021427e-05, "loss": 1.7592, "step": 3804 }, { "epoch": 0.2864186379118914, "grad_norm": 5.140336036682129, "learning_rate": 8.379935413301378e-05, "loss": 2.3247, "step": 3805 }, { "epoch": 0.2864939121925516, "grad_norm": 3.8094191551208496, "learning_rate": 8.379036949653867e-05, "loss": 2.1055, "step": 3806 }, { "epoch": 0.2865691864732118, "grad_norm": 4.065224647521973, "learning_rate": 8.378138285132305e-05, "loss": 2.0397, "step": 3807 }, { "epoch": 0.2866444607538719, "grad_norm": 5.094583034515381, "learning_rate": 8.377239419790112e-05, "loss": 2.0621, "step": 3808 }, { "epoch": 0.2867197350345321, "grad_norm": 4.215206146240234, "learning_rate": 8.37634035368073e-05, "loss": 2.0845, "step": 3809 }, { "epoch": 0.28679500931519225, "grad_norm": 5.886216163635254, "learning_rate": 8.3754410868576e-05, "loss": 2.1409, "step": 3810 }, { "epoch": 0.28687028359585237, "grad_norm": 4.233997344970703, "learning_rate": 8.374541619374182e-05, "loss": 2.0214, "step": 3811 }, { "epoch": 0.28694555787651255, "grad_norm": 4.8626909255981445, "learning_rate": 8.373641951283948e-05, "loss": 1.8788, "step": 3812 }, { "epoch": 0.2870208321571727, "grad_norm": 3.4109678268432617, "learning_rate": 8.37274208264038e-05, "loss": 1.9001, "step": 3813 }, { "epoch": 0.28709610643783284, "grad_norm": 4.922783374786377, "learning_rate": 8.371842013496973e-05, "loss": 1.6678, "step": 3814 }, { "epoch": 0.287171380718493, "grad_norm": 5.090536117553711, "learning_rate": 8.370941743907233e-05, "loss": 2.2334, "step": 3815 }, { "epoch": 0.28724665499915314, "grad_norm": 4.591648578643799, "learning_rate": 8.370041273924678e-05, "loss": 2.0385, "step": 3816 }, { "epoch": 0.2873219292798133, "grad_norm": 4.662242412567139, "learning_rate": 8.36914060360284e-05, "loss": 1.9126, "step": 3817 }, { "epoch": 0.2873972035604735, "grad_norm": 4.626654148101807, "learning_rate": 8.36823973299526e-05, "loss": 2.0897, "step": 3818 }, { "epoch": 0.2874724778411336, "grad_norm": 3.912527322769165, "learning_rate": 8.367338662155493e-05, "loss": 2.136, "step": 3819 }, { "epoch": 0.2875477521217938, "grad_norm": 4.67393159866333, "learning_rate": 8.366437391137103e-05, "loss": 2.1881, "step": 3820 }, { "epoch": 0.28762302640245396, "grad_norm": 4.673575401306152, "learning_rate": 8.36553591999367e-05, "loss": 2.0971, "step": 3821 }, { "epoch": 0.2876983006831141, "grad_norm": 4.837671279907227, "learning_rate": 8.364634248778784e-05, "loss": 2.1025, "step": 3822 }, { "epoch": 0.28777357496377426, "grad_norm": 4.0610785484313965, "learning_rate": 8.363732377546045e-05, "loss": 2.1179, "step": 3823 }, { "epoch": 0.28784884924443443, "grad_norm": 4.773620128631592, "learning_rate": 8.36283030634907e-05, "loss": 2.0521, "step": 3824 }, { "epoch": 0.28792412352509456, "grad_norm": 5.910046100616455, "learning_rate": 8.36192803524148e-05, "loss": 2.0464, "step": 3825 }, { "epoch": 0.28799939780575473, "grad_norm": 4.970592975616455, "learning_rate": 8.361025564276917e-05, "loss": 2.2635, "step": 3826 }, { "epoch": 0.28807467208641485, "grad_norm": 5.785548686981201, "learning_rate": 8.360122893509026e-05, "loss": 2.0841, "step": 3827 }, { "epoch": 0.288149946367075, "grad_norm": 4.501217842102051, "learning_rate": 8.35922002299147e-05, "loss": 2.3656, "step": 3828 }, { "epoch": 0.2882252206477352, "grad_norm": 4.644845485687256, "learning_rate": 8.358316952777925e-05, "loss": 2.2716, "step": 3829 }, { "epoch": 0.2883004949283953, "grad_norm": 4.391740322113037, "learning_rate": 8.357413682922071e-05, "loss": 1.7138, "step": 3830 }, { "epoch": 0.2883757692090555, "grad_norm": 5.401401042938232, "learning_rate": 8.356510213477606e-05, "loss": 2.0579, "step": 3831 }, { "epoch": 0.2884510434897157, "grad_norm": 5.10807466506958, "learning_rate": 8.355606544498243e-05, "loss": 2.3897, "step": 3832 }, { "epoch": 0.2885263177703758, "grad_norm": 5.3836493492126465, "learning_rate": 8.354702676037697e-05, "loss": 2.1215, "step": 3833 }, { "epoch": 0.28860159205103597, "grad_norm": 3.7758097648620605, "learning_rate": 8.353798608149704e-05, "loss": 1.7526, "step": 3834 }, { "epoch": 0.2886768663316961, "grad_norm": 6.716507434844971, "learning_rate": 8.352894340888005e-05, "loss": 2.0362, "step": 3835 }, { "epoch": 0.28875214061235627, "grad_norm": 5.690709590911865, "learning_rate": 8.351989874306358e-05, "loss": 1.9573, "step": 3836 }, { "epoch": 0.28882741489301644, "grad_norm": 4.09605598449707, "learning_rate": 8.351085208458534e-05, "loss": 2.1092, "step": 3837 }, { "epoch": 0.28890268917367656, "grad_norm": 4.334811210632324, "learning_rate": 8.350180343398307e-05, "loss": 2.2697, "step": 3838 }, { "epoch": 0.28897796345433674, "grad_norm": 5.101006507873535, "learning_rate": 8.349275279179471e-05, "loss": 2.0569, "step": 3839 }, { "epoch": 0.2890532377349969, "grad_norm": 3.7114970684051514, "learning_rate": 8.348370015855831e-05, "loss": 2.1174, "step": 3840 }, { "epoch": 0.28912851201565704, "grad_norm": 4.862429618835449, "learning_rate": 8.347464553481201e-05, "loss": 2.0633, "step": 3841 }, { "epoch": 0.2892037862963172, "grad_norm": 4.648384094238281, "learning_rate": 8.346558892109409e-05, "loss": 2.0382, "step": 3842 }, { "epoch": 0.2892790605769774, "grad_norm": 3.9312198162078857, "learning_rate": 8.345653031794292e-05, "loss": 1.9585, "step": 3843 }, { "epoch": 0.2893543348576375, "grad_norm": 4.682003498077393, "learning_rate": 8.344746972589702e-05, "loss": 1.9414, "step": 3844 }, { "epoch": 0.2894296091382977, "grad_norm": 4.717390060424805, "learning_rate": 8.343840714549502e-05, "loss": 1.9941, "step": 3845 }, { "epoch": 0.2895048834189578, "grad_norm": 8.018993377685547, "learning_rate": 8.342934257727566e-05, "loss": 2.0114, "step": 3846 }, { "epoch": 0.289580157699618, "grad_norm": 4.741693019866943, "learning_rate": 8.342027602177779e-05, "loss": 1.5556, "step": 3847 }, { "epoch": 0.28965543198027816, "grad_norm": 5.723662376403809, "learning_rate": 8.341120747954043e-05, "loss": 2.2284, "step": 3848 }, { "epoch": 0.2897307062609383, "grad_norm": 3.611459255218506, "learning_rate": 8.340213695110264e-05, "loss": 2.0701, "step": 3849 }, { "epoch": 0.28980598054159845, "grad_norm": 4.317535400390625, "learning_rate": 8.339306443700366e-05, "loss": 2.0111, "step": 3850 }, { "epoch": 0.2898812548222586, "grad_norm": 4.843621253967285, "learning_rate": 8.33839899377828e-05, "loss": 2.1514, "step": 3851 }, { "epoch": 0.28995652910291875, "grad_norm": 5.786384105682373, "learning_rate": 8.337491345397953e-05, "loss": 2.1834, "step": 3852 }, { "epoch": 0.2900318033835789, "grad_norm": 5.805883884429932, "learning_rate": 8.336583498613342e-05, "loss": 2.3418, "step": 3853 }, { "epoch": 0.29010707766423904, "grad_norm": 4.941905975341797, "learning_rate": 8.335675453478416e-05, "loss": 2.0093, "step": 3854 }, { "epoch": 0.2901823519448992, "grad_norm": 4.880833148956299, "learning_rate": 8.334767210047155e-05, "loss": 2.1342, "step": 3855 }, { "epoch": 0.2902576262255594, "grad_norm": 5.400149822235107, "learning_rate": 8.333858768373552e-05, "loss": 2.247, "step": 3856 }, { "epoch": 0.2903329005062195, "grad_norm": 4.728665828704834, "learning_rate": 8.332950128511612e-05, "loss": 1.9277, "step": 3857 }, { "epoch": 0.2904081747868797, "grad_norm": 4.006535530090332, "learning_rate": 8.332041290515349e-05, "loss": 2.048, "step": 3858 }, { "epoch": 0.29048344906753987, "grad_norm": 5.73699951171875, "learning_rate": 8.331132254438793e-05, "loss": 2.4664, "step": 3859 }, { "epoch": 0.2905587233482, "grad_norm": 4.849059104919434, "learning_rate": 8.330223020335982e-05, "loss": 1.9854, "step": 3860 }, { "epoch": 0.29063399762886016, "grad_norm": 4.367104530334473, "learning_rate": 8.329313588260968e-05, "loss": 2.075, "step": 3861 }, { "epoch": 0.29070927190952034, "grad_norm": 6.0469560623168945, "learning_rate": 8.328403958267814e-05, "loss": 1.9521, "step": 3862 }, { "epoch": 0.29078454619018046, "grad_norm": 4.310194492340088, "learning_rate": 8.327494130410595e-05, "loss": 1.9982, "step": 3863 }, { "epoch": 0.29085982047084064, "grad_norm": 5.066767692565918, "learning_rate": 8.326584104743398e-05, "loss": 1.9326, "step": 3864 }, { "epoch": 0.29093509475150076, "grad_norm": 6.129978656768799, "learning_rate": 8.325673881320319e-05, "loss": 1.8982, "step": 3865 }, { "epoch": 0.29101036903216093, "grad_norm": 4.554304599761963, "learning_rate": 8.324763460195472e-05, "loss": 1.9654, "step": 3866 }, { "epoch": 0.2910856433128211, "grad_norm": 4.857050895690918, "learning_rate": 8.323852841422976e-05, "loss": 2.1171, "step": 3867 }, { "epoch": 0.2911609175934812, "grad_norm": 5.321245193481445, "learning_rate": 8.322942025056968e-05, "loss": 2.0208, "step": 3868 }, { "epoch": 0.2912361918741414, "grad_norm": 5.192969799041748, "learning_rate": 8.32203101115159e-05, "loss": 2.2445, "step": 3869 }, { "epoch": 0.2913114661548016, "grad_norm": 7.29888916015625, "learning_rate": 8.321119799760999e-05, "loss": 2.1943, "step": 3870 }, { "epoch": 0.2913867404354617, "grad_norm": 5.519054412841797, "learning_rate": 8.320208390939368e-05, "loss": 2.2425, "step": 3871 }, { "epoch": 0.2914620147161219, "grad_norm": 5.460089683532715, "learning_rate": 8.319296784740873e-05, "loss": 2.0266, "step": 3872 }, { "epoch": 0.29153728899678205, "grad_norm": 6.9658203125, "learning_rate": 8.318384981219707e-05, "loss": 1.7287, "step": 3873 }, { "epoch": 0.29161256327744217, "grad_norm": 4.43731689453125, "learning_rate": 8.317472980430079e-05, "loss": 1.8946, "step": 3874 }, { "epoch": 0.29168783755810235, "grad_norm": 3.5282974243164062, "learning_rate": 8.316560782426199e-05, "loss": 1.8885, "step": 3875 }, { "epoch": 0.29176311183876247, "grad_norm": 3.992851734161377, "learning_rate": 8.315648387262296e-05, "loss": 1.8385, "step": 3876 }, { "epoch": 0.29183838611942264, "grad_norm": 4.900772571563721, "learning_rate": 8.314735794992612e-05, "loss": 2.0619, "step": 3877 }, { "epoch": 0.2919136604000828, "grad_norm": 4.7528977394104, "learning_rate": 8.313823005671396e-05, "loss": 2.067, "step": 3878 }, { "epoch": 0.29198893468074294, "grad_norm": 4.103478908538818, "learning_rate": 8.312910019352909e-05, "loss": 2.0187, "step": 3879 }, { "epoch": 0.2920642089614031, "grad_norm": 4.086658954620361, "learning_rate": 8.311996836091427e-05, "loss": 2.0212, "step": 3880 }, { "epoch": 0.2921394832420633, "grad_norm": 3.4795148372650146, "learning_rate": 8.311083455941238e-05, "loss": 1.9303, "step": 3881 }, { "epoch": 0.2922147575227234, "grad_norm": 3.36259388923645, "learning_rate": 8.310169878956638e-05, "loss": 1.8607, "step": 3882 }, { "epoch": 0.2922900318033836, "grad_norm": 3.9996771812438965, "learning_rate": 8.309256105191934e-05, "loss": 2.0218, "step": 3883 }, { "epoch": 0.2923653060840437, "grad_norm": 4.0418009757995605, "learning_rate": 8.308342134701452e-05, "loss": 2.3364, "step": 3884 }, { "epoch": 0.2924405803647039, "grad_norm": 3.996657609939575, "learning_rate": 8.307427967539523e-05, "loss": 2.1476, "step": 3885 }, { "epoch": 0.29251585464536406, "grad_norm": 3.3307595252990723, "learning_rate": 8.30651360376049e-05, "loss": 2.1576, "step": 3886 }, { "epoch": 0.2925911289260242, "grad_norm": 3.8001067638397217, "learning_rate": 8.305599043418712e-05, "loss": 2.344, "step": 3887 }, { "epoch": 0.29266640320668436, "grad_norm": 3.9414944648742676, "learning_rate": 8.304684286568556e-05, "loss": 2.0307, "step": 3888 }, { "epoch": 0.29274167748734453, "grad_norm": 5.2964348793029785, "learning_rate": 8.303769333264402e-05, "loss": 1.8305, "step": 3889 }, { "epoch": 0.29281695176800465, "grad_norm": 4.15656852722168, "learning_rate": 8.302854183560639e-05, "loss": 2.107, "step": 3890 }, { "epoch": 0.2928922260486648, "grad_norm": 5.4341325759887695, "learning_rate": 8.30193883751167e-05, "loss": 1.9815, "step": 3891 }, { "epoch": 0.292967500329325, "grad_norm": 3.7687902450561523, "learning_rate": 8.301023295171916e-05, "loss": 2.0175, "step": 3892 }, { "epoch": 0.2930427746099851, "grad_norm": 4.858816623687744, "learning_rate": 8.300107556595796e-05, "loss": 2.3404, "step": 3893 }, { "epoch": 0.2931180488906453, "grad_norm": 5.89325475692749, "learning_rate": 8.299191621837751e-05, "loss": 2.329, "step": 3894 }, { "epoch": 0.2931933231713054, "grad_norm": 4.135165691375732, "learning_rate": 8.298275490952232e-05, "loss": 1.7683, "step": 3895 }, { "epoch": 0.2932685974519656, "grad_norm": 3.811086654663086, "learning_rate": 8.297359163993697e-05, "loss": 1.8282, "step": 3896 }, { "epoch": 0.29334387173262577, "grad_norm": 5.477607727050781, "learning_rate": 8.296442641016622e-05, "loss": 2.4105, "step": 3897 }, { "epoch": 0.2934191460132859, "grad_norm": 5.256962776184082, "learning_rate": 8.29552592207549e-05, "loss": 1.8768, "step": 3898 }, { "epoch": 0.29349442029394607, "grad_norm": 5.292032241821289, "learning_rate": 8.294609007224797e-05, "loss": 2.0916, "step": 3899 }, { "epoch": 0.29356969457460624, "grad_norm": 5.821744918823242, "learning_rate": 8.293691896519053e-05, "loss": 1.9116, "step": 3900 }, { "epoch": 0.29364496885526636, "grad_norm": 5.460099220275879, "learning_rate": 8.292774590012777e-05, "loss": 2.1305, "step": 3901 }, { "epoch": 0.29372024313592654, "grad_norm": 4.575508117675781, "learning_rate": 8.291857087760499e-05, "loss": 2.1291, "step": 3902 }, { "epoch": 0.29379551741658666, "grad_norm": 5.542977333068848, "learning_rate": 8.290939389816763e-05, "loss": 2.0904, "step": 3903 }, { "epoch": 0.29387079169724684, "grad_norm": 5.345617771148682, "learning_rate": 8.290021496236122e-05, "loss": 1.8152, "step": 3904 }, { "epoch": 0.293946065977907, "grad_norm": 4.5423784255981445, "learning_rate": 8.289103407073143e-05, "loss": 1.9969, "step": 3905 }, { "epoch": 0.29402134025856713, "grad_norm": 4.751160621643066, "learning_rate": 8.288185122382405e-05, "loss": 1.6903, "step": 3906 }, { "epoch": 0.2940966145392273, "grad_norm": 5.917749404907227, "learning_rate": 8.287266642218496e-05, "loss": 2.1263, "step": 3907 }, { "epoch": 0.2941718888198875, "grad_norm": 4.357872486114502, "learning_rate": 8.286347966636016e-05, "loss": 1.9129, "step": 3908 }, { "epoch": 0.2942471631005476, "grad_norm": 6.8260650634765625, "learning_rate": 8.285429095689581e-05, "loss": 2.4091, "step": 3909 }, { "epoch": 0.2943224373812078, "grad_norm": 9.451496124267578, "learning_rate": 8.284510029433813e-05, "loss": 2.3008, "step": 3910 }, { "epoch": 0.29439771166186796, "grad_norm": 4.148449897766113, "learning_rate": 8.283590767923347e-05, "loss": 1.9655, "step": 3911 }, { "epoch": 0.2944729859425281, "grad_norm": 4.271643161773682, "learning_rate": 8.282671311212833e-05, "loss": 1.9186, "step": 3912 }, { "epoch": 0.29454826022318825, "grad_norm": 4.254988193511963, "learning_rate": 8.281751659356926e-05, "loss": 2.1378, "step": 3913 }, { "epoch": 0.29462353450384837, "grad_norm": 4.778247356414795, "learning_rate": 8.280831812410301e-05, "loss": 2.0003, "step": 3914 }, { "epoch": 0.29469880878450855, "grad_norm": 5.063285827636719, "learning_rate": 8.279911770427638e-05, "loss": 2.2523, "step": 3915 }, { "epoch": 0.2947740830651687, "grad_norm": 3.722421884536743, "learning_rate": 8.278991533463633e-05, "loss": 2.4155, "step": 3916 }, { "epoch": 0.29484935734582884, "grad_norm": 5.895635604858398, "learning_rate": 8.278071101572988e-05, "loss": 2.0962, "step": 3917 }, { "epoch": 0.294924631626489, "grad_norm": 4.910404682159424, "learning_rate": 8.277150474810422e-05, "loss": 2.0297, "step": 3918 }, { "epoch": 0.2949999059071492, "grad_norm": 4.950763702392578, "learning_rate": 8.276229653230665e-05, "loss": 2.1183, "step": 3919 }, { "epoch": 0.2950751801878093, "grad_norm": 4.1348419189453125, "learning_rate": 8.275308636888455e-05, "loss": 2.3009, "step": 3920 }, { "epoch": 0.2951504544684695, "grad_norm": 3.937880277633667, "learning_rate": 8.274387425838543e-05, "loss": 2.1799, "step": 3921 }, { "epoch": 0.29522572874912967, "grad_norm": 4.720341682434082, "learning_rate": 8.273466020135695e-05, "loss": 2.0935, "step": 3922 }, { "epoch": 0.2953010030297898, "grad_norm": 4.910231590270996, "learning_rate": 8.272544419834685e-05, "loss": 2.0942, "step": 3923 }, { "epoch": 0.29537627731044996, "grad_norm": 4.611443519592285, "learning_rate": 8.271622624990298e-05, "loss": 1.8372, "step": 3924 }, { "epoch": 0.2954515515911101, "grad_norm": 5.8901143074035645, "learning_rate": 8.270700635657334e-05, "loss": 2.3443, "step": 3925 }, { "epoch": 0.29552682587177026, "grad_norm": 4.361662864685059, "learning_rate": 8.269778451890603e-05, "loss": 1.7892, "step": 3926 }, { "epoch": 0.29560210015243044, "grad_norm": 4.859438896179199, "learning_rate": 8.268856073744924e-05, "loss": 2.0249, "step": 3927 }, { "epoch": 0.29567737443309056, "grad_norm": 6.353734016418457, "learning_rate": 8.267933501275132e-05, "loss": 2.0772, "step": 3928 }, { "epoch": 0.29575264871375073, "grad_norm": 4.042829990386963, "learning_rate": 8.267010734536067e-05, "loss": 2.0966, "step": 3929 }, { "epoch": 0.2958279229944109, "grad_norm": 5.235006809234619, "learning_rate": 8.266087773582591e-05, "loss": 2.1075, "step": 3930 }, { "epoch": 0.29590319727507103, "grad_norm": 6.5151448249816895, "learning_rate": 8.265164618469567e-05, "loss": 2.1292, "step": 3931 }, { "epoch": 0.2959784715557312, "grad_norm": 5.033535480499268, "learning_rate": 8.264241269251876e-05, "loss": 1.837, "step": 3932 }, { "epoch": 0.2960537458363913, "grad_norm": 5.000491619110107, "learning_rate": 8.263317725984406e-05, "loss": 1.8514, "step": 3933 }, { "epoch": 0.2961290201170515, "grad_norm": 5.6992669105529785, "learning_rate": 8.262393988722061e-05, "loss": 2.3944, "step": 3934 }, { "epoch": 0.2962042943977117, "grad_norm": 6.751134395599365, "learning_rate": 8.261470057519755e-05, "loss": 2.3705, "step": 3935 }, { "epoch": 0.2962795686783718, "grad_norm": 4.841224670410156, "learning_rate": 8.260545932432413e-05, "loss": 1.7609, "step": 3936 }, { "epoch": 0.29635484295903197, "grad_norm": 4.674503803253174, "learning_rate": 8.25962161351497e-05, "loss": 2.2483, "step": 3937 }, { "epoch": 0.29643011723969215, "grad_norm": 6.809699535369873, "learning_rate": 8.258697100822376e-05, "loss": 2.394, "step": 3938 }, { "epoch": 0.29650539152035227, "grad_norm": 4.707481384277344, "learning_rate": 8.257772394409589e-05, "loss": 2.2682, "step": 3939 }, { "epoch": 0.29658066580101244, "grad_norm": 4.450935363769531, "learning_rate": 8.256847494331581e-05, "loss": 2.2971, "step": 3940 }, { "epoch": 0.2966559400816726, "grad_norm": 6.442799091339111, "learning_rate": 8.255922400643335e-05, "loss": 2.1986, "step": 3941 }, { "epoch": 0.29673121436233274, "grad_norm": 4.342957019805908, "learning_rate": 8.254997113399842e-05, "loss": 1.8359, "step": 3942 }, { "epoch": 0.2968064886429929, "grad_norm": 4.233155250549316, "learning_rate": 8.254071632656113e-05, "loss": 2.1798, "step": 3943 }, { "epoch": 0.29688176292365304, "grad_norm": 4.547600746154785, "learning_rate": 8.253145958467162e-05, "loss": 1.9461, "step": 3944 }, { "epoch": 0.2969570372043132, "grad_norm": 5.658600330352783, "learning_rate": 8.252220090888017e-05, "loss": 2.2108, "step": 3945 }, { "epoch": 0.2970323114849734, "grad_norm": 4.1030192375183105, "learning_rate": 8.251294029973719e-05, "loss": 1.8221, "step": 3946 }, { "epoch": 0.2971075857656335, "grad_norm": 5.113779067993164, "learning_rate": 8.25036777577932e-05, "loss": 2.3138, "step": 3947 }, { "epoch": 0.2971828600462937, "grad_norm": 5.227575778961182, "learning_rate": 8.249441328359883e-05, "loss": 1.8268, "step": 3948 }, { "epoch": 0.29725813432695386, "grad_norm": 4.392734050750732, "learning_rate": 8.248514687770483e-05, "loss": 2.0917, "step": 3949 }, { "epoch": 0.297333408607614, "grad_norm": 4.64680814743042, "learning_rate": 8.247587854066207e-05, "loss": 2.0378, "step": 3950 }, { "epoch": 0.29740868288827416, "grad_norm": 6.5726423263549805, "learning_rate": 8.24666082730215e-05, "loss": 1.8285, "step": 3951 }, { "epoch": 0.29748395716893433, "grad_norm": 5.788646697998047, "learning_rate": 8.245733607533423e-05, "loss": 2.1962, "step": 3952 }, { "epoch": 0.29755923144959445, "grad_norm": 5.60465669631958, "learning_rate": 8.244806194815146e-05, "loss": 1.8553, "step": 3953 }, { "epoch": 0.29763450573025463, "grad_norm": 3.659080982208252, "learning_rate": 8.243878589202451e-05, "loss": 2.0637, "step": 3954 }, { "epoch": 0.29770978001091475, "grad_norm": 4.029128551483154, "learning_rate": 8.242950790750484e-05, "loss": 2.5333, "step": 3955 }, { "epoch": 0.2977850542915749, "grad_norm": 6.846841812133789, "learning_rate": 8.242022799514395e-05, "loss": 1.9938, "step": 3956 }, { "epoch": 0.2978603285722351, "grad_norm": 4.650421619415283, "learning_rate": 8.241094615549353e-05, "loss": 1.8515, "step": 3957 }, { "epoch": 0.2979356028528952, "grad_norm": 4.748678684234619, "learning_rate": 8.240166238910536e-05, "loss": 2.25, "step": 3958 }, { "epoch": 0.2980108771335554, "grad_norm": 3.416999340057373, "learning_rate": 8.239237669653134e-05, "loss": 2.2199, "step": 3959 }, { "epoch": 0.29808615141421557, "grad_norm": 4.143542766571045, "learning_rate": 8.238308907832347e-05, "loss": 2.2873, "step": 3960 }, { "epoch": 0.2981614256948757, "grad_norm": 6.439436435699463, "learning_rate": 8.237379953503386e-05, "loss": 2.4096, "step": 3961 }, { "epoch": 0.29823669997553587, "grad_norm": 4.001052379608154, "learning_rate": 8.236450806721476e-05, "loss": 1.9737, "step": 3962 }, { "epoch": 0.298311974256196, "grad_norm": 7.826466083526611, "learning_rate": 8.235521467541852e-05, "loss": 3.3204, "step": 3963 }, { "epoch": 0.29838724853685616, "grad_norm": 4.821740627288818, "learning_rate": 8.234591936019762e-05, "loss": 2.0892, "step": 3964 }, { "epoch": 0.29846252281751634, "grad_norm": 4.715775012969971, "learning_rate": 8.233662212210461e-05, "loss": 1.5054, "step": 3965 }, { "epoch": 0.29853779709817646, "grad_norm": 4.925595283508301, "learning_rate": 8.23273229616922e-05, "loss": 2.6118, "step": 3966 }, { "epoch": 0.29861307137883664, "grad_norm": 5.416860580444336, "learning_rate": 8.231802187951321e-05, "loss": 2.0367, "step": 3967 }, { "epoch": 0.2986883456594968, "grad_norm": 4.345701694488525, "learning_rate": 8.230871887612054e-05, "loss": 2.4256, "step": 3968 }, { "epoch": 0.29876361994015693, "grad_norm": 3.9337241649627686, "learning_rate": 8.229941395206724e-05, "loss": 1.9631, "step": 3969 }, { "epoch": 0.2988388942208171, "grad_norm": 3.07364821434021, "learning_rate": 8.229010710790646e-05, "loss": 1.9359, "step": 3970 }, { "epoch": 0.2989141685014773, "grad_norm": 5.827083110809326, "learning_rate": 8.228079834419145e-05, "loss": 1.8755, "step": 3971 }, { "epoch": 0.2989894427821374, "grad_norm": 3.811436891555786, "learning_rate": 8.227148766147561e-05, "loss": 1.9425, "step": 3972 }, { "epoch": 0.2990647170627976, "grad_norm": 3.9005682468414307, "learning_rate": 8.226217506031243e-05, "loss": 1.7227, "step": 3973 }, { "epoch": 0.2991399913434577, "grad_norm": 4.720652103424072, "learning_rate": 8.225286054125552e-05, "loss": 2.0385, "step": 3974 }, { "epoch": 0.2992152656241179, "grad_norm": 5.91552734375, "learning_rate": 8.224354410485857e-05, "loss": 1.8691, "step": 3975 }, { "epoch": 0.29929053990477805, "grad_norm": 4.188562393188477, "learning_rate": 8.223422575167546e-05, "loss": 1.7629, "step": 3976 }, { "epoch": 0.2993658141854382, "grad_norm": 3.4777920246124268, "learning_rate": 8.222490548226011e-05, "loss": 2.0878, "step": 3977 }, { "epoch": 0.29944108846609835, "grad_norm": 5.007256031036377, "learning_rate": 8.221558329716661e-05, "loss": 2.2845, "step": 3978 }, { "epoch": 0.2995163627467585, "grad_norm": 6.493385314941406, "learning_rate": 8.220625919694909e-05, "loss": 1.9871, "step": 3979 }, { "epoch": 0.29959163702741864, "grad_norm": 3.5783050060272217, "learning_rate": 8.21969331821619e-05, "loss": 1.9078, "step": 3980 }, { "epoch": 0.2996669113080788, "grad_norm": 3.807652235031128, "learning_rate": 8.21876052533594e-05, "loss": 1.7729, "step": 3981 }, { "epoch": 0.29974218558873894, "grad_norm": 4.732152938842773, "learning_rate": 8.217827541109612e-05, "loss": 2.0942, "step": 3982 }, { "epoch": 0.2998174598693991, "grad_norm": 5.426209926605225, "learning_rate": 8.21689436559267e-05, "loss": 2.2869, "step": 3983 }, { "epoch": 0.2998927341500593, "grad_norm": 5.603438854217529, "learning_rate": 8.215960998840591e-05, "loss": 2.0067, "step": 3984 }, { "epoch": 0.2999680084307194, "grad_norm": 8.527303695678711, "learning_rate": 8.215027440908856e-05, "loss": 2.1007, "step": 3985 }, { "epoch": 0.3000432827113796, "grad_norm": 4.717874050140381, "learning_rate": 8.214093691852963e-05, "loss": 2.2188, "step": 3986 }, { "epoch": 0.30011855699203976, "grad_norm": 4.590381622314453, "learning_rate": 8.213159751728424e-05, "loss": 1.9118, "step": 3987 }, { "epoch": 0.3001938312726999, "grad_norm": 3.9194700717926025, "learning_rate": 8.212225620590757e-05, "loss": 2.1885, "step": 3988 }, { "epoch": 0.30026910555336006, "grad_norm": 6.475456714630127, "learning_rate": 8.211291298495493e-05, "loss": 2.0232, "step": 3989 }, { "epoch": 0.30034437983402024, "grad_norm": 4.130027770996094, "learning_rate": 8.210356785498178e-05, "loss": 2.1068, "step": 3990 }, { "epoch": 0.30041965411468036, "grad_norm": 5.896829605102539, "learning_rate": 8.209422081654362e-05, "loss": 2.5909, "step": 3991 }, { "epoch": 0.30049492839534053, "grad_norm": 4.915403366088867, "learning_rate": 8.208487187019614e-05, "loss": 1.8748, "step": 3992 }, { "epoch": 0.30057020267600065, "grad_norm": 6.936680316925049, "learning_rate": 8.207552101649506e-05, "loss": 2.5195, "step": 3993 }, { "epoch": 0.30064547695666083, "grad_norm": 5.8221259117126465, "learning_rate": 8.20661682559963e-05, "loss": 2.1315, "step": 3994 }, { "epoch": 0.300720751237321, "grad_norm": 6.580040454864502, "learning_rate": 8.205681358925585e-05, "loss": 1.6014, "step": 3995 }, { "epoch": 0.3007960255179811, "grad_norm": 3.8563807010650635, "learning_rate": 8.204745701682981e-05, "loss": 2.0251, "step": 3996 }, { "epoch": 0.3008712997986413, "grad_norm": 9.69757080078125, "learning_rate": 8.20380985392744e-05, "loss": 2.3376, "step": 3997 }, { "epoch": 0.3009465740793015, "grad_norm": 5.109683513641357, "learning_rate": 8.202873815714596e-05, "loss": 2.4456, "step": 3998 }, { "epoch": 0.3010218483599616, "grad_norm": 4.2439351081848145, "learning_rate": 8.201937587100095e-05, "loss": 1.9209, "step": 3999 }, { "epoch": 0.3010971226406218, "grad_norm": 5.059563636779785, "learning_rate": 8.20100116813959e-05, "loss": 2.0953, "step": 4000 }, { "epoch": 0.30117239692128195, "grad_norm": 4.447500228881836, "learning_rate": 8.200064558888753e-05, "loss": 2.2077, "step": 4001 }, { "epoch": 0.30124767120194207, "grad_norm": 5.637600421905518, "learning_rate": 8.199127759403258e-05, "loss": 1.6544, "step": 4002 }, { "epoch": 0.30132294548260224, "grad_norm": 4.083150386810303, "learning_rate": 8.198190769738797e-05, "loss": 2.1035, "step": 4003 }, { "epoch": 0.30139821976326236, "grad_norm": 5.844494342803955, "learning_rate": 8.197253589951073e-05, "loss": 1.9702, "step": 4004 }, { "epoch": 0.30147349404392254, "grad_norm": 3.5449769496917725, "learning_rate": 8.196316220095795e-05, "loss": 1.8437, "step": 4005 }, { "epoch": 0.3015487683245827, "grad_norm": 3.938633918762207, "learning_rate": 8.19537866022869e-05, "loss": 2.2647, "step": 4006 }, { "epoch": 0.30162404260524284, "grad_norm": 4.594608306884766, "learning_rate": 8.194440910405491e-05, "loss": 2.1227, "step": 4007 }, { "epoch": 0.301699316885903, "grad_norm": 4.835352420806885, "learning_rate": 8.193502970681948e-05, "loss": 2.04, "step": 4008 }, { "epoch": 0.3017745911665632, "grad_norm": 4.093810558319092, "learning_rate": 8.192564841113815e-05, "loss": 1.6861, "step": 4009 }, { "epoch": 0.3018498654472233, "grad_norm": 4.382516384124756, "learning_rate": 8.191626521756863e-05, "loss": 2.6152, "step": 4010 }, { "epoch": 0.3019251397278835, "grad_norm": 4.418197154998779, "learning_rate": 8.190688012666871e-05, "loss": 2.3922, "step": 4011 }, { "epoch": 0.3020004140085436, "grad_norm": 3.890061616897583, "learning_rate": 8.189749313899633e-05, "loss": 2.067, "step": 4012 }, { "epoch": 0.3020756882892038, "grad_norm": 5.5481953620910645, "learning_rate": 8.188810425510951e-05, "loss": 2.3599, "step": 4013 }, { "epoch": 0.30215096256986396, "grad_norm": 5.106924057006836, "learning_rate": 8.187871347556638e-05, "loss": 2.2143, "step": 4014 }, { "epoch": 0.3022262368505241, "grad_norm": 4.832911968231201, "learning_rate": 8.186932080092521e-05, "loss": 1.9913, "step": 4015 }, { "epoch": 0.30230151113118425, "grad_norm": 5.2827677726745605, "learning_rate": 8.185992623174437e-05, "loss": 1.8147, "step": 4016 }, { "epoch": 0.30237678541184443, "grad_norm": 4.7907023429870605, "learning_rate": 8.185052976858231e-05, "loss": 2.3666, "step": 4017 }, { "epoch": 0.30245205969250455, "grad_norm": 4.429478168487549, "learning_rate": 8.184113141199766e-05, "loss": 2.1214, "step": 4018 }, { "epoch": 0.3025273339731647, "grad_norm": 5.64173698425293, "learning_rate": 8.183173116254912e-05, "loss": 1.7993, "step": 4019 }, { "epoch": 0.3026026082538249, "grad_norm": 4.29542350769043, "learning_rate": 8.182232902079548e-05, "loss": 1.8399, "step": 4020 }, { "epoch": 0.302677882534485, "grad_norm": 6.586565017700195, "learning_rate": 8.18129249872957e-05, "loss": 1.7283, "step": 4021 }, { "epoch": 0.3027531568151452, "grad_norm": 4.294366359710693, "learning_rate": 8.180351906260879e-05, "loss": 2.7436, "step": 4022 }, { "epoch": 0.3028284310958053, "grad_norm": 4.890934467315674, "learning_rate": 8.179411124729395e-05, "loss": 2.5896, "step": 4023 }, { "epoch": 0.3029037053764655, "grad_norm": 2.9330251216888428, "learning_rate": 8.178470154191041e-05, "loss": 2.0704, "step": 4024 }, { "epoch": 0.30297897965712567, "grad_norm": 4.444950103759766, "learning_rate": 8.177528994701758e-05, "loss": 2.2147, "step": 4025 }, { "epoch": 0.3030542539377858, "grad_norm": 3.5154833793640137, "learning_rate": 8.17658764631749e-05, "loss": 1.7689, "step": 4026 }, { "epoch": 0.30312952821844596, "grad_norm": 5.468313217163086, "learning_rate": 8.175646109094205e-05, "loss": 2.2068, "step": 4027 }, { "epoch": 0.30320480249910614, "grad_norm": 5.573143482208252, "learning_rate": 8.174704383087868e-05, "loss": 1.9208, "step": 4028 }, { "epoch": 0.30328007677976626, "grad_norm": 4.63287353515625, "learning_rate": 8.173762468354467e-05, "loss": 2.0323, "step": 4029 }, { "epoch": 0.30335535106042644, "grad_norm": 4.022366046905518, "learning_rate": 8.172820364949992e-05, "loss": 2.3292, "step": 4030 }, { "epoch": 0.30343062534108656, "grad_norm": 3.474485397338867, "learning_rate": 8.171878072930451e-05, "loss": 2.1012, "step": 4031 }, { "epoch": 0.30350589962174673, "grad_norm": 4.458362579345703, "learning_rate": 8.17093559235186e-05, "loss": 2.0234, "step": 4032 }, { "epoch": 0.3035811739024069, "grad_norm": 5.3365397453308105, "learning_rate": 8.169992923270247e-05, "loss": 1.8182, "step": 4033 }, { "epoch": 0.30365644818306703, "grad_norm": 4.710349082946777, "learning_rate": 8.169050065741648e-05, "loss": 1.9494, "step": 4034 }, { "epoch": 0.3037317224637272, "grad_norm": 4.603903293609619, "learning_rate": 8.168107019822117e-05, "loss": 1.6216, "step": 4035 }, { "epoch": 0.3038069967443874, "grad_norm": 4.38110876083374, "learning_rate": 8.167163785567716e-05, "loss": 1.9569, "step": 4036 }, { "epoch": 0.3038822710250475, "grad_norm": 5.198643684387207, "learning_rate": 8.166220363034513e-05, "loss": 1.9497, "step": 4037 }, { "epoch": 0.3039575453057077, "grad_norm": 6.538378715515137, "learning_rate": 8.165276752278594e-05, "loss": 2.3177, "step": 4038 }, { "epoch": 0.30403281958636785, "grad_norm": 3.8377621173858643, "learning_rate": 8.164332953356056e-05, "loss": 2.2456, "step": 4039 }, { "epoch": 0.304108093867028, "grad_norm": 3.472177505493164, "learning_rate": 8.163388966323002e-05, "loss": 1.6802, "step": 4040 }, { "epoch": 0.30418336814768815, "grad_norm": 3.353297233581543, "learning_rate": 8.162444791235552e-05, "loss": 2.1592, "step": 4041 }, { "epoch": 0.30425864242834827, "grad_norm": 4.183670520782471, "learning_rate": 8.161500428149833e-05, "loss": 1.9398, "step": 4042 }, { "epoch": 0.30433391670900845, "grad_norm": 5.333328723907471, "learning_rate": 8.160555877121985e-05, "loss": 1.9265, "step": 4043 }, { "epoch": 0.3044091909896686, "grad_norm": 5.0812907218933105, "learning_rate": 8.159611138208157e-05, "loss": 1.8548, "step": 4044 }, { "epoch": 0.30448446527032874, "grad_norm": 4.2538909912109375, "learning_rate": 8.158666211464514e-05, "loss": 1.9934, "step": 4045 }, { "epoch": 0.3045597395509889, "grad_norm": 5.934788703918457, "learning_rate": 8.157721096947229e-05, "loss": 1.842, "step": 4046 }, { "epoch": 0.3046350138316491, "grad_norm": 4.272953510284424, "learning_rate": 8.156775794712485e-05, "loss": 2.1654, "step": 4047 }, { "epoch": 0.3047102881123092, "grad_norm": 4.149127960205078, "learning_rate": 8.155830304816476e-05, "loss": 2.0902, "step": 4048 }, { "epoch": 0.3047855623929694, "grad_norm": 5.6880598068237305, "learning_rate": 8.154884627315412e-05, "loss": 1.9925, "step": 4049 }, { "epoch": 0.30486083667362956, "grad_norm": 4.037545680999756, "learning_rate": 8.153938762265509e-05, "loss": 2.0993, "step": 4050 }, { "epoch": 0.3049361109542897, "grad_norm": 4.277050495147705, "learning_rate": 8.152992709722996e-05, "loss": 2.2155, "step": 4051 }, { "epoch": 0.30501138523494986, "grad_norm": 4.304996967315674, "learning_rate": 8.152046469744115e-05, "loss": 1.8723, "step": 4052 }, { "epoch": 0.30508665951561, "grad_norm": 3.7426083087921143, "learning_rate": 8.151100042385114e-05, "loss": 1.9863, "step": 4053 }, { "epoch": 0.30516193379627016, "grad_norm": 3.8212766647338867, "learning_rate": 8.150153427702256e-05, "loss": 2.0204, "step": 4054 }, { "epoch": 0.30523720807693033, "grad_norm": 3.710747003555298, "learning_rate": 8.149206625751817e-05, "loss": 2.0361, "step": 4055 }, { "epoch": 0.30531248235759045, "grad_norm": 4.176271438598633, "learning_rate": 8.14825963659008e-05, "loss": 2.0997, "step": 4056 }, { "epoch": 0.30538775663825063, "grad_norm": 4.722878932952881, "learning_rate": 8.14731246027334e-05, "loss": 2.0295, "step": 4057 }, { "epoch": 0.3054630309189108, "grad_norm": 4.019648551940918, "learning_rate": 8.146365096857907e-05, "loss": 1.4909, "step": 4058 }, { "epoch": 0.3055383051995709, "grad_norm": 5.106976509094238, "learning_rate": 8.145417546400096e-05, "loss": 2.1997, "step": 4059 }, { "epoch": 0.3056135794802311, "grad_norm": 6.042627811431885, "learning_rate": 8.144469808956235e-05, "loss": 1.7898, "step": 4060 }, { "epoch": 0.3056888537608912, "grad_norm": 3.81748628616333, "learning_rate": 8.143521884582668e-05, "loss": 1.9809, "step": 4061 }, { "epoch": 0.3057641280415514, "grad_norm": 6.871935844421387, "learning_rate": 8.142573773335745e-05, "loss": 1.9752, "step": 4062 }, { "epoch": 0.3058394023222116, "grad_norm": 3.6774086952209473, "learning_rate": 8.141625475271828e-05, "loss": 2.0542, "step": 4063 }, { "epoch": 0.3059146766028717, "grad_norm": 5.08273983001709, "learning_rate": 8.140676990447291e-05, "loss": 2.0862, "step": 4064 }, { "epoch": 0.30598995088353187, "grad_norm": 4.048882484436035, "learning_rate": 8.139728318918518e-05, "loss": 1.9852, "step": 4065 }, { "epoch": 0.30606522516419205, "grad_norm": 4.194674968719482, "learning_rate": 8.138779460741906e-05, "loss": 1.9804, "step": 4066 }, { "epoch": 0.30614049944485217, "grad_norm": 4.660661697387695, "learning_rate": 8.137830415973862e-05, "loss": 1.7398, "step": 4067 }, { "epoch": 0.30621577372551234, "grad_norm": 4.434919834136963, "learning_rate": 8.136881184670802e-05, "loss": 2.2419, "step": 4068 }, { "epoch": 0.3062910480061725, "grad_norm": 5.208791732788086, "learning_rate": 8.135931766889156e-05, "loss": 2.0253, "step": 4069 }, { "epoch": 0.30636632228683264, "grad_norm": 4.941207408905029, "learning_rate": 8.134982162685365e-05, "loss": 2.0767, "step": 4070 }, { "epoch": 0.3064415965674928, "grad_norm": 3.9456770420074463, "learning_rate": 8.134032372115881e-05, "loss": 1.9677, "step": 4071 }, { "epoch": 0.30651687084815293, "grad_norm": 4.965690612792969, "learning_rate": 8.133082395237163e-05, "loss": 1.8762, "step": 4072 }, { "epoch": 0.3065921451288131, "grad_norm": 6.1574907302856445, "learning_rate": 8.132132232105686e-05, "loss": 1.9853, "step": 4073 }, { "epoch": 0.3066674194094733, "grad_norm": 5.204733371734619, "learning_rate": 8.131181882777938e-05, "loss": 2.0672, "step": 4074 }, { "epoch": 0.3067426936901334, "grad_norm": 5.487968921661377, "learning_rate": 8.130231347310408e-05, "loss": 2.6792, "step": 4075 }, { "epoch": 0.3068179679707936, "grad_norm": 4.597421646118164, "learning_rate": 8.129280625759608e-05, "loss": 2.1712, "step": 4076 }, { "epoch": 0.30689324225145376, "grad_norm": 4.876674175262451, "learning_rate": 8.128329718182052e-05, "loss": 2.0335, "step": 4077 }, { "epoch": 0.3069685165321139, "grad_norm": 4.4856438636779785, "learning_rate": 8.127378624634272e-05, "loss": 2.1583, "step": 4078 }, { "epoch": 0.30704379081277405, "grad_norm": 4.017351150512695, "learning_rate": 8.126427345172806e-05, "loss": 1.8377, "step": 4079 }, { "epoch": 0.3071190650934342, "grad_norm": 6.131873607635498, "learning_rate": 8.125475879854206e-05, "loss": 2.1403, "step": 4080 }, { "epoch": 0.30719433937409435, "grad_norm": 5.1919846534729, "learning_rate": 8.124524228735031e-05, "loss": 2.3897, "step": 4081 }, { "epoch": 0.3072696136547545, "grad_norm": 6.324389934539795, "learning_rate": 8.123572391871857e-05, "loss": 1.6265, "step": 4082 }, { "epoch": 0.30734488793541465, "grad_norm": 5.253021717071533, "learning_rate": 8.122620369321267e-05, "loss": 1.7724, "step": 4083 }, { "epoch": 0.3074201622160748, "grad_norm": 4.45188045501709, "learning_rate": 8.121668161139855e-05, "loss": 1.9286, "step": 4084 }, { "epoch": 0.307495436496735, "grad_norm": 4.46063756942749, "learning_rate": 8.120715767384228e-05, "loss": 2.0184, "step": 4085 }, { "epoch": 0.3075707107773951, "grad_norm": 4.661442756652832, "learning_rate": 8.119763188111002e-05, "loss": 2.1171, "step": 4086 }, { "epoch": 0.3076459850580553, "grad_norm": 5.498867511749268, "learning_rate": 8.118810423376808e-05, "loss": 2.0624, "step": 4087 }, { "epoch": 0.30772125933871547, "grad_norm": 5.268554210662842, "learning_rate": 8.117857473238283e-05, "loss": 1.779, "step": 4088 }, { "epoch": 0.3077965336193756, "grad_norm": 5.048746585845947, "learning_rate": 8.116904337752077e-05, "loss": 1.8637, "step": 4089 }, { "epoch": 0.30787180790003577, "grad_norm": 6.81883430480957, "learning_rate": 8.115951016974851e-05, "loss": 2.3365, "step": 4090 }, { "epoch": 0.3079470821806959, "grad_norm": 4.037649154663086, "learning_rate": 8.114997510963277e-05, "loss": 2.0255, "step": 4091 }, { "epoch": 0.30802235646135606, "grad_norm": 3.8580470085144043, "learning_rate": 8.114043819774041e-05, "loss": 1.952, "step": 4092 }, { "epoch": 0.30809763074201624, "grad_norm": 4.869329452514648, "learning_rate": 8.113089943463834e-05, "loss": 2.0638, "step": 4093 }, { "epoch": 0.30817290502267636, "grad_norm": 5.133235454559326, "learning_rate": 8.112135882089362e-05, "loss": 1.8439, "step": 4094 }, { "epoch": 0.30824817930333653, "grad_norm": 4.648326873779297, "learning_rate": 8.111181635707343e-05, "loss": 1.982, "step": 4095 }, { "epoch": 0.3083234535839967, "grad_norm": 5.285988807678223, "learning_rate": 8.1102272043745e-05, "loss": 2.0807, "step": 4096 }, { "epoch": 0.30839872786465683, "grad_norm": 11.501988410949707, "learning_rate": 8.109272588147578e-05, "loss": 2.2605, "step": 4097 }, { "epoch": 0.308474002145317, "grad_norm": 5.610404014587402, "learning_rate": 8.10831778708332e-05, "loss": 2.1319, "step": 4098 }, { "epoch": 0.3085492764259772, "grad_norm": 3.634601593017578, "learning_rate": 8.107362801238487e-05, "loss": 1.9752, "step": 4099 }, { "epoch": 0.3086245507066373, "grad_norm": 5.04384708404541, "learning_rate": 8.106407630669852e-05, "loss": 1.9854, "step": 4100 }, { "epoch": 0.3086998249872975, "grad_norm": 4.8440165519714355, "learning_rate": 8.105452275434197e-05, "loss": 2.0274, "step": 4101 }, { "epoch": 0.3087750992679576, "grad_norm": 5.094618797302246, "learning_rate": 8.104496735588316e-05, "loss": 1.8071, "step": 4102 }, { "epoch": 0.3088503735486178, "grad_norm": 5.863461017608643, "learning_rate": 8.103541011189011e-05, "loss": 1.9727, "step": 4103 }, { "epoch": 0.30892564782927795, "grad_norm": 5.042271137237549, "learning_rate": 8.102585102293098e-05, "loss": 1.9689, "step": 4104 }, { "epoch": 0.30900092210993807, "grad_norm": 4.370942115783691, "learning_rate": 8.101629008957402e-05, "loss": 1.776, "step": 4105 }, { "epoch": 0.30907619639059825, "grad_norm": 3.9975955486297607, "learning_rate": 8.100672731238762e-05, "loss": 1.9453, "step": 4106 }, { "epoch": 0.3091514706712584, "grad_norm": 4.6842827796936035, "learning_rate": 8.099716269194025e-05, "loss": 1.9941, "step": 4107 }, { "epoch": 0.30922674495191854, "grad_norm": 5.521533489227295, "learning_rate": 8.09875962288005e-05, "loss": 2.2095, "step": 4108 }, { "epoch": 0.3093020192325787, "grad_norm": 5.3865532875061035, "learning_rate": 8.097802792353705e-05, "loss": 1.6124, "step": 4109 }, { "epoch": 0.30937729351323884, "grad_norm": 5.3589959144592285, "learning_rate": 8.096845777671874e-05, "loss": 2.208, "step": 4110 }, { "epoch": 0.309452567793899, "grad_norm": 5.392484664916992, "learning_rate": 8.095888578891449e-05, "loss": 1.8472, "step": 4111 }, { "epoch": 0.3095278420745592, "grad_norm": 3.5484778881073, "learning_rate": 8.094931196069328e-05, "loss": 1.8095, "step": 4112 }, { "epoch": 0.3096031163552193, "grad_norm": 4.021432876586914, "learning_rate": 8.09397362926243e-05, "loss": 2.2879, "step": 4113 }, { "epoch": 0.3096783906358795, "grad_norm": 4.4004740715026855, "learning_rate": 8.093015878527676e-05, "loss": 2.1758, "step": 4114 }, { "epoch": 0.30975366491653966, "grad_norm": 5.061720848083496, "learning_rate": 8.092057943922004e-05, "loss": 1.8712, "step": 4115 }, { "epoch": 0.3098289391971998, "grad_norm": 5.847341537475586, "learning_rate": 8.091099825502359e-05, "loss": 1.6718, "step": 4116 }, { "epoch": 0.30990421347785996, "grad_norm": 3.808504104614258, "learning_rate": 8.090141523325699e-05, "loss": 2.0764, "step": 4117 }, { "epoch": 0.30997948775852013, "grad_norm": 3.72623348236084, "learning_rate": 8.089183037448991e-05, "loss": 1.9831, "step": 4118 }, { "epoch": 0.31005476203918025, "grad_norm": 4.513221740722656, "learning_rate": 8.088224367929216e-05, "loss": 2.1058, "step": 4119 }, { "epoch": 0.31013003631984043, "grad_norm": 6.3050537109375, "learning_rate": 8.087265514823362e-05, "loss": 1.9353, "step": 4120 }, { "epoch": 0.31020531060050055, "grad_norm": 3.6257219314575195, "learning_rate": 8.086306478188434e-05, "loss": 1.8618, "step": 4121 }, { "epoch": 0.3102805848811607, "grad_norm": 4.7520318031311035, "learning_rate": 8.08534725808144e-05, "loss": 2.5045, "step": 4122 }, { "epoch": 0.3103558591618209, "grad_norm": 5.053914546966553, "learning_rate": 8.084387854559403e-05, "loss": 2.275, "step": 4123 }, { "epoch": 0.310431133442481, "grad_norm": 6.50734806060791, "learning_rate": 8.08342826767936e-05, "loss": 2.1574, "step": 4124 }, { "epoch": 0.3105064077231412, "grad_norm": 5.083341121673584, "learning_rate": 8.082468497498355e-05, "loss": 2.0956, "step": 4125 }, { "epoch": 0.3105816820038014, "grad_norm": 5.281416416168213, "learning_rate": 8.08150854407344e-05, "loss": 2.0642, "step": 4126 }, { "epoch": 0.3106569562844615, "grad_norm": 5.110672950744629, "learning_rate": 8.080548407461684e-05, "loss": 2.1145, "step": 4127 }, { "epoch": 0.31073223056512167, "grad_norm": 4.969967842102051, "learning_rate": 8.079588087720165e-05, "loss": 1.9353, "step": 4128 }, { "epoch": 0.3108075048457818, "grad_norm": 4.673458576202393, "learning_rate": 8.078627584905971e-05, "loss": 1.7939, "step": 4129 }, { "epoch": 0.31088277912644197, "grad_norm": 3.638911724090576, "learning_rate": 8.077666899076201e-05, "loss": 1.7281, "step": 4130 }, { "epoch": 0.31095805340710214, "grad_norm": 3.6680829524993896, "learning_rate": 8.076706030287964e-05, "loss": 2.1524, "step": 4131 }, { "epoch": 0.31103332768776226, "grad_norm": 5.044182300567627, "learning_rate": 8.075744978598381e-05, "loss": 2.6038, "step": 4132 }, { "epoch": 0.31110860196842244, "grad_norm": 7.607592582702637, "learning_rate": 8.074783744064585e-05, "loss": 2.198, "step": 4133 }, { "epoch": 0.3111838762490826, "grad_norm": 5.536702632904053, "learning_rate": 8.073822326743718e-05, "loss": 2.6092, "step": 4134 }, { "epoch": 0.31125915052974273, "grad_norm": 4.153436183929443, "learning_rate": 8.072860726692933e-05, "loss": 2.2134, "step": 4135 }, { "epoch": 0.3113344248104029, "grad_norm": 4.976628303527832, "learning_rate": 8.071898943969396e-05, "loss": 2.0248, "step": 4136 }, { "epoch": 0.3114096990910631, "grad_norm": 4.188265323638916, "learning_rate": 8.070936978630279e-05, "loss": 1.7563, "step": 4137 }, { "epoch": 0.3114849733717232, "grad_norm": 3.808957815170288, "learning_rate": 8.069974830732772e-05, "loss": 1.7866, "step": 4138 }, { "epoch": 0.3115602476523834, "grad_norm": 5.6623663902282715, "learning_rate": 8.069012500334069e-05, "loss": 1.7498, "step": 4139 }, { "epoch": 0.3116355219330435, "grad_norm": 6.884983539581299, "learning_rate": 8.06804998749138e-05, "loss": 2.1193, "step": 4140 }, { "epoch": 0.3117107962137037, "grad_norm": 5.9430670738220215, "learning_rate": 8.067087292261921e-05, "loss": 2.1311, "step": 4141 }, { "epoch": 0.31178607049436385, "grad_norm": 4.839221954345703, "learning_rate": 8.066124414702922e-05, "loss": 2.2644, "step": 4142 }, { "epoch": 0.311861344775024, "grad_norm": 6.93126106262207, "learning_rate": 8.065161354871627e-05, "loss": 1.9878, "step": 4143 }, { "epoch": 0.31193661905568415, "grad_norm": 5.780520915985107, "learning_rate": 8.064198112825284e-05, "loss": 1.7907, "step": 4144 }, { "epoch": 0.3120118933363443, "grad_norm": 4.934045314788818, "learning_rate": 8.063234688621152e-05, "loss": 2.1539, "step": 4145 }, { "epoch": 0.31208716761700445, "grad_norm": 4.8335418701171875, "learning_rate": 8.062271082316511e-05, "loss": 1.994, "step": 4146 }, { "epoch": 0.3121624418976646, "grad_norm": 6.246216297149658, "learning_rate": 8.06130729396864e-05, "loss": 1.7197, "step": 4147 }, { "epoch": 0.3122377161783248, "grad_norm": 4.342348098754883, "learning_rate": 8.060343323634834e-05, "loss": 2.2097, "step": 4148 }, { "epoch": 0.3123129904589849, "grad_norm": 4.31889009475708, "learning_rate": 8.059379171372399e-05, "loss": 1.8451, "step": 4149 }, { "epoch": 0.3123882647396451, "grad_norm": 4.057023525238037, "learning_rate": 8.058414837238648e-05, "loss": 2.3338, "step": 4150 }, { "epoch": 0.3124635390203052, "grad_norm": 5.637147903442383, "learning_rate": 8.057450321290913e-05, "loss": 1.9821, "step": 4151 }, { "epoch": 0.3125388133009654, "grad_norm": 4.549032211303711, "learning_rate": 8.056485623586529e-05, "loss": 1.602, "step": 4152 }, { "epoch": 0.31261408758162557, "grad_norm": 3.76389479637146, "learning_rate": 8.055520744182845e-05, "loss": 1.8877, "step": 4153 }, { "epoch": 0.3126893618622857, "grad_norm": 4.166214466094971, "learning_rate": 8.05455568313722e-05, "loss": 1.9226, "step": 4154 }, { "epoch": 0.31276463614294586, "grad_norm": 4.176894664764404, "learning_rate": 8.053590440507022e-05, "loss": 1.8303, "step": 4155 }, { "epoch": 0.31283991042360604, "grad_norm": 3.733010768890381, "learning_rate": 8.052625016349636e-05, "loss": 2.0695, "step": 4156 }, { "epoch": 0.31291518470426616, "grad_norm": 4.667023658752441, "learning_rate": 8.05165941072245e-05, "loss": 2.3617, "step": 4157 }, { "epoch": 0.31299045898492633, "grad_norm": 4.891637802124023, "learning_rate": 8.05069362368287e-05, "loss": 2.1035, "step": 4158 }, { "epoch": 0.31306573326558645, "grad_norm": 4.5017571449279785, "learning_rate": 8.049727655288308e-05, "loss": 1.8228, "step": 4159 }, { "epoch": 0.31314100754624663, "grad_norm": 4.297059059143066, "learning_rate": 8.048761505596185e-05, "loss": 1.7543, "step": 4160 }, { "epoch": 0.3132162818269068, "grad_norm": 5.62193489074707, "learning_rate": 8.047795174663941e-05, "loss": 1.8116, "step": 4161 }, { "epoch": 0.3132915561075669, "grad_norm": 5.109541416168213, "learning_rate": 8.046828662549018e-05, "loss": 2.2991, "step": 4162 }, { "epoch": 0.3133668303882271, "grad_norm": 4.738384246826172, "learning_rate": 8.045861969308872e-05, "loss": 1.753, "step": 4163 }, { "epoch": 0.3134421046688873, "grad_norm": 4.469361782073975, "learning_rate": 8.044895095000972e-05, "loss": 2.0392, "step": 4164 }, { "epoch": 0.3135173789495474, "grad_norm": 3.790707588195801, "learning_rate": 8.043928039682798e-05, "loss": 1.7623, "step": 4165 }, { "epoch": 0.3135926532302076, "grad_norm": 5.312924861907959, "learning_rate": 8.042960803411835e-05, "loss": 1.7638, "step": 4166 }, { "epoch": 0.31366792751086775, "grad_norm": 5.138605117797852, "learning_rate": 8.041993386245581e-05, "loss": 2.1812, "step": 4167 }, { "epoch": 0.31374320179152787, "grad_norm": 4.705393314361572, "learning_rate": 8.04102578824155e-05, "loss": 2.1203, "step": 4168 }, { "epoch": 0.31381847607218805, "grad_norm": 4.142406940460205, "learning_rate": 8.040058009457263e-05, "loss": 2.0457, "step": 4169 }, { "epoch": 0.31389375035284817, "grad_norm": 4.072697162628174, "learning_rate": 8.039090049950249e-05, "loss": 2.3336, "step": 4170 }, { "epoch": 0.31396902463350834, "grad_norm": 3.9310832023620605, "learning_rate": 8.038121909778052e-05, "loss": 2.0128, "step": 4171 }, { "epoch": 0.3140442989141685, "grad_norm": 4.50548791885376, "learning_rate": 8.037153588998224e-05, "loss": 2.7424, "step": 4172 }, { "epoch": 0.31411957319482864, "grad_norm": 4.231843948364258, "learning_rate": 8.036185087668329e-05, "loss": 2.1257, "step": 4173 }, { "epoch": 0.3141948474754888, "grad_norm": 7.13316535949707, "learning_rate": 8.035216405845945e-05, "loss": 2.4721, "step": 4174 }, { "epoch": 0.314270121756149, "grad_norm": 5.095510005950928, "learning_rate": 8.034247543588653e-05, "loss": 2.2306, "step": 4175 }, { "epoch": 0.3143453960368091, "grad_norm": 3.729904890060425, "learning_rate": 8.033278500954051e-05, "loss": 2.1525, "step": 4176 }, { "epoch": 0.3144206703174693, "grad_norm": 5.1464762687683105, "learning_rate": 8.032309277999745e-05, "loss": 2.035, "step": 4177 }, { "epoch": 0.3144959445981294, "grad_norm": 6.489736080169678, "learning_rate": 8.031339874783354e-05, "loss": 2.0451, "step": 4178 }, { "epoch": 0.3145712188787896, "grad_norm": 5.9339375495910645, "learning_rate": 8.030370291362506e-05, "loss": 2.6718, "step": 4179 }, { "epoch": 0.31464649315944976, "grad_norm": 4.8780107498168945, "learning_rate": 8.029400527794837e-05, "loss": 2.0106, "step": 4180 }, { "epoch": 0.3147217674401099, "grad_norm": 4.1365485191345215, "learning_rate": 8.028430584138e-05, "loss": 2.0318, "step": 4181 }, { "epoch": 0.31479704172077005, "grad_norm": 4.1941752433776855, "learning_rate": 8.027460460449657e-05, "loss": 2.0775, "step": 4182 }, { "epoch": 0.31487231600143023, "grad_norm": 3.978912353515625, "learning_rate": 8.026490156787473e-05, "loss": 2.0506, "step": 4183 }, { "epoch": 0.31494759028209035, "grad_norm": 3.028184175491333, "learning_rate": 8.025519673209135e-05, "loss": 1.9099, "step": 4184 }, { "epoch": 0.3150228645627505, "grad_norm": 3.51009202003479, "learning_rate": 8.024549009772331e-05, "loss": 2.3928, "step": 4185 }, { "epoch": 0.3150981388434107, "grad_norm": 5.449923038482666, "learning_rate": 8.023578166534769e-05, "loss": 1.7385, "step": 4186 }, { "epoch": 0.3151734131240708, "grad_norm": 5.712917327880859, "learning_rate": 8.022607143554159e-05, "loss": 1.642, "step": 4187 }, { "epoch": 0.315248687404731, "grad_norm": 5.557191848754883, "learning_rate": 8.02163594088823e-05, "loss": 2.0553, "step": 4188 }, { "epoch": 0.3153239616853911, "grad_norm": 4.718916416168213, "learning_rate": 8.020664558594711e-05, "loss": 1.8814, "step": 4189 }, { "epoch": 0.3153992359660513, "grad_norm": 4.241898536682129, "learning_rate": 8.019692996731354e-05, "loss": 2.1256, "step": 4190 }, { "epoch": 0.31547451024671147, "grad_norm": 3.9251842498779297, "learning_rate": 8.018721255355911e-05, "loss": 2.1138, "step": 4191 }, { "epoch": 0.3155497845273716, "grad_norm": 4.508667469024658, "learning_rate": 8.017749334526152e-05, "loss": 1.8397, "step": 4192 }, { "epoch": 0.31562505880803177, "grad_norm": 3.842512607574463, "learning_rate": 8.016777234299854e-05, "loss": 2.0503, "step": 4193 }, { "epoch": 0.31570033308869194, "grad_norm": 5.04013204574585, "learning_rate": 8.015804954734805e-05, "loss": 2.1784, "step": 4194 }, { "epoch": 0.31577560736935206, "grad_norm": 4.8822150230407715, "learning_rate": 8.014832495888804e-05, "loss": 2.0893, "step": 4195 }, { "epoch": 0.31585088165001224, "grad_norm": 6.418948173522949, "learning_rate": 8.013859857819664e-05, "loss": 2.2752, "step": 4196 }, { "epoch": 0.3159261559306724, "grad_norm": 6.8863420486450195, "learning_rate": 8.012887040585202e-05, "loss": 2.0813, "step": 4197 }, { "epoch": 0.31600143021133253, "grad_norm": 4.413151741027832, "learning_rate": 8.01191404424325e-05, "loss": 1.9331, "step": 4198 }, { "epoch": 0.3160767044919927, "grad_norm": 4.351831912994385, "learning_rate": 8.01094086885165e-05, "loss": 2.0356, "step": 4199 }, { "epoch": 0.31615197877265283, "grad_norm": 5.103581428527832, "learning_rate": 8.009967514468254e-05, "loss": 1.8951, "step": 4200 }, { "epoch": 0.316227253053313, "grad_norm": 7.617289066314697, "learning_rate": 8.008993981150928e-05, "loss": 2.0731, "step": 4201 }, { "epoch": 0.3163025273339732, "grad_norm": 3.9347362518310547, "learning_rate": 8.008020268957543e-05, "loss": 1.8671, "step": 4202 }, { "epoch": 0.3163778016146333, "grad_norm": 4.56450080871582, "learning_rate": 8.007046377945983e-05, "loss": 1.8944, "step": 4203 }, { "epoch": 0.3164530758952935, "grad_norm": 4.860941410064697, "learning_rate": 8.006072308174144e-05, "loss": 2.3441, "step": 4204 }, { "epoch": 0.31652835017595365, "grad_norm": 6.801948547363281, "learning_rate": 8.005098059699932e-05, "loss": 2.2835, "step": 4205 }, { "epoch": 0.3166036244566138, "grad_norm": 4.3067626953125, "learning_rate": 8.004123632581264e-05, "loss": 1.7514, "step": 4206 }, { "epoch": 0.31667889873727395, "grad_norm": 5.525414943695068, "learning_rate": 8.003149026876064e-05, "loss": 1.8895, "step": 4207 }, { "epoch": 0.31675417301793407, "grad_norm": 4.888801574707031, "learning_rate": 8.002174242642273e-05, "loss": 1.8697, "step": 4208 }, { "epoch": 0.31682944729859425, "grad_norm": 5.053991794586182, "learning_rate": 8.001199279937837e-05, "loss": 1.9335, "step": 4209 }, { "epoch": 0.3169047215792544, "grad_norm": 5.848512649536133, "learning_rate": 8.000224138820715e-05, "loss": 2.3787, "step": 4210 }, { "epoch": 0.31697999585991454, "grad_norm": 4.121793746948242, "learning_rate": 7.999248819348877e-05, "loss": 1.9153, "step": 4211 }, { "epoch": 0.3170552701405747, "grad_norm": 6.225831031799316, "learning_rate": 7.998273321580302e-05, "loss": 2.1583, "step": 4212 }, { "epoch": 0.3171305444212349, "grad_norm": 4.883932113647461, "learning_rate": 7.99729764557298e-05, "loss": 2.1148, "step": 4213 }, { "epoch": 0.317205818701895, "grad_norm": 5.402492523193359, "learning_rate": 7.996321791384915e-05, "loss": 2.1135, "step": 4214 }, { "epoch": 0.3172810929825552, "grad_norm": 5.1864213943481445, "learning_rate": 7.995345759074116e-05, "loss": 1.8842, "step": 4215 }, { "epoch": 0.31735636726321537, "grad_norm": 4.874420166015625, "learning_rate": 7.994369548698608e-05, "loss": 2.3403, "step": 4216 }, { "epoch": 0.3174316415438755, "grad_norm": 5.139329433441162, "learning_rate": 7.993393160316421e-05, "loss": 2.1243, "step": 4217 }, { "epoch": 0.31750691582453566, "grad_norm": 4.445467948913574, "learning_rate": 7.992416593985599e-05, "loss": 2.1472, "step": 4218 }, { "epoch": 0.3175821901051958, "grad_norm": 5.442739486694336, "learning_rate": 7.991439849764196e-05, "loss": 1.9282, "step": 4219 }, { "epoch": 0.31765746438585596, "grad_norm": 5.267314910888672, "learning_rate": 7.990462927710278e-05, "loss": 2.1721, "step": 4220 }, { "epoch": 0.31773273866651613, "grad_norm": 5.10986852645874, "learning_rate": 7.98948582788192e-05, "loss": 1.9953, "step": 4221 }, { "epoch": 0.31780801294717625, "grad_norm": 4.468688011169434, "learning_rate": 7.988508550337207e-05, "loss": 2.006, "step": 4222 }, { "epoch": 0.31788328722783643, "grad_norm": 6.800841808319092, "learning_rate": 7.987531095134236e-05, "loss": 2.2421, "step": 4223 }, { "epoch": 0.3179585615084966, "grad_norm": 3.708024740219116, "learning_rate": 7.986553462331114e-05, "loss": 1.7357, "step": 4224 }, { "epoch": 0.3180338357891567, "grad_norm": 3.7180466651916504, "learning_rate": 7.985575651985957e-05, "loss": 1.7467, "step": 4225 }, { "epoch": 0.3181091100698169, "grad_norm": 4.065893173217773, "learning_rate": 7.984597664156895e-05, "loss": 1.9881, "step": 4226 }, { "epoch": 0.318184384350477, "grad_norm": 4.427385330200195, "learning_rate": 7.983619498902064e-05, "loss": 1.8517, "step": 4227 }, { "epoch": 0.3182596586311372, "grad_norm": 3.9506874084472656, "learning_rate": 7.982641156279615e-05, "loss": 1.8064, "step": 4228 }, { "epoch": 0.3183349329117974, "grad_norm": 4.5521440505981445, "learning_rate": 7.98166263634771e-05, "loss": 2.0856, "step": 4229 }, { "epoch": 0.3184102071924575, "grad_norm": 5.417686939239502, "learning_rate": 7.980683939164515e-05, "loss": 2.1261, "step": 4230 }, { "epoch": 0.31848548147311767, "grad_norm": 5.204405784606934, "learning_rate": 7.979705064788212e-05, "loss": 2.4627, "step": 4231 }, { "epoch": 0.31856075575377785, "grad_norm": 6.09988260269165, "learning_rate": 7.978726013276994e-05, "loss": 2.1523, "step": 4232 }, { "epoch": 0.31863603003443797, "grad_norm": 4.191976547241211, "learning_rate": 7.97774678468906e-05, "loss": 2.2153, "step": 4233 }, { "epoch": 0.31871130431509814, "grad_norm": 4.253741264343262, "learning_rate": 7.976767379082623e-05, "loss": 2.1283, "step": 4234 }, { "epoch": 0.3187865785957583, "grad_norm": 4.9622039794921875, "learning_rate": 7.975787796515907e-05, "loss": 2.0825, "step": 4235 }, { "epoch": 0.31886185287641844, "grad_norm": 4.996169567108154, "learning_rate": 7.974808037047146e-05, "loss": 1.9932, "step": 4236 }, { "epoch": 0.3189371271570786, "grad_norm": 4.502724647521973, "learning_rate": 7.973828100734584e-05, "loss": 2.1003, "step": 4237 }, { "epoch": 0.31901240143773874, "grad_norm": 4.587053298950195, "learning_rate": 7.972847987636471e-05, "loss": 1.662, "step": 4238 }, { "epoch": 0.3190876757183989, "grad_norm": 3.4886562824249268, "learning_rate": 7.971867697811079e-05, "loss": 2.2299, "step": 4239 }, { "epoch": 0.3191629499990591, "grad_norm": 5.665261268615723, "learning_rate": 7.970887231316677e-05, "loss": 2.0695, "step": 4240 }, { "epoch": 0.3192382242797192, "grad_norm": 4.8837127685546875, "learning_rate": 7.969906588211554e-05, "loss": 1.8784, "step": 4241 }, { "epoch": 0.3193134985603794, "grad_norm": 4.704890251159668, "learning_rate": 7.968925768554005e-05, "loss": 1.7844, "step": 4242 }, { "epoch": 0.31938877284103956, "grad_norm": 5.348260402679443, "learning_rate": 7.96794477240234e-05, "loss": 2.0128, "step": 4243 }, { "epoch": 0.3194640471216997, "grad_norm": 5.229158401489258, "learning_rate": 7.966963599814874e-05, "loss": 2.2811, "step": 4244 }, { "epoch": 0.31953932140235985, "grad_norm": 4.3444013595581055, "learning_rate": 7.965982250849934e-05, "loss": 1.7147, "step": 4245 }, { "epoch": 0.31961459568302003, "grad_norm": 4.520373344421387, "learning_rate": 7.965000725565861e-05, "loss": 2.0335, "step": 4246 }, { "epoch": 0.31968986996368015, "grad_norm": 6.000203609466553, "learning_rate": 7.964019024021001e-05, "loss": 2.2267, "step": 4247 }, { "epoch": 0.3197651442443403, "grad_norm": 3.972568988800049, "learning_rate": 7.963037146273715e-05, "loss": 2.0934, "step": 4248 }, { "epoch": 0.31984041852500045, "grad_norm": 5.49121618270874, "learning_rate": 7.962055092382372e-05, "loss": 1.9397, "step": 4249 }, { "epoch": 0.3199156928056606, "grad_norm": 4.182012557983398, "learning_rate": 7.961072862405354e-05, "loss": 2.0562, "step": 4250 }, { "epoch": 0.3199909670863208, "grad_norm": 4.054929256439209, "learning_rate": 7.96009045640105e-05, "loss": 1.7626, "step": 4251 }, { "epoch": 0.3200662413669809, "grad_norm": 4.9852399826049805, "learning_rate": 7.959107874427863e-05, "loss": 1.8231, "step": 4252 }, { "epoch": 0.3201415156476411, "grad_norm": 4.02760124206543, "learning_rate": 7.958125116544201e-05, "loss": 2.1763, "step": 4253 }, { "epoch": 0.32021678992830127, "grad_norm": 3.0537211894989014, "learning_rate": 7.95714218280849e-05, "loss": 1.9088, "step": 4254 }, { "epoch": 0.3202920642089614, "grad_norm": 5.0345354080200195, "learning_rate": 7.956159073279161e-05, "loss": 2.0425, "step": 4255 }, { "epoch": 0.32036733848962157, "grad_norm": 4.502742290496826, "learning_rate": 7.955175788014658e-05, "loss": 2.0991, "step": 4256 }, { "epoch": 0.3204426127702817, "grad_norm": 3.492851972579956, "learning_rate": 7.954192327073434e-05, "loss": 1.821, "step": 4257 }, { "epoch": 0.32051788705094186, "grad_norm": 4.437711715698242, "learning_rate": 7.953208690513952e-05, "loss": 1.8556, "step": 4258 }, { "epoch": 0.32059316133160204, "grad_norm": 5.508961200714111, "learning_rate": 7.952224878394687e-05, "loss": 1.6512, "step": 4259 }, { "epoch": 0.32066843561226216, "grad_norm": 4.101222991943359, "learning_rate": 7.951240890774124e-05, "loss": 1.6915, "step": 4260 }, { "epoch": 0.32074370989292234, "grad_norm": 3.9297451972961426, "learning_rate": 7.950256727710758e-05, "loss": 2.0412, "step": 4261 }, { "epoch": 0.3208189841735825, "grad_norm": 4.377941131591797, "learning_rate": 7.949272389263096e-05, "loss": 2.2407, "step": 4262 }, { "epoch": 0.32089425845424263, "grad_norm": 4.96002721786499, "learning_rate": 7.948287875489651e-05, "loss": 1.4974, "step": 4263 }, { "epoch": 0.3209695327349028, "grad_norm": 4.096199989318848, "learning_rate": 7.947303186448952e-05, "loss": 2.0694, "step": 4264 }, { "epoch": 0.321044807015563, "grad_norm": 7.235177040100098, "learning_rate": 7.946318322199535e-05, "loss": 2.0124, "step": 4265 }, { "epoch": 0.3211200812962231, "grad_norm": 5.543209552764893, "learning_rate": 7.94533328279995e-05, "loss": 2.1203, "step": 4266 }, { "epoch": 0.3211953555768833, "grad_norm": 4.147661209106445, "learning_rate": 7.94434806830875e-05, "loss": 2.2026, "step": 4267 }, { "epoch": 0.3212706298575434, "grad_norm": 6.708639621734619, "learning_rate": 7.943362678784505e-05, "loss": 2.0444, "step": 4268 }, { "epoch": 0.3213459041382036, "grad_norm": 7.2309370040893555, "learning_rate": 7.942377114285796e-05, "loss": 2.2447, "step": 4269 }, { "epoch": 0.32142117841886375, "grad_norm": 4.966527462005615, "learning_rate": 7.941391374871209e-05, "loss": 1.8573, "step": 4270 }, { "epoch": 0.32149645269952387, "grad_norm": 5.686848163604736, "learning_rate": 7.940405460599345e-05, "loss": 1.696, "step": 4271 }, { "epoch": 0.32157172698018405, "grad_norm": 4.885373592376709, "learning_rate": 7.939419371528813e-05, "loss": 1.7538, "step": 4272 }, { "epoch": 0.3216470012608442, "grad_norm": 6.414916038513184, "learning_rate": 7.938433107718235e-05, "loss": 2.0618, "step": 4273 }, { "epoch": 0.32172227554150434, "grad_norm": 4.447811603546143, "learning_rate": 7.937446669226238e-05, "loss": 2.0703, "step": 4274 }, { "epoch": 0.3217975498221645, "grad_norm": 5.1187744140625, "learning_rate": 7.936460056111468e-05, "loss": 1.9478, "step": 4275 }, { "epoch": 0.3218728241028247, "grad_norm": 3.978109359741211, "learning_rate": 7.93547326843257e-05, "loss": 1.7732, "step": 4276 }, { "epoch": 0.3219480983834848, "grad_norm": 6.079015731811523, "learning_rate": 7.93448630624821e-05, "loss": 2.0366, "step": 4277 }, { "epoch": 0.322023372664145, "grad_norm": 5.9308881759643555, "learning_rate": 7.93349916961706e-05, "loss": 1.8863, "step": 4278 }, { "epoch": 0.3220986469448051, "grad_norm": 4.380959510803223, "learning_rate": 7.932511858597801e-05, "loss": 1.8405, "step": 4279 }, { "epoch": 0.3221739212254653, "grad_norm": 5.676402568817139, "learning_rate": 7.931524373249126e-05, "loss": 1.9023, "step": 4280 }, { "epoch": 0.32224919550612546, "grad_norm": 5.0600433349609375, "learning_rate": 7.93053671362974e-05, "loss": 2.3296, "step": 4281 }, { "epoch": 0.3223244697867856, "grad_norm": 4.345888614654541, "learning_rate": 7.929548879798353e-05, "loss": 2.2065, "step": 4282 }, { "epoch": 0.32239974406744576, "grad_norm": 4.994972229003906, "learning_rate": 7.928560871813692e-05, "loss": 1.7915, "step": 4283 }, { "epoch": 0.32247501834810594, "grad_norm": 4.461777687072754, "learning_rate": 7.927572689734489e-05, "loss": 2.0374, "step": 4284 }, { "epoch": 0.32255029262876606, "grad_norm": 5.133300304412842, "learning_rate": 7.926584333619491e-05, "loss": 1.9424, "step": 4285 }, { "epoch": 0.32262556690942623, "grad_norm": 5.922780513763428, "learning_rate": 7.92559580352745e-05, "loss": 2.175, "step": 4286 }, { "epoch": 0.32270084119008635, "grad_norm": 7.765385150909424, "learning_rate": 7.924607099517135e-05, "loss": 2.184, "step": 4287 }, { "epoch": 0.3227761154707465, "grad_norm": 4.702193737030029, "learning_rate": 7.92361822164732e-05, "loss": 2.1254, "step": 4288 }, { "epoch": 0.3228513897514067, "grad_norm": 3.975552558898926, "learning_rate": 7.922629169976789e-05, "loss": 2.0114, "step": 4289 }, { "epoch": 0.3229266640320668, "grad_norm": 6.022935390472412, "learning_rate": 7.92163994456434e-05, "loss": 2.011, "step": 4290 }, { "epoch": 0.323001938312727, "grad_norm": 4.55320930480957, "learning_rate": 7.92065054546878e-05, "loss": 2.1454, "step": 4291 }, { "epoch": 0.3230772125933872, "grad_norm": 5.387508869171143, "learning_rate": 7.919660972748923e-05, "loss": 2.2658, "step": 4292 }, { "epoch": 0.3231524868740473, "grad_norm": 4.650260925292969, "learning_rate": 7.918671226463601e-05, "loss": 1.8261, "step": 4293 }, { "epoch": 0.32322776115470747, "grad_norm": 6.136397838592529, "learning_rate": 7.91768130667165e-05, "loss": 1.9409, "step": 4294 }, { "epoch": 0.32330303543536765, "grad_norm": 5.892971515655518, "learning_rate": 7.916691213431915e-05, "loss": 2.0142, "step": 4295 }, { "epoch": 0.32337830971602777, "grad_norm": 7.019687175750732, "learning_rate": 7.915700946803257e-05, "loss": 1.9436, "step": 4296 }, { "epoch": 0.32345358399668794, "grad_norm": 3.8222222328186035, "learning_rate": 7.914710506844545e-05, "loss": 2.2068, "step": 4297 }, { "epoch": 0.32352885827734806, "grad_norm": 5.505558013916016, "learning_rate": 7.913719893614655e-05, "loss": 2.126, "step": 4298 }, { "epoch": 0.32360413255800824, "grad_norm": 3.8058364391326904, "learning_rate": 7.912729107172479e-05, "loss": 1.6943, "step": 4299 }, { "epoch": 0.3236794068386684, "grad_norm": 3.583813190460205, "learning_rate": 7.911738147576914e-05, "loss": 2.1404, "step": 4300 }, { "epoch": 0.32375468111932854, "grad_norm": 3.3333823680877686, "learning_rate": 7.91074701488687e-05, "loss": 1.9622, "step": 4301 }, { "epoch": 0.3238299553999887, "grad_norm": 7.203909873962402, "learning_rate": 7.90975570916127e-05, "loss": 2.0116, "step": 4302 }, { "epoch": 0.3239052296806489, "grad_norm": 4.942460060119629, "learning_rate": 7.90876423045904e-05, "loss": 1.9917, "step": 4303 }, { "epoch": 0.323980503961309, "grad_norm": 5.483014106750488, "learning_rate": 7.907772578839125e-05, "loss": 1.832, "step": 4304 }, { "epoch": 0.3240557782419692, "grad_norm": 4.343141555786133, "learning_rate": 7.906780754360472e-05, "loss": 1.7992, "step": 4305 }, { "epoch": 0.3241310525226293, "grad_norm": 4.399363994598389, "learning_rate": 7.905788757082044e-05, "loss": 1.9644, "step": 4306 }, { "epoch": 0.3242063268032895, "grad_norm": 4.299865245819092, "learning_rate": 7.904796587062811e-05, "loss": 2.067, "step": 4307 }, { "epoch": 0.32428160108394966, "grad_norm": 4.725642204284668, "learning_rate": 7.903804244361757e-05, "loss": 1.9544, "step": 4308 }, { "epoch": 0.3243568753646098, "grad_norm": 4.476677417755127, "learning_rate": 7.902811729037873e-05, "loss": 2.3473, "step": 4309 }, { "epoch": 0.32443214964526995, "grad_norm": 3.550751209259033, "learning_rate": 7.90181904115016e-05, "loss": 1.7212, "step": 4310 }, { "epoch": 0.3245074239259301, "grad_norm": 6.090052127838135, "learning_rate": 7.900826180757632e-05, "loss": 1.9578, "step": 4311 }, { "epoch": 0.32458269820659025, "grad_norm": 4.012701511383057, "learning_rate": 7.89983314791931e-05, "loss": 1.9202, "step": 4312 }, { "epoch": 0.3246579724872504, "grad_norm": 4.01508903503418, "learning_rate": 7.898839942694228e-05, "loss": 2.2097, "step": 4313 }, { "epoch": 0.3247332467679106, "grad_norm": 3.751957654953003, "learning_rate": 7.89784656514143e-05, "loss": 1.7587, "step": 4314 }, { "epoch": 0.3248085210485707, "grad_norm": 4.292129993438721, "learning_rate": 7.896853015319967e-05, "loss": 1.8522, "step": 4315 }, { "epoch": 0.3248837953292309, "grad_norm": 6.028865814208984, "learning_rate": 7.895859293288907e-05, "loss": 2.0651, "step": 4316 }, { "epoch": 0.324959069609891, "grad_norm": 5.337474346160889, "learning_rate": 7.894865399107319e-05, "loss": 2.1627, "step": 4317 }, { "epoch": 0.3250343438905512, "grad_norm": 3.9330873489379883, "learning_rate": 7.893871332834288e-05, "loss": 1.8096, "step": 4318 }, { "epoch": 0.32510961817121137, "grad_norm": 4.409187316894531, "learning_rate": 7.892877094528912e-05, "loss": 1.871, "step": 4319 }, { "epoch": 0.3251848924518715, "grad_norm": 3.8573362827301025, "learning_rate": 7.891882684250292e-05, "loss": 2.2731, "step": 4320 }, { "epoch": 0.32526016673253166, "grad_norm": 4.413033962249756, "learning_rate": 7.890888102057545e-05, "loss": 1.9311, "step": 4321 }, { "epoch": 0.32533544101319184, "grad_norm": 5.094873428344727, "learning_rate": 7.889893348009796e-05, "loss": 1.8511, "step": 4322 }, { "epoch": 0.32541071529385196, "grad_norm": 7.153104782104492, "learning_rate": 7.888898422166178e-05, "loss": 1.7481, "step": 4323 }, { "epoch": 0.32548598957451214, "grad_norm": 11.435986518859863, "learning_rate": 7.887903324585839e-05, "loss": 2.1428, "step": 4324 }, { "epoch": 0.3255612638551723, "grad_norm": 3.9848570823669434, "learning_rate": 7.886908055327933e-05, "loss": 1.8565, "step": 4325 }, { "epoch": 0.32563653813583243, "grad_norm": 4.865833282470703, "learning_rate": 7.885912614451627e-05, "loss": 1.708, "step": 4326 }, { "epoch": 0.3257118124164926, "grad_norm": 5.470982551574707, "learning_rate": 7.884917002016097e-05, "loss": 2.0026, "step": 4327 }, { "epoch": 0.32578708669715273, "grad_norm": 3.812516927719116, "learning_rate": 7.883921218080528e-05, "loss": 2.2151, "step": 4328 }, { "epoch": 0.3258623609778129, "grad_norm": 4.403374671936035, "learning_rate": 7.882925262704118e-05, "loss": 1.8582, "step": 4329 }, { "epoch": 0.3259376352584731, "grad_norm": 5.941940784454346, "learning_rate": 7.881929135946073e-05, "loss": 2.3364, "step": 4330 }, { "epoch": 0.3260129095391332, "grad_norm": 4.640542984008789, "learning_rate": 7.88093283786561e-05, "loss": 2.1626, "step": 4331 }, { "epoch": 0.3260881838197934, "grad_norm": 4.178682804107666, "learning_rate": 7.879936368521957e-05, "loss": 2.0245, "step": 4332 }, { "epoch": 0.32616345810045355, "grad_norm": 4.092398643493652, "learning_rate": 7.87893972797435e-05, "loss": 2.0029, "step": 4333 }, { "epoch": 0.32623873238111367, "grad_norm": 4.372684955596924, "learning_rate": 7.877942916282037e-05, "loss": 2.2685, "step": 4334 }, { "epoch": 0.32631400666177385, "grad_norm": 4.049747467041016, "learning_rate": 7.876945933504274e-05, "loss": 2.002, "step": 4335 }, { "epoch": 0.32638928094243397, "grad_norm": 4.097887992858887, "learning_rate": 7.875948779700331e-05, "loss": 1.6293, "step": 4336 }, { "epoch": 0.32646455522309414, "grad_norm": 4.2467851638793945, "learning_rate": 7.874951454929484e-05, "loss": 1.7134, "step": 4337 }, { "epoch": 0.3265398295037543, "grad_norm": 5.490288734436035, "learning_rate": 7.873953959251023e-05, "loss": 1.8483, "step": 4338 }, { "epoch": 0.32661510378441444, "grad_norm": 3.672646999359131, "learning_rate": 7.872956292724247e-05, "loss": 2.2833, "step": 4339 }, { "epoch": 0.3266903780650746, "grad_norm": 5.37131404876709, "learning_rate": 7.871958455408461e-05, "loss": 1.9189, "step": 4340 }, { "epoch": 0.3267656523457348, "grad_norm": 4.298294544219971, "learning_rate": 7.870960447362986e-05, "loss": 1.9717, "step": 4341 }, { "epoch": 0.3268409266263949, "grad_norm": 4.142689228057861, "learning_rate": 7.869962268647149e-05, "loss": 1.9898, "step": 4342 }, { "epoch": 0.3269162009070551, "grad_norm": 3.9931344985961914, "learning_rate": 7.868963919320292e-05, "loss": 2.1516, "step": 4343 }, { "epoch": 0.32699147518771526, "grad_norm": 4.967349529266357, "learning_rate": 7.867965399441762e-05, "loss": 1.7839, "step": 4344 }, { "epoch": 0.3270667494683754, "grad_norm": 6.360227584838867, "learning_rate": 7.866966709070916e-05, "loss": 1.8235, "step": 4345 }, { "epoch": 0.32714202374903556, "grad_norm": 4.461812496185303, "learning_rate": 7.865967848267126e-05, "loss": 2.0104, "step": 4346 }, { "epoch": 0.3272172980296957, "grad_norm": 4.522796154022217, "learning_rate": 7.864968817089772e-05, "loss": 2.2676, "step": 4347 }, { "epoch": 0.32729257231035586, "grad_norm": 4.2580342292785645, "learning_rate": 7.86396961559824e-05, "loss": 2.2547, "step": 4348 }, { "epoch": 0.32736784659101603, "grad_norm": 6.083259105682373, "learning_rate": 7.862970243851935e-05, "loss": 2.2956, "step": 4349 }, { "epoch": 0.32744312087167615, "grad_norm": 4.294934272766113, "learning_rate": 7.861970701910262e-05, "loss": 1.9972, "step": 4350 }, { "epoch": 0.32751839515233633, "grad_norm": 6.339199066162109, "learning_rate": 7.860970989832644e-05, "loss": 2.2726, "step": 4351 }, { "epoch": 0.3275936694329965, "grad_norm": 4.928619861602783, "learning_rate": 7.859971107678507e-05, "loss": 1.7872, "step": 4352 }, { "epoch": 0.3276689437136566, "grad_norm": 4.054974555969238, "learning_rate": 7.858971055507295e-05, "loss": 1.8862, "step": 4353 }, { "epoch": 0.3277442179943168, "grad_norm": 6.087123870849609, "learning_rate": 7.857970833378458e-05, "loss": 2.017, "step": 4354 }, { "epoch": 0.3278194922749769, "grad_norm": 5.132475852966309, "learning_rate": 7.856970441351456e-05, "loss": 1.9775, "step": 4355 }, { "epoch": 0.3278947665556371, "grad_norm": 3.0807714462280273, "learning_rate": 7.855969879485758e-05, "loss": 1.9778, "step": 4356 }, { "epoch": 0.32797004083629727, "grad_norm": 5.419703483581543, "learning_rate": 7.854969147840845e-05, "loss": 1.8723, "step": 4357 }, { "epoch": 0.3280453151169574, "grad_norm": 7.973278522491455, "learning_rate": 7.853968246476209e-05, "loss": 2.0772, "step": 4358 }, { "epoch": 0.32812058939761757, "grad_norm": 7.781949520111084, "learning_rate": 7.852967175451349e-05, "loss": 2.3391, "step": 4359 }, { "epoch": 0.32819586367827774, "grad_norm": 5.441353797912598, "learning_rate": 7.851965934825775e-05, "loss": 1.9057, "step": 4360 }, { "epoch": 0.32827113795893786, "grad_norm": 4.854582786560059, "learning_rate": 7.850964524659013e-05, "loss": 1.6827, "step": 4361 }, { "epoch": 0.32834641223959804, "grad_norm": 5.159985542297363, "learning_rate": 7.849962945010587e-05, "loss": 1.8685, "step": 4362 }, { "epoch": 0.3284216865202582, "grad_norm": 5.116022109985352, "learning_rate": 7.848961195940043e-05, "loss": 1.951, "step": 4363 }, { "epoch": 0.32849696080091834, "grad_norm": 4.6052961349487305, "learning_rate": 7.847959277506928e-05, "loss": 1.9344, "step": 4364 }, { "epoch": 0.3285722350815785, "grad_norm": 7.996417999267578, "learning_rate": 7.846957189770809e-05, "loss": 1.9771, "step": 4365 }, { "epoch": 0.32864750936223863, "grad_norm": 6.167883396148682, "learning_rate": 7.845954932791253e-05, "loss": 1.7924, "step": 4366 }, { "epoch": 0.3287227836428988, "grad_norm": 5.015079021453857, "learning_rate": 7.84495250662784e-05, "loss": 1.9587, "step": 4367 }, { "epoch": 0.328798057923559, "grad_norm": 4.868743419647217, "learning_rate": 7.843949911340168e-05, "loss": 2.056, "step": 4368 }, { "epoch": 0.3288733322042191, "grad_norm": 4.857298374176025, "learning_rate": 7.842947146987829e-05, "loss": 2.1993, "step": 4369 }, { "epoch": 0.3289486064848793, "grad_norm": 5.768919467926025, "learning_rate": 7.841944213630442e-05, "loss": 1.8897, "step": 4370 }, { "epoch": 0.32902388076553946, "grad_norm": 5.21314811706543, "learning_rate": 7.840941111327626e-05, "loss": 1.7522, "step": 4371 }, { "epoch": 0.3290991550461996, "grad_norm": 4.84205436706543, "learning_rate": 7.839937840139013e-05, "loss": 2.7705, "step": 4372 }, { "epoch": 0.32917442932685975, "grad_norm": 4.099056720733643, "learning_rate": 7.838934400124242e-05, "loss": 1.4868, "step": 4373 }, { "epoch": 0.32924970360751993, "grad_norm": 5.3566575050354, "learning_rate": 7.837930791342969e-05, "loss": 2.1252, "step": 4374 }, { "epoch": 0.32932497788818005, "grad_norm": 7.518743515014648, "learning_rate": 7.836927013854853e-05, "loss": 1.8127, "step": 4375 }, { "epoch": 0.3294002521688402, "grad_norm": 6.118736743927002, "learning_rate": 7.835923067719565e-05, "loss": 1.6639, "step": 4376 }, { "epoch": 0.32947552644950034, "grad_norm": 4.957034587860107, "learning_rate": 7.834918952996788e-05, "loss": 1.8331, "step": 4377 }, { "epoch": 0.3295508007301605, "grad_norm": 4.866137981414795, "learning_rate": 7.833914669746214e-05, "loss": 2.2401, "step": 4378 }, { "epoch": 0.3296260750108207, "grad_norm": 7.278197765350342, "learning_rate": 7.832910218027545e-05, "loss": 2.2516, "step": 4379 }, { "epoch": 0.3297013492914808, "grad_norm": 3.633559465408325, "learning_rate": 7.831905597900491e-05, "loss": 2.0158, "step": 4380 }, { "epoch": 0.329776623572141, "grad_norm": 4.701472282409668, "learning_rate": 7.830900809424775e-05, "loss": 2.0728, "step": 4381 }, { "epoch": 0.32985189785280117, "grad_norm": 4.455605983734131, "learning_rate": 7.82989585266013e-05, "loss": 2.0341, "step": 4382 }, { "epoch": 0.3299271721334613, "grad_norm": 4.972047805786133, "learning_rate": 7.828890727666294e-05, "loss": 1.9944, "step": 4383 }, { "epoch": 0.33000244641412146, "grad_norm": 6.96929931640625, "learning_rate": 7.827885434503021e-05, "loss": 2.2406, "step": 4384 }, { "epoch": 0.3300777206947816, "grad_norm": 4.37180757522583, "learning_rate": 7.826879973230074e-05, "loss": 1.949, "step": 4385 }, { "epoch": 0.33015299497544176, "grad_norm": 4.380893230438232, "learning_rate": 7.825874343907226e-05, "loss": 2.113, "step": 4386 }, { "epoch": 0.33022826925610194, "grad_norm": 5.326410293579102, "learning_rate": 7.824868546594255e-05, "loss": 1.7335, "step": 4387 }, { "epoch": 0.33030354353676206, "grad_norm": 4.2704572677612305, "learning_rate": 7.823862581350954e-05, "loss": 2.1107, "step": 4388 }, { "epoch": 0.33037881781742223, "grad_norm": 5.810431480407715, "learning_rate": 7.822856448237126e-05, "loss": 2.1861, "step": 4389 }, { "epoch": 0.3304540920980824, "grad_norm": 6.7572526931762695, "learning_rate": 7.821850147312579e-05, "loss": 2.476, "step": 4390 }, { "epoch": 0.33052936637874253, "grad_norm": 5.573050498962402, "learning_rate": 7.82084367863714e-05, "loss": 2.1025, "step": 4391 }, { "epoch": 0.3306046406594027, "grad_norm": 4.264028549194336, "learning_rate": 7.81983704227064e-05, "loss": 1.7665, "step": 4392 }, { "epoch": 0.3306799149400629, "grad_norm": 5.386504650115967, "learning_rate": 7.818830238272916e-05, "loss": 2.5614, "step": 4393 }, { "epoch": 0.330755189220723, "grad_norm": 4.34447717666626, "learning_rate": 7.817823266703823e-05, "loss": 2.0942, "step": 4394 }, { "epoch": 0.3308304635013832, "grad_norm": 4.112555027008057, "learning_rate": 7.816816127623222e-05, "loss": 2.0292, "step": 4395 }, { "epoch": 0.3309057377820433, "grad_norm": 4.329293727874756, "learning_rate": 7.815808821090986e-05, "loss": 1.6239, "step": 4396 }, { "epoch": 0.3309810120627035, "grad_norm": 4.057832717895508, "learning_rate": 7.814801347166993e-05, "loss": 2.0869, "step": 4397 }, { "epoch": 0.33105628634336365, "grad_norm": 5.131999492645264, "learning_rate": 7.813793705911138e-05, "loss": 1.981, "step": 4398 }, { "epoch": 0.33113156062402377, "grad_norm": 7.1158905029296875, "learning_rate": 7.812785897383319e-05, "loss": 2.1861, "step": 4399 }, { "epoch": 0.33120683490468394, "grad_norm": 4.448178291320801, "learning_rate": 7.811777921643451e-05, "loss": 1.7959, "step": 4400 }, { "epoch": 0.3312821091853441, "grad_norm": 5.340634822845459, "learning_rate": 7.810769778751453e-05, "loss": 2.0081, "step": 4401 }, { "epoch": 0.33135738346600424, "grad_norm": 4.587032318115234, "learning_rate": 7.809761468767256e-05, "loss": 1.9775, "step": 4402 }, { "epoch": 0.3314326577466644, "grad_norm": 7.372905731201172, "learning_rate": 7.808752991750803e-05, "loss": 2.443, "step": 4403 }, { "epoch": 0.33150793202732454, "grad_norm": 5.082029342651367, "learning_rate": 7.807744347762042e-05, "loss": 1.8237, "step": 4404 }, { "epoch": 0.3315832063079847, "grad_norm": 4.510185718536377, "learning_rate": 7.806735536860939e-05, "loss": 1.5911, "step": 4405 }, { "epoch": 0.3316584805886449, "grad_norm": 4.006820201873779, "learning_rate": 7.805726559107458e-05, "loss": 1.7089, "step": 4406 }, { "epoch": 0.331733754869305, "grad_norm": 4.016329765319824, "learning_rate": 7.804717414561586e-05, "loss": 1.7514, "step": 4407 }, { "epoch": 0.3318090291499652, "grad_norm": 4.960211277008057, "learning_rate": 7.803708103283311e-05, "loss": 2.1126, "step": 4408 }, { "epoch": 0.33188430343062536, "grad_norm": 5.330507278442383, "learning_rate": 7.802698625332634e-05, "loss": 2.1649, "step": 4409 }, { "epoch": 0.3319595777112855, "grad_norm": 3.614654302597046, "learning_rate": 7.801688980769564e-05, "loss": 1.6986, "step": 4410 }, { "epoch": 0.33203485199194566, "grad_norm": 3.831610679626465, "learning_rate": 7.800679169654125e-05, "loss": 2.0079, "step": 4411 }, { "epoch": 0.33211012627260583, "grad_norm": 5.059773921966553, "learning_rate": 7.799669192046345e-05, "loss": 2.1007, "step": 4412 }, { "epoch": 0.33218540055326595, "grad_norm": 3.5472607612609863, "learning_rate": 7.798659048006265e-05, "loss": 2.063, "step": 4413 }, { "epoch": 0.33226067483392613, "grad_norm": 4.235814094543457, "learning_rate": 7.797648737593934e-05, "loss": 1.8019, "step": 4414 }, { "epoch": 0.33233594911458625, "grad_norm": 4.2445268630981445, "learning_rate": 7.796638260869412e-05, "loss": 2.2093, "step": 4415 }, { "epoch": 0.3324112233952464, "grad_norm": 3.7013628482818604, "learning_rate": 7.79562761789277e-05, "loss": 1.7717, "step": 4416 }, { "epoch": 0.3324864976759066, "grad_norm": 4.648346424102783, "learning_rate": 7.79461680872409e-05, "loss": 1.9894, "step": 4417 }, { "epoch": 0.3325617719565667, "grad_norm": 4.188632488250732, "learning_rate": 7.793605833423459e-05, "loss": 1.6559, "step": 4418 }, { "epoch": 0.3326370462372269, "grad_norm": 3.7285044193267822, "learning_rate": 7.792594692050975e-05, "loss": 2.1474, "step": 4419 }, { "epoch": 0.3327123205178871, "grad_norm": 5.108846664428711, "learning_rate": 7.791583384666751e-05, "loss": 2.1861, "step": 4420 }, { "epoch": 0.3327875947985472, "grad_norm": 5.324336528778076, "learning_rate": 7.790571911330904e-05, "loss": 2.0839, "step": 4421 }, { "epoch": 0.33286286907920737, "grad_norm": 4.901613235473633, "learning_rate": 7.789560272103564e-05, "loss": 1.6584, "step": 4422 }, { "epoch": 0.33293814335986754, "grad_norm": 4.398468971252441, "learning_rate": 7.788548467044869e-05, "loss": 1.9581, "step": 4423 }, { "epoch": 0.33301341764052766, "grad_norm": 5.463700294494629, "learning_rate": 7.787536496214972e-05, "loss": 2.1776, "step": 4424 }, { "epoch": 0.33308869192118784, "grad_norm": 7.23293399810791, "learning_rate": 7.786524359674026e-05, "loss": 1.9, "step": 4425 }, { "epoch": 0.33316396620184796, "grad_norm": 5.656863212585449, "learning_rate": 7.785512057482202e-05, "loss": 2.1969, "step": 4426 }, { "epoch": 0.33323924048250814, "grad_norm": 5.028671741485596, "learning_rate": 7.784499589699681e-05, "loss": 2.4658, "step": 4427 }, { "epoch": 0.3333145147631683, "grad_norm": 4.938046932220459, "learning_rate": 7.783486956386647e-05, "loss": 2.026, "step": 4428 }, { "epoch": 0.33338978904382843, "grad_norm": 5.654271602630615, "learning_rate": 7.782474157603301e-05, "loss": 2.2582, "step": 4429 }, { "epoch": 0.3334650633244886, "grad_norm": 5.342366695404053, "learning_rate": 7.78146119340985e-05, "loss": 2.1101, "step": 4430 }, { "epoch": 0.3335403376051488, "grad_norm": 6.084756374359131, "learning_rate": 7.780448063866511e-05, "loss": 2.4105, "step": 4431 }, { "epoch": 0.3336156118858089, "grad_norm": 3.4337925910949707, "learning_rate": 7.779434769033514e-05, "loss": 1.8763, "step": 4432 }, { "epoch": 0.3336908861664691, "grad_norm": 5.285945892333984, "learning_rate": 7.778421308971093e-05, "loss": 1.9216, "step": 4433 }, { "epoch": 0.3337661604471292, "grad_norm": 4.035688400268555, "learning_rate": 7.7774076837395e-05, "loss": 1.9087, "step": 4434 }, { "epoch": 0.3338414347277894, "grad_norm": 4.876992225646973, "learning_rate": 7.776393893398988e-05, "loss": 2.0467, "step": 4435 }, { "epoch": 0.33391670900844955, "grad_norm": 4.6616058349609375, "learning_rate": 7.775379938009826e-05, "loss": 2.2517, "step": 4436 }, { "epoch": 0.3339919832891097, "grad_norm": 4.198055267333984, "learning_rate": 7.77436581763229e-05, "loss": 1.5766, "step": 4437 }, { "epoch": 0.33406725756976985, "grad_norm": 4.089532852172852, "learning_rate": 7.773351532326667e-05, "loss": 2.0709, "step": 4438 }, { "epoch": 0.33414253185043, "grad_norm": 4.617081642150879, "learning_rate": 7.772337082153253e-05, "loss": 1.5901, "step": 4439 }, { "epoch": 0.33421780613109014, "grad_norm": 4.185454368591309, "learning_rate": 7.771322467172353e-05, "loss": 1.9904, "step": 4440 }, { "epoch": 0.3342930804117503, "grad_norm": 5.368082046508789, "learning_rate": 7.770307687444284e-05, "loss": 1.6603, "step": 4441 }, { "epoch": 0.3343683546924105, "grad_norm": 4.917660236358643, "learning_rate": 7.769292743029374e-05, "loss": 1.9464, "step": 4442 }, { "epoch": 0.3344436289730706, "grad_norm": 5.311621189117432, "learning_rate": 7.768277633987955e-05, "loss": 2.1194, "step": 4443 }, { "epoch": 0.3345189032537308, "grad_norm": 4.397782325744629, "learning_rate": 7.767262360380374e-05, "loss": 2.0917, "step": 4444 }, { "epoch": 0.3345941775343909, "grad_norm": 4.391879081726074, "learning_rate": 7.766246922266986e-05, "loss": 2.024, "step": 4445 }, { "epoch": 0.3346694518150511, "grad_norm": 4.531358242034912, "learning_rate": 7.765231319708156e-05, "loss": 2.1964, "step": 4446 }, { "epoch": 0.33474472609571126, "grad_norm": 4.115760326385498, "learning_rate": 7.764215552764258e-05, "loss": 2.0223, "step": 4447 }, { "epoch": 0.3348200003763714, "grad_norm": 4.948193073272705, "learning_rate": 7.763199621495676e-05, "loss": 2.297, "step": 4448 }, { "epoch": 0.33489527465703156, "grad_norm": 5.160480499267578, "learning_rate": 7.762183525962804e-05, "loss": 1.8223, "step": 4449 }, { "epoch": 0.33497054893769174, "grad_norm": 4.777421951293945, "learning_rate": 7.76116726622605e-05, "loss": 1.8727, "step": 4450 }, { "epoch": 0.33504582321835186, "grad_norm": 5.0138983726501465, "learning_rate": 7.760150842345823e-05, "loss": 2.3869, "step": 4451 }, { "epoch": 0.33512109749901203, "grad_norm": 5.5878071784973145, "learning_rate": 7.75913425438255e-05, "loss": 2.0247, "step": 4452 }, { "epoch": 0.33519637177967215, "grad_norm": 4.397315502166748, "learning_rate": 7.758117502396661e-05, "loss": 2.0419, "step": 4453 }, { "epoch": 0.33527164606033233, "grad_norm": 7.074615478515625, "learning_rate": 7.7571005864486e-05, "loss": 2.1746, "step": 4454 }, { "epoch": 0.3353469203409925, "grad_norm": 4.192893981933594, "learning_rate": 7.756083506598821e-05, "loss": 1.9827, "step": 4455 }, { "epoch": 0.3354221946216526, "grad_norm": 5.783407688140869, "learning_rate": 7.755066262907786e-05, "loss": 2.0694, "step": 4456 }, { "epoch": 0.3354974689023128, "grad_norm": 4.581982135772705, "learning_rate": 7.754048855435965e-05, "loss": 2.1816, "step": 4457 }, { "epoch": 0.335572743182973, "grad_norm": 3.9422690868377686, "learning_rate": 7.753031284243844e-05, "loss": 2.0383, "step": 4458 }, { "epoch": 0.3356480174636331, "grad_norm": 5.631012439727783, "learning_rate": 7.75201354939191e-05, "loss": 1.934, "step": 4459 }, { "epoch": 0.3357232917442933, "grad_norm": 5.207779884338379, "learning_rate": 7.750995650940669e-05, "loss": 1.761, "step": 4460 }, { "epoch": 0.33579856602495345, "grad_norm": 5.82303524017334, "learning_rate": 7.74997758895063e-05, "loss": 2.1199, "step": 4461 }, { "epoch": 0.33587384030561357, "grad_norm": 4.269281387329102, "learning_rate": 7.748959363482313e-05, "loss": 2.0459, "step": 4462 }, { "epoch": 0.33594911458627374, "grad_norm": 5.640504360198975, "learning_rate": 7.747940974596248e-05, "loss": 2.4645, "step": 4463 }, { "epoch": 0.33602438886693387, "grad_norm": 6.266437530517578, "learning_rate": 7.746922422352978e-05, "loss": 2.3986, "step": 4464 }, { "epoch": 0.33609966314759404, "grad_norm": 4.637019634246826, "learning_rate": 7.745903706813054e-05, "loss": 1.8858, "step": 4465 }, { "epoch": 0.3361749374282542, "grad_norm": 3.3513193130493164, "learning_rate": 7.744884828037031e-05, "loss": 2.1596, "step": 4466 }, { "epoch": 0.33625021170891434, "grad_norm": 4.358898162841797, "learning_rate": 7.743865786085482e-05, "loss": 1.942, "step": 4467 }, { "epoch": 0.3363254859895745, "grad_norm": 4.427700996398926, "learning_rate": 7.742846581018985e-05, "loss": 1.6143, "step": 4468 }, { "epoch": 0.3364007602702347, "grad_norm": 4.402661323547363, "learning_rate": 7.741827212898128e-05, "loss": 2.195, "step": 4469 }, { "epoch": 0.3364760345508948, "grad_norm": 4.933595180511475, "learning_rate": 7.740807681783511e-05, "loss": 1.712, "step": 4470 }, { "epoch": 0.336551308831555, "grad_norm": 3.9877562522888184, "learning_rate": 7.739787987735742e-05, "loss": 1.8227, "step": 4471 }, { "epoch": 0.33662658311221516, "grad_norm": 4.576951026916504, "learning_rate": 7.73876813081544e-05, "loss": 2.0839, "step": 4472 }, { "epoch": 0.3367018573928753, "grad_norm": 4.622626781463623, "learning_rate": 7.737748111083227e-05, "loss": 1.891, "step": 4473 }, { "epoch": 0.33677713167353546, "grad_norm": 5.091668605804443, "learning_rate": 7.736727928599748e-05, "loss": 1.788, "step": 4474 }, { "epoch": 0.3368524059541956, "grad_norm": 8.14338207244873, "learning_rate": 7.735707583425643e-05, "loss": 2.1729, "step": 4475 }, { "epoch": 0.33692768023485575, "grad_norm": 6.775550365447998, "learning_rate": 7.734687075621574e-05, "loss": 2.3048, "step": 4476 }, { "epoch": 0.33700295451551593, "grad_norm": 5.820411205291748, "learning_rate": 7.733666405248205e-05, "loss": 1.712, "step": 4477 }, { "epoch": 0.33707822879617605, "grad_norm": 5.631104946136475, "learning_rate": 7.73264557236621e-05, "loss": 1.9128, "step": 4478 }, { "epoch": 0.3371535030768362, "grad_norm": 4.196645736694336, "learning_rate": 7.731624577036278e-05, "loss": 2.6356, "step": 4479 }, { "epoch": 0.3372287773574964, "grad_norm": 4.481573581695557, "learning_rate": 7.730603419319103e-05, "loss": 2.7323, "step": 4480 }, { "epoch": 0.3373040516381565, "grad_norm": 5.715769290924072, "learning_rate": 7.72958209927539e-05, "loss": 2.0297, "step": 4481 }, { "epoch": 0.3373793259188167, "grad_norm": 4.2328715324401855, "learning_rate": 7.728560616965852e-05, "loss": 2.0633, "step": 4482 }, { "epoch": 0.3374546001994768, "grad_norm": 7.679381847381592, "learning_rate": 7.727538972451216e-05, "loss": 2.1336, "step": 4483 }, { "epoch": 0.337529874480137, "grad_norm": 4.048801422119141, "learning_rate": 7.726517165792213e-05, "loss": 1.7317, "step": 4484 }, { "epoch": 0.33760514876079717, "grad_norm": 4.568599700927734, "learning_rate": 7.725495197049588e-05, "loss": 1.8173, "step": 4485 }, { "epoch": 0.3376804230414573, "grad_norm": 4.221613883972168, "learning_rate": 7.724473066284095e-05, "loss": 2.1084, "step": 4486 }, { "epoch": 0.33775569732211747, "grad_norm": 4.320460796356201, "learning_rate": 7.723450773556494e-05, "loss": 2.2091, "step": 4487 }, { "epoch": 0.33783097160277764, "grad_norm": 4.902789115905762, "learning_rate": 7.72242831892756e-05, "loss": 1.7225, "step": 4488 }, { "epoch": 0.33790624588343776, "grad_norm": 5.744263648986816, "learning_rate": 7.721405702458073e-05, "loss": 2.0202, "step": 4489 }, { "epoch": 0.33798152016409794, "grad_norm": 3.8968923091888428, "learning_rate": 7.720382924208827e-05, "loss": 1.7443, "step": 4490 }, { "epoch": 0.3380567944447581, "grad_norm": 7.999432563781738, "learning_rate": 7.719359984240619e-05, "loss": 2.011, "step": 4491 }, { "epoch": 0.33813206872541823, "grad_norm": 6.596442699432373, "learning_rate": 7.718336882614265e-05, "loss": 1.8791, "step": 4492 }, { "epoch": 0.3382073430060784, "grad_norm": 4.972760200500488, "learning_rate": 7.717313619390583e-05, "loss": 1.9773, "step": 4493 }, { "epoch": 0.33828261728673853, "grad_norm": 3.9295780658721924, "learning_rate": 7.716290194630404e-05, "loss": 1.7177, "step": 4494 }, { "epoch": 0.3383578915673987, "grad_norm": 3.8098621368408203, "learning_rate": 7.715266608394565e-05, "loss": 2.3969, "step": 4495 }, { "epoch": 0.3384331658480589, "grad_norm": 3.7683727741241455, "learning_rate": 7.714242860743916e-05, "loss": 2.2652, "step": 4496 }, { "epoch": 0.338508440128719, "grad_norm": 3.973531484603882, "learning_rate": 7.71321895173932e-05, "loss": 2.1398, "step": 4497 }, { "epoch": 0.3385837144093792, "grad_norm": 5.437339782714844, "learning_rate": 7.712194881441641e-05, "loss": 1.7838, "step": 4498 }, { "epoch": 0.33865898869003935, "grad_norm": 4.454009056091309, "learning_rate": 7.711170649911759e-05, "loss": 1.8643, "step": 4499 }, { "epoch": 0.3387342629706995, "grad_norm": 5.164426803588867, "learning_rate": 7.710146257210561e-05, "loss": 2.2005, "step": 4500 }, { "epoch": 0.33880953725135965, "grad_norm": 4.7237677574157715, "learning_rate": 7.709121703398944e-05, "loss": 2.0712, "step": 4501 }, { "epoch": 0.33888481153201977, "grad_norm": 4.242769241333008, "learning_rate": 7.708096988537815e-05, "loss": 2.41, "step": 4502 }, { "epoch": 0.33896008581267995, "grad_norm": 4.900112152099609, "learning_rate": 7.707072112688091e-05, "loss": 2.1935, "step": 4503 }, { "epoch": 0.3390353600933401, "grad_norm": 3.766782760620117, "learning_rate": 7.706047075910696e-05, "loss": 2.0112, "step": 4504 }, { "epoch": 0.33911063437400024, "grad_norm": 5.16675329208374, "learning_rate": 7.705021878266568e-05, "loss": 2.171, "step": 4505 }, { "epoch": 0.3391859086546604, "grad_norm": 5.00575065612793, "learning_rate": 7.703996519816651e-05, "loss": 2.4108, "step": 4506 }, { "epoch": 0.3392611829353206, "grad_norm": 4.930896759033203, "learning_rate": 7.702971000621899e-05, "loss": 1.863, "step": 4507 }, { "epoch": 0.3393364572159807, "grad_norm": 4.462894439697266, "learning_rate": 7.701945320743278e-05, "loss": 1.5271, "step": 4508 }, { "epoch": 0.3394117314966409, "grad_norm": 5.848621368408203, "learning_rate": 7.70091948024176e-05, "loss": 2.0097, "step": 4509 }, { "epoch": 0.33948700577730107, "grad_norm": 4.751779556274414, "learning_rate": 7.699893479178326e-05, "loss": 1.9456, "step": 4510 }, { "epoch": 0.3395622800579612, "grad_norm": 4.161238670349121, "learning_rate": 7.698867317613974e-05, "loss": 2.0393, "step": 4511 }, { "epoch": 0.33963755433862136, "grad_norm": 4.925955295562744, "learning_rate": 7.697840995609703e-05, "loss": 1.5402, "step": 4512 }, { "epoch": 0.3397128286192815, "grad_norm": 3.698184013366699, "learning_rate": 7.696814513226527e-05, "loss": 2.2706, "step": 4513 }, { "epoch": 0.33978810289994166, "grad_norm": 3.8912553787231445, "learning_rate": 7.695787870525465e-05, "loss": 1.8541, "step": 4514 }, { "epoch": 0.33986337718060183, "grad_norm": 5.740266799926758, "learning_rate": 7.694761067567551e-05, "loss": 1.9408, "step": 4515 }, { "epoch": 0.33993865146126195, "grad_norm": 4.267884731292725, "learning_rate": 7.693734104413821e-05, "loss": 2.411, "step": 4516 }, { "epoch": 0.34001392574192213, "grad_norm": 3.908961534500122, "learning_rate": 7.692706981125329e-05, "loss": 2.0016, "step": 4517 }, { "epoch": 0.3400892000225823, "grad_norm": 7.236872673034668, "learning_rate": 7.691679697763133e-05, "loss": 2.0333, "step": 4518 }, { "epoch": 0.3401644743032424, "grad_norm": 4.250300884246826, "learning_rate": 7.690652254388303e-05, "loss": 1.7263, "step": 4519 }, { "epoch": 0.3402397485839026, "grad_norm": 3.9014570713043213, "learning_rate": 7.689624651061916e-05, "loss": 2.0795, "step": 4520 }, { "epoch": 0.3403150228645628, "grad_norm": 4.842888355255127, "learning_rate": 7.688596887845062e-05, "loss": 1.895, "step": 4521 }, { "epoch": 0.3403902971452229, "grad_norm": 4.448963165283203, "learning_rate": 7.687568964798836e-05, "loss": 2.1164, "step": 4522 }, { "epoch": 0.3404655714258831, "grad_norm": 5.541933536529541, "learning_rate": 7.686540881984347e-05, "loss": 1.9672, "step": 4523 }, { "epoch": 0.3405408457065432, "grad_norm": 3.8231594562530518, "learning_rate": 7.685512639462711e-05, "loss": 1.9295, "step": 4524 }, { "epoch": 0.34061611998720337, "grad_norm": 4.114373683929443, "learning_rate": 7.684484237295055e-05, "loss": 1.8997, "step": 4525 }, { "epoch": 0.34069139426786355, "grad_norm": 3.881075143814087, "learning_rate": 7.683455675542515e-05, "loss": 2.0707, "step": 4526 }, { "epoch": 0.34076666854852367, "grad_norm": 3.963350772857666, "learning_rate": 7.682426954266231e-05, "loss": 1.9434, "step": 4527 }, { "epoch": 0.34084194282918384, "grad_norm": 5.266693592071533, "learning_rate": 7.681398073527364e-05, "loss": 1.7345, "step": 4528 }, { "epoch": 0.340917217109844, "grad_norm": 4.409034252166748, "learning_rate": 7.680369033387073e-05, "loss": 1.6433, "step": 4529 }, { "epoch": 0.34099249139050414, "grad_norm": 4.036330223083496, "learning_rate": 7.679339833906537e-05, "loss": 2.1143, "step": 4530 }, { "epoch": 0.3410677656711643, "grad_norm": 4.3933916091918945, "learning_rate": 7.678310475146935e-05, "loss": 2.0351, "step": 4531 }, { "epoch": 0.34114303995182443, "grad_norm": 4.535825252532959, "learning_rate": 7.67728095716946e-05, "loss": 1.6391, "step": 4532 }, { "epoch": 0.3412183142324846, "grad_norm": 4.306824207305908, "learning_rate": 7.676251280035312e-05, "loss": 2.0512, "step": 4533 }, { "epoch": 0.3412935885131448, "grad_norm": 4.810033321380615, "learning_rate": 7.675221443805706e-05, "loss": 2.2103, "step": 4534 }, { "epoch": 0.3413688627938049, "grad_norm": 5.2636799812316895, "learning_rate": 7.674191448541861e-05, "loss": 2.0217, "step": 4535 }, { "epoch": 0.3414441370744651, "grad_norm": 5.04922342300415, "learning_rate": 7.673161294305008e-05, "loss": 1.7624, "step": 4536 }, { "epoch": 0.34151941135512526, "grad_norm": 3.822277069091797, "learning_rate": 7.672130981156387e-05, "loss": 1.8217, "step": 4537 }, { "epoch": 0.3415946856357854, "grad_norm": 4.553699970245361, "learning_rate": 7.671100509157243e-05, "loss": 2.2154, "step": 4538 }, { "epoch": 0.34166995991644555, "grad_norm": 5.175565719604492, "learning_rate": 7.670069878368842e-05, "loss": 1.622, "step": 4539 }, { "epoch": 0.34174523419710573, "grad_norm": 4.373694896697998, "learning_rate": 7.669039088852446e-05, "loss": 1.7503, "step": 4540 }, { "epoch": 0.34182050847776585, "grad_norm": 5.188920974731445, "learning_rate": 7.668008140669335e-05, "loss": 2.0285, "step": 4541 }, { "epoch": 0.341895782758426, "grad_norm": 4.475624084472656, "learning_rate": 7.666977033880795e-05, "loss": 2.1663, "step": 4542 }, { "epoch": 0.34197105703908615, "grad_norm": 4.608626842498779, "learning_rate": 7.665945768548122e-05, "loss": 1.8493, "step": 4543 }, { "epoch": 0.3420463313197463, "grad_norm": 4.155726432800293, "learning_rate": 7.664914344732623e-05, "loss": 1.9544, "step": 4544 }, { "epoch": 0.3421216056004065, "grad_norm": 6.454883575439453, "learning_rate": 7.663882762495614e-05, "loss": 1.9329, "step": 4545 }, { "epoch": 0.3421968798810666, "grad_norm": 5.580347061157227, "learning_rate": 7.662851021898417e-05, "loss": 1.8874, "step": 4546 }, { "epoch": 0.3422721541617268, "grad_norm": 5.288273334503174, "learning_rate": 7.661819123002367e-05, "loss": 1.9504, "step": 4547 }, { "epoch": 0.34234742844238697, "grad_norm": 3.8523223400115967, "learning_rate": 7.66078706586881e-05, "loss": 2.0412, "step": 4548 }, { "epoch": 0.3424227027230471, "grad_norm": 4.138383388519287, "learning_rate": 7.659754850559095e-05, "loss": 1.9187, "step": 4549 }, { "epoch": 0.34249797700370727, "grad_norm": 4.0213446617126465, "learning_rate": 7.658722477134585e-05, "loss": 1.887, "step": 4550 }, { "epoch": 0.3425732512843674, "grad_norm": 4.166807651519775, "learning_rate": 7.657689945656654e-05, "loss": 1.8533, "step": 4551 }, { "epoch": 0.34264852556502756, "grad_norm": 3.826023578643799, "learning_rate": 7.656657256186681e-05, "loss": 2.1984, "step": 4552 }, { "epoch": 0.34272379984568774, "grad_norm": 6.021687984466553, "learning_rate": 7.655624408786058e-05, "loss": 2.2327, "step": 4553 }, { "epoch": 0.34279907412634786, "grad_norm": 6.18969202041626, "learning_rate": 7.654591403516183e-05, "loss": 2.341, "step": 4554 }, { "epoch": 0.34287434840700803, "grad_norm": 4.900983810424805, "learning_rate": 7.653558240438463e-05, "loss": 2.1054, "step": 4555 }, { "epoch": 0.3429496226876682, "grad_norm": 3.8235034942626953, "learning_rate": 7.652524919614323e-05, "loss": 1.8667, "step": 4556 }, { "epoch": 0.34302489696832833, "grad_norm": 4.521095275878906, "learning_rate": 7.651491441105188e-05, "loss": 1.9447, "step": 4557 }, { "epoch": 0.3431001712489885, "grad_norm": 4.181612014770508, "learning_rate": 7.650457804972493e-05, "loss": 1.8305, "step": 4558 }, { "epoch": 0.3431754455296487, "grad_norm": 3.963656425476074, "learning_rate": 7.649424011277686e-05, "loss": 1.9965, "step": 4559 }, { "epoch": 0.3432507198103088, "grad_norm": 4.6136345863342285, "learning_rate": 7.648390060082225e-05, "loss": 2.0132, "step": 4560 }, { "epoch": 0.343325994090969, "grad_norm": 5.049828052520752, "learning_rate": 7.647355951447572e-05, "loss": 2.218, "step": 4561 }, { "epoch": 0.3434012683716291, "grad_norm": 4.716050624847412, "learning_rate": 7.646321685435205e-05, "loss": 1.9644, "step": 4562 }, { "epoch": 0.3434765426522893, "grad_norm": 3.6872105598449707, "learning_rate": 7.645287262106607e-05, "loss": 1.9582, "step": 4563 }, { "epoch": 0.34355181693294945, "grad_norm": 4.714653491973877, "learning_rate": 7.644252681523271e-05, "loss": 2.0582, "step": 4564 }, { "epoch": 0.34362709121360957, "grad_norm": 4.092118263244629, "learning_rate": 7.643217943746703e-05, "loss": 1.9243, "step": 4565 }, { "epoch": 0.34370236549426975, "grad_norm": 5.0258612632751465, "learning_rate": 7.64218304883841e-05, "loss": 2.0855, "step": 4566 }, { "epoch": 0.3437776397749299, "grad_norm": 3.448729991912842, "learning_rate": 7.641147996859916e-05, "loss": 2.1726, "step": 4567 }, { "epoch": 0.34385291405559004, "grad_norm": 5.362695217132568, "learning_rate": 7.640112787872753e-05, "loss": 1.9428, "step": 4568 }, { "epoch": 0.3439281883362502, "grad_norm": 3.757882595062256, "learning_rate": 7.639077421938459e-05, "loss": 2.0626, "step": 4569 }, { "epoch": 0.3440034626169104, "grad_norm": 3.0293984413146973, "learning_rate": 7.638041899118586e-05, "loss": 1.999, "step": 4570 }, { "epoch": 0.3440787368975705, "grad_norm": 4.279021739959717, "learning_rate": 7.63700621947469e-05, "loss": 2.1412, "step": 4571 }, { "epoch": 0.3441540111782307, "grad_norm": 3.9731411933898926, "learning_rate": 7.635970383068341e-05, "loss": 1.8495, "step": 4572 }, { "epoch": 0.3442292854588908, "grad_norm": 5.1099467277526855, "learning_rate": 7.634934389961116e-05, "loss": 2.0807, "step": 4573 }, { "epoch": 0.344304559739551, "grad_norm": 5.556121349334717, "learning_rate": 7.633898240214603e-05, "loss": 2.0429, "step": 4574 }, { "epoch": 0.34437983402021116, "grad_norm": 4.677093982696533, "learning_rate": 7.632861933890397e-05, "loss": 2.24, "step": 4575 }, { "epoch": 0.3444551083008713, "grad_norm": 3.9654626846313477, "learning_rate": 7.631825471050102e-05, "loss": 1.8898, "step": 4576 }, { "epoch": 0.34453038258153146, "grad_norm": 4.97780704498291, "learning_rate": 7.630788851755336e-05, "loss": 1.795, "step": 4577 }, { "epoch": 0.34460565686219163, "grad_norm": 5.8871941566467285, "learning_rate": 7.62975207606772e-05, "loss": 2.2431, "step": 4578 }, { "epoch": 0.34468093114285175, "grad_norm": 5.159177303314209, "learning_rate": 7.628715144048888e-05, "loss": 1.9894, "step": 4579 }, { "epoch": 0.34475620542351193, "grad_norm": 5.730270862579346, "learning_rate": 7.627678055760485e-05, "loss": 1.8643, "step": 4580 }, { "epoch": 0.34483147970417205, "grad_norm": 4.280642032623291, "learning_rate": 7.626640811264161e-05, "loss": 1.9696, "step": 4581 }, { "epoch": 0.3449067539848322, "grad_norm": 6.338525295257568, "learning_rate": 7.625603410621577e-05, "loss": 1.7777, "step": 4582 }, { "epoch": 0.3449820282654924, "grad_norm": 5.291580677032471, "learning_rate": 7.624565853894405e-05, "loss": 2.5501, "step": 4583 }, { "epoch": 0.3450573025461525, "grad_norm": 4.285087585449219, "learning_rate": 7.623528141144322e-05, "loss": 1.9375, "step": 4584 }, { "epoch": 0.3451325768268127, "grad_norm": 4.013237476348877, "learning_rate": 7.62249027243302e-05, "loss": 1.7449, "step": 4585 }, { "epoch": 0.3452078511074729, "grad_norm": 4.898282051086426, "learning_rate": 7.621452247822197e-05, "loss": 1.9462, "step": 4586 }, { "epoch": 0.345283125388133, "grad_norm": 5.8522047996521, "learning_rate": 7.620414067373558e-05, "loss": 2.1401, "step": 4587 }, { "epoch": 0.34535839966879317, "grad_norm": 4.723256587982178, "learning_rate": 7.619375731148823e-05, "loss": 1.6346, "step": 4588 }, { "epoch": 0.34543367394945335, "grad_norm": 4.570127010345459, "learning_rate": 7.618337239209715e-05, "loss": 2.1056, "step": 4589 }, { "epoch": 0.34550894823011347, "grad_norm": 6.95283842086792, "learning_rate": 7.617298591617971e-05, "loss": 2.4376, "step": 4590 }, { "epoch": 0.34558422251077364, "grad_norm": 4.839108467102051, "learning_rate": 7.616259788435337e-05, "loss": 1.8667, "step": 4591 }, { "epoch": 0.34565949679143376, "grad_norm": 4.085836887359619, "learning_rate": 7.615220829723563e-05, "loss": 1.8209, "step": 4592 }, { "epoch": 0.34573477107209394, "grad_norm": 5.696686267852783, "learning_rate": 7.614181715544417e-05, "loss": 2.0996, "step": 4593 }, { "epoch": 0.3458100453527541, "grad_norm": 6.192338943481445, "learning_rate": 7.613142445959668e-05, "loss": 2.3157, "step": 4594 }, { "epoch": 0.34588531963341423, "grad_norm": 5.837879180908203, "learning_rate": 7.612103021031099e-05, "loss": 1.8426, "step": 4595 }, { "epoch": 0.3459605939140744, "grad_norm": 4.4337477684021, "learning_rate": 7.611063440820501e-05, "loss": 2.2347, "step": 4596 }, { "epoch": 0.3460358681947346, "grad_norm": 5.813669204711914, "learning_rate": 7.610023705389673e-05, "loss": 1.9623, "step": 4597 }, { "epoch": 0.3461111424753947, "grad_norm": 4.502193450927734, "learning_rate": 7.608983814800423e-05, "loss": 1.943, "step": 4598 }, { "epoch": 0.3461864167560549, "grad_norm": 5.070720195770264, "learning_rate": 7.607943769114574e-05, "loss": 2.0809, "step": 4599 }, { "epoch": 0.34626169103671506, "grad_norm": 4.427411079406738, "learning_rate": 7.606903568393948e-05, "loss": 2.1432, "step": 4600 }, { "epoch": 0.3463369653173752, "grad_norm": 4.621142387390137, "learning_rate": 7.605863212700385e-05, "loss": 1.5111, "step": 4601 }, { "epoch": 0.34641223959803535, "grad_norm": 3.4134957790374756, "learning_rate": 7.604822702095735e-05, "loss": 1.7918, "step": 4602 }, { "epoch": 0.3464875138786955, "grad_norm": 4.904789924621582, "learning_rate": 7.603782036641846e-05, "loss": 2.0124, "step": 4603 }, { "epoch": 0.34656278815935565, "grad_norm": 4.430422306060791, "learning_rate": 7.602741216400586e-05, "loss": 1.7916, "step": 4604 }, { "epoch": 0.3466380624400158, "grad_norm": 4.909929275512695, "learning_rate": 7.60170024143383e-05, "loss": 2.4277, "step": 4605 }, { "epoch": 0.34671333672067595, "grad_norm": 4.747127532958984, "learning_rate": 7.600659111803458e-05, "loss": 1.8553, "step": 4606 }, { "epoch": 0.3467886110013361, "grad_norm": 6.263697624206543, "learning_rate": 7.599617827571367e-05, "loss": 1.7871, "step": 4607 }, { "epoch": 0.3468638852819963, "grad_norm": 4.54171085357666, "learning_rate": 7.598576388799452e-05, "loss": 1.9016, "step": 4608 }, { "epoch": 0.3469391595626564, "grad_norm": 4.513587951660156, "learning_rate": 7.59753479554963e-05, "loss": 2.1541, "step": 4609 }, { "epoch": 0.3470144338433166, "grad_norm": 5.317783832550049, "learning_rate": 7.596493047883816e-05, "loss": 2.3906, "step": 4610 }, { "epoch": 0.3470897081239767, "grad_norm": 4.195907115936279, "learning_rate": 7.595451145863938e-05, "loss": 1.8432, "step": 4611 }, { "epoch": 0.3471649824046369, "grad_norm": 5.053755760192871, "learning_rate": 7.594409089551941e-05, "loss": 2.0609, "step": 4612 }, { "epoch": 0.34724025668529707, "grad_norm": 3.9988138675689697, "learning_rate": 7.593366879009766e-05, "loss": 2.1742, "step": 4613 }, { "epoch": 0.3473155309659572, "grad_norm": 5.1297502517700195, "learning_rate": 7.592324514299371e-05, "loss": 2.0278, "step": 4614 }, { "epoch": 0.34739080524661736, "grad_norm": 3.50924015045166, "learning_rate": 7.591281995482722e-05, "loss": 2.0818, "step": 4615 }, { "epoch": 0.34746607952727754, "grad_norm": 4.464993953704834, "learning_rate": 7.590239322621792e-05, "loss": 1.7159, "step": 4616 }, { "epoch": 0.34754135380793766, "grad_norm": 3.403712749481201, "learning_rate": 7.58919649577857e-05, "loss": 2.1461, "step": 4617 }, { "epoch": 0.34761662808859783, "grad_norm": 3.7006256580352783, "learning_rate": 7.588153515015043e-05, "loss": 1.7536, "step": 4618 }, { "epoch": 0.347691902369258, "grad_norm": 4.032113552093506, "learning_rate": 7.587110380393216e-05, "loss": 2.2485, "step": 4619 }, { "epoch": 0.34776717664991813, "grad_norm": 4.458015441894531, "learning_rate": 7.586067091975103e-05, "loss": 2.2192, "step": 4620 }, { "epoch": 0.3478424509305783, "grad_norm": 5.377789497375488, "learning_rate": 7.585023649822717e-05, "loss": 2.3237, "step": 4621 }, { "epoch": 0.3479177252112384, "grad_norm": 6.268248558044434, "learning_rate": 7.583980053998095e-05, "loss": 2.5396, "step": 4622 }, { "epoch": 0.3479929994918986, "grad_norm": 5.029088497161865, "learning_rate": 7.582936304563272e-05, "loss": 2.1357, "step": 4623 }, { "epoch": 0.3480682737725588, "grad_norm": 4.130476474761963, "learning_rate": 7.581892401580297e-05, "loss": 1.9216, "step": 4624 }, { "epoch": 0.3481435480532189, "grad_norm": 4.483092308044434, "learning_rate": 7.580848345111228e-05, "loss": 2.3111, "step": 4625 }, { "epoch": 0.3482188223338791, "grad_norm": 4.400412082672119, "learning_rate": 7.579804135218129e-05, "loss": 2.0789, "step": 4626 }, { "epoch": 0.34829409661453925, "grad_norm": 5.589075565338135, "learning_rate": 7.578759771963077e-05, "loss": 1.8212, "step": 4627 }, { "epoch": 0.34836937089519937, "grad_norm": 4.75601863861084, "learning_rate": 7.577715255408155e-05, "loss": 2.0269, "step": 4628 }, { "epoch": 0.34844464517585955, "grad_norm": 3.8390045166015625, "learning_rate": 7.576670585615459e-05, "loss": 1.9359, "step": 4629 }, { "epoch": 0.34851991945651967, "grad_norm": 6.69706916809082, "learning_rate": 7.57562576264709e-05, "loss": 2.0578, "step": 4630 }, { "epoch": 0.34859519373717984, "grad_norm": 7.2338361740112305, "learning_rate": 7.57458078656516e-05, "loss": 2.6355, "step": 4631 }, { "epoch": 0.34867046801784, "grad_norm": 3.920349359512329, "learning_rate": 7.573535657431788e-05, "loss": 1.5752, "step": 4632 }, { "epoch": 0.34874574229850014, "grad_norm": 4.407501220703125, "learning_rate": 7.572490375309105e-05, "loss": 2.514, "step": 4633 }, { "epoch": 0.3488210165791603, "grad_norm": 4.966390132904053, "learning_rate": 7.571444940259252e-05, "loss": 1.7593, "step": 4634 }, { "epoch": 0.3488962908598205, "grad_norm": 4.687565803527832, "learning_rate": 7.570399352344375e-05, "loss": 1.9425, "step": 4635 }, { "epoch": 0.3489715651404806, "grad_norm": 4.8182549476623535, "learning_rate": 7.569353611626633e-05, "loss": 2.1645, "step": 4636 }, { "epoch": 0.3490468394211408, "grad_norm": 3.68569016456604, "learning_rate": 7.568307718168189e-05, "loss": 2.2552, "step": 4637 }, { "epoch": 0.34912211370180096, "grad_norm": 5.435297012329102, "learning_rate": 7.56726167203122e-05, "loss": 1.8918, "step": 4638 }, { "epoch": 0.3491973879824611, "grad_norm": 4.0888495445251465, "learning_rate": 7.566215473277913e-05, "loss": 2.0708, "step": 4639 }, { "epoch": 0.34927266226312126, "grad_norm": 5.4462761878967285, "learning_rate": 7.565169121970459e-05, "loss": 2.2294, "step": 4640 }, { "epoch": 0.3493479365437814, "grad_norm": 3.784348964691162, "learning_rate": 7.564122618171061e-05, "loss": 1.8248, "step": 4641 }, { "epoch": 0.34942321082444155, "grad_norm": 6.087522029876709, "learning_rate": 7.56307596194193e-05, "loss": 2.1466, "step": 4642 }, { "epoch": 0.34949848510510173, "grad_norm": 4.043786525726318, "learning_rate": 7.56202915334529e-05, "loss": 2.1563, "step": 4643 }, { "epoch": 0.34957375938576185, "grad_norm": 4.114598274230957, "learning_rate": 7.560982192443364e-05, "loss": 1.7694, "step": 4644 }, { "epoch": 0.349649033666422, "grad_norm": 4.336655139923096, "learning_rate": 7.559935079298397e-05, "loss": 1.8078, "step": 4645 }, { "epoch": 0.3497243079470822, "grad_norm": 6.600823879241943, "learning_rate": 7.558887813972635e-05, "loss": 2.3505, "step": 4646 }, { "epoch": 0.3497995822277423, "grad_norm": 5.6425461769104, "learning_rate": 7.557840396528334e-05, "loss": 2.1108, "step": 4647 }, { "epoch": 0.3498748565084025, "grad_norm": 6.038580417633057, "learning_rate": 7.556792827027761e-05, "loss": 2.3126, "step": 4648 }, { "epoch": 0.3499501307890627, "grad_norm": 5.721938133239746, "learning_rate": 7.555745105533191e-05, "loss": 2.0378, "step": 4649 }, { "epoch": 0.3500254050697228, "grad_norm": 5.774184703826904, "learning_rate": 7.554697232106906e-05, "loss": 1.8985, "step": 4650 }, { "epoch": 0.35010067935038297, "grad_norm": 4.349372386932373, "learning_rate": 7.553649206811204e-05, "loss": 1.8721, "step": 4651 }, { "epoch": 0.3501759536310431, "grad_norm": 5.158112049102783, "learning_rate": 7.552601029708382e-05, "loss": 2.0118, "step": 4652 }, { "epoch": 0.35025122791170327, "grad_norm": 4.468225002288818, "learning_rate": 7.551552700860753e-05, "loss": 2.2721, "step": 4653 }, { "epoch": 0.35032650219236344, "grad_norm": 7.925318717956543, "learning_rate": 7.550504220330638e-05, "loss": 2.1947, "step": 4654 }, { "epoch": 0.35040177647302356, "grad_norm": 7.497496604919434, "learning_rate": 7.549455588180363e-05, "loss": 2.2687, "step": 4655 }, { "epoch": 0.35047705075368374, "grad_norm": 4.677943706512451, "learning_rate": 7.548406804472271e-05, "loss": 2.0925, "step": 4656 }, { "epoch": 0.3505523250343439, "grad_norm": 4.890117645263672, "learning_rate": 7.547357869268705e-05, "loss": 1.9201, "step": 4657 }, { "epoch": 0.35062759931500403, "grad_norm": 3.9217960834503174, "learning_rate": 7.546308782632024e-05, "loss": 2.0213, "step": 4658 }, { "epoch": 0.3507028735956642, "grad_norm": 4.779502868652344, "learning_rate": 7.545259544624592e-05, "loss": 1.7539, "step": 4659 }, { "epoch": 0.35077814787632433, "grad_norm": 5.4057841300964355, "learning_rate": 7.544210155308783e-05, "loss": 1.9425, "step": 4660 }, { "epoch": 0.3508534221569845, "grad_norm": 4.38415002822876, "learning_rate": 7.54316061474698e-05, "loss": 2.433, "step": 4661 }, { "epoch": 0.3509286964376447, "grad_norm": 4.677315711975098, "learning_rate": 7.542110923001576e-05, "loss": 1.7816, "step": 4662 }, { "epoch": 0.3510039707183048, "grad_norm": 4.090447902679443, "learning_rate": 7.541061080134972e-05, "loss": 2.3677, "step": 4663 }, { "epoch": 0.351079244998965, "grad_norm": 5.142765998840332, "learning_rate": 7.540011086209578e-05, "loss": 1.8085, "step": 4664 }, { "epoch": 0.35115451927962515, "grad_norm": 4.226830005645752, "learning_rate": 7.538960941287813e-05, "loss": 2.0493, "step": 4665 }, { "epoch": 0.3512297935602853, "grad_norm": 4.075433731079102, "learning_rate": 7.537910645432105e-05, "loss": 1.8381, "step": 4666 }, { "epoch": 0.35130506784094545, "grad_norm": 4.133872985839844, "learning_rate": 7.53686019870489e-05, "loss": 1.8037, "step": 4667 }, { "epoch": 0.3513803421216056, "grad_norm": 4.133872985839844, "learning_rate": 7.53686019870489e-05, "loss": 2.5777, "step": 4668 }, { "epoch": 0.35145561640226575, "grad_norm": 4.6818528175354, "learning_rate": 7.535809601168617e-05, "loss": 2.0334, "step": 4669 }, { "epoch": 0.3515308906829259, "grad_norm": 3.3950798511505127, "learning_rate": 7.53475885288574e-05, "loss": 1.8075, "step": 4670 }, { "epoch": 0.35160616496358604, "grad_norm": 4.6079912185668945, "learning_rate": 7.53370795391872e-05, "loss": 1.8669, "step": 4671 }, { "epoch": 0.3516814392442462, "grad_norm": 4.347660541534424, "learning_rate": 7.53265690433003e-05, "loss": 1.5897, "step": 4672 }, { "epoch": 0.3517567135249064, "grad_norm": 3.6169469356536865, "learning_rate": 7.531605704182159e-05, "loss": 1.7861, "step": 4673 }, { "epoch": 0.3518319878055665, "grad_norm": 4.170974254608154, "learning_rate": 7.53055435353759e-05, "loss": 1.9276, "step": 4674 }, { "epoch": 0.3519072620862267, "grad_norm": 4.170974254608154, "learning_rate": 7.53055435353759e-05, "loss": 2.5965, "step": 4675 }, { "epoch": 0.35198253636688687, "grad_norm": 4.087090492248535, "learning_rate": 7.529502852458828e-05, "loss": 1.922, "step": 4676 }, { "epoch": 0.352057810647547, "grad_norm": 3.9385740756988525, "learning_rate": 7.528451201008378e-05, "loss": 2.094, "step": 4677 }, { "epoch": 0.35213308492820716, "grad_norm": 4.231057643890381, "learning_rate": 7.527399399248759e-05, "loss": 2.018, "step": 4678 }, { "epoch": 0.3522083592088673, "grad_norm": 4.711199760437012, "learning_rate": 7.526347447242496e-05, "loss": 2.2746, "step": 4679 }, { "epoch": 0.35228363348952746, "grad_norm": 4.021269798278809, "learning_rate": 7.525295345052128e-05, "loss": 2.2216, "step": 4680 }, { "epoch": 0.35235890777018763, "grad_norm": 5.026397228240967, "learning_rate": 7.524243092740198e-05, "loss": 1.7627, "step": 4681 }, { "epoch": 0.35243418205084776, "grad_norm": 6.576305866241455, "learning_rate": 7.523190690369258e-05, "loss": 2.0708, "step": 4682 }, { "epoch": 0.35250945633150793, "grad_norm": 5.01113748550415, "learning_rate": 7.522138138001873e-05, "loss": 2.2132, "step": 4683 }, { "epoch": 0.3525847306121681, "grad_norm": 6.437198162078857, "learning_rate": 7.52108543570061e-05, "loss": 2.2058, "step": 4684 }, { "epoch": 0.3526600048928282, "grad_norm": 4.341702938079834, "learning_rate": 7.520032583528052e-05, "loss": 1.8943, "step": 4685 }, { "epoch": 0.3527352791734884, "grad_norm": 4.824337482452393, "learning_rate": 7.51897958154679e-05, "loss": 2.1713, "step": 4686 }, { "epoch": 0.3528105534541486, "grad_norm": 4.546299934387207, "learning_rate": 7.517926429819418e-05, "loss": 2.385, "step": 4687 }, { "epoch": 0.3528858277348087, "grad_norm": 4.972386360168457, "learning_rate": 7.516873128408546e-05, "loss": 1.7746, "step": 4688 }, { "epoch": 0.3529611020154689, "grad_norm": 3.514876127243042, "learning_rate": 7.515819677376787e-05, "loss": 2.0456, "step": 4689 }, { "epoch": 0.353036376296129, "grad_norm": 4.511981964111328, "learning_rate": 7.514766076786766e-05, "loss": 2.4228, "step": 4690 }, { "epoch": 0.35311165057678917, "grad_norm": 6.4157233238220215, "learning_rate": 7.513712326701116e-05, "loss": 2.1866, "step": 4691 }, { "epoch": 0.35318692485744935, "grad_norm": 5.201791286468506, "learning_rate": 7.512658427182484e-05, "loss": 2.088, "step": 4692 }, { "epoch": 0.35326219913810947, "grad_norm": 4.7506103515625, "learning_rate": 7.511604378293518e-05, "loss": 1.6858, "step": 4693 }, { "epoch": 0.35333747341876964, "grad_norm": 4.93814754486084, "learning_rate": 7.510550180096877e-05, "loss": 2.5642, "step": 4694 }, { "epoch": 0.3534127476994298, "grad_norm": 4.639529705047607, "learning_rate": 7.509495832655232e-05, "loss": 1.907, "step": 4695 }, { "epoch": 0.35348802198008994, "grad_norm": 4.353016376495361, "learning_rate": 7.50844133603126e-05, "loss": 2.1002, "step": 4696 }, { "epoch": 0.3535632962607501, "grad_norm": 5.383945465087891, "learning_rate": 7.507386690287647e-05, "loss": 2.0776, "step": 4697 }, { "epoch": 0.3536385705414103, "grad_norm": 5.4766035079956055, "learning_rate": 7.506331895487092e-05, "loss": 1.9973, "step": 4698 }, { "epoch": 0.3537138448220704, "grad_norm": 4.778066635131836, "learning_rate": 7.505276951692297e-05, "loss": 2.0725, "step": 4699 }, { "epoch": 0.3537891191027306, "grad_norm": 4.152775287628174, "learning_rate": 7.504221858965975e-05, "loss": 1.8939, "step": 4700 }, { "epoch": 0.3538643933833907, "grad_norm": 5.066797733306885, "learning_rate": 7.503166617370849e-05, "loss": 1.5798, "step": 4701 }, { "epoch": 0.3539396676640509, "grad_norm": 4.326906204223633, "learning_rate": 7.50211122696965e-05, "loss": 2.0667, "step": 4702 }, { "epoch": 0.35401494194471106, "grad_norm": 4.983119010925293, "learning_rate": 7.501055687825117e-05, "loss": 2.1523, "step": 4703 }, { "epoch": 0.3540902162253712, "grad_norm": 3.7467000484466553, "learning_rate": 7.500000000000001e-05, "loss": 2.1015, "step": 4704 }, { "epoch": 0.35416549050603136, "grad_norm": 5.698918342590332, "learning_rate": 7.498944163557059e-05, "loss": 1.9026, "step": 4705 }, { "epoch": 0.35424076478669153, "grad_norm": 5.043235778808594, "learning_rate": 7.497888178559055e-05, "loss": 2.2288, "step": 4706 }, { "epoch": 0.35431603906735165, "grad_norm": 4.1288347244262695, "learning_rate": 7.496832045068765e-05, "loss": 2.1109, "step": 4707 }, { "epoch": 0.3543913133480118, "grad_norm": 4.3924241065979, "learning_rate": 7.495775763148975e-05, "loss": 2.0131, "step": 4708 }, { "epoch": 0.35446658762867195, "grad_norm": 4.646059036254883, "learning_rate": 7.494719332862478e-05, "loss": 2.1389, "step": 4709 }, { "epoch": 0.3545418619093321, "grad_norm": 4.2902655601501465, "learning_rate": 7.493662754272075e-05, "loss": 2.2265, "step": 4710 }, { "epoch": 0.3546171361899923, "grad_norm": 5.3461408615112305, "learning_rate": 7.492606027440577e-05, "loss": 1.7097, "step": 4711 }, { "epoch": 0.3546924104706524, "grad_norm": 4.69003963470459, "learning_rate": 7.491549152430801e-05, "loss": 2.0261, "step": 4712 }, { "epoch": 0.3547676847513126, "grad_norm": 3.4113070964813232, "learning_rate": 7.490492129305577e-05, "loss": 1.7582, "step": 4713 }, { "epoch": 0.35484295903197277, "grad_norm": 7.446574687957764, "learning_rate": 7.489434958127744e-05, "loss": 1.665, "step": 4714 }, { "epoch": 0.3549182333126329, "grad_norm": 3.97598934173584, "learning_rate": 7.488377638960143e-05, "loss": 1.671, "step": 4715 }, { "epoch": 0.35499350759329307, "grad_norm": 4.233238220214844, "learning_rate": 7.487320171865633e-05, "loss": 1.8296, "step": 4716 }, { "epoch": 0.35506878187395324, "grad_norm": 6.832796096801758, "learning_rate": 7.486262556907075e-05, "loss": 2.3745, "step": 4717 }, { "epoch": 0.35514405615461336, "grad_norm": 7.490714073181152, "learning_rate": 7.485204794147343e-05, "loss": 1.9779, "step": 4718 }, { "epoch": 0.35521933043527354, "grad_norm": 4.480311393737793, "learning_rate": 7.484146883649316e-05, "loss": 1.93, "step": 4719 }, { "epoch": 0.35529460471593366, "grad_norm": 5.3867082595825195, "learning_rate": 7.483088825475885e-05, "loss": 2.1377, "step": 4720 }, { "epoch": 0.35536987899659384, "grad_norm": 4.348886966705322, "learning_rate": 7.482030619689947e-05, "loss": 1.7696, "step": 4721 }, { "epoch": 0.355445153277254, "grad_norm": 3.7030577659606934, "learning_rate": 7.480972266354411e-05, "loss": 2.0611, "step": 4722 }, { "epoch": 0.35552042755791413, "grad_norm": 6.327174663543701, "learning_rate": 7.479913765532193e-05, "loss": 2.1051, "step": 4723 }, { "epoch": 0.3555957018385743, "grad_norm": 5.268999099731445, "learning_rate": 7.478855117286217e-05, "loss": 1.9332, "step": 4724 }, { "epoch": 0.3556709761192345, "grad_norm": 6.376285076141357, "learning_rate": 7.477796321679416e-05, "loss": 2.3561, "step": 4725 }, { "epoch": 0.3557462503998946, "grad_norm": 4.8138203620910645, "learning_rate": 7.476737378774735e-05, "loss": 1.8648, "step": 4726 }, { "epoch": 0.3558215246805548, "grad_norm": 5.368795394897461, "learning_rate": 7.475678288635122e-05, "loss": 1.8362, "step": 4727 }, { "epoch": 0.3558967989612149, "grad_norm": 4.2373127937316895, "learning_rate": 7.474619051323539e-05, "loss": 2.0507, "step": 4728 }, { "epoch": 0.3559720732418751, "grad_norm": 4.380899906158447, "learning_rate": 7.473559666902954e-05, "loss": 2.0412, "step": 4729 }, { "epoch": 0.35604734752253525, "grad_norm": 4.2086181640625, "learning_rate": 7.472500135436344e-05, "loss": 1.8879, "step": 4730 }, { "epoch": 0.35612262180319537, "grad_norm": 3.9845449924468994, "learning_rate": 7.471440456986695e-05, "loss": 2.2651, "step": 4731 }, { "epoch": 0.35619789608385555, "grad_norm": 3.9845449924468994, "learning_rate": 7.471440456986695e-05, "loss": 2.1018, "step": 4732 }, { "epoch": 0.3562731703645157, "grad_norm": 6.091592311859131, "learning_rate": 7.470380631617002e-05, "loss": 2.0122, "step": 4733 }, { "epoch": 0.35634844464517584, "grad_norm": 6.080043315887451, "learning_rate": 7.46932065939027e-05, "loss": 1.7726, "step": 4734 }, { "epoch": 0.356423718925836, "grad_norm": 4.906828880310059, "learning_rate": 7.46826054036951e-05, "loss": 1.9076, "step": 4735 }, { "epoch": 0.3564989932064962, "grad_norm": 5.630390167236328, "learning_rate": 7.467200274617741e-05, "loss": 2.5238, "step": 4736 }, { "epoch": 0.3565742674871563, "grad_norm": 4.263101577758789, "learning_rate": 7.466139862197996e-05, "loss": 1.9743, "step": 4737 }, { "epoch": 0.3566495417678165, "grad_norm": 4.454501628875732, "learning_rate": 7.465079303173312e-05, "loss": 1.8774, "step": 4738 }, { "epoch": 0.3567248160484766, "grad_norm": 3.741698741912842, "learning_rate": 7.464018597606738e-05, "loss": 1.7884, "step": 4739 }, { "epoch": 0.3568000903291368, "grad_norm": 7.079321384429932, "learning_rate": 7.462957745561328e-05, "loss": 1.8561, "step": 4740 }, { "epoch": 0.35687536460979696, "grad_norm": 3.574429988861084, "learning_rate": 7.461896747100146e-05, "loss": 1.8334, "step": 4741 }, { "epoch": 0.3569506388904571, "grad_norm": 5.020144939422607, "learning_rate": 7.460835602286265e-05, "loss": 2.4262, "step": 4742 }, { "epoch": 0.35702591317111726, "grad_norm": 4.757741451263428, "learning_rate": 7.45977431118277e-05, "loss": 2.2663, "step": 4743 }, { "epoch": 0.35710118745177744, "grad_norm": 4.493041038513184, "learning_rate": 7.458712873852752e-05, "loss": 1.9607, "step": 4744 }, { "epoch": 0.35717646173243756, "grad_norm": 5.57503604888916, "learning_rate": 7.457651290359306e-05, "loss": 2.0441, "step": 4745 }, { "epoch": 0.35725173601309773, "grad_norm": 5.385438919067383, "learning_rate": 7.456589560765545e-05, "loss": 2.1624, "step": 4746 }, { "epoch": 0.3573270102937579, "grad_norm": 6.012648105621338, "learning_rate": 7.455527685134581e-05, "loss": 1.9506, "step": 4747 }, { "epoch": 0.357402284574418, "grad_norm": 4.124917507171631, "learning_rate": 7.454465663529542e-05, "loss": 1.8411, "step": 4748 }, { "epoch": 0.3574775588550782, "grad_norm": 3.7410566806793213, "learning_rate": 7.453403496013563e-05, "loss": 2.1715, "step": 4749 }, { "epoch": 0.3575528331357383, "grad_norm": 4.363630294799805, "learning_rate": 7.452341182649786e-05, "loss": 1.9888, "step": 4750 }, { "epoch": 0.3576281074163985, "grad_norm": 7.150066375732422, "learning_rate": 7.451278723501362e-05, "loss": 1.6745, "step": 4751 }, { "epoch": 0.3577033816970587, "grad_norm": 5.0994977951049805, "learning_rate": 7.45021611863145e-05, "loss": 1.9023, "step": 4752 }, { "epoch": 0.3577786559777188, "grad_norm": 4.080338001251221, "learning_rate": 7.449153368103222e-05, "loss": 2.0436, "step": 4753 }, { "epoch": 0.35785393025837897, "grad_norm": 4.2494378089904785, "learning_rate": 7.448090471979853e-05, "loss": 2.1868, "step": 4754 }, { "epoch": 0.35792920453903915, "grad_norm": 4.448575496673584, "learning_rate": 7.44702743032453e-05, "loss": 1.9114, "step": 4755 }, { "epoch": 0.35800447881969927, "grad_norm": 4.7296857833862305, "learning_rate": 7.445964243200447e-05, "loss": 2.2521, "step": 4756 }, { "epoch": 0.35807975310035944, "grad_norm": 4.6908650398254395, "learning_rate": 7.444900910670808e-05, "loss": 2.5617, "step": 4757 }, { "epoch": 0.35815502738101956, "grad_norm": 7.209279537200928, "learning_rate": 7.443837432798826e-05, "loss": 1.9502, "step": 4758 }, { "epoch": 0.35823030166167974, "grad_norm": 6.864109992980957, "learning_rate": 7.442773809647718e-05, "loss": 2.1742, "step": 4759 }, { "epoch": 0.3583055759423399, "grad_norm": 4.517666816711426, "learning_rate": 7.44171004128072e-05, "loss": 1.9867, "step": 4760 }, { "epoch": 0.35838085022300004, "grad_norm": 5.149838924407959, "learning_rate": 7.440646127761064e-05, "loss": 1.944, "step": 4761 }, { "epoch": 0.3584561245036602, "grad_norm": 4.458034038543701, "learning_rate": 7.439582069151999e-05, "loss": 2.2805, "step": 4762 }, { "epoch": 0.3585313987843204, "grad_norm": 4.561699867248535, "learning_rate": 7.43851786551678e-05, "loss": 1.8461, "step": 4763 }, { "epoch": 0.3586066730649805, "grad_norm": 3.4224936962127686, "learning_rate": 7.437453516918669e-05, "loss": 1.9924, "step": 4764 }, { "epoch": 0.3586819473456407, "grad_norm": 4.27512788772583, "learning_rate": 7.436389023420944e-05, "loss": 2.0595, "step": 4765 }, { "epoch": 0.35875722162630086, "grad_norm": 4.82543420791626, "learning_rate": 7.43532438508688e-05, "loss": 1.9961, "step": 4766 }, { "epoch": 0.358832495906961, "grad_norm": 4.925774097442627, "learning_rate": 7.43425960197977e-05, "loss": 2.0342, "step": 4767 }, { "epoch": 0.35890777018762116, "grad_norm": 4.324056625366211, "learning_rate": 7.433194674162911e-05, "loss": 1.6868, "step": 4768 }, { "epoch": 0.3589830444682813, "grad_norm": 4.3465986251831055, "learning_rate": 7.43212960169961e-05, "loss": 1.7335, "step": 4769 }, { "epoch": 0.35905831874894145, "grad_norm": 3.428480863571167, "learning_rate": 7.431064384653182e-05, "loss": 2.0071, "step": 4770 }, { "epoch": 0.3591335930296016, "grad_norm": 4.085512638092041, "learning_rate": 7.429999023086953e-05, "loss": 2.1411, "step": 4771 }, { "epoch": 0.35920886731026175, "grad_norm": 4.5069732666015625, "learning_rate": 7.428933517064254e-05, "loss": 2.1567, "step": 4772 }, { "epoch": 0.3592841415909219, "grad_norm": 3.8591883182525635, "learning_rate": 7.427867866648425e-05, "loss": 1.9823, "step": 4773 }, { "epoch": 0.3593594158715821, "grad_norm": 5.22625732421875, "learning_rate": 7.42680207190282e-05, "loss": 1.8327, "step": 4774 }, { "epoch": 0.3594346901522422, "grad_norm": 4.900472164154053, "learning_rate": 7.425736132890795e-05, "loss": 1.9832, "step": 4775 }, { "epoch": 0.3595099644329024, "grad_norm": 9.567375183105469, "learning_rate": 7.424670049675716e-05, "loss": 2.1592, "step": 4776 }, { "epoch": 0.3595852387135625, "grad_norm": 5.523016452789307, "learning_rate": 7.423603822320962e-05, "loss": 2.0211, "step": 4777 }, { "epoch": 0.3596605129942227, "grad_norm": 3.923966407775879, "learning_rate": 7.422537450889913e-05, "loss": 2.0995, "step": 4778 }, { "epoch": 0.35973578727488287, "grad_norm": 5.271427631378174, "learning_rate": 7.421470935445964e-05, "loss": 2.1583, "step": 4779 }, { "epoch": 0.359811061555543, "grad_norm": 5.400697708129883, "learning_rate": 7.420404276052517e-05, "loss": 1.9825, "step": 4780 }, { "epoch": 0.35988633583620316, "grad_norm": 5.789221286773682, "learning_rate": 7.419337472772978e-05, "loss": 2.4105, "step": 4781 }, { "epoch": 0.35996161011686334, "grad_norm": 13.104486465454102, "learning_rate": 7.418270525670769e-05, "loss": 2.3911, "step": 4782 }, { "epoch": 0.36003688439752346, "grad_norm": 3.88413143157959, "learning_rate": 7.417203434809317e-05, "loss": 1.6194, "step": 4783 }, { "epoch": 0.36011215867818364, "grad_norm": 6.00800085067749, "learning_rate": 7.416136200252057e-05, "loss": 1.7539, "step": 4784 }, { "epoch": 0.3601874329588438, "grad_norm": 4.076695442199707, "learning_rate": 7.415068822062432e-05, "loss": 1.8827, "step": 4785 }, { "epoch": 0.36026270723950393, "grad_norm": 5.769009113311768, "learning_rate": 7.414001300303894e-05, "loss": 2.3875, "step": 4786 }, { "epoch": 0.3603379815201641, "grad_norm": 3.7072229385375977, "learning_rate": 7.412933635039907e-05, "loss": 1.8795, "step": 4787 }, { "epoch": 0.36041325580082423, "grad_norm": 5.090272903442383, "learning_rate": 7.411865826333937e-05, "loss": 2.3674, "step": 4788 }, { "epoch": 0.3604885300814844, "grad_norm": 5.507866382598877, "learning_rate": 7.410797874249464e-05, "loss": 2.1187, "step": 4789 }, { "epoch": 0.3605638043621446, "grad_norm": 5.066542625427246, "learning_rate": 7.409729778849977e-05, "loss": 1.8951, "step": 4790 }, { "epoch": 0.3606390786428047, "grad_norm": 4.192557334899902, "learning_rate": 7.408661540198968e-05, "loss": 2.0987, "step": 4791 }, { "epoch": 0.3607143529234649, "grad_norm": 4.773987293243408, "learning_rate": 7.40759315835994e-05, "loss": 1.8078, "step": 4792 }, { "epoch": 0.36078962720412505, "grad_norm": 6.124429702758789, "learning_rate": 7.40652463339641e-05, "loss": 2.1519, "step": 4793 }, { "epoch": 0.36086490148478517, "grad_norm": 5.819246768951416, "learning_rate": 7.405455965371892e-05, "loss": 1.7901, "step": 4794 }, { "epoch": 0.36094017576544535, "grad_norm": 5.987430095672607, "learning_rate": 7.40438715434992e-05, "loss": 2.6114, "step": 4795 }, { "epoch": 0.3610154500461055, "grad_norm": 5.041569709777832, "learning_rate": 7.403318200394033e-05, "loss": 2.4454, "step": 4796 }, { "epoch": 0.36109072432676564, "grad_norm": 7.414455413818359, "learning_rate": 7.402249103567773e-05, "loss": 2.1914, "step": 4797 }, { "epoch": 0.3611659986074258, "grad_norm": 4.2949724197387695, "learning_rate": 7.401179863934698e-05, "loss": 1.9993, "step": 4798 }, { "epoch": 0.36124127288808594, "grad_norm": 7.248776435852051, "learning_rate": 7.400110481558367e-05, "loss": 2.0786, "step": 4799 }, { "epoch": 0.3613165471687461, "grad_norm": 8.821529388427734, "learning_rate": 7.399040956502358e-05, "loss": 2.57, "step": 4800 }, { "epoch": 0.3613918214494063, "grad_norm": 4.257993221282959, "learning_rate": 7.397971288830246e-05, "loss": 1.808, "step": 4801 }, { "epoch": 0.3614670957300664, "grad_norm": 4.006655216217041, "learning_rate": 7.39690147860562e-05, "loss": 1.9007, "step": 4802 }, { "epoch": 0.3615423700107266, "grad_norm": 3.936253309249878, "learning_rate": 7.39583152589208e-05, "loss": 1.9579, "step": 4803 }, { "epoch": 0.36161764429138676, "grad_norm": 5.082552909851074, "learning_rate": 7.394761430753232e-05, "loss": 2.0267, "step": 4804 }, { "epoch": 0.3616929185720469, "grad_norm": 4.073530197143555, "learning_rate": 7.393691193252685e-05, "loss": 2.0529, "step": 4805 }, { "epoch": 0.36176819285270706, "grad_norm": 4.454007148742676, "learning_rate": 7.392620813454066e-05, "loss": 1.7266, "step": 4806 }, { "epoch": 0.3618434671333672, "grad_norm": 6.2713303565979, "learning_rate": 7.391550291421007e-05, "loss": 1.9563, "step": 4807 }, { "epoch": 0.36191874141402736, "grad_norm": 3.697896718978882, "learning_rate": 7.390479627217143e-05, "loss": 2.1485, "step": 4808 }, { "epoch": 0.36199401569468753, "grad_norm": 5.898225784301758, "learning_rate": 7.389408820906124e-05, "loss": 2.2685, "step": 4809 }, { "epoch": 0.36206928997534765, "grad_norm": 2.9236931800842285, "learning_rate": 7.388337872551607e-05, "loss": 2.0541, "step": 4810 }, { "epoch": 0.36214456425600783, "grad_norm": 4.811728477478027, "learning_rate": 7.387266782217258e-05, "loss": 2.3104, "step": 4811 }, { "epoch": 0.362219838536668, "grad_norm": 5.527030944824219, "learning_rate": 7.386195549966747e-05, "loss": 1.7537, "step": 4812 }, { "epoch": 0.3622951128173281, "grad_norm": 5.188048362731934, "learning_rate": 7.385124175863759e-05, "loss": 1.8926, "step": 4813 }, { "epoch": 0.3623703870979883, "grad_norm": 4.454374313354492, "learning_rate": 7.38405265997198e-05, "loss": 2.1972, "step": 4814 }, { "epoch": 0.3624456613786485, "grad_norm": 3.2439000606536865, "learning_rate": 7.382981002355111e-05, "loss": 1.8493, "step": 4815 }, { "epoch": 0.3625209356593086, "grad_norm": 4.143496990203857, "learning_rate": 7.38190920307686e-05, "loss": 2.1858, "step": 4816 }, { "epoch": 0.36259620993996877, "grad_norm": 3.6470460891723633, "learning_rate": 7.380837262200943e-05, "loss": 2.0283, "step": 4817 }, { "epoch": 0.3626714842206289, "grad_norm": 4.135168552398682, "learning_rate": 7.379765179791081e-05, "loss": 1.8709, "step": 4818 }, { "epoch": 0.36274675850128907, "grad_norm": 5.41266393661499, "learning_rate": 7.378692955911005e-05, "loss": 1.7939, "step": 4819 }, { "epoch": 0.36282203278194924, "grad_norm": 3.172822952270508, "learning_rate": 7.377620590624462e-05, "loss": 2.0273, "step": 4820 }, { "epoch": 0.36289730706260936, "grad_norm": 4.549415588378906, "learning_rate": 7.376548083995193e-05, "loss": 1.5833, "step": 4821 }, { "epoch": 0.36297258134326954, "grad_norm": 4.41801118850708, "learning_rate": 7.375475436086961e-05, "loss": 1.758, "step": 4822 }, { "epoch": 0.3630478556239297, "grad_norm": 4.3426194190979, "learning_rate": 7.374402646963533e-05, "loss": 1.9815, "step": 4823 }, { "epoch": 0.36312312990458984, "grad_norm": 6.006479740142822, "learning_rate": 7.373329716688677e-05, "loss": 1.9615, "step": 4824 }, { "epoch": 0.36319840418525, "grad_norm": 4.682624340057373, "learning_rate": 7.37225664532618e-05, "loss": 2.045, "step": 4825 }, { "epoch": 0.36327367846591013, "grad_norm": 3.200746774673462, "learning_rate": 7.371183432939832e-05, "loss": 2.1439, "step": 4826 }, { "epoch": 0.3633489527465703, "grad_norm": 4.339603900909424, "learning_rate": 7.370110079593435e-05, "loss": 2.1029, "step": 4827 }, { "epoch": 0.3634242270272305, "grad_norm": 3.5030617713928223, "learning_rate": 7.369036585350792e-05, "loss": 1.6845, "step": 4828 }, { "epoch": 0.3634995013078906, "grad_norm": 4.315023422241211, "learning_rate": 7.367962950275724e-05, "loss": 1.8469, "step": 4829 }, { "epoch": 0.3635747755885508, "grad_norm": 4.554084300994873, "learning_rate": 7.36688917443205e-05, "loss": 2.2038, "step": 4830 }, { "epoch": 0.36365004986921096, "grad_norm": 5.289467811584473, "learning_rate": 7.365815257883607e-05, "loss": 2.2925, "step": 4831 }, { "epoch": 0.3637253241498711, "grad_norm": 4.193687438964844, "learning_rate": 7.364741200694235e-05, "loss": 1.7365, "step": 4832 }, { "epoch": 0.36380059843053125, "grad_norm": 4.348536968231201, "learning_rate": 7.363667002927785e-05, "loss": 1.8362, "step": 4833 }, { "epoch": 0.36387587271119143, "grad_norm": 6.698772430419922, "learning_rate": 7.362592664648113e-05, "loss": 1.9214, "step": 4834 }, { "epoch": 0.36395114699185155, "grad_norm": 5.859483242034912, "learning_rate": 7.361518185919087e-05, "loss": 1.7184, "step": 4835 }, { "epoch": 0.3640264212725117, "grad_norm": 4.6047797203063965, "learning_rate": 7.360443566804578e-05, "loss": 2.2897, "step": 4836 }, { "epoch": 0.36410169555317184, "grad_norm": 4.616860389709473, "learning_rate": 7.359368807368474e-05, "loss": 1.8805, "step": 4837 }, { "epoch": 0.364176969833832, "grad_norm": 4.212845802307129, "learning_rate": 7.358293907674664e-05, "loss": 1.9099, "step": 4838 }, { "epoch": 0.3642522441144922, "grad_norm": 5.893839359283447, "learning_rate": 7.357218867787048e-05, "loss": 2.2213, "step": 4839 }, { "epoch": 0.3643275183951523, "grad_norm": 5.683198928833008, "learning_rate": 7.356143687769534e-05, "loss": 1.9078, "step": 4840 }, { "epoch": 0.3644027926758125, "grad_norm": 5.062298774719238, "learning_rate": 7.355068367686039e-05, "loss": 1.7167, "step": 4841 }, { "epoch": 0.36447806695647267, "grad_norm": 4.512243747711182, "learning_rate": 7.353992907600484e-05, "loss": 1.9748, "step": 4842 }, { "epoch": 0.3645533412371328, "grad_norm": 4.125036716461182, "learning_rate": 7.352917307576808e-05, "loss": 1.7362, "step": 4843 }, { "epoch": 0.36462861551779296, "grad_norm": 4.952609539031982, "learning_rate": 7.351841567678948e-05, "loss": 1.9814, "step": 4844 }, { "epoch": 0.36470388979845314, "grad_norm": 3.766040086746216, "learning_rate": 7.350765687970856e-05, "loss": 1.6756, "step": 4845 }, { "epoch": 0.36477916407911326, "grad_norm": 4.0204057693481445, "learning_rate": 7.349689668516488e-05, "loss": 2.078, "step": 4846 }, { "epoch": 0.36485443835977344, "grad_norm": 6.277435302734375, "learning_rate": 7.348613509379811e-05, "loss": 1.9333, "step": 4847 }, { "epoch": 0.36492971264043356, "grad_norm": 5.5256147384643555, "learning_rate": 7.347537210624802e-05, "loss": 2.0258, "step": 4848 }, { "epoch": 0.36500498692109373, "grad_norm": 6.066572666168213, "learning_rate": 7.346460772315439e-05, "loss": 2.072, "step": 4849 }, { "epoch": 0.3650802612017539, "grad_norm": 6.329031944274902, "learning_rate": 7.345384194515719e-05, "loss": 2.0047, "step": 4850 }, { "epoch": 0.36515553548241403, "grad_norm": 4.246005058288574, "learning_rate": 7.344307477289637e-05, "loss": 1.9237, "step": 4851 }, { "epoch": 0.3652308097630742, "grad_norm": 5.608550548553467, "learning_rate": 7.343230620701199e-05, "loss": 1.9446, "step": 4852 }, { "epoch": 0.3653060840437344, "grad_norm": 4.107571601867676, "learning_rate": 7.342153624814427e-05, "loss": 2.1045, "step": 4853 }, { "epoch": 0.3653813583243945, "grad_norm": 4.538722991943359, "learning_rate": 7.34107648969334e-05, "loss": 2.1323, "step": 4854 }, { "epoch": 0.3654566326050547, "grad_norm": 4.807298183441162, "learning_rate": 7.339999215401975e-05, "loss": 1.8293, "step": 4855 }, { "epoch": 0.3655319068857148, "grad_norm": 4.328993320465088, "learning_rate": 7.338921802004372e-05, "loss": 2.0214, "step": 4856 }, { "epoch": 0.365607181166375, "grad_norm": 5.195735454559326, "learning_rate": 7.337844249564577e-05, "loss": 2.1436, "step": 4857 }, { "epoch": 0.36568245544703515, "grad_norm": 5.643425941467285, "learning_rate": 7.33676655814665e-05, "loss": 1.7839, "step": 4858 }, { "epoch": 0.36575772972769527, "grad_norm": 4.731564998626709, "learning_rate": 7.335688727814655e-05, "loss": 1.998, "step": 4859 }, { "epoch": 0.36583300400835544, "grad_norm": 3.9718198776245117, "learning_rate": 7.33461075863267e-05, "loss": 2.0454, "step": 4860 }, { "epoch": 0.3659082782890156, "grad_norm": 6.705219268798828, "learning_rate": 7.333532650664772e-05, "loss": 1.8717, "step": 4861 }, { "epoch": 0.36598355256967574, "grad_norm": 4.640828609466553, "learning_rate": 7.332454403975054e-05, "loss": 1.9267, "step": 4862 }, { "epoch": 0.3660588268503359, "grad_norm": 4.463264465332031, "learning_rate": 7.331376018627613e-05, "loss": 1.8131, "step": 4863 }, { "epoch": 0.3661341011309961, "grad_norm": 5.110785007476807, "learning_rate": 7.330297494686557e-05, "loss": 1.8752, "step": 4864 }, { "epoch": 0.3662093754116562, "grad_norm": 5.874404430389404, "learning_rate": 7.329218832216003e-05, "loss": 1.7848, "step": 4865 }, { "epoch": 0.3662846496923164, "grad_norm": 4.12038516998291, "learning_rate": 7.328140031280074e-05, "loss": 1.8644, "step": 4866 }, { "epoch": 0.3663599239729765, "grad_norm": 4.457777500152588, "learning_rate": 7.327061091942897e-05, "loss": 1.969, "step": 4867 }, { "epoch": 0.3664351982536367, "grad_norm": 5.2491655349731445, "learning_rate": 7.325982014268617e-05, "loss": 1.7904, "step": 4868 }, { "epoch": 0.36651047253429686, "grad_norm": 4.683348655700684, "learning_rate": 7.324902798321379e-05, "loss": 1.7999, "step": 4869 }, { "epoch": 0.366585746814957, "grad_norm": 4.114802837371826, "learning_rate": 7.32382344416534e-05, "loss": 2.0892, "step": 4870 }, { "epoch": 0.36666102109561716, "grad_norm": 7.05025053024292, "learning_rate": 7.322743951864668e-05, "loss": 2.295, "step": 4871 }, { "epoch": 0.36673629537627733, "grad_norm": 5.158146381378174, "learning_rate": 7.321664321483531e-05, "loss": 2.2242, "step": 4872 }, { "epoch": 0.36681156965693745, "grad_norm": 5.335732460021973, "learning_rate": 7.320584553086113e-05, "loss": 1.8312, "step": 4873 }, { "epoch": 0.36688684393759763, "grad_norm": 4.165846824645996, "learning_rate": 7.3195046467366e-05, "loss": 2.162, "step": 4874 }, { "epoch": 0.3669621182182578, "grad_norm": 4.397825717926025, "learning_rate": 7.318424602499192e-05, "loss": 1.9227, "step": 4875 }, { "epoch": 0.3670373924989179, "grad_norm": 4.312918663024902, "learning_rate": 7.317344420438093e-05, "loss": 1.7857, "step": 4876 }, { "epoch": 0.3671126667795781, "grad_norm": 6.843772888183594, "learning_rate": 7.316264100617518e-05, "loss": 2.1434, "step": 4877 }, { "epoch": 0.3671879410602382, "grad_norm": 5.819020748138428, "learning_rate": 7.315183643101689e-05, "loss": 1.9415, "step": 4878 }, { "epoch": 0.3672632153408984, "grad_norm": 4.090846061706543, "learning_rate": 7.314103047954834e-05, "loss": 1.468, "step": 4879 }, { "epoch": 0.3673384896215586, "grad_norm": 4.501558780670166, "learning_rate": 7.313022315241195e-05, "loss": 1.9053, "step": 4880 }, { "epoch": 0.3674137639022187, "grad_norm": 4.205174446105957, "learning_rate": 7.311941445025014e-05, "loss": 2.0861, "step": 4881 }, { "epoch": 0.36748903818287887, "grad_norm": 6.205946445465088, "learning_rate": 7.310860437370548e-05, "loss": 2.1394, "step": 4882 }, { "epoch": 0.36756431246353904, "grad_norm": 4.712464332580566, "learning_rate": 7.30977929234206e-05, "loss": 1.8987, "step": 4883 }, { "epoch": 0.36763958674419916, "grad_norm": 5.140176773071289, "learning_rate": 7.308698010003822e-05, "loss": 1.8801, "step": 4884 }, { "epoch": 0.36771486102485934, "grad_norm": 5.020617485046387, "learning_rate": 7.30761659042011e-05, "loss": 1.82, "step": 4885 }, { "epoch": 0.36779013530551946, "grad_norm": 6.558064937591553, "learning_rate": 7.306535033655212e-05, "loss": 2.1496, "step": 4886 }, { "epoch": 0.36786540958617964, "grad_norm": 5.348294258117676, "learning_rate": 7.305453339773425e-05, "loss": 1.9108, "step": 4887 }, { "epoch": 0.3679406838668398, "grad_norm": 4.863654136657715, "learning_rate": 7.304371508839053e-05, "loss": 1.825, "step": 4888 }, { "epoch": 0.36801595814749993, "grad_norm": 4.5959978103637695, "learning_rate": 7.303289540916407e-05, "loss": 2.1451, "step": 4889 }, { "epoch": 0.3680912324281601, "grad_norm": 5.211095333099365, "learning_rate": 7.302207436069807e-05, "loss": 1.8705, "step": 4890 }, { "epoch": 0.3681665067088203, "grad_norm": 5.763791561126709, "learning_rate": 7.301125194363579e-05, "loss": 2.188, "step": 4891 }, { "epoch": 0.3682417809894804, "grad_norm": 5.282546520233154, "learning_rate": 7.300042815862062e-05, "loss": 2.1692, "step": 4892 }, { "epoch": 0.3683170552701406, "grad_norm": 5.3406171798706055, "learning_rate": 7.298960300629598e-05, "loss": 1.9179, "step": 4893 }, { "epoch": 0.36839232955080076, "grad_norm": 4.139369010925293, "learning_rate": 7.297877648730542e-05, "loss": 2.382, "step": 4894 }, { "epoch": 0.3684676038314609, "grad_norm": 4.6781463623046875, "learning_rate": 7.296794860229254e-05, "loss": 2.3396, "step": 4895 }, { "epoch": 0.36854287811212105, "grad_norm": 6.445566177368164, "learning_rate": 7.295711935190099e-05, "loss": 2.2569, "step": 4896 }, { "epoch": 0.3686181523927812, "grad_norm": 3.5880117416381836, "learning_rate": 7.294628873677458e-05, "loss": 1.8082, "step": 4897 }, { "epoch": 0.36869342667344135, "grad_norm": 4.24821138381958, "learning_rate": 7.293545675755716e-05, "loss": 1.7728, "step": 4898 }, { "epoch": 0.3687687009541015, "grad_norm": 5.272346496582031, "learning_rate": 7.292462341489262e-05, "loss": 1.9196, "step": 4899 }, { "epoch": 0.36884397523476165, "grad_norm": 5.66727352142334, "learning_rate": 7.291378870942501e-05, "loss": 1.9229, "step": 4900 }, { "epoch": 0.3689192495154218, "grad_norm": 5.6897969245910645, "learning_rate": 7.29029526417984e-05, "loss": 1.8551, "step": 4901 }, { "epoch": 0.368994523796082, "grad_norm": 5.403607368469238, "learning_rate": 7.289211521265698e-05, "loss": 1.6613, "step": 4902 }, { "epoch": 0.3690697980767421, "grad_norm": 4.262795925140381, "learning_rate": 7.288127642264497e-05, "loss": 2.139, "step": 4903 }, { "epoch": 0.3691450723574023, "grad_norm": 4.469027519226074, "learning_rate": 7.287043627240676e-05, "loss": 2.1908, "step": 4904 }, { "epoch": 0.3692203466380624, "grad_norm": 4.615016460418701, "learning_rate": 7.285959476258673e-05, "loss": 1.8332, "step": 4905 }, { "epoch": 0.3692956209187226, "grad_norm": 6.115482807159424, "learning_rate": 7.284875189382938e-05, "loss": 2.1107, "step": 4906 }, { "epoch": 0.36937089519938276, "grad_norm": 4.915789604187012, "learning_rate": 7.283790766677931e-05, "loss": 2.0013, "step": 4907 }, { "epoch": 0.3694461694800429, "grad_norm": 9.588419914245605, "learning_rate": 7.282706208208113e-05, "loss": 2.1525, "step": 4908 }, { "epoch": 0.36952144376070306, "grad_norm": 4.847698211669922, "learning_rate": 7.281621514037962e-05, "loss": 1.915, "step": 4909 }, { "epoch": 0.36959671804136324, "grad_norm": 4.7174153327941895, "learning_rate": 7.280536684231957e-05, "loss": 1.8076, "step": 4910 }, { "epoch": 0.36967199232202336, "grad_norm": 4.227524280548096, "learning_rate": 7.279451718854592e-05, "loss": 1.8091, "step": 4911 }, { "epoch": 0.36974726660268353, "grad_norm": 3.4831960201263428, "learning_rate": 7.278366617970363e-05, "loss": 1.7343, "step": 4912 }, { "epoch": 0.3698225408833437, "grad_norm": 4.319570541381836, "learning_rate": 7.277281381643774e-05, "loss": 1.9639, "step": 4913 }, { "epoch": 0.36989781516400383, "grad_norm": 4.9830241203308105, "learning_rate": 7.276196009939342e-05, "loss": 2.3083, "step": 4914 }, { "epoch": 0.369973089444664, "grad_norm": 5.01857328414917, "learning_rate": 7.275110502921588e-05, "loss": 1.7981, "step": 4915 }, { "epoch": 0.3700483637253241, "grad_norm": 4.583183288574219, "learning_rate": 7.274024860655044e-05, "loss": 2.1572, "step": 4916 }, { "epoch": 0.3701236380059843, "grad_norm": 4.341670036315918, "learning_rate": 7.272939083204246e-05, "loss": 2.2375, "step": 4917 }, { "epoch": 0.3701989122866445, "grad_norm": 5.428600311279297, "learning_rate": 7.27185317063374e-05, "loss": 2.0468, "step": 4918 }, { "epoch": 0.3702741865673046, "grad_norm": 5.212686538696289, "learning_rate": 7.270767123008083e-05, "loss": 1.6357, "step": 4919 }, { "epoch": 0.3703494608479648, "grad_norm": 4.537408828735352, "learning_rate": 7.269680940391836e-05, "loss": 1.9553, "step": 4920 }, { "epoch": 0.37042473512862495, "grad_norm": 3.0378222465515137, "learning_rate": 7.268594622849569e-05, "loss": 1.8631, "step": 4921 }, { "epoch": 0.37050000940928507, "grad_norm": 3.8827826976776123, "learning_rate": 7.267508170445862e-05, "loss": 2.206, "step": 4922 }, { "epoch": 0.37057528368994525, "grad_norm": 4.087047100067139, "learning_rate": 7.266421583245299e-05, "loss": 1.6895, "step": 4923 }, { "epoch": 0.3706505579706054, "grad_norm": 4.350297927856445, "learning_rate": 7.265334861312479e-05, "loss": 1.7875, "step": 4924 }, { "epoch": 0.37072583225126554, "grad_norm": 5.5706963539123535, "learning_rate": 7.264248004711998e-05, "loss": 2.1399, "step": 4925 }, { "epoch": 0.3708011065319257, "grad_norm": 6.357446193695068, "learning_rate": 7.263161013508471e-05, "loss": 1.9069, "step": 4926 }, { "epoch": 0.37087638081258584, "grad_norm": 6.167360782623291, "learning_rate": 7.262073887766517e-05, "loss": 2.0014, "step": 4927 }, { "epoch": 0.370951655093246, "grad_norm": 4.467043399810791, "learning_rate": 7.26098662755076e-05, "loss": 1.9529, "step": 4928 }, { "epoch": 0.3710269293739062, "grad_norm": 4.538745403289795, "learning_rate": 7.259899232925835e-05, "loss": 1.9351, "step": 4929 }, { "epoch": 0.3711022036545663, "grad_norm": 5.637248516082764, "learning_rate": 7.258811703956385e-05, "loss": 2.1126, "step": 4930 }, { "epoch": 0.3711774779352265, "grad_norm": 5.919388771057129, "learning_rate": 7.257724040707061e-05, "loss": 1.7308, "step": 4931 }, { "epoch": 0.37125275221588666, "grad_norm": 4.361879348754883, "learning_rate": 7.25663624324252e-05, "loss": 1.9224, "step": 4932 }, { "epoch": 0.3713280264965468, "grad_norm": 7.6469502449035645, "learning_rate": 7.255548311627432e-05, "loss": 1.7658, "step": 4933 }, { "epoch": 0.37140330077720696, "grad_norm": 7.319008827209473, "learning_rate": 7.254460245926466e-05, "loss": 1.9378, "step": 4934 }, { "epoch": 0.3714785750578671, "grad_norm": 5.513526916503906, "learning_rate": 7.253372046204307e-05, "loss": 2.2914, "step": 4935 }, { "epoch": 0.37155384933852725, "grad_norm": 4.749660015106201, "learning_rate": 7.252283712525646e-05, "loss": 1.8299, "step": 4936 }, { "epoch": 0.37162912361918743, "grad_norm": 4.704840660095215, "learning_rate": 7.251195244955183e-05, "loss": 1.9976, "step": 4937 }, { "epoch": 0.37170439789984755, "grad_norm": 5.760531425476074, "learning_rate": 7.250106643557619e-05, "loss": 2.0084, "step": 4938 }, { "epoch": 0.3717796721805077, "grad_norm": 4.350567817687988, "learning_rate": 7.249017908397673e-05, "loss": 1.8266, "step": 4939 }, { "epoch": 0.3718549464611679, "grad_norm": 4.014101028442383, "learning_rate": 7.247929039540066e-05, "loss": 1.7896, "step": 4940 }, { "epoch": 0.371930220741828, "grad_norm": 3.99737286567688, "learning_rate": 7.246840037049527e-05, "loss": 2.1688, "step": 4941 }, { "epoch": 0.3720054950224882, "grad_norm": 3.815800666809082, "learning_rate": 7.245750900990793e-05, "loss": 2.1723, "step": 4942 }, { "epoch": 0.3720807693031484, "grad_norm": 3.4326460361480713, "learning_rate": 7.244661631428614e-05, "loss": 2.0385, "step": 4943 }, { "epoch": 0.3721560435838085, "grad_norm": 4.556941986083984, "learning_rate": 7.243572228427743e-05, "loss": 2.0194, "step": 4944 }, { "epoch": 0.37223131786446867, "grad_norm": 6.778387069702148, "learning_rate": 7.242482692052936e-05, "loss": 1.8463, "step": 4945 }, { "epoch": 0.3723065921451288, "grad_norm": 5.627053737640381, "learning_rate": 7.24139302236897e-05, "loss": 2.1858, "step": 4946 }, { "epoch": 0.37238186642578897, "grad_norm": 5.661882400512695, "learning_rate": 7.24030321944062e-05, "loss": 1.9663, "step": 4947 }, { "epoch": 0.37245714070644914, "grad_norm": 4.480603218078613, "learning_rate": 7.239213283332672e-05, "loss": 2.0683, "step": 4948 }, { "epoch": 0.37253241498710926, "grad_norm": 4.722218036651611, "learning_rate": 7.23812321410992e-05, "loss": 1.6676, "step": 4949 }, { "epoch": 0.37260768926776944, "grad_norm": 5.633545875549316, "learning_rate": 7.237033011837162e-05, "loss": 1.8249, "step": 4950 }, { "epoch": 0.3726829635484296, "grad_norm": 4.879673957824707, "learning_rate": 7.235942676579213e-05, "loss": 2.2057, "step": 4951 }, { "epoch": 0.37275823782908973, "grad_norm": 4.61749267578125, "learning_rate": 7.234852208400886e-05, "loss": 2.0422, "step": 4952 }, { "epoch": 0.3728335121097499, "grad_norm": 4.231074810028076, "learning_rate": 7.233761607367009e-05, "loss": 1.8792, "step": 4953 }, { "epoch": 0.37290878639041003, "grad_norm": 4.710064888000488, "learning_rate": 7.232670873542413e-05, "loss": 1.9134, "step": 4954 }, { "epoch": 0.3729840606710702, "grad_norm": 4.5765461921691895, "learning_rate": 7.23158000699194e-05, "loss": 1.7344, "step": 4955 }, { "epoch": 0.3730593349517304, "grad_norm": 4.981764316558838, "learning_rate": 7.230489007780439e-05, "loss": 1.8601, "step": 4956 }, { "epoch": 0.3731346092323905, "grad_norm": 7.854146480560303, "learning_rate": 7.229397875972765e-05, "loss": 2.4389, "step": 4957 }, { "epoch": 0.3732098835130507, "grad_norm": 6.466365814208984, "learning_rate": 7.228306611633787e-05, "loss": 2.0474, "step": 4958 }, { "epoch": 0.37328515779371085, "grad_norm": 5.906894207000732, "learning_rate": 7.22721521482837e-05, "loss": 2.0779, "step": 4959 }, { "epoch": 0.373360432074371, "grad_norm": 5.88653039932251, "learning_rate": 7.226123685621405e-05, "loss": 2.1126, "step": 4960 }, { "epoch": 0.37343570635503115, "grad_norm": 4.718963146209717, "learning_rate": 7.22503202407777e-05, "loss": 2.0624, "step": 4961 }, { "epoch": 0.3735109806356913, "grad_norm": 5.176740646362305, "learning_rate": 7.223940230262367e-05, "loss": 1.7592, "step": 4962 }, { "epoch": 0.37358625491635145, "grad_norm": 5.126104354858398, "learning_rate": 7.2228483042401e-05, "loss": 1.7519, "step": 4963 }, { "epoch": 0.3736615291970116, "grad_norm": 5.948572158813477, "learning_rate": 7.221756246075877e-05, "loss": 2.3341, "step": 4964 }, { "epoch": 0.37373680347767174, "grad_norm": 4.3009467124938965, "learning_rate": 7.22066405583462e-05, "loss": 1.8655, "step": 4965 }, { "epoch": 0.3738120777583319, "grad_norm": 5.706455707550049, "learning_rate": 7.219571733581257e-05, "loss": 1.9716, "step": 4966 }, { "epoch": 0.3738873520389921, "grad_norm": 6.1628642082214355, "learning_rate": 7.218479279380725e-05, "loss": 1.9441, "step": 4967 }, { "epoch": 0.3739626263196522, "grad_norm": 4.59248161315918, "learning_rate": 7.217386693297963e-05, "loss": 2.3536, "step": 4968 }, { "epoch": 0.3740379006003124, "grad_norm": 6.029731273651123, "learning_rate": 7.216293975397927e-05, "loss": 2.0032, "step": 4969 }, { "epoch": 0.37411317488097257, "grad_norm": 5.294279098510742, "learning_rate": 7.21520112574557e-05, "loss": 2.138, "step": 4970 }, { "epoch": 0.3741884491616327, "grad_norm": 5.968223571777344, "learning_rate": 7.214108144405865e-05, "loss": 2.0692, "step": 4971 }, { "epoch": 0.37426372344229286, "grad_norm": 4.473196506500244, "learning_rate": 7.213015031443783e-05, "loss": 2.1575, "step": 4972 }, { "epoch": 0.37433899772295304, "grad_norm": 4.75431489944458, "learning_rate": 7.211921786924308e-05, "loss": 1.882, "step": 4973 }, { "epoch": 0.37441427200361316, "grad_norm": 4.734861850738525, "learning_rate": 7.210828410912428e-05, "loss": 2.0258, "step": 4974 }, { "epoch": 0.37448954628427333, "grad_norm": 3.701101064682007, "learning_rate": 7.209734903473143e-05, "loss": 1.7323, "step": 4975 }, { "epoch": 0.37456482056493345, "grad_norm": 4.051206111907959, "learning_rate": 7.208641264671458e-05, "loss": 1.8825, "step": 4976 }, { "epoch": 0.37464009484559363, "grad_norm": 3.509989023208618, "learning_rate": 7.207547494572388e-05, "loss": 1.7323, "step": 4977 }, { "epoch": 0.3747153691262538, "grad_norm": 4.238576412200928, "learning_rate": 7.206453593240954e-05, "loss": 2.0529, "step": 4978 }, { "epoch": 0.3747906434069139, "grad_norm": 3.843618631362915, "learning_rate": 7.205359560742185e-05, "loss": 2.1025, "step": 4979 }, { "epoch": 0.3748659176875741, "grad_norm": 5.472354888916016, "learning_rate": 7.204265397141116e-05, "loss": 1.9685, "step": 4980 }, { "epoch": 0.3749411919682343, "grad_norm": 4.376461505889893, "learning_rate": 7.203171102502795e-05, "loss": 2.2941, "step": 4981 }, { "epoch": 0.3750164662488944, "grad_norm": 4.474477291107178, "learning_rate": 7.202076676892273e-05, "loss": 1.9242, "step": 4982 }, { "epoch": 0.3750917405295546, "grad_norm": 6.944291591644287, "learning_rate": 7.200982120374613e-05, "loss": 1.8588, "step": 4983 }, { "epoch": 0.3751670148102147, "grad_norm": 4.876765251159668, "learning_rate": 7.199887433014879e-05, "loss": 1.9065, "step": 4984 }, { "epoch": 0.37524228909087487, "grad_norm": 4.21694278717041, "learning_rate": 7.19879261487815e-05, "loss": 2.1048, "step": 4985 }, { "epoch": 0.37531756337153505, "grad_norm": 4.962738990783691, "learning_rate": 7.197697666029511e-05, "loss": 1.9615, "step": 4986 }, { "epoch": 0.37539283765219517, "grad_norm": 4.720092296600342, "learning_rate": 7.196602586534049e-05, "loss": 2.2484, "step": 4987 }, { "epoch": 0.37546811193285534, "grad_norm": 3.698617696762085, "learning_rate": 7.195507376456867e-05, "loss": 2.1332, "step": 4988 }, { "epoch": 0.3755433862135155, "grad_norm": 4.769763469696045, "learning_rate": 7.194412035863072e-05, "loss": 1.8165, "step": 4989 }, { "epoch": 0.37561866049417564, "grad_norm": 4.7708845138549805, "learning_rate": 7.193316564817776e-05, "loss": 1.7809, "step": 4990 }, { "epoch": 0.3756939347748358, "grad_norm": 5.258187294006348, "learning_rate": 7.192220963386104e-05, "loss": 2.0299, "step": 4991 }, { "epoch": 0.375769209055496, "grad_norm": 4.5305047035217285, "learning_rate": 7.191125231633187e-05, "loss": 1.7322, "step": 4992 }, { "epoch": 0.3758444833361561, "grad_norm": 5.102001190185547, "learning_rate": 7.190029369624162e-05, "loss": 1.6057, "step": 4993 }, { "epoch": 0.3759197576168163, "grad_norm": 4.977925777435303, "learning_rate": 7.188933377424174e-05, "loss": 2.2238, "step": 4994 }, { "epoch": 0.3759950318974764, "grad_norm": 3.880150079727173, "learning_rate": 7.187837255098379e-05, "loss": 1.8831, "step": 4995 }, { "epoch": 0.3760703061781366, "grad_norm": 5.856607437133789, "learning_rate": 7.186741002711935e-05, "loss": 1.826, "step": 4996 }, { "epoch": 0.37614558045879676, "grad_norm": 6.347630977630615, "learning_rate": 7.185644620330014e-05, "loss": 2.368, "step": 4997 }, { "epoch": 0.3762208547394569, "grad_norm": 4.962389945983887, "learning_rate": 7.18454810801779e-05, "loss": 2.0538, "step": 4998 }, { "epoch": 0.37629612902011705, "grad_norm": 5.2876715660095215, "learning_rate": 7.18345146584045e-05, "loss": 2.0556, "step": 4999 }, { "epoch": 0.37637140330077723, "grad_norm": 4.463347911834717, "learning_rate": 7.182354693863186e-05, "loss": 1.8828, "step": 5000 }, { "epoch": 0.37644667758143735, "grad_norm": 5.426022529602051, "learning_rate": 7.181257792151197e-05, "loss": 1.7753, "step": 5001 }, { "epoch": 0.3765219518620975, "grad_norm": 5.392134189605713, "learning_rate": 7.180160760769692e-05, "loss": 1.9247, "step": 5002 }, { "epoch": 0.37659722614275765, "grad_norm": 5.8048577308654785, "learning_rate": 7.179063599783884e-05, "loss": 2.0037, "step": 5003 }, { "epoch": 0.3766725004234178, "grad_norm": 4.083925247192383, "learning_rate": 7.177966309258998e-05, "loss": 1.9497, "step": 5004 }, { "epoch": 0.376747774704078, "grad_norm": 3.546855926513672, "learning_rate": 7.176868889260264e-05, "loss": 1.7008, "step": 5005 }, { "epoch": 0.3768230489847381, "grad_norm": 3.7905898094177246, "learning_rate": 7.17577133985292e-05, "loss": 1.8258, "step": 5006 }, { "epoch": 0.3768983232653983, "grad_norm": 3.6993086338043213, "learning_rate": 7.174673661102214e-05, "loss": 1.7559, "step": 5007 }, { "epoch": 0.37697359754605847, "grad_norm": 4.094613552093506, "learning_rate": 7.173575853073398e-05, "loss": 1.541, "step": 5008 }, { "epoch": 0.3770488718267186, "grad_norm": 5.412510871887207, "learning_rate": 7.172477915831734e-05, "loss": 1.9054, "step": 5009 }, { "epoch": 0.37712414610737877, "grad_norm": 4.654134273529053, "learning_rate": 7.171379849442493e-05, "loss": 1.817, "step": 5010 }, { "epoch": 0.37719942038803894, "grad_norm": 5.500486850738525, "learning_rate": 7.170281653970949e-05, "loss": 2.0739, "step": 5011 }, { "epoch": 0.37727469466869906, "grad_norm": 4.02164363861084, "learning_rate": 7.169183329482388e-05, "loss": 2.0592, "step": 5012 }, { "epoch": 0.37734996894935924, "grad_norm": 6.750339984893799, "learning_rate": 7.168084876042103e-05, "loss": 2.1483, "step": 5013 }, { "epoch": 0.37742524323001936, "grad_norm": 3.388923406600952, "learning_rate": 7.166986293715391e-05, "loss": 1.8918, "step": 5014 }, { "epoch": 0.37750051751067953, "grad_norm": 3.9066715240478516, "learning_rate": 7.165887582567563e-05, "loss": 1.8342, "step": 5015 }, { "epoch": 0.3775757917913397, "grad_norm": 5.191504001617432, "learning_rate": 7.164788742663932e-05, "loss": 2.0172, "step": 5016 }, { "epoch": 0.37765106607199983, "grad_norm": 4.917282581329346, "learning_rate": 7.163689774069823e-05, "loss": 1.8206, "step": 5017 }, { "epoch": 0.37772634035266, "grad_norm": 5.534020900726318, "learning_rate": 7.162590676850565e-05, "loss": 1.9287, "step": 5018 }, { "epoch": 0.3778016146333202, "grad_norm": 4.5132575035095215, "learning_rate": 7.161491451071495e-05, "loss": 1.9113, "step": 5019 }, { "epoch": 0.3778768889139803, "grad_norm": 4.004202365875244, "learning_rate": 7.160392096797963e-05, "loss": 1.9418, "step": 5020 }, { "epoch": 0.3779521631946405, "grad_norm": 4.021640777587891, "learning_rate": 7.159292614095318e-05, "loss": 1.8938, "step": 5021 }, { "epoch": 0.37802743747530065, "grad_norm": 4.470360279083252, "learning_rate": 7.158193003028922e-05, "loss": 1.9444, "step": 5022 }, { "epoch": 0.3781027117559608, "grad_norm": 4.077413082122803, "learning_rate": 7.157093263664147e-05, "loss": 2.1638, "step": 5023 }, { "epoch": 0.37817798603662095, "grad_norm": 4.954759120941162, "learning_rate": 7.155993396066365e-05, "loss": 2.5408, "step": 5024 }, { "epoch": 0.37825326031728107, "grad_norm": 4.298381805419922, "learning_rate": 7.154893400300962e-05, "loss": 1.7462, "step": 5025 }, { "epoch": 0.37832853459794125, "grad_norm": 5.803854942321777, "learning_rate": 7.153793276433329e-05, "loss": 1.8743, "step": 5026 }, { "epoch": 0.3784038088786014, "grad_norm": 4.230853080749512, "learning_rate": 7.152693024528867e-05, "loss": 2.1784, "step": 5027 }, { "epoch": 0.37847908315926154, "grad_norm": 4.332756519317627, "learning_rate": 7.15159264465298e-05, "loss": 2.0783, "step": 5028 }, { "epoch": 0.3785543574399217, "grad_norm": 3.7402727603912354, "learning_rate": 7.150492136871086e-05, "loss": 1.9596, "step": 5029 }, { "epoch": 0.3786296317205819, "grad_norm": 4.28030252456665, "learning_rate": 7.149391501248602e-05, "loss": 1.8501, "step": 5030 }, { "epoch": 0.378704906001242, "grad_norm": 4.2571563720703125, "learning_rate": 7.148290737850963e-05, "loss": 1.6562, "step": 5031 }, { "epoch": 0.3787801802819022, "grad_norm": 4.48539400100708, "learning_rate": 7.147189846743601e-05, "loss": 1.9153, "step": 5032 }, { "epoch": 0.3788554545625623, "grad_norm": 4.535348892211914, "learning_rate": 7.146088827991966e-05, "loss": 1.8151, "step": 5033 }, { "epoch": 0.3789307288432225, "grad_norm": 4.449469089508057, "learning_rate": 7.144987681661508e-05, "loss": 1.9655, "step": 5034 }, { "epoch": 0.37900600312388266, "grad_norm": 4.397324085235596, "learning_rate": 7.143886407817686e-05, "loss": 1.7285, "step": 5035 }, { "epoch": 0.3790812774045428, "grad_norm": 4.513822555541992, "learning_rate": 7.14278500652597e-05, "loss": 2.2549, "step": 5036 }, { "epoch": 0.37915655168520296, "grad_norm": 3.738454818725586, "learning_rate": 7.141683477851832e-05, "loss": 2.0126, "step": 5037 }, { "epoch": 0.37923182596586313, "grad_norm": 6.871589183807373, "learning_rate": 7.140581821860757e-05, "loss": 1.7037, "step": 5038 }, { "epoch": 0.37930710024652325, "grad_norm": 5.357760429382324, "learning_rate": 7.139480038618235e-05, "loss": 1.7613, "step": 5039 }, { "epoch": 0.37938237452718343, "grad_norm": 5.217741012573242, "learning_rate": 7.138378128189763e-05, "loss": 1.7709, "step": 5040 }, { "epoch": 0.3794576488078436, "grad_norm": 5.146366119384766, "learning_rate": 7.137276090640847e-05, "loss": 2.0119, "step": 5041 }, { "epoch": 0.3795329230885037, "grad_norm": 4.0107879638671875, "learning_rate": 7.136173926037e-05, "loss": 1.8021, "step": 5042 }, { "epoch": 0.3796081973691639, "grad_norm": 4.555127143859863, "learning_rate": 7.135071634443744e-05, "loss": 1.8569, "step": 5043 }, { "epoch": 0.379683471649824, "grad_norm": 4.552152633666992, "learning_rate": 7.133969215926604e-05, "loss": 1.6807, "step": 5044 }, { "epoch": 0.3797587459304842, "grad_norm": 4.354506969451904, "learning_rate": 7.132866670551117e-05, "loss": 1.9583, "step": 5045 }, { "epoch": 0.3798340202111444, "grad_norm": 5.571901321411133, "learning_rate": 7.131763998382828e-05, "loss": 1.8813, "step": 5046 }, { "epoch": 0.3799092944918045, "grad_norm": 4.542109966278076, "learning_rate": 7.130661199487283e-05, "loss": 2.0044, "step": 5047 }, { "epoch": 0.37998456877246467, "grad_norm": 4.0436296463012695, "learning_rate": 7.129558273930043e-05, "loss": 1.9924, "step": 5048 }, { "epoch": 0.38005984305312485, "grad_norm": 4.427436828613281, "learning_rate": 7.128455221776677e-05, "loss": 2.292, "step": 5049 }, { "epoch": 0.38013511733378497, "grad_norm": 4.2799506187438965, "learning_rate": 7.127352043092755e-05, "loss": 1.8546, "step": 5050 }, { "epoch": 0.38021039161444514, "grad_norm": 4.1891679763793945, "learning_rate": 7.126248737943858e-05, "loss": 1.6323, "step": 5051 }, { "epoch": 0.38028566589510526, "grad_norm": 3.9905247688293457, "learning_rate": 7.125145306395574e-05, "loss": 2.0276, "step": 5052 }, { "epoch": 0.38036094017576544, "grad_norm": 4.146697044372559, "learning_rate": 7.124041748513498e-05, "loss": 1.8364, "step": 5053 }, { "epoch": 0.3804362144564256, "grad_norm": 4.037591457366943, "learning_rate": 7.122938064363236e-05, "loss": 1.7057, "step": 5054 }, { "epoch": 0.38051148873708573, "grad_norm": 5.675656318664551, "learning_rate": 7.121834254010398e-05, "loss": 1.8862, "step": 5055 }, { "epoch": 0.3805867630177459, "grad_norm": 4.468969821929932, "learning_rate": 7.120730317520601e-05, "loss": 1.8913, "step": 5056 }, { "epoch": 0.3806620372984061, "grad_norm": 5.267385482788086, "learning_rate": 7.119626254959472e-05, "loss": 2.018, "step": 5057 }, { "epoch": 0.3807373115790662, "grad_norm": 4.253632545471191, "learning_rate": 7.118522066392644e-05, "loss": 1.9346, "step": 5058 }, { "epoch": 0.3808125858597264, "grad_norm": 5.13553524017334, "learning_rate": 7.117417751885756e-05, "loss": 2.0081, "step": 5059 }, { "epoch": 0.38088786014038656, "grad_norm": 5.386470317840576, "learning_rate": 7.11631331150446e-05, "loss": 1.7915, "step": 5060 }, { "epoch": 0.3809631344210467, "grad_norm": 4.69920015335083, "learning_rate": 7.11520874531441e-05, "loss": 2.1036, "step": 5061 }, { "epoch": 0.38103840870170685, "grad_norm": 5.338794708251953, "learning_rate": 7.114104053381269e-05, "loss": 1.6762, "step": 5062 }, { "epoch": 0.381113682982367, "grad_norm": 4.164871692657471, "learning_rate": 7.112999235770708e-05, "loss": 2.3391, "step": 5063 }, { "epoch": 0.38118895726302715, "grad_norm": 6.454789161682129, "learning_rate": 7.111894292548405e-05, "loss": 2.1076, "step": 5064 }, { "epoch": 0.3812642315436873, "grad_norm": 5.738858699798584, "learning_rate": 7.110789223780046e-05, "loss": 2.1032, "step": 5065 }, { "epoch": 0.38133950582434745, "grad_norm": 4.800664901733398, "learning_rate": 7.109684029531326e-05, "loss": 1.7477, "step": 5066 }, { "epoch": 0.3814147801050076, "grad_norm": 4.2151994705200195, "learning_rate": 7.108578709867941e-05, "loss": 1.8912, "step": 5067 }, { "epoch": 0.3814900543856678, "grad_norm": 3.88224720954895, "learning_rate": 7.107473264855604e-05, "loss": 1.9579, "step": 5068 }, { "epoch": 0.3815653286663279, "grad_norm": 4.584081172943115, "learning_rate": 7.106367694560028e-05, "loss": 1.923, "step": 5069 }, { "epoch": 0.3816406029469881, "grad_norm": 5.365553379058838, "learning_rate": 7.105261999046935e-05, "loss": 2.3511, "step": 5070 }, { "epoch": 0.38171587722764827, "grad_norm": 3.8131959438323975, "learning_rate": 7.10415617838206e-05, "loss": 1.9687, "step": 5071 }, { "epoch": 0.3817911515083084, "grad_norm": 3.665224552154541, "learning_rate": 7.103050232631134e-05, "loss": 1.8544, "step": 5072 }, { "epoch": 0.38186642578896857, "grad_norm": 4.011844158172607, "learning_rate": 7.101944161859908e-05, "loss": 2.2753, "step": 5073 }, { "epoch": 0.3819417000696287, "grad_norm": 4.224832534790039, "learning_rate": 7.100837966134133e-05, "loss": 1.8325, "step": 5074 }, { "epoch": 0.38201697435028886, "grad_norm": 6.464770317077637, "learning_rate": 7.099731645519568e-05, "loss": 2.0772, "step": 5075 }, { "epoch": 0.38209224863094904, "grad_norm": 6.836861610412598, "learning_rate": 7.098625200081982e-05, "loss": 2.2939, "step": 5076 }, { "epoch": 0.38216752291160916, "grad_norm": 3.214604377746582, "learning_rate": 7.09751862988715e-05, "loss": 1.9385, "step": 5077 }, { "epoch": 0.38224279719226933, "grad_norm": 5.163912773132324, "learning_rate": 7.096411935000853e-05, "loss": 1.881, "step": 5078 }, { "epoch": 0.3823180714729295, "grad_norm": 4.539270401000977, "learning_rate": 7.09530511548888e-05, "loss": 2.0638, "step": 5079 }, { "epoch": 0.38239334575358963, "grad_norm": 3.8244714736938477, "learning_rate": 7.094198171417032e-05, "loss": 1.9458, "step": 5080 }, { "epoch": 0.3824686200342498, "grad_norm": 3.7809698581695557, "learning_rate": 7.093091102851112e-05, "loss": 2.0929, "step": 5081 }, { "epoch": 0.3825438943149099, "grad_norm": 5.210256099700928, "learning_rate": 7.091983909856932e-05, "loss": 1.9799, "step": 5082 }, { "epoch": 0.3826191685955701, "grad_norm": 6.476984024047852, "learning_rate": 7.09087659250031e-05, "loss": 2.1878, "step": 5083 }, { "epoch": 0.3826944428762303, "grad_norm": 3.7477054595947266, "learning_rate": 7.089769150847075e-05, "loss": 2.2522, "step": 5084 }, { "epoch": 0.3827697171568904, "grad_norm": 4.948090553283691, "learning_rate": 7.08866158496306e-05, "loss": 2.1235, "step": 5085 }, { "epoch": 0.3828449914375506, "grad_norm": 4.929243087768555, "learning_rate": 7.087553894914107e-05, "loss": 2.1283, "step": 5086 }, { "epoch": 0.38292026571821075, "grad_norm": 6.278349876403809, "learning_rate": 7.086446080766063e-05, "loss": 2.1767, "step": 5087 }, { "epoch": 0.38299553999887087, "grad_norm": 5.265294075012207, "learning_rate": 7.085338142584788e-05, "loss": 2.2872, "step": 5088 }, { "epoch": 0.38307081427953105, "grad_norm": 4.189830303192139, "learning_rate": 7.084230080436142e-05, "loss": 1.9563, "step": 5089 }, { "epoch": 0.3831460885601912, "grad_norm": 3.9800798892974854, "learning_rate": 7.083121894386e-05, "loss": 1.8569, "step": 5090 }, { "epoch": 0.38322136284085134, "grad_norm": 5.580798625946045, "learning_rate": 7.082013584500238e-05, "loss": 1.9092, "step": 5091 }, { "epoch": 0.3832966371215115, "grad_norm": 4.221299648284912, "learning_rate": 7.080905150844742e-05, "loss": 1.7303, "step": 5092 }, { "epoch": 0.38337191140217164, "grad_norm": 4.8832011222839355, "learning_rate": 7.079796593485405e-05, "loss": 2.0758, "step": 5093 }, { "epoch": 0.3834471856828318, "grad_norm": 7.229682445526123, "learning_rate": 7.07868791248813e-05, "loss": 2.0081, "step": 5094 }, { "epoch": 0.383522459963492, "grad_norm": 6.102149486541748, "learning_rate": 7.077579107918821e-05, "loss": 2.0254, "step": 5095 }, { "epoch": 0.3835977342441521, "grad_norm": 5.362575054168701, "learning_rate": 7.076470179843396e-05, "loss": 2.1112, "step": 5096 }, { "epoch": 0.3836730085248123, "grad_norm": 4.909099102020264, "learning_rate": 7.075361128327777e-05, "loss": 2.067, "step": 5097 }, { "epoch": 0.38374828280547246, "grad_norm": 5.938938140869141, "learning_rate": 7.074251953437893e-05, "loss": 2.3223, "step": 5098 }, { "epoch": 0.3838235570861326, "grad_norm": 5.339906215667725, "learning_rate": 7.073142655239684e-05, "loss": 1.8494, "step": 5099 }, { "epoch": 0.38389883136679276, "grad_norm": 6.356232166290283, "learning_rate": 7.072033233799091e-05, "loss": 1.827, "step": 5100 }, { "epoch": 0.3839741056474529, "grad_norm": 4.398833751678467, "learning_rate": 7.07092368918207e-05, "loss": 2.1328, "step": 5101 }, { "epoch": 0.38404937992811305, "grad_norm": 7.725823879241943, "learning_rate": 7.069814021454576e-05, "loss": 1.9501, "step": 5102 }, { "epoch": 0.38412465420877323, "grad_norm": 4.908069133758545, "learning_rate": 7.068704230682576e-05, "loss": 2.18, "step": 5103 }, { "epoch": 0.38419992848943335, "grad_norm": 4.4095306396484375, "learning_rate": 7.067594316932047e-05, "loss": 1.8836, "step": 5104 }, { "epoch": 0.3842752027700935, "grad_norm": 5.494690895080566, "learning_rate": 7.066484280268968e-05, "loss": 1.8885, "step": 5105 }, { "epoch": 0.3843504770507537, "grad_norm": 4.715152263641357, "learning_rate": 7.06537412075933e-05, "loss": 2.1896, "step": 5106 }, { "epoch": 0.3844257513314138, "grad_norm": 3.8597936630249023, "learning_rate": 7.064263838469124e-05, "loss": 1.6586, "step": 5107 }, { "epoch": 0.384501025612074, "grad_norm": 5.016448020935059, "learning_rate": 7.063153433464356e-05, "loss": 1.9668, "step": 5108 }, { "epoch": 0.3845762998927342, "grad_norm": 6.7499566078186035, "learning_rate": 7.062042905811037e-05, "loss": 2.1402, "step": 5109 }, { "epoch": 0.3846515741733943, "grad_norm": 3.981473445892334, "learning_rate": 7.060932255575183e-05, "loss": 1.9862, "step": 5110 }, { "epoch": 0.38472684845405447, "grad_norm": 4.7544732093811035, "learning_rate": 7.059821482822821e-05, "loss": 1.7379, "step": 5111 }, { "epoch": 0.3848021227347146, "grad_norm": 4.64710807800293, "learning_rate": 7.058710587619982e-05, "loss": 1.9903, "step": 5112 }, { "epoch": 0.38487739701537477, "grad_norm": 4.240914344787598, "learning_rate": 7.057599570032706e-05, "loss": 1.9765, "step": 5113 }, { "epoch": 0.38495267129603494, "grad_norm": 4.636510848999023, "learning_rate": 7.056488430127039e-05, "loss": 1.9417, "step": 5114 }, { "epoch": 0.38502794557669506, "grad_norm": 3.8164968490600586, "learning_rate": 7.055377167969035e-05, "loss": 2.0084, "step": 5115 }, { "epoch": 0.38510321985735524, "grad_norm": 4.97038459777832, "learning_rate": 7.054265783624756e-05, "loss": 2.5179, "step": 5116 }, { "epoch": 0.3851784941380154, "grad_norm": 6.12393045425415, "learning_rate": 7.053154277160272e-05, "loss": 2.1607, "step": 5117 }, { "epoch": 0.38525376841867554, "grad_norm": 6.2277421951293945, "learning_rate": 7.052042648641656e-05, "loss": 2.1662, "step": 5118 }, { "epoch": 0.3853290426993357, "grad_norm": 4.501318454742432, "learning_rate": 7.050930898134993e-05, "loss": 1.7508, "step": 5119 }, { "epoch": 0.3854043169799959, "grad_norm": 4.644231796264648, "learning_rate": 7.049819025706373e-05, "loss": 2.449, "step": 5120 }, { "epoch": 0.385479591260656, "grad_norm": 4.109147548675537, "learning_rate": 7.048707031421893e-05, "loss": 1.8091, "step": 5121 }, { "epoch": 0.3855548655413162, "grad_norm": 4.13007926940918, "learning_rate": 7.047594915347658e-05, "loss": 1.6222, "step": 5122 }, { "epoch": 0.3856301398219763, "grad_norm": 8.340609550476074, "learning_rate": 7.046482677549782e-05, "loss": 1.9621, "step": 5123 }, { "epoch": 0.3857054141026365, "grad_norm": 3.7131779193878174, "learning_rate": 7.045370318094382e-05, "loss": 1.9834, "step": 5124 }, { "epoch": 0.38578068838329665, "grad_norm": 3.881448745727539, "learning_rate": 7.044257837047585e-05, "loss": 1.7421, "step": 5125 }, { "epoch": 0.3858559626639568, "grad_norm": 3.990791082382202, "learning_rate": 7.043145234475526e-05, "loss": 2.2056, "step": 5126 }, { "epoch": 0.38593123694461695, "grad_norm": 5.629392623901367, "learning_rate": 7.042032510444343e-05, "loss": 2.1236, "step": 5127 }, { "epoch": 0.3860065112252771, "grad_norm": 4.396368503570557, "learning_rate": 7.04091966502019e-05, "loss": 1.8319, "step": 5128 }, { "epoch": 0.38608178550593725, "grad_norm": 4.149318218231201, "learning_rate": 7.039806698269216e-05, "loss": 1.922, "step": 5129 }, { "epoch": 0.3861570597865974, "grad_norm": 3.678591251373291, "learning_rate": 7.038693610257588e-05, "loss": 2.5824, "step": 5130 }, { "epoch": 0.38623233406725754, "grad_norm": 3.483721971511841, "learning_rate": 7.037580401051474e-05, "loss": 1.9086, "step": 5131 }, { "epoch": 0.3863076083479177, "grad_norm": 4.043239593505859, "learning_rate": 7.036467070717053e-05, "loss": 2.0304, "step": 5132 }, { "epoch": 0.3863828826285779, "grad_norm": 5.411766529083252, "learning_rate": 7.035353619320507e-05, "loss": 2.0291, "step": 5133 }, { "epoch": 0.386458156909238, "grad_norm": 6.395378589630127, "learning_rate": 7.03424004692803e-05, "loss": 1.912, "step": 5134 }, { "epoch": 0.3865334311898982, "grad_norm": 5.284376621246338, "learning_rate": 7.033126353605817e-05, "loss": 2.0315, "step": 5135 }, { "epoch": 0.38660870547055837, "grad_norm": 11.093127250671387, "learning_rate": 7.032012539420076e-05, "loss": 1.8797, "step": 5136 }, { "epoch": 0.3866839797512185, "grad_norm": 4.60587739944458, "learning_rate": 7.03089860443702e-05, "loss": 2.2172, "step": 5137 }, { "epoch": 0.38675925403187866, "grad_norm": 4.815913200378418, "learning_rate": 7.029784548722871e-05, "loss": 2.1422, "step": 5138 }, { "epoch": 0.38683452831253884, "grad_norm": 3.66540789604187, "learning_rate": 7.028670372343853e-05, "loss": 1.8488, "step": 5139 }, { "epoch": 0.38690980259319896, "grad_norm": 5.9024248123168945, "learning_rate": 7.027556075366202e-05, "loss": 2.1104, "step": 5140 }, { "epoch": 0.38698507687385914, "grad_norm": 3.7674996852874756, "learning_rate": 7.026441657856162e-05, "loss": 1.8183, "step": 5141 }, { "epoch": 0.38706035115451926, "grad_norm": 6.17453670501709, "learning_rate": 7.025327119879979e-05, "loss": 2.0484, "step": 5142 }, { "epoch": 0.38713562543517943, "grad_norm": 3.8538362979888916, "learning_rate": 7.02421246150391e-05, "loss": 1.8584, "step": 5143 }, { "epoch": 0.3872108997158396, "grad_norm": 5.662238597869873, "learning_rate": 7.023097682794217e-05, "loss": 2.1748, "step": 5144 }, { "epoch": 0.3872861739964997, "grad_norm": 3.8990836143493652, "learning_rate": 7.021982783817172e-05, "loss": 1.8932, "step": 5145 }, { "epoch": 0.3873614482771599, "grad_norm": 3.45202374458313, "learning_rate": 7.020867764639054e-05, "loss": 2.1976, "step": 5146 }, { "epoch": 0.3874367225578201, "grad_norm": 7.609501838684082, "learning_rate": 7.019752625326145e-05, "loss": 2.2901, "step": 5147 }, { "epoch": 0.3875119968384802, "grad_norm": 4.57600212097168, "learning_rate": 7.018637365944737e-05, "loss": 1.9127, "step": 5148 }, { "epoch": 0.3875872711191404, "grad_norm": 5.195226192474365, "learning_rate": 7.017521986561131e-05, "loss": 1.7377, "step": 5149 }, { "epoch": 0.3876625453998005, "grad_norm": 4.835358619689941, "learning_rate": 7.016406487241632e-05, "loss": 1.7477, "step": 5150 }, { "epoch": 0.38773781968046067, "grad_norm": 5.3273539543151855, "learning_rate": 7.01529086805255e-05, "loss": 1.6292, "step": 5151 }, { "epoch": 0.38781309396112085, "grad_norm": 4.562398910522461, "learning_rate": 7.014175129060212e-05, "loss": 1.8722, "step": 5152 }, { "epoch": 0.38788836824178097, "grad_norm": 3.4114813804626465, "learning_rate": 7.01305927033094e-05, "loss": 1.9809, "step": 5153 }, { "epoch": 0.38796364252244114, "grad_norm": 5.234886646270752, "learning_rate": 7.011943291931071e-05, "loss": 2.1683, "step": 5154 }, { "epoch": 0.3880389168031013, "grad_norm": 4.3607707023620605, "learning_rate": 7.010827193926947e-05, "loss": 1.7392, "step": 5155 }, { "epoch": 0.38811419108376144, "grad_norm": 4.793964862823486, "learning_rate": 7.009710976384915e-05, "loss": 1.9888, "step": 5156 }, { "epoch": 0.3881894653644216, "grad_norm": 3.9515368938446045, "learning_rate": 7.008594639371333e-05, "loss": 2.1223, "step": 5157 }, { "epoch": 0.3882647396450818, "grad_norm": 4.231078147888184, "learning_rate": 7.00747818295256e-05, "loss": 2.0826, "step": 5158 }, { "epoch": 0.3883400139257419, "grad_norm": 3.7947447299957275, "learning_rate": 7.006361607194972e-05, "loss": 1.8089, "step": 5159 }, { "epoch": 0.3884152882064021, "grad_norm": 5.510124206542969, "learning_rate": 7.005244912164942e-05, "loss": 2.1654, "step": 5160 }, { "epoch": 0.3884905624870622, "grad_norm": 3.541808605194092, "learning_rate": 7.004128097928857e-05, "loss": 1.8131, "step": 5161 }, { "epoch": 0.3885658367677224, "grad_norm": 5.766353607177734, "learning_rate": 7.003011164553107e-05, "loss": 1.8511, "step": 5162 }, { "epoch": 0.38864111104838256, "grad_norm": 4.433131217956543, "learning_rate": 7.00189411210409e-05, "loss": 1.8669, "step": 5163 }, { "epoch": 0.3887163853290427, "grad_norm": 4.663097858428955, "learning_rate": 7.000776940648213e-05, "loss": 2.4336, "step": 5164 }, { "epoch": 0.38879165960970286, "grad_norm": 5.722248077392578, "learning_rate": 6.999659650251885e-05, "loss": 2.1351, "step": 5165 }, { "epoch": 0.38886693389036303, "grad_norm": 3.8639774322509766, "learning_rate": 6.998542240981531e-05, "loss": 1.7186, "step": 5166 }, { "epoch": 0.38894220817102315, "grad_norm": 3.6881942749023438, "learning_rate": 6.997424712903576e-05, "loss": 1.6883, "step": 5167 }, { "epoch": 0.3890174824516833, "grad_norm": 3.686176061630249, "learning_rate": 6.99630706608445e-05, "loss": 2.6687, "step": 5168 }, { "epoch": 0.3890927567323435, "grad_norm": 3.8122544288635254, "learning_rate": 6.995189300590599e-05, "loss": 2.1757, "step": 5169 }, { "epoch": 0.3891680310130036, "grad_norm": 3.9678571224212646, "learning_rate": 6.994071416488468e-05, "loss": 1.5493, "step": 5170 }, { "epoch": 0.3892433052936638, "grad_norm": 4.851013660430908, "learning_rate": 6.992953413844514e-05, "loss": 2.1696, "step": 5171 }, { "epoch": 0.3893185795743239, "grad_norm": 4.309285640716553, "learning_rate": 6.991835292725197e-05, "loss": 1.8684, "step": 5172 }, { "epoch": 0.3893938538549841, "grad_norm": 5.811774253845215, "learning_rate": 6.990717053196987e-05, "loss": 1.7465, "step": 5173 }, { "epoch": 0.38946912813564427, "grad_norm": 5.433722019195557, "learning_rate": 6.989598695326363e-05, "loss": 2.09, "step": 5174 }, { "epoch": 0.3895444024163044, "grad_norm": 4.857303142547607, "learning_rate": 6.988480219179801e-05, "loss": 2.0593, "step": 5175 }, { "epoch": 0.38961967669696457, "grad_norm": 4.678370475769043, "learning_rate": 6.987361624823797e-05, "loss": 2.125, "step": 5176 }, { "epoch": 0.38969495097762474, "grad_norm": 4.956774711608887, "learning_rate": 6.986242912324847e-05, "loss": 1.8494, "step": 5177 }, { "epoch": 0.38977022525828486, "grad_norm": 5.266158103942871, "learning_rate": 6.985124081749457e-05, "loss": 2.9335, "step": 5178 }, { "epoch": 0.38984549953894504, "grad_norm": 4.795161724090576, "learning_rate": 6.984005133164134e-05, "loss": 1.8209, "step": 5179 }, { "epoch": 0.38992077381960516, "grad_norm": 4.719874382019043, "learning_rate": 6.982886066635399e-05, "loss": 2.1454, "step": 5180 }, { "epoch": 0.38999604810026534, "grad_norm": 5.314960956573486, "learning_rate": 6.981766882229777e-05, "loss": 2.0495, "step": 5181 }, { "epoch": 0.3900713223809255, "grad_norm": 5.239182949066162, "learning_rate": 6.9806475800138e-05, "loss": 2.215, "step": 5182 }, { "epoch": 0.39014659666158563, "grad_norm": 6.457382678985596, "learning_rate": 6.979528160054009e-05, "loss": 1.8731, "step": 5183 }, { "epoch": 0.3902218709422458, "grad_norm": 4.513756275177002, "learning_rate": 6.978408622416948e-05, "loss": 2.2722, "step": 5184 }, { "epoch": 0.390297145222906, "grad_norm": 4.637709617614746, "learning_rate": 6.977288967169171e-05, "loss": 1.953, "step": 5185 }, { "epoch": 0.3903724195035661, "grad_norm": 4.369754791259766, "learning_rate": 6.976169194377238e-05, "loss": 2.1536, "step": 5186 }, { "epoch": 0.3904476937842263, "grad_norm": 5.093334197998047, "learning_rate": 6.975049304107716e-05, "loss": 2.1538, "step": 5187 }, { "epoch": 0.39052296806488646, "grad_norm": 3.9879164695739746, "learning_rate": 6.973929296427181e-05, "loss": 1.7484, "step": 5188 }, { "epoch": 0.3905982423455466, "grad_norm": 6.970964431762695, "learning_rate": 6.972809171402213e-05, "loss": 2.1684, "step": 5189 }, { "epoch": 0.39067351662620675, "grad_norm": 6.39085054397583, "learning_rate": 6.9716889290994e-05, "loss": 1.8039, "step": 5190 }, { "epoch": 0.39074879090686687, "grad_norm": 4.7798638343811035, "learning_rate": 6.970568569585338e-05, "loss": 1.8142, "step": 5191 }, { "epoch": 0.39082406518752705, "grad_norm": 4.437145709991455, "learning_rate": 6.969448092926629e-05, "loss": 1.9917, "step": 5192 }, { "epoch": 0.3908993394681872, "grad_norm": 4.2501630783081055, "learning_rate": 6.968327499189879e-05, "loss": 1.9556, "step": 5193 }, { "epoch": 0.39097461374884734, "grad_norm": 4.457305431365967, "learning_rate": 6.96720678844171e-05, "loss": 1.986, "step": 5194 }, { "epoch": 0.3910498880295075, "grad_norm": 4.49616813659668, "learning_rate": 6.966085960748741e-05, "loss": 2.2419, "step": 5195 }, { "epoch": 0.3911251623101677, "grad_norm": 4.975657939910889, "learning_rate": 6.964965016177603e-05, "loss": 1.7282, "step": 5196 }, { "epoch": 0.3912004365908278, "grad_norm": 4.2967987060546875, "learning_rate": 6.963843954794935e-05, "loss": 2.0065, "step": 5197 }, { "epoch": 0.391275710871488, "grad_norm": 4.789773941040039, "learning_rate": 6.962722776667376e-05, "loss": 2.0844, "step": 5198 }, { "epoch": 0.39135098515214817, "grad_norm": 6.244070529937744, "learning_rate": 6.961601481861581e-05, "loss": 1.9259, "step": 5199 }, { "epoch": 0.3914262594328083, "grad_norm": 9.858591079711914, "learning_rate": 6.960480070444205e-05, "loss": 1.8169, "step": 5200 }, { "epoch": 0.39150153371346846, "grad_norm": 4.541567802429199, "learning_rate": 6.959358542481915e-05, "loss": 2.2345, "step": 5201 }, { "epoch": 0.3915768079941286, "grad_norm": 3.4116322994232178, "learning_rate": 6.958236898041382e-05, "loss": 1.9152, "step": 5202 }, { "epoch": 0.39165208227478876, "grad_norm": 6.525388717651367, "learning_rate": 6.957115137189285e-05, "loss": 2.0367, "step": 5203 }, { "epoch": 0.39172735655544894, "grad_norm": 4.803116798400879, "learning_rate": 6.955993259992306e-05, "loss": 2.0156, "step": 5204 }, { "epoch": 0.39180263083610906, "grad_norm": 3.4431774616241455, "learning_rate": 6.954871266517143e-05, "loss": 1.9005, "step": 5205 }, { "epoch": 0.39187790511676923, "grad_norm": 4.246635437011719, "learning_rate": 6.95374915683049e-05, "loss": 2.4181, "step": 5206 }, { "epoch": 0.3919531793974294, "grad_norm": 5.497817039489746, "learning_rate": 6.952626930999058e-05, "loss": 1.8896, "step": 5207 }, { "epoch": 0.39202845367808953, "grad_norm": 3.530252695083618, "learning_rate": 6.951504589089555e-05, "loss": 2.2551, "step": 5208 }, { "epoch": 0.3921037279587497, "grad_norm": 5.155354976654053, "learning_rate": 6.950382131168705e-05, "loss": 1.8505, "step": 5209 }, { "epoch": 0.3921790022394098, "grad_norm": 5.639703750610352, "learning_rate": 6.949259557303233e-05, "loss": 2.0545, "step": 5210 }, { "epoch": 0.39225427652007, "grad_norm": 3.528696060180664, "learning_rate": 6.948136867559874e-05, "loss": 1.894, "step": 5211 }, { "epoch": 0.3923295508007302, "grad_norm": 4.529226779937744, "learning_rate": 6.947014062005368e-05, "loss": 2.0473, "step": 5212 }, { "epoch": 0.3924048250813903, "grad_norm": 3.6226611137390137, "learning_rate": 6.945891140706462e-05, "loss": 1.957, "step": 5213 }, { "epoch": 0.39248009936205047, "grad_norm": 4.451925754547119, "learning_rate": 6.944768103729913e-05, "loss": 2.2745, "step": 5214 }, { "epoch": 0.39255537364271065, "grad_norm": 4.6399054527282715, "learning_rate": 6.943644951142478e-05, "loss": 2.1126, "step": 5215 }, { "epoch": 0.39263064792337077, "grad_norm": 4.229841709136963, "learning_rate": 6.942521683010928e-05, "loss": 2.0751, "step": 5216 }, { "epoch": 0.39270592220403094, "grad_norm": 4.860999584197998, "learning_rate": 6.941398299402039e-05, "loss": 1.9959, "step": 5217 }, { "epoch": 0.3927811964846911, "grad_norm": 6.332437515258789, "learning_rate": 6.940274800382591e-05, "loss": 1.9549, "step": 5218 }, { "epoch": 0.39285647076535124, "grad_norm": 4.228542327880859, "learning_rate": 6.939151186019373e-05, "loss": 1.6997, "step": 5219 }, { "epoch": 0.3929317450460114, "grad_norm": 6.465989589691162, "learning_rate": 6.938027456379182e-05, "loss": 1.8414, "step": 5220 }, { "epoch": 0.39300701932667154, "grad_norm": 3.890904188156128, "learning_rate": 6.936903611528818e-05, "loss": 2.1604, "step": 5221 }, { "epoch": 0.3930822936073317, "grad_norm": 4.503025054931641, "learning_rate": 6.935779651535093e-05, "loss": 2.1503, "step": 5222 }, { "epoch": 0.3931575678879919, "grad_norm": 4.028709888458252, "learning_rate": 6.934655576464822e-05, "loss": 1.9473, "step": 5223 }, { "epoch": 0.393232842168652, "grad_norm": 4.7539591789245605, "learning_rate": 6.933531386384827e-05, "loss": 1.6832, "step": 5224 }, { "epoch": 0.3933081164493122, "grad_norm": 4.165890216827393, "learning_rate": 6.93240708136194e-05, "loss": 1.7192, "step": 5225 }, { "epoch": 0.39338339072997236, "grad_norm": 4.968687534332275, "learning_rate": 6.931282661462997e-05, "loss": 1.9188, "step": 5226 }, { "epoch": 0.3934586650106325, "grad_norm": 4.237826824188232, "learning_rate": 6.93015812675484e-05, "loss": 2.2298, "step": 5227 }, { "epoch": 0.39353393929129266, "grad_norm": 5.553686141967773, "learning_rate": 6.929033477304323e-05, "loss": 2.0493, "step": 5228 }, { "epoch": 0.3936092135719528, "grad_norm": 7.572820663452148, "learning_rate": 6.927908713178299e-05, "loss": 2.5351, "step": 5229 }, { "epoch": 0.39368448785261295, "grad_norm": 3.9996278285980225, "learning_rate": 6.926783834443634e-05, "loss": 1.9888, "step": 5230 }, { "epoch": 0.39375976213327313, "grad_norm": 5.510247707366943, "learning_rate": 6.925658841167197e-05, "loss": 2.1663, "step": 5231 }, { "epoch": 0.39383503641393325, "grad_norm": 6.237199306488037, "learning_rate": 6.92453373341587e-05, "loss": 1.8505, "step": 5232 }, { "epoch": 0.3939103106945934, "grad_norm": 4.817835807800293, "learning_rate": 6.923408511256533e-05, "loss": 1.8843, "step": 5233 }, { "epoch": 0.3939855849752536, "grad_norm": 6.970422267913818, "learning_rate": 6.922283174756081e-05, "loss": 1.7876, "step": 5234 }, { "epoch": 0.3940608592559137, "grad_norm": 4.317952632904053, "learning_rate": 6.921157723981408e-05, "loss": 1.9553, "step": 5235 }, { "epoch": 0.3941361335365739, "grad_norm": 5.635189533233643, "learning_rate": 6.920032158999422e-05, "loss": 1.6901, "step": 5236 }, { "epoch": 0.39421140781723407, "grad_norm": 5.372305870056152, "learning_rate": 6.918906479877032e-05, "loss": 1.9993, "step": 5237 }, { "epoch": 0.3942866820978942, "grad_norm": 4.717647075653076, "learning_rate": 6.917780686681158e-05, "loss": 2.064, "step": 5238 }, { "epoch": 0.39436195637855437, "grad_norm": 5.763543128967285, "learning_rate": 6.916654779478725e-05, "loss": 1.942, "step": 5239 }, { "epoch": 0.3944372306592145, "grad_norm": 4.852224349975586, "learning_rate": 6.915528758336665e-05, "loss": 2.0205, "step": 5240 }, { "epoch": 0.39451250493987466, "grad_norm": 6.8463358879089355, "learning_rate": 6.914402623321916e-05, "loss": 2.2348, "step": 5241 }, { "epoch": 0.39458777922053484, "grad_norm": 6.190590858459473, "learning_rate": 6.913276374501425e-05, "loss": 2.3121, "step": 5242 }, { "epoch": 0.39466305350119496, "grad_norm": 5.485639572143555, "learning_rate": 6.912150011942143e-05, "loss": 1.8485, "step": 5243 }, { "epoch": 0.39473832778185514, "grad_norm": 5.057278156280518, "learning_rate": 6.911023535711029e-05, "loss": 2.0078, "step": 5244 }, { "epoch": 0.3948136020625153, "grad_norm": 4.9163665771484375, "learning_rate": 6.90989694587505e-05, "loss": 1.8428, "step": 5245 }, { "epoch": 0.39488887634317543, "grad_norm": 5.255848407745361, "learning_rate": 6.908770242501176e-05, "loss": 1.9317, "step": 5246 }, { "epoch": 0.3949641506238356, "grad_norm": 4.284176826477051, "learning_rate": 6.90764342565639e-05, "loss": 2.1024, "step": 5247 }, { "epoch": 0.3950394249044958, "grad_norm": 5.583375453948975, "learning_rate": 6.906516495407674e-05, "loss": 1.703, "step": 5248 }, { "epoch": 0.3951146991851559, "grad_norm": 5.231213569641113, "learning_rate": 6.905389451822024e-05, "loss": 2.2031, "step": 5249 }, { "epoch": 0.3951899734658161, "grad_norm": 5.470889091491699, "learning_rate": 6.90426229496644e-05, "loss": 2.0737, "step": 5250 }, { "epoch": 0.3952652477464762, "grad_norm": 6.145623683929443, "learning_rate": 6.903135024907923e-05, "loss": 2.2188, "step": 5251 }, { "epoch": 0.3953405220271364, "grad_norm": 3.9044687747955322, "learning_rate": 6.902007641713492e-05, "loss": 2.0435, "step": 5252 }, { "epoch": 0.39541579630779655, "grad_norm": 3.5779004096984863, "learning_rate": 6.900880145450163e-05, "loss": 1.8906, "step": 5253 }, { "epoch": 0.3954910705884567, "grad_norm": 3.3963522911071777, "learning_rate": 6.899752536184964e-05, "loss": 1.9346, "step": 5254 }, { "epoch": 0.39556634486911685, "grad_norm": 4.952561855316162, "learning_rate": 6.898624813984928e-05, "loss": 2.2904, "step": 5255 }, { "epoch": 0.395641619149777, "grad_norm": 3.7818069458007812, "learning_rate": 6.897496978917093e-05, "loss": 2.1326, "step": 5256 }, { "epoch": 0.39571689343043714, "grad_norm": 5.169959545135498, "learning_rate": 6.896369031048508e-05, "loss": 2.3314, "step": 5257 }, { "epoch": 0.3957921677110973, "grad_norm": 5.590060234069824, "learning_rate": 6.895240970446226e-05, "loss": 2.2577, "step": 5258 }, { "epoch": 0.39586744199175744, "grad_norm": 5.482585906982422, "learning_rate": 6.894112797177306e-05, "loss": 2.1709, "step": 5259 }, { "epoch": 0.3959427162724176, "grad_norm": 5.6133036613464355, "learning_rate": 6.892984511308814e-05, "loss": 1.9704, "step": 5260 }, { "epoch": 0.3960179905530778, "grad_norm": 4.960308074951172, "learning_rate": 6.891856112907827e-05, "loss": 1.8028, "step": 5261 }, { "epoch": 0.3960932648337379, "grad_norm": 5.726239204406738, "learning_rate": 6.89072760204142e-05, "loss": 1.9112, "step": 5262 }, { "epoch": 0.3961685391143981, "grad_norm": 5.848134994506836, "learning_rate": 6.889598978776684e-05, "loss": 1.7987, "step": 5263 }, { "epoch": 0.39624381339505826, "grad_norm": 3.87086820602417, "learning_rate": 6.88847024318071e-05, "loss": 1.9066, "step": 5264 }, { "epoch": 0.3963190876757184, "grad_norm": 4.969845294952393, "learning_rate": 6.887341395320597e-05, "loss": 1.9808, "step": 5265 }, { "epoch": 0.39639436195637856, "grad_norm": 3.9948718547821045, "learning_rate": 6.886212435263455e-05, "loss": 2.1369, "step": 5266 }, { "epoch": 0.39646963623703874, "grad_norm": 4.366805553436279, "learning_rate": 6.885083363076395e-05, "loss": 1.9568, "step": 5267 }, { "epoch": 0.39654491051769886, "grad_norm": 4.778541564941406, "learning_rate": 6.88395417882654e-05, "loss": 1.6151, "step": 5268 }, { "epoch": 0.39662018479835903, "grad_norm": 4.081904888153076, "learning_rate": 6.882824882581013e-05, "loss": 2.0432, "step": 5269 }, { "epoch": 0.39669545907901915, "grad_norm": 6.065483570098877, "learning_rate": 6.881695474406949e-05, "loss": 1.6471, "step": 5270 }, { "epoch": 0.39677073335967933, "grad_norm": 3.967003345489502, "learning_rate": 6.880565954371489e-05, "loss": 1.7362, "step": 5271 }, { "epoch": 0.3968460076403395, "grad_norm": 4.717532157897949, "learning_rate": 6.87943632254178e-05, "loss": 2.0467, "step": 5272 }, { "epoch": 0.3969212819209996, "grad_norm": 4.1001176834106445, "learning_rate": 6.87830657898497e-05, "loss": 1.6473, "step": 5273 }, { "epoch": 0.3969965562016598, "grad_norm": 4.454771995544434, "learning_rate": 6.877176723768228e-05, "loss": 1.6812, "step": 5274 }, { "epoch": 0.39707183048232, "grad_norm": 7.681920051574707, "learning_rate": 6.876046756958715e-05, "loss": 2.2034, "step": 5275 }, { "epoch": 0.3971471047629801, "grad_norm": 5.013684272766113, "learning_rate": 6.874916678623603e-05, "loss": 1.9127, "step": 5276 }, { "epoch": 0.3972223790436403, "grad_norm": 6.265958786010742, "learning_rate": 6.873786488830076e-05, "loss": 2.0918, "step": 5277 }, { "epoch": 0.3972976533243004, "grad_norm": 4.134488105773926, "learning_rate": 6.87265618764532e-05, "loss": 1.9597, "step": 5278 }, { "epoch": 0.39737292760496057, "grad_norm": 5.5006103515625, "learning_rate": 6.871525775136525e-05, "loss": 1.9306, "step": 5279 }, { "epoch": 0.39744820188562074, "grad_norm": 6.579580783843994, "learning_rate": 6.870395251370894e-05, "loss": 2.1004, "step": 5280 }, { "epoch": 0.39752347616628086, "grad_norm": 3.9398036003112793, "learning_rate": 6.869264616415629e-05, "loss": 1.7839, "step": 5281 }, { "epoch": 0.39759875044694104, "grad_norm": 5.8119401931762695, "learning_rate": 6.86813387033795e-05, "loss": 2.0025, "step": 5282 }, { "epoch": 0.3976740247276012, "grad_norm": 4.4750494956970215, "learning_rate": 6.86700301320507e-05, "loss": 2.4783, "step": 5283 }, { "epoch": 0.39774929900826134, "grad_norm": 5.226348876953125, "learning_rate": 6.865872045084219e-05, "loss": 2.0954, "step": 5284 }, { "epoch": 0.3978245732889215, "grad_norm": 4.446264266967773, "learning_rate": 6.864740966042629e-05, "loss": 1.9985, "step": 5285 }, { "epoch": 0.3978998475695817, "grad_norm": 5.967496395111084, "learning_rate": 6.863609776147539e-05, "loss": 1.678, "step": 5286 }, { "epoch": 0.3979751218502418, "grad_norm": 3.8630638122558594, "learning_rate": 6.862478475466194e-05, "loss": 1.8381, "step": 5287 }, { "epoch": 0.398050396130902, "grad_norm": 4.604032516479492, "learning_rate": 6.861347064065849e-05, "loss": 2.0495, "step": 5288 }, { "epoch": 0.3981256704115621, "grad_norm": 7.21726131439209, "learning_rate": 6.860215542013761e-05, "loss": 1.9833, "step": 5289 }, { "epoch": 0.3982009446922223, "grad_norm": 9.447072982788086, "learning_rate": 6.859083909377197e-05, "loss": 2.147, "step": 5290 }, { "epoch": 0.39827621897288246, "grad_norm": 5.315767765045166, "learning_rate": 6.857952166223429e-05, "loss": 2.1883, "step": 5291 }, { "epoch": 0.3983514932535426, "grad_norm": 7.571106910705566, "learning_rate": 6.856820312619735e-05, "loss": 1.9042, "step": 5292 }, { "epoch": 0.39842676753420275, "grad_norm": 3.8130648136138916, "learning_rate": 6.855688348633402e-05, "loss": 1.8368, "step": 5293 }, { "epoch": 0.39850204181486293, "grad_norm": 4.890517711639404, "learning_rate": 6.85455627433172e-05, "loss": 1.7831, "step": 5294 }, { "epoch": 0.39857731609552305, "grad_norm": 5.495864391326904, "learning_rate": 6.85342408978199e-05, "loss": 1.6298, "step": 5295 }, { "epoch": 0.3986525903761832, "grad_norm": 5.847315788269043, "learning_rate": 6.852291795051518e-05, "loss": 2.0748, "step": 5296 }, { "epoch": 0.3987278646568434, "grad_norm": 4.205320835113525, "learning_rate": 6.851159390207611e-05, "loss": 2.2681, "step": 5297 }, { "epoch": 0.3988031389375035, "grad_norm": 4.053877830505371, "learning_rate": 6.85002687531759e-05, "loss": 1.7144, "step": 5298 }, { "epoch": 0.3988784132181637, "grad_norm": 5.00118350982666, "learning_rate": 6.848894250448778e-05, "loss": 1.7858, "step": 5299 }, { "epoch": 0.3989536874988238, "grad_norm": 4.14704704284668, "learning_rate": 6.847761515668511e-05, "loss": 1.6874, "step": 5300 }, { "epoch": 0.399028961779484, "grad_norm": 3.2100651264190674, "learning_rate": 6.846628671044121e-05, "loss": 1.7546, "step": 5301 }, { "epoch": 0.39910423606014417, "grad_norm": 5.690414905548096, "learning_rate": 6.845495716642958e-05, "loss": 2.5446, "step": 5302 }, { "epoch": 0.3991795103408043, "grad_norm": 7.436687469482422, "learning_rate": 6.844362652532368e-05, "loss": 1.8027, "step": 5303 }, { "epoch": 0.39925478462146446, "grad_norm": 4.503210067749023, "learning_rate": 6.843229478779712e-05, "loss": 2.0429, "step": 5304 }, { "epoch": 0.39933005890212464, "grad_norm": 4.755377769470215, "learning_rate": 6.84209619545235e-05, "loss": 2.2132, "step": 5305 }, { "epoch": 0.39940533318278476, "grad_norm": 3.7669692039489746, "learning_rate": 6.840962802617656e-05, "loss": 1.5255, "step": 5306 }, { "epoch": 0.39948060746344494, "grad_norm": 4.223198413848877, "learning_rate": 6.839829300343006e-05, "loss": 2.0126, "step": 5307 }, { "epoch": 0.39955588174410506, "grad_norm": 4.116264820098877, "learning_rate": 6.838695688695782e-05, "loss": 1.6213, "step": 5308 }, { "epoch": 0.39963115602476523, "grad_norm": 8.226816177368164, "learning_rate": 6.837561967743375e-05, "loss": 2.1123, "step": 5309 }, { "epoch": 0.3997064303054254, "grad_norm": 4.63575553894043, "learning_rate": 6.836428137553184e-05, "loss": 1.7832, "step": 5310 }, { "epoch": 0.39978170458608553, "grad_norm": 5.650008678436279, "learning_rate": 6.835294198192608e-05, "loss": 1.6598, "step": 5311 }, { "epoch": 0.3998569788667457, "grad_norm": 6.670443534851074, "learning_rate": 6.834160149729058e-05, "loss": 1.846, "step": 5312 }, { "epoch": 0.3999322531474059, "grad_norm": 4.246932029724121, "learning_rate": 6.83302599222995e-05, "loss": 2.2101, "step": 5313 }, { "epoch": 0.400007527428066, "grad_norm": 5.359315872192383, "learning_rate": 6.831891725762706e-05, "loss": 2.1341, "step": 5314 }, { "epoch": 0.4000828017087262, "grad_norm": 4.920767307281494, "learning_rate": 6.830757350394755e-05, "loss": 2.1021, "step": 5315 }, { "epoch": 0.40015807598938635, "grad_norm": 5.670407772064209, "learning_rate": 6.829622866193532e-05, "loss": 1.9613, "step": 5316 }, { "epoch": 0.4002333502700465, "grad_norm": 4.997689247131348, "learning_rate": 6.82848827322648e-05, "loss": 2.076, "step": 5317 }, { "epoch": 0.40030862455070665, "grad_norm": 3.870497703552246, "learning_rate": 6.827353571561048e-05, "loss": 1.705, "step": 5318 }, { "epoch": 0.40038389883136677, "grad_norm": 3.82179594039917, "learning_rate": 6.826218761264689e-05, "loss": 1.7637, "step": 5319 }, { "epoch": 0.40045917311202694, "grad_norm": 3.977510690689087, "learning_rate": 6.825083842404862e-05, "loss": 2.0026, "step": 5320 }, { "epoch": 0.4005344473926871, "grad_norm": 3.6797702312469482, "learning_rate": 6.823948815049039e-05, "loss": 1.9066, "step": 5321 }, { "epoch": 0.40060972167334724, "grad_norm": 4.414495944976807, "learning_rate": 6.822813679264694e-05, "loss": 1.7956, "step": 5322 }, { "epoch": 0.4006849959540074, "grad_norm": 3.931663990020752, "learning_rate": 6.821678435119304e-05, "loss": 1.7816, "step": 5323 }, { "epoch": 0.4007602702346676, "grad_norm": 5.739896297454834, "learning_rate": 6.82054308268036e-05, "loss": 2.4555, "step": 5324 }, { "epoch": 0.4008355445153277, "grad_norm": 4.897427558898926, "learning_rate": 6.819407622015353e-05, "loss": 2.11, "step": 5325 }, { "epoch": 0.4009108187959879, "grad_norm": 4.48984432220459, "learning_rate": 6.818272053191781e-05, "loss": 1.7368, "step": 5326 }, { "epoch": 0.400986093076648, "grad_norm": 4.326822757720947, "learning_rate": 6.817136376277154e-05, "loss": 1.8739, "step": 5327 }, { "epoch": 0.4010613673573082, "grad_norm": 4.570828437805176, "learning_rate": 6.816000591338985e-05, "loss": 1.8174, "step": 5328 }, { "epoch": 0.40113664163796836, "grad_norm": 4.024326801300049, "learning_rate": 6.81486469844479e-05, "loss": 1.655, "step": 5329 }, { "epoch": 0.4012119159186285, "grad_norm": 5.480904579162598, "learning_rate": 6.813728697662096e-05, "loss": 1.9133, "step": 5330 }, { "epoch": 0.40128719019928866, "grad_norm": 4.703399181365967, "learning_rate": 6.812592589058435e-05, "loss": 1.7196, "step": 5331 }, { "epoch": 0.40136246447994883, "grad_norm": 5.78317928314209, "learning_rate": 6.811456372701345e-05, "loss": 2.415, "step": 5332 }, { "epoch": 0.40143773876060895, "grad_norm": 4.7734270095825195, "learning_rate": 6.810320048658372e-05, "loss": 2.0948, "step": 5333 }, { "epoch": 0.40151301304126913, "grad_norm": 5.654014587402344, "learning_rate": 6.809183616997068e-05, "loss": 2.3321, "step": 5334 }, { "epoch": 0.4015882873219293, "grad_norm": 7.924801349639893, "learning_rate": 6.808047077784988e-05, "loss": 1.8561, "step": 5335 }, { "epoch": 0.4016635616025894, "grad_norm": 5.06160306930542, "learning_rate": 6.806910431089696e-05, "loss": 1.6722, "step": 5336 }, { "epoch": 0.4017388358832496, "grad_norm": 4.080567359924316, "learning_rate": 6.805773676978764e-05, "loss": 1.8174, "step": 5337 }, { "epoch": 0.4018141101639097, "grad_norm": 4.801535129547119, "learning_rate": 6.804636815519768e-05, "loss": 1.9026, "step": 5338 }, { "epoch": 0.4018893844445699, "grad_norm": 4.634988784790039, "learning_rate": 6.803499846780292e-05, "loss": 2.2378, "step": 5339 }, { "epoch": 0.4019646587252301, "grad_norm": 4.98685359954834, "learning_rate": 6.802362770827926e-05, "loss": 2.1058, "step": 5340 }, { "epoch": 0.4020399330058902, "grad_norm": 4.1788153648376465, "learning_rate": 6.801225587730263e-05, "loss": 1.6515, "step": 5341 }, { "epoch": 0.40211520728655037, "grad_norm": 3.9120664596557617, "learning_rate": 6.800088297554908e-05, "loss": 1.9521, "step": 5342 }, { "epoch": 0.40219048156721054, "grad_norm": 5.457016944885254, "learning_rate": 6.798950900369469e-05, "loss": 1.9686, "step": 5343 }, { "epoch": 0.40226575584787067, "grad_norm": 3.8646645545959473, "learning_rate": 6.79781339624156e-05, "loss": 2.0856, "step": 5344 }, { "epoch": 0.40234103012853084, "grad_norm": 4.47550630569458, "learning_rate": 6.796675785238804e-05, "loss": 2.0627, "step": 5345 }, { "epoch": 0.402416304409191, "grad_norm": 3.247621536254883, "learning_rate": 6.795538067428827e-05, "loss": 1.7987, "step": 5346 }, { "epoch": 0.40249157868985114, "grad_norm": 4.706051826477051, "learning_rate": 6.794400242879264e-05, "loss": 1.7926, "step": 5347 }, { "epoch": 0.4025668529705113, "grad_norm": 4.113757133483887, "learning_rate": 6.793262311657755e-05, "loss": 1.6333, "step": 5348 }, { "epoch": 0.40264212725117143, "grad_norm": 4.506556987762451, "learning_rate": 6.792124273831948e-05, "loss": 2.2044, "step": 5349 }, { "epoch": 0.4027174015318316, "grad_norm": 4.144906520843506, "learning_rate": 6.790986129469494e-05, "loss": 2.0494, "step": 5350 }, { "epoch": 0.4027926758124918, "grad_norm": 5.415107250213623, "learning_rate": 6.789847878638054e-05, "loss": 2.0532, "step": 5351 }, { "epoch": 0.4028679500931519, "grad_norm": 4.5909342765808105, "learning_rate": 6.788709521405295e-05, "loss": 2.2343, "step": 5352 }, { "epoch": 0.4029432243738121, "grad_norm": 3.308436870574951, "learning_rate": 6.787571057838884e-05, "loss": 2.1883, "step": 5353 }, { "epoch": 0.40301849865447226, "grad_norm": 5.77985143661499, "learning_rate": 6.786432488006503e-05, "loss": 2.0106, "step": 5354 }, { "epoch": 0.4030937729351324, "grad_norm": 5.094431400299072, "learning_rate": 6.785293811975838e-05, "loss": 1.9518, "step": 5355 }, { "epoch": 0.40316904721579255, "grad_norm": 3.918471574783325, "learning_rate": 6.784155029814579e-05, "loss": 1.6179, "step": 5356 }, { "epoch": 0.4032443214964527, "grad_norm": 3.814473867416382, "learning_rate": 6.783016141590422e-05, "loss": 2.183, "step": 5357 }, { "epoch": 0.40331959577711285, "grad_norm": 3.6349940299987793, "learning_rate": 6.781877147371071e-05, "loss": 1.9747, "step": 5358 }, { "epoch": 0.403394870057773, "grad_norm": 3.963618516921997, "learning_rate": 6.780738047224237e-05, "loss": 2.094, "step": 5359 }, { "epoch": 0.40347014433843315, "grad_norm": 5.607470989227295, "learning_rate": 6.779598841217636e-05, "loss": 2.0079, "step": 5360 }, { "epoch": 0.4035454186190933, "grad_norm": 4.573211193084717, "learning_rate": 6.778459529418989e-05, "loss": 1.7159, "step": 5361 }, { "epoch": 0.4036206928997535, "grad_norm": 4.286177635192871, "learning_rate": 6.777320111896027e-05, "loss": 1.9202, "step": 5362 }, { "epoch": 0.4036959671804136, "grad_norm": 5.121272563934326, "learning_rate": 6.776180588716484e-05, "loss": 1.9964, "step": 5363 }, { "epoch": 0.4037712414610738, "grad_norm": 4.205358982086182, "learning_rate": 6.775040959948101e-05, "loss": 1.9531, "step": 5364 }, { "epoch": 0.40384651574173397, "grad_norm": 4.61204195022583, "learning_rate": 6.773901225658626e-05, "loss": 2.2701, "step": 5365 }, { "epoch": 0.4039217900223941, "grad_norm": 5.428807258605957, "learning_rate": 6.772761385915812e-05, "loss": 1.9785, "step": 5366 }, { "epoch": 0.40399706430305427, "grad_norm": 4.795749664306641, "learning_rate": 6.771621440787423e-05, "loss": 2.2442, "step": 5367 }, { "epoch": 0.4040723385837144, "grad_norm": 4.88709831237793, "learning_rate": 6.770481390341221e-05, "loss": 2.1068, "step": 5368 }, { "epoch": 0.40414761286437456, "grad_norm": 4.479779243469238, "learning_rate": 6.76934123464498e-05, "loss": 2.0085, "step": 5369 }, { "epoch": 0.40422288714503474, "grad_norm": 4.843325138092041, "learning_rate": 6.768200973766478e-05, "loss": 2.0468, "step": 5370 }, { "epoch": 0.40429816142569486, "grad_norm": 7.453130722045898, "learning_rate": 6.767060607773504e-05, "loss": 1.9619, "step": 5371 }, { "epoch": 0.40437343570635503, "grad_norm": 5.16350793838501, "learning_rate": 6.765920136733848e-05, "loss": 2.3995, "step": 5372 }, { "epoch": 0.4044487099870152, "grad_norm": 7.323816299438477, "learning_rate": 6.764779560715305e-05, "loss": 2.0284, "step": 5373 }, { "epoch": 0.40452398426767533, "grad_norm": 5.113553524017334, "learning_rate": 6.763638879785681e-05, "loss": 2.0464, "step": 5374 }, { "epoch": 0.4045992585483355, "grad_norm": 5.947402477264404, "learning_rate": 6.762498094012787e-05, "loss": 1.8261, "step": 5375 }, { "epoch": 0.4046745328289956, "grad_norm": 3.4446866512298584, "learning_rate": 6.761357203464436e-05, "loss": 1.7482, "step": 5376 }, { "epoch": 0.4047498071096558, "grad_norm": 4.794046878814697, "learning_rate": 6.760216208208456e-05, "loss": 1.9207, "step": 5377 }, { "epoch": 0.404825081390316, "grad_norm": 4.4714765548706055, "learning_rate": 6.759075108312672e-05, "loss": 1.7401, "step": 5378 }, { "epoch": 0.4049003556709761, "grad_norm": 4.635371208190918, "learning_rate": 6.757933903844921e-05, "loss": 1.9601, "step": 5379 }, { "epoch": 0.4049756299516363, "grad_norm": 5.2532877922058105, "learning_rate": 6.756792594873042e-05, "loss": 2.1482, "step": 5380 }, { "epoch": 0.40505090423229645, "grad_norm": 4.8074727058410645, "learning_rate": 6.755651181464885e-05, "loss": 2.0965, "step": 5381 }, { "epoch": 0.40512617851295657, "grad_norm": 6.030498027801514, "learning_rate": 6.754509663688303e-05, "loss": 2.0231, "step": 5382 }, { "epoch": 0.40520145279361675, "grad_norm": 4.379759788513184, "learning_rate": 6.753368041611156e-05, "loss": 1.7873, "step": 5383 }, { "epoch": 0.4052767270742769, "grad_norm": 5.034201622009277, "learning_rate": 6.752226315301309e-05, "loss": 1.7356, "step": 5384 }, { "epoch": 0.40535200135493704, "grad_norm": 5.684075355529785, "learning_rate": 6.751084484826635e-05, "loss": 2.012, "step": 5385 }, { "epoch": 0.4054272756355972, "grad_norm": 6.668386936187744, "learning_rate": 6.749942550255013e-05, "loss": 2.3243, "step": 5386 }, { "epoch": 0.40550254991625734, "grad_norm": 5.3215436935424805, "learning_rate": 6.748800511654327e-05, "loss": 2.0491, "step": 5387 }, { "epoch": 0.4055778241969175, "grad_norm": 6.198952674865723, "learning_rate": 6.74765836909247e-05, "loss": 1.9957, "step": 5388 }, { "epoch": 0.4056530984775777, "grad_norm": 4.485803127288818, "learning_rate": 6.746516122637336e-05, "loss": 1.8012, "step": 5389 }, { "epoch": 0.4057283727582378, "grad_norm": 4.713202476501465, "learning_rate": 6.745373772356832e-05, "loss": 1.761, "step": 5390 }, { "epoch": 0.405803647038898, "grad_norm": 4.559616565704346, "learning_rate": 6.744231318318863e-05, "loss": 1.78, "step": 5391 }, { "epoch": 0.40587892131955816, "grad_norm": 4.271080493927002, "learning_rate": 6.743088760591349e-05, "loss": 1.7002, "step": 5392 }, { "epoch": 0.4059541956002183, "grad_norm": 5.435100078582764, "learning_rate": 6.741946099242206e-05, "loss": 1.8004, "step": 5393 }, { "epoch": 0.40602946988087846, "grad_norm": 4.465577125549316, "learning_rate": 6.74080333433937e-05, "loss": 1.6774, "step": 5394 }, { "epoch": 0.40610474416153863, "grad_norm": 7.253365516662598, "learning_rate": 6.739660465950769e-05, "loss": 1.8281, "step": 5395 }, { "epoch": 0.40618001844219875, "grad_norm": 4.638010025024414, "learning_rate": 6.738517494144346e-05, "loss": 2.1896, "step": 5396 }, { "epoch": 0.40625529272285893, "grad_norm": 5.373666286468506, "learning_rate": 6.737374418988044e-05, "loss": 1.8781, "step": 5397 }, { "epoch": 0.40633056700351905, "grad_norm": 5.579836368560791, "learning_rate": 6.736231240549818e-05, "loss": 1.9463, "step": 5398 }, { "epoch": 0.4064058412841792, "grad_norm": 5.846822261810303, "learning_rate": 6.735087958897628e-05, "loss": 1.6214, "step": 5399 }, { "epoch": 0.4064811155648394, "grad_norm": 3.9557971954345703, "learning_rate": 6.733944574099437e-05, "loss": 1.7163, "step": 5400 }, { "epoch": 0.4065563898454995, "grad_norm": 3.311464309692383, "learning_rate": 6.732801086223216e-05, "loss": 2.0677, "step": 5401 }, { "epoch": 0.4066316641261597, "grad_norm": 7.025592803955078, "learning_rate": 6.731657495336942e-05, "loss": 1.6905, "step": 5402 }, { "epoch": 0.4067069384068199, "grad_norm": 5.563939094543457, "learning_rate": 6.730513801508601e-05, "loss": 1.8414, "step": 5403 }, { "epoch": 0.40678221268748, "grad_norm": 4.507243633270264, "learning_rate": 6.729370004806176e-05, "loss": 2.0683, "step": 5404 }, { "epoch": 0.40685748696814017, "grad_norm": 4.501284122467041, "learning_rate": 6.728226105297668e-05, "loss": 1.7875, "step": 5405 }, { "epoch": 0.4069327612488003, "grad_norm": 3.7367732524871826, "learning_rate": 6.727082103051077e-05, "loss": 2.0878, "step": 5406 }, { "epoch": 0.40700803552946047, "grad_norm": 3.810563802719116, "learning_rate": 6.725937998134412e-05, "loss": 1.8462, "step": 5407 }, { "epoch": 0.40708330981012064, "grad_norm": 4.526647567749023, "learning_rate": 6.724793790615685e-05, "loss": 2.098, "step": 5408 }, { "epoch": 0.40715858409078076, "grad_norm": 4.322384357452393, "learning_rate": 6.723649480562913e-05, "loss": 1.7134, "step": 5409 }, { "epoch": 0.40723385837144094, "grad_norm": 4.435588359832764, "learning_rate": 6.722505068044128e-05, "loss": 2.3888, "step": 5410 }, { "epoch": 0.4073091326521011, "grad_norm": 5.700343608856201, "learning_rate": 6.721360553127358e-05, "loss": 2.4076, "step": 5411 }, { "epoch": 0.40738440693276123, "grad_norm": 4.957685947418213, "learning_rate": 6.720215935880644e-05, "loss": 2.135, "step": 5412 }, { "epoch": 0.4074596812134214, "grad_norm": 5.051072597503662, "learning_rate": 6.719071216372027e-05, "loss": 1.8736, "step": 5413 }, { "epoch": 0.4075349554940816, "grad_norm": 4.596951961517334, "learning_rate": 6.717926394669558e-05, "loss": 1.9796, "step": 5414 }, { "epoch": 0.4076102297747417, "grad_norm": 7.163468837738037, "learning_rate": 6.716781470841296e-05, "loss": 1.901, "step": 5415 }, { "epoch": 0.4076855040554019, "grad_norm": 3.8344624042510986, "learning_rate": 6.7156364449553e-05, "loss": 1.7953, "step": 5416 }, { "epoch": 0.407760778336062, "grad_norm": 4.731624603271484, "learning_rate": 6.71449131707964e-05, "loss": 2.1101, "step": 5417 }, { "epoch": 0.4078360526167222, "grad_norm": 4.400200366973877, "learning_rate": 6.713346087282392e-05, "loss": 2.167, "step": 5418 }, { "epoch": 0.40791132689738235, "grad_norm": 4.1537017822265625, "learning_rate": 6.712200755631634e-05, "loss": 1.9945, "step": 5419 }, { "epoch": 0.4079866011780425, "grad_norm": 3.854161500930786, "learning_rate": 6.711055322195455e-05, "loss": 2.0093, "step": 5420 }, { "epoch": 0.40806187545870265, "grad_norm": 4.663625240325928, "learning_rate": 6.709909787041945e-05, "loss": 2.1156, "step": 5421 }, { "epoch": 0.4081371497393628, "grad_norm": 5.383629322052002, "learning_rate": 6.708764150239206e-05, "loss": 1.7476, "step": 5422 }, { "epoch": 0.40821242402002295, "grad_norm": 5.7428507804870605, "learning_rate": 6.70761841185534e-05, "loss": 1.704, "step": 5423 }, { "epoch": 0.4082876983006831, "grad_norm": 3.8466684818267822, "learning_rate": 6.706472571958461e-05, "loss": 2.1686, "step": 5424 }, { "epoch": 0.40836297258134324, "grad_norm": 5.016278266906738, "learning_rate": 6.705326630616684e-05, "loss": 2.2948, "step": 5425 }, { "epoch": 0.4084382468620034, "grad_norm": 7.761519432067871, "learning_rate": 6.70418058789813e-05, "loss": 2.3454, "step": 5426 }, { "epoch": 0.4085135211426636, "grad_norm": 4.111473560333252, "learning_rate": 6.70303444387093e-05, "loss": 1.964, "step": 5427 }, { "epoch": 0.4085887954233237, "grad_norm": 5.230279922485352, "learning_rate": 6.701888198603221e-05, "loss": 2.3346, "step": 5428 }, { "epoch": 0.4086640697039839, "grad_norm": 4.104366302490234, "learning_rate": 6.70074185216314e-05, "loss": 2.526, "step": 5429 }, { "epoch": 0.40873934398464407, "grad_norm": 7.066196918487549, "learning_rate": 6.699595404618837e-05, "loss": 1.907, "step": 5430 }, { "epoch": 0.4088146182653042, "grad_norm": 5.222086429595947, "learning_rate": 6.698448856038462e-05, "loss": 1.8991, "step": 5431 }, { "epoch": 0.40888989254596436, "grad_norm": 6.374770641326904, "learning_rate": 6.697302206490178e-05, "loss": 2.2013, "step": 5432 }, { "epoch": 0.40896516682662454, "grad_norm": 5.201332092285156, "learning_rate": 6.696155456042147e-05, "loss": 2.0184, "step": 5433 }, { "epoch": 0.40904044110728466, "grad_norm": 4.482089042663574, "learning_rate": 6.69500860476254e-05, "loss": 1.9787, "step": 5434 }, { "epoch": 0.40911571538794483, "grad_norm": 5.201145648956299, "learning_rate": 6.693861652719536e-05, "loss": 2.0073, "step": 5435 }, { "epoch": 0.40919098966860495, "grad_norm": 4.535206317901611, "learning_rate": 6.692714599981318e-05, "loss": 2.0493, "step": 5436 }, { "epoch": 0.40926626394926513, "grad_norm": 7.1508660316467285, "learning_rate": 6.691567446616072e-05, "loss": 1.7305, "step": 5437 }, { "epoch": 0.4093415382299253, "grad_norm": 3.420806646347046, "learning_rate": 6.690420192691996e-05, "loss": 2.0333, "step": 5438 }, { "epoch": 0.4094168125105854, "grad_norm": 3.878455638885498, "learning_rate": 6.68927283827729e-05, "loss": 2.2041, "step": 5439 }, { "epoch": 0.4094920867912456, "grad_norm": 4.764273166656494, "learning_rate": 6.68812538344016e-05, "loss": 1.9954, "step": 5440 }, { "epoch": 0.4095673610719058, "grad_norm": 3.9040868282318115, "learning_rate": 6.686977828248822e-05, "loss": 2.0542, "step": 5441 }, { "epoch": 0.4096426353525659, "grad_norm": 3.967054843902588, "learning_rate": 6.68583017277149e-05, "loss": 1.9223, "step": 5442 }, { "epoch": 0.4097179096332261, "grad_norm": 5.52067756652832, "learning_rate": 6.684682417076392e-05, "loss": 1.9067, "step": 5443 }, { "epoch": 0.40979318391388625, "grad_norm": 5.799335956573486, "learning_rate": 6.68353456123176e-05, "loss": 1.7956, "step": 5444 }, { "epoch": 0.40986845819454637, "grad_norm": 3.870598793029785, "learning_rate": 6.682386605305827e-05, "loss": 1.5659, "step": 5445 }, { "epoch": 0.40994373247520655, "grad_norm": 4.401954650878906, "learning_rate": 6.681238549366838e-05, "loss": 2.2076, "step": 5446 }, { "epoch": 0.41001900675586667, "grad_norm": 4.360254287719727, "learning_rate": 6.680090393483041e-05, "loss": 2.4369, "step": 5447 }, { "epoch": 0.41009428103652684, "grad_norm": 4.853362560272217, "learning_rate": 6.678942137722691e-05, "loss": 1.8962, "step": 5448 }, { "epoch": 0.410169555317187, "grad_norm": 5.478942394256592, "learning_rate": 6.677793782154049e-05, "loss": 2.0748, "step": 5449 }, { "epoch": 0.41024482959784714, "grad_norm": 5.751876354217529, "learning_rate": 6.67664532684538e-05, "loss": 2.1473, "step": 5450 }, { "epoch": 0.4103201038785073, "grad_norm": 4.607638359069824, "learning_rate": 6.675496771864958e-05, "loss": 1.713, "step": 5451 }, { "epoch": 0.4103953781591675, "grad_norm": 5.937441349029541, "learning_rate": 6.67434811728106e-05, "loss": 1.8276, "step": 5452 }, { "epoch": 0.4104706524398276, "grad_norm": 5.579580783843994, "learning_rate": 6.67319936316197e-05, "loss": 1.6076, "step": 5453 }, { "epoch": 0.4105459267204878, "grad_norm": 5.940950393676758, "learning_rate": 6.672050509575978e-05, "loss": 2.0264, "step": 5454 }, { "epoch": 0.4106212010011479, "grad_norm": 3.596885919570923, "learning_rate": 6.670901556591383e-05, "loss": 1.8774, "step": 5455 }, { "epoch": 0.4106964752818081, "grad_norm": 3.9531962871551514, "learning_rate": 6.669752504276486e-05, "loss": 1.9779, "step": 5456 }, { "epoch": 0.41077174956246826, "grad_norm": 3.6535210609436035, "learning_rate": 6.668603352699592e-05, "loss": 2.1141, "step": 5457 }, { "epoch": 0.4108470238431284, "grad_norm": 5.708169460296631, "learning_rate": 6.667454101929016e-05, "loss": 1.7303, "step": 5458 }, { "epoch": 0.41092229812378855, "grad_norm": 3.641714572906494, "learning_rate": 6.666304752033076e-05, "loss": 1.8841, "step": 5459 }, { "epoch": 0.41099757240444873, "grad_norm": 4.162882328033447, "learning_rate": 6.665155303080104e-05, "loss": 1.9474, "step": 5460 }, { "epoch": 0.41107284668510885, "grad_norm": 3.7056288719177246, "learning_rate": 6.664005755138424e-05, "loss": 1.9426, "step": 5461 }, { "epoch": 0.411148120965769, "grad_norm": 5.81548547744751, "learning_rate": 6.662856108276379e-05, "loss": 1.7837, "step": 5462 }, { "epoch": 0.4112233952464292, "grad_norm": 4.699812412261963, "learning_rate": 6.661706362562308e-05, "loss": 2.0458, "step": 5463 }, { "epoch": 0.4112986695270893, "grad_norm": 3.592576742172241, "learning_rate": 6.66055651806456e-05, "loss": 1.8828, "step": 5464 }, { "epoch": 0.4113739438077495, "grad_norm": 4.497145652770996, "learning_rate": 6.659406574851494e-05, "loss": 1.7356, "step": 5465 }, { "epoch": 0.4114492180884096, "grad_norm": 5.0229573249816895, "learning_rate": 6.658256532991468e-05, "loss": 1.785, "step": 5466 }, { "epoch": 0.4115244923690698, "grad_norm": 4.335805892944336, "learning_rate": 6.65710639255285e-05, "loss": 2.0807, "step": 5467 }, { "epoch": 0.41159976664972997, "grad_norm": 4.9765625, "learning_rate": 6.65595615360401e-05, "loss": 1.9448, "step": 5468 }, { "epoch": 0.4116750409303901, "grad_norm": 3.9427287578582764, "learning_rate": 6.65480581621333e-05, "loss": 1.8073, "step": 5469 }, { "epoch": 0.41175031521105027, "grad_norm": 4.376291275024414, "learning_rate": 6.653655380449192e-05, "loss": 2.293, "step": 5470 }, { "epoch": 0.41182558949171044, "grad_norm": 6.065493583679199, "learning_rate": 6.652504846379984e-05, "loss": 2.3089, "step": 5471 }, { "epoch": 0.41190086377237056, "grad_norm": 5.656959056854248, "learning_rate": 6.651354214074106e-05, "loss": 2.1937, "step": 5472 }, { "epoch": 0.41197613805303074, "grad_norm": 6.063931465148926, "learning_rate": 6.65020348359996e-05, "loss": 2.1979, "step": 5473 }, { "epoch": 0.41205141233369086, "grad_norm": 6.589503288269043, "learning_rate": 6.649052655025951e-05, "loss": 1.6999, "step": 5474 }, { "epoch": 0.41212668661435103, "grad_norm": 5.17265510559082, "learning_rate": 6.647901728420494e-05, "loss": 2.288, "step": 5475 }, { "epoch": 0.4122019608950112, "grad_norm": 4.724446773529053, "learning_rate": 6.646750703852006e-05, "loss": 1.8786, "step": 5476 }, { "epoch": 0.41227723517567133, "grad_norm": 3.911710023880005, "learning_rate": 6.645599581388913e-05, "loss": 1.9273, "step": 5477 }, { "epoch": 0.4123525094563315, "grad_norm": 3.653254270553589, "learning_rate": 6.64444836109965e-05, "loss": 2.2875, "step": 5478 }, { "epoch": 0.4124277837369917, "grad_norm": 4.853933811187744, "learning_rate": 6.643297043052647e-05, "loss": 1.8094, "step": 5479 }, { "epoch": 0.4125030580176518, "grad_norm": 4.0237932205200195, "learning_rate": 6.64214562731635e-05, "loss": 1.8404, "step": 5480 }, { "epoch": 0.412578332298312, "grad_norm": 4.912440299987793, "learning_rate": 6.640994113959209e-05, "loss": 2.2973, "step": 5481 }, { "epoch": 0.41265360657897215, "grad_norm": 5.190946578979492, "learning_rate": 6.639842503049674e-05, "loss": 1.8979, "step": 5482 }, { "epoch": 0.4127288808596323, "grad_norm": 3.769709587097168, "learning_rate": 6.63869079465621e-05, "loss": 1.7073, "step": 5483 }, { "epoch": 0.41280415514029245, "grad_norm": 4.815976142883301, "learning_rate": 6.63753898884728e-05, "loss": 2.104, "step": 5484 }, { "epoch": 0.41287942942095257, "grad_norm": 4.878068923950195, "learning_rate": 6.636387085691356e-05, "loss": 1.7059, "step": 5485 }, { "epoch": 0.41295470370161275, "grad_norm": 3.944702386856079, "learning_rate": 6.635235085256914e-05, "loss": 1.7494, "step": 5486 }, { "epoch": 0.4130299779822729, "grad_norm": 5.319370269775391, "learning_rate": 6.634082987612438e-05, "loss": 1.8966, "step": 5487 }, { "epoch": 0.41310525226293304, "grad_norm": 4.177558898925781, "learning_rate": 6.632930792826417e-05, "loss": 1.9216, "step": 5488 }, { "epoch": 0.4131805265435932, "grad_norm": 7.6635847091674805, "learning_rate": 6.631778500967347e-05, "loss": 2.0967, "step": 5489 }, { "epoch": 0.4132558008242534, "grad_norm": 4.461691856384277, "learning_rate": 6.630626112103727e-05, "loss": 2.1364, "step": 5490 }, { "epoch": 0.4133310751049135, "grad_norm": 5.968453407287598, "learning_rate": 6.629473626304063e-05, "loss": 2.1202, "step": 5491 }, { "epoch": 0.4134063493855737, "grad_norm": 3.196467638015747, "learning_rate": 6.628321043636868e-05, "loss": 1.8225, "step": 5492 }, { "epoch": 0.41348162366623387, "grad_norm": 5.531734943389893, "learning_rate": 6.627168364170659e-05, "loss": 1.9503, "step": 5493 }, { "epoch": 0.413556897946894, "grad_norm": 4.877641201019287, "learning_rate": 6.626015587973958e-05, "loss": 1.7222, "step": 5494 }, { "epoch": 0.41363217222755416, "grad_norm": 4.493724822998047, "learning_rate": 6.624862715115297e-05, "loss": 2.0498, "step": 5495 }, { "epoch": 0.4137074465082143, "grad_norm": 4.2190046310424805, "learning_rate": 6.623709745663212e-05, "loss": 1.7869, "step": 5496 }, { "epoch": 0.41378272078887446, "grad_norm": 3.5987708568573, "learning_rate": 6.622556679686238e-05, "loss": 2.1812, "step": 5497 }, { "epoch": 0.41385799506953463, "grad_norm": 5.731755256652832, "learning_rate": 6.621403517252926e-05, "loss": 1.7986, "step": 5498 }, { "epoch": 0.41393326935019475, "grad_norm": 6.7161641120910645, "learning_rate": 6.620250258431827e-05, "loss": 1.9385, "step": 5499 }, { "epoch": 0.41400854363085493, "grad_norm": 5.86796760559082, "learning_rate": 6.6190969032915e-05, "loss": 2.2876, "step": 5500 }, { "epoch": 0.4140838179115151, "grad_norm": 4.715075969696045, "learning_rate": 6.617943451900508e-05, "loss": 1.8518, "step": 5501 }, { "epoch": 0.4141590921921752, "grad_norm": 4.128543376922607, "learning_rate": 6.616789904327421e-05, "loss": 2.2018, "step": 5502 }, { "epoch": 0.4142343664728354, "grad_norm": 3.8076722621917725, "learning_rate": 6.615636260640812e-05, "loss": 1.7915, "step": 5503 }, { "epoch": 0.4143096407534955, "grad_norm": 4.099318981170654, "learning_rate": 6.614482520909262e-05, "loss": 2.0997, "step": 5504 }, { "epoch": 0.4143849150341557, "grad_norm": 3.803635835647583, "learning_rate": 6.61332868520136e-05, "loss": 1.7286, "step": 5505 }, { "epoch": 0.4144601893148159, "grad_norm": 4.256179332733154, "learning_rate": 6.612174753585697e-05, "loss": 1.7436, "step": 5506 }, { "epoch": 0.414535463595476, "grad_norm": 5.945194244384766, "learning_rate": 6.61102072613087e-05, "loss": 1.9926, "step": 5507 }, { "epoch": 0.41461073787613617, "grad_norm": 3.957068681716919, "learning_rate": 6.609866602905484e-05, "loss": 1.6294, "step": 5508 }, { "epoch": 0.41468601215679635, "grad_norm": 6.181549072265625, "learning_rate": 6.608712383978147e-05, "loss": 1.9012, "step": 5509 }, { "epoch": 0.41476128643745647, "grad_norm": 5.627074718475342, "learning_rate": 6.607558069417476e-05, "loss": 1.8964, "step": 5510 }, { "epoch": 0.41483656071811664, "grad_norm": 4.87725305557251, "learning_rate": 6.60640365929209e-05, "loss": 1.6232, "step": 5511 }, { "epoch": 0.4149118349987768, "grad_norm": 4.347396373748779, "learning_rate": 6.605249153670615e-05, "loss": 2.1033, "step": 5512 }, { "epoch": 0.41498710927943694, "grad_norm": 5.015224456787109, "learning_rate": 6.604094552621685e-05, "loss": 1.8466, "step": 5513 }, { "epoch": 0.4150623835600971, "grad_norm": 3.6444180011749268, "learning_rate": 6.602939856213935e-05, "loss": 1.7917, "step": 5514 }, { "epoch": 0.41513765784075723, "grad_norm": 5.13500452041626, "learning_rate": 6.601785064516011e-05, "loss": 1.7059, "step": 5515 }, { "epoch": 0.4152129321214174, "grad_norm": 5.212123394012451, "learning_rate": 6.600630177596559e-05, "loss": 1.681, "step": 5516 }, { "epoch": 0.4152882064020776, "grad_norm": 4.870180130004883, "learning_rate": 6.599475195524237e-05, "loss": 1.7537, "step": 5517 }, { "epoch": 0.4153634806827377, "grad_norm": 4.179619312286377, "learning_rate": 6.598320118367704e-05, "loss": 1.7974, "step": 5518 }, { "epoch": 0.4154387549633979, "grad_norm": 4.074046611785889, "learning_rate": 6.597164946195627e-05, "loss": 1.6882, "step": 5519 }, { "epoch": 0.41551402924405806, "grad_norm": 5.8371052742004395, "learning_rate": 6.596009679076675e-05, "loss": 2.0133, "step": 5520 }, { "epoch": 0.4155893035247182, "grad_norm": 4.035010814666748, "learning_rate": 6.594854317079529e-05, "loss": 1.8934, "step": 5521 }, { "epoch": 0.41566457780537835, "grad_norm": 4.570525646209717, "learning_rate": 6.593698860272868e-05, "loss": 1.711, "step": 5522 }, { "epoch": 0.41573985208603853, "grad_norm": 4.247208595275879, "learning_rate": 6.592543308725383e-05, "loss": 1.8048, "step": 5523 }, { "epoch": 0.41581512636669865, "grad_norm": 4.1996636390686035, "learning_rate": 6.591387662505769e-05, "loss": 1.9503, "step": 5524 }, { "epoch": 0.4158904006473588, "grad_norm": 4.474452972412109, "learning_rate": 6.590231921682724e-05, "loss": 1.7582, "step": 5525 }, { "epoch": 0.41596567492801895, "grad_norm": 4.767648696899414, "learning_rate": 6.589076086324954e-05, "loss": 2.1755, "step": 5526 }, { "epoch": 0.4160409492086791, "grad_norm": 5.134219646453857, "learning_rate": 6.587920156501171e-05, "loss": 1.7309, "step": 5527 }, { "epoch": 0.4161162234893393, "grad_norm": 5.276822566986084, "learning_rate": 6.58676413228009e-05, "loss": 1.8096, "step": 5528 }, { "epoch": 0.4161914977699994, "grad_norm": 7.404260158538818, "learning_rate": 6.585608013730436e-05, "loss": 2.0437, "step": 5529 }, { "epoch": 0.4162667720506596, "grad_norm": 5.470710754394531, "learning_rate": 6.584451800920933e-05, "loss": 2.108, "step": 5530 }, { "epoch": 0.41634204633131977, "grad_norm": 4.577542781829834, "learning_rate": 6.583295493920317e-05, "loss": 2.018, "step": 5531 }, { "epoch": 0.4164173206119799, "grad_norm": 4.170786380767822, "learning_rate": 6.582139092797327e-05, "loss": 1.9929, "step": 5532 }, { "epoch": 0.41649259489264007, "grad_norm": 5.146010398864746, "learning_rate": 6.580982597620709e-05, "loss": 1.8045, "step": 5533 }, { "epoch": 0.4165678691733002, "grad_norm": 5.699809551239014, "learning_rate": 6.57982600845921e-05, "loss": 2.1068, "step": 5534 }, { "epoch": 0.41664314345396036, "grad_norm": 4.2040815353393555, "learning_rate": 6.57866932538159e-05, "loss": 2.1131, "step": 5535 }, { "epoch": 0.41671841773462054, "grad_norm": 4.423560619354248, "learning_rate": 6.577512548456606e-05, "loss": 1.8453, "step": 5536 }, { "epoch": 0.41679369201528066, "grad_norm": 4.110985279083252, "learning_rate": 6.576355677753028e-05, "loss": 1.8922, "step": 5537 }, { "epoch": 0.41686896629594083, "grad_norm": 3.8957650661468506, "learning_rate": 6.575198713339629e-05, "loss": 1.8041, "step": 5538 }, { "epoch": 0.416944240576601, "grad_norm": 7.501094818115234, "learning_rate": 6.574041655285185e-05, "loss": 2.0819, "step": 5539 }, { "epoch": 0.41701951485726113, "grad_norm": 4.668057918548584, "learning_rate": 6.572884503658483e-05, "loss": 1.857, "step": 5540 }, { "epoch": 0.4170947891379213, "grad_norm": 4.338768005371094, "learning_rate": 6.57172725852831e-05, "loss": 1.9319, "step": 5541 }, { "epoch": 0.4171700634185815, "grad_norm": 4.211892604827881, "learning_rate": 6.57056991996346e-05, "loss": 1.9953, "step": 5542 }, { "epoch": 0.4172453376992416, "grad_norm": 4.700490474700928, "learning_rate": 6.569412488032735e-05, "loss": 2.1271, "step": 5543 }, { "epoch": 0.4173206119799018, "grad_norm": 4.889296531677246, "learning_rate": 6.568254962804941e-05, "loss": 1.8898, "step": 5544 }, { "epoch": 0.4173958862605619, "grad_norm": 4.8866400718688965, "learning_rate": 6.567097344348889e-05, "loss": 2.0086, "step": 5545 }, { "epoch": 0.4174711605412221, "grad_norm": 6.036957263946533, "learning_rate": 6.565939632733396e-05, "loss": 2.1814, "step": 5546 }, { "epoch": 0.41754643482188225, "grad_norm": 4.945574760437012, "learning_rate": 6.564781828027285e-05, "loss": 2.1964, "step": 5547 }, { "epoch": 0.41762170910254237, "grad_norm": 4.205514907836914, "learning_rate": 6.563623930299385e-05, "loss": 1.8114, "step": 5548 }, { "epoch": 0.41769698338320255, "grad_norm": 3.874279737472534, "learning_rate": 6.562465939618529e-05, "loss": 1.7542, "step": 5549 }, { "epoch": 0.4177722576638627, "grad_norm": 4.054240703582764, "learning_rate": 6.561307856053556e-05, "loss": 1.8724, "step": 5550 }, { "epoch": 0.41784753194452284, "grad_norm": 4.651938438415527, "learning_rate": 6.56014967967331e-05, "loss": 1.8783, "step": 5551 }, { "epoch": 0.417922806225183, "grad_norm": 4.218622207641602, "learning_rate": 6.558991410546644e-05, "loss": 2.1694, "step": 5552 }, { "epoch": 0.41799808050584314, "grad_norm": 6.124258041381836, "learning_rate": 6.55783304874241e-05, "loss": 2.0684, "step": 5553 }, { "epoch": 0.4180733547865033, "grad_norm": 5.30171012878418, "learning_rate": 6.556674594329471e-05, "loss": 1.9376, "step": 5554 }, { "epoch": 0.4181486290671635, "grad_norm": 4.841015815734863, "learning_rate": 6.555516047376696e-05, "loss": 2.0306, "step": 5555 }, { "epoch": 0.4182239033478236, "grad_norm": 4.988308906555176, "learning_rate": 6.554357407952954e-05, "loss": 1.9401, "step": 5556 }, { "epoch": 0.4182991776284838, "grad_norm": 5.21719217300415, "learning_rate": 6.553198676127124e-05, "loss": 2.0798, "step": 5557 }, { "epoch": 0.41837445190914396, "grad_norm": 3.97567081451416, "learning_rate": 6.552039851968089e-05, "loss": 1.6506, "step": 5558 }, { "epoch": 0.4184497261898041, "grad_norm": 4.648266315460205, "learning_rate": 6.550880935544738e-05, "loss": 1.9546, "step": 5559 }, { "epoch": 0.41852500047046426, "grad_norm": 4.081138610839844, "learning_rate": 6.549721926925966e-05, "loss": 1.5423, "step": 5560 }, { "epoch": 0.41860027475112443, "grad_norm": 4.243156433105469, "learning_rate": 6.548562826180672e-05, "loss": 1.6255, "step": 5561 }, { "epoch": 0.41867554903178456, "grad_norm": 4.178682804107666, "learning_rate": 6.547403633377762e-05, "loss": 1.7335, "step": 5562 }, { "epoch": 0.41875082331244473, "grad_norm": 4.467832088470459, "learning_rate": 6.546244348586144e-05, "loss": 1.656, "step": 5563 }, { "epoch": 0.41882609759310485, "grad_norm": 3.900017023086548, "learning_rate": 6.545084971874738e-05, "loss": 1.5719, "step": 5564 }, { "epoch": 0.418901371873765, "grad_norm": 6.569604873657227, "learning_rate": 6.543925503312462e-05, "loss": 2.226, "step": 5565 }, { "epoch": 0.4189766461544252, "grad_norm": 5.198246002197266, "learning_rate": 6.542765942968246e-05, "loss": 1.9406, "step": 5566 }, { "epoch": 0.4190519204350853, "grad_norm": 3.909407138824463, "learning_rate": 6.541606290911022e-05, "loss": 1.9182, "step": 5567 }, { "epoch": 0.4191271947157455, "grad_norm": 4.8404974937438965, "learning_rate": 6.540446547209727e-05, "loss": 1.9712, "step": 5568 }, { "epoch": 0.4192024689964057, "grad_norm": 4.304462909698486, "learning_rate": 6.539286711933304e-05, "loss": 2.1316, "step": 5569 }, { "epoch": 0.4192777432770658, "grad_norm": 6.251839637756348, "learning_rate": 6.538126785150705e-05, "loss": 1.7148, "step": 5570 }, { "epoch": 0.41935301755772597, "grad_norm": 4.521939754486084, "learning_rate": 6.536966766930879e-05, "loss": 2.0965, "step": 5571 }, { "epoch": 0.41942829183838615, "grad_norm": 5.470311164855957, "learning_rate": 6.53580665734279e-05, "loss": 2.0385, "step": 5572 }, { "epoch": 0.41950356611904627, "grad_norm": 4.349471092224121, "learning_rate": 6.534646456455402e-05, "loss": 2.0269, "step": 5573 }, { "epoch": 0.41957884039970644, "grad_norm": 4.1794233322143555, "learning_rate": 6.533486164337686e-05, "loss": 2.4095, "step": 5574 }, { "epoch": 0.41965411468036656, "grad_norm": 6.031955718994141, "learning_rate": 6.532325781058616e-05, "loss": 2.1703, "step": 5575 }, { "epoch": 0.41972938896102674, "grad_norm": 3.9421780109405518, "learning_rate": 6.531165306687177e-05, "loss": 1.9262, "step": 5576 }, { "epoch": 0.4198046632416869, "grad_norm": 4.936164855957031, "learning_rate": 6.530004741292352e-05, "loss": 2.2354, "step": 5577 }, { "epoch": 0.41987993752234704, "grad_norm": 4.212682247161865, "learning_rate": 6.528844084943137e-05, "loss": 1.7489, "step": 5578 }, { "epoch": 0.4199552118030072, "grad_norm": 7.0396647453308105, "learning_rate": 6.527683337708527e-05, "loss": 1.8702, "step": 5579 }, { "epoch": 0.4200304860836674, "grad_norm": 4.923828125, "learning_rate": 6.526522499657526e-05, "loss": 1.9955, "step": 5580 }, { "epoch": 0.4201057603643275, "grad_norm": 4.858852863311768, "learning_rate": 6.525361570859143e-05, "loss": 2.0155, "step": 5581 }, { "epoch": 0.4201810346449877, "grad_norm": 4.04617166519165, "learning_rate": 6.524200551382392e-05, "loss": 2.0444, "step": 5582 }, { "epoch": 0.4202563089256478, "grad_norm": 4.236696720123291, "learning_rate": 6.523039441296289e-05, "loss": 1.9147, "step": 5583 }, { "epoch": 0.420331583206308, "grad_norm": 5.749179840087891, "learning_rate": 6.521878240669863e-05, "loss": 2.1391, "step": 5584 }, { "epoch": 0.42040685748696816, "grad_norm": 4.734997749328613, "learning_rate": 6.520716949572143e-05, "loss": 1.8811, "step": 5585 }, { "epoch": 0.4204821317676283, "grad_norm": 3.8527164459228516, "learning_rate": 6.519555568072163e-05, "loss": 2.2059, "step": 5586 }, { "epoch": 0.42055740604828845, "grad_norm": 4.343130588531494, "learning_rate": 6.518394096238965e-05, "loss": 1.864, "step": 5587 }, { "epoch": 0.4206326803289486, "grad_norm": 4.452051639556885, "learning_rate": 6.517232534141594e-05, "loss": 1.9093, "step": 5588 }, { "epoch": 0.42070795460960875, "grad_norm": 4.398669242858887, "learning_rate": 6.516070881849104e-05, "loss": 1.9407, "step": 5589 }, { "epoch": 0.4207832288902689, "grad_norm": 4.123104095458984, "learning_rate": 6.514909139430549e-05, "loss": 1.7576, "step": 5590 }, { "epoch": 0.4208585031709291, "grad_norm": 4.239006996154785, "learning_rate": 6.513747306954992e-05, "loss": 2.2412, "step": 5591 }, { "epoch": 0.4209337774515892, "grad_norm": 4.470141410827637, "learning_rate": 6.5125853844915e-05, "loss": 2.0184, "step": 5592 }, { "epoch": 0.4210090517322494, "grad_norm": 5.281346321105957, "learning_rate": 6.511423372109149e-05, "loss": 1.9499, "step": 5593 }, { "epoch": 0.4210843260129095, "grad_norm": 4.19333028793335, "learning_rate": 6.510261269877015e-05, "loss": 2.1242, "step": 5594 }, { "epoch": 0.4211596002935697, "grad_norm": 4.250405788421631, "learning_rate": 6.50909907786418e-05, "loss": 2.035, "step": 5595 }, { "epoch": 0.42123487457422987, "grad_norm": 4.133971691131592, "learning_rate": 6.507936796139737e-05, "loss": 2.3415, "step": 5596 }, { "epoch": 0.42131014885489, "grad_norm": 3.4514198303222656, "learning_rate": 6.506774424772778e-05, "loss": 2.1725, "step": 5597 }, { "epoch": 0.42138542313555016, "grad_norm": 4.451467514038086, "learning_rate": 6.505611963832404e-05, "loss": 1.9895, "step": 5598 }, { "epoch": 0.42146069741621034, "grad_norm": 4.091064453125, "learning_rate": 6.504449413387715e-05, "loss": 1.903, "step": 5599 }, { "epoch": 0.42153597169687046, "grad_norm": 3.049105644226074, "learning_rate": 6.503286773507829e-05, "loss": 2.2702, "step": 5600 }, { "epoch": 0.42161124597753064, "grad_norm": 3.6067404747009277, "learning_rate": 6.502124044261855e-05, "loss": 1.7548, "step": 5601 }, { "epoch": 0.42168652025819076, "grad_norm": 4.892045974731445, "learning_rate": 6.500961225718918e-05, "loss": 2.3685, "step": 5602 }, { "epoch": 0.42176179453885093, "grad_norm": 3.874382972717285, "learning_rate": 6.499798317948143e-05, "loss": 2.0288, "step": 5603 }, { "epoch": 0.4218370688195111, "grad_norm": 5.166650772094727, "learning_rate": 6.498635321018661e-05, "loss": 2.0425, "step": 5604 }, { "epoch": 0.4219123431001712, "grad_norm": 3.9357619285583496, "learning_rate": 6.497472234999608e-05, "loss": 1.6051, "step": 5605 }, { "epoch": 0.4219876173808314, "grad_norm": 3.3423304557800293, "learning_rate": 6.49630905996013e-05, "loss": 1.8607, "step": 5606 }, { "epoch": 0.4220628916614916, "grad_norm": 8.150017738342285, "learning_rate": 6.495145795969371e-05, "loss": 2.034, "step": 5607 }, { "epoch": 0.4221381659421517, "grad_norm": 4.082434177398682, "learning_rate": 6.493982443096484e-05, "loss": 2.2919, "step": 5608 }, { "epoch": 0.4222134402228119, "grad_norm": 4.554947376251221, "learning_rate": 6.492819001410627e-05, "loss": 2.069, "step": 5609 }, { "epoch": 0.42228871450347205, "grad_norm": 6.27967643737793, "learning_rate": 6.491655470980963e-05, "loss": 2.251, "step": 5610 }, { "epoch": 0.42236398878413217, "grad_norm": 5.9056291580200195, "learning_rate": 6.490491851876663e-05, "loss": 1.6081, "step": 5611 }, { "epoch": 0.42243926306479235, "grad_norm": 4.23393440246582, "learning_rate": 6.489328144166899e-05, "loss": 2.3094, "step": 5612 }, { "epoch": 0.42251453734545247, "grad_norm": 4.29860782623291, "learning_rate": 6.48816434792085e-05, "loss": 2.0796, "step": 5613 }, { "epoch": 0.42258981162611264, "grad_norm": 5.503111362457275, "learning_rate": 6.4870004632077e-05, "loss": 2.029, "step": 5614 }, { "epoch": 0.4226650859067728, "grad_norm": 6.667332172393799, "learning_rate": 6.48583649009664e-05, "loss": 2.3769, "step": 5615 }, { "epoch": 0.42274036018743294, "grad_norm": 6.4203877449035645, "learning_rate": 6.484672428656862e-05, "loss": 1.9105, "step": 5616 }, { "epoch": 0.4228156344680931, "grad_norm": 4.276432514190674, "learning_rate": 6.48350827895757e-05, "loss": 1.8086, "step": 5617 }, { "epoch": 0.4228909087487533, "grad_norm": 5.440550327301025, "learning_rate": 6.482344041067969e-05, "loss": 1.9897, "step": 5618 }, { "epoch": 0.4229661830294134, "grad_norm": 4.325236797332764, "learning_rate": 6.481179715057266e-05, "loss": 1.9733, "step": 5619 }, { "epoch": 0.4230414573100736, "grad_norm": 3.871230363845825, "learning_rate": 6.480015300994678e-05, "loss": 2.2546, "step": 5620 }, { "epoch": 0.42311673159073376, "grad_norm": 6.921532154083252, "learning_rate": 6.478850798949428e-05, "loss": 2.3421, "step": 5621 }, { "epoch": 0.4231920058713939, "grad_norm": 4.719679355621338, "learning_rate": 6.477686208990741e-05, "loss": 2.1701, "step": 5622 }, { "epoch": 0.42326728015205406, "grad_norm": 6.527713298797607, "learning_rate": 6.47652153118785e-05, "loss": 1.9278, "step": 5623 }, { "epoch": 0.4233425544327142, "grad_norm": 3.849130868911743, "learning_rate": 6.47535676560999e-05, "loss": 2.3324, "step": 5624 }, { "epoch": 0.42341782871337436, "grad_norm": 4.888229846954346, "learning_rate": 6.474191912326404e-05, "loss": 2.3075, "step": 5625 }, { "epoch": 0.42349310299403453, "grad_norm": 3.5545620918273926, "learning_rate": 6.473026971406338e-05, "loss": 2.0089, "step": 5626 }, { "epoch": 0.42356837727469465, "grad_norm": 5.497159957885742, "learning_rate": 6.471861942919043e-05, "loss": 1.8244, "step": 5627 }, { "epoch": 0.4236436515553548, "grad_norm": 4.458886623382568, "learning_rate": 6.470696826933782e-05, "loss": 2.1825, "step": 5628 }, { "epoch": 0.423718925836015, "grad_norm": 5.496108055114746, "learning_rate": 6.469531623519813e-05, "loss": 2.0918, "step": 5629 }, { "epoch": 0.4237942001166751, "grad_norm": 3.3734006881713867, "learning_rate": 6.468366332746406e-05, "loss": 1.8253, "step": 5630 }, { "epoch": 0.4238694743973353, "grad_norm": 6.460005283355713, "learning_rate": 6.467200954682835e-05, "loss": 2.2932, "step": 5631 }, { "epoch": 0.4239447486779954, "grad_norm": 3.8254501819610596, "learning_rate": 6.466035489398374e-05, "loss": 2.2456, "step": 5632 }, { "epoch": 0.4240200229586556, "grad_norm": 5.881473064422607, "learning_rate": 6.464869936962312e-05, "loss": 1.6875, "step": 5633 }, { "epoch": 0.42409529723931577, "grad_norm": 4.08540153503418, "learning_rate": 6.463704297443935e-05, "loss": 1.9969, "step": 5634 }, { "epoch": 0.4241705715199759, "grad_norm": 7.7894287109375, "learning_rate": 6.462538570912539e-05, "loss": 2.0714, "step": 5635 }, { "epoch": 0.42424584580063607, "grad_norm": 5.379004001617432, "learning_rate": 6.461372757437419e-05, "loss": 1.8022, "step": 5636 }, { "epoch": 0.42432112008129624, "grad_norm": 4.519621849060059, "learning_rate": 6.460206857087882e-05, "loss": 1.95, "step": 5637 }, { "epoch": 0.42439639436195636, "grad_norm": 4.3377685546875, "learning_rate": 6.459040869933238e-05, "loss": 2.0346, "step": 5638 }, { "epoch": 0.42447166864261654, "grad_norm": 5.817694664001465, "learning_rate": 6.457874796042801e-05, "loss": 1.8139, "step": 5639 }, { "epoch": 0.4245469429232767, "grad_norm": 4.451927661895752, "learning_rate": 6.45670863548589e-05, "loss": 1.9106, "step": 5640 }, { "epoch": 0.42462221720393684, "grad_norm": 5.235522747039795, "learning_rate": 6.45554238833183e-05, "loss": 2.1191, "step": 5641 }, { "epoch": 0.424697491484597, "grad_norm": 6.057497978210449, "learning_rate": 6.454376054649951e-05, "loss": 1.8064, "step": 5642 }, { "epoch": 0.42477276576525713, "grad_norm": 8.908404350280762, "learning_rate": 6.45320963450959e-05, "loss": 2.2445, "step": 5643 }, { "epoch": 0.4248480400459173, "grad_norm": 5.425161361694336, "learning_rate": 6.452043127980084e-05, "loss": 1.7459, "step": 5644 }, { "epoch": 0.4249233143265775, "grad_norm": 4.097648620605469, "learning_rate": 6.450876535130782e-05, "loss": 2.039, "step": 5645 }, { "epoch": 0.4249985886072376, "grad_norm": 5.247454643249512, "learning_rate": 6.449709856031033e-05, "loss": 2.1966, "step": 5646 }, { "epoch": 0.4250738628878978, "grad_norm": 5.577682971954346, "learning_rate": 6.448543090750193e-05, "loss": 1.9621, "step": 5647 }, { "epoch": 0.42514913716855796, "grad_norm": 6.4181623458862305, "learning_rate": 6.447376239357623e-05, "loss": 2.3809, "step": 5648 }, { "epoch": 0.4252244114492181, "grad_norm": 4.736383438110352, "learning_rate": 6.446209301922686e-05, "loss": 2.3393, "step": 5649 }, { "epoch": 0.42529968572987825, "grad_norm": 4.835147380828857, "learning_rate": 6.445042278514758e-05, "loss": 1.722, "step": 5650 }, { "epoch": 0.42537496001053837, "grad_norm": 4.267189979553223, "learning_rate": 6.443875169203213e-05, "loss": 1.8631, "step": 5651 }, { "epoch": 0.42545023429119855, "grad_norm": 6.311431407928467, "learning_rate": 6.442707974057432e-05, "loss": 2.0236, "step": 5652 }, { "epoch": 0.4255255085718587, "grad_norm": 4.5755109786987305, "learning_rate": 6.441540693146799e-05, "loss": 2.3902, "step": 5653 }, { "epoch": 0.42560078285251884, "grad_norm": 5.2585129737854, "learning_rate": 6.440373326540709e-05, "loss": 1.6589, "step": 5654 }, { "epoch": 0.425676057133179, "grad_norm": 5.402616500854492, "learning_rate": 6.43920587430856e-05, "loss": 2.0014, "step": 5655 }, { "epoch": 0.4257513314138392, "grad_norm": 4.718621253967285, "learning_rate": 6.438038336519749e-05, "loss": 1.9912, "step": 5656 }, { "epoch": 0.4258266056944993, "grad_norm": 4.067650318145752, "learning_rate": 6.436870713243687e-05, "loss": 1.9912, "step": 5657 }, { "epoch": 0.4259018799751595, "grad_norm": 4.3337483406066895, "learning_rate": 6.435703004549782e-05, "loss": 2.0985, "step": 5658 }, { "epoch": 0.42597715425581967, "grad_norm": 5.808797836303711, "learning_rate": 6.434535210507453e-05, "loss": 1.8555, "step": 5659 }, { "epoch": 0.4260524285364798, "grad_norm": 5.587106704711914, "learning_rate": 6.433367331186122e-05, "loss": 1.887, "step": 5660 }, { "epoch": 0.42612770281713996, "grad_norm": 3.134948492050171, "learning_rate": 6.432199366655217e-05, "loss": 1.9426, "step": 5661 }, { "epoch": 0.4262029770978001, "grad_norm": 4.005614280700684, "learning_rate": 6.431031316984169e-05, "loss": 1.9145, "step": 5662 }, { "epoch": 0.42627825137846026, "grad_norm": 4.251115798950195, "learning_rate": 6.429863182242415e-05, "loss": 2.0053, "step": 5663 }, { "epoch": 0.42635352565912044, "grad_norm": 6.862904071807861, "learning_rate": 6.428694962499397e-05, "loss": 1.892, "step": 5664 }, { "epoch": 0.42642879993978056, "grad_norm": 4.527406692504883, "learning_rate": 6.427526657824563e-05, "loss": 1.8634, "step": 5665 }, { "epoch": 0.42650407422044073, "grad_norm": 5.64104700088501, "learning_rate": 6.426358268287366e-05, "loss": 2.2718, "step": 5666 }, { "epoch": 0.4265793485011009, "grad_norm": 5.171625137329102, "learning_rate": 6.425189793957262e-05, "loss": 2.0148, "step": 5667 }, { "epoch": 0.42665462278176103, "grad_norm": 4.813802242279053, "learning_rate": 6.424021234903714e-05, "loss": 1.7304, "step": 5668 }, { "epoch": 0.4267298970624212, "grad_norm": 4.335046291351318, "learning_rate": 6.422852591196191e-05, "loss": 2.1238, "step": 5669 }, { "epoch": 0.4268051713430814, "grad_norm": 4.876555442810059, "learning_rate": 6.421683862904163e-05, "loss": 1.9286, "step": 5670 }, { "epoch": 0.4268804456237415, "grad_norm": 3.706444025039673, "learning_rate": 6.420515050097107e-05, "loss": 1.7301, "step": 5671 }, { "epoch": 0.4269557199044017, "grad_norm": 3.9172468185424805, "learning_rate": 6.41934615284451e-05, "loss": 1.6058, "step": 5672 }, { "epoch": 0.4270309941850618, "grad_norm": 4.290459156036377, "learning_rate": 6.418177171215856e-05, "loss": 1.9359, "step": 5673 }, { "epoch": 0.42710626846572197, "grad_norm": 6.446939945220947, "learning_rate": 6.417008105280637e-05, "loss": 2.2751, "step": 5674 }, { "epoch": 0.42718154274638215, "grad_norm": 4.605138301849365, "learning_rate": 6.415838955108353e-05, "loss": 2.0881, "step": 5675 }, { "epoch": 0.42725681702704227, "grad_norm": 4.872567176818848, "learning_rate": 6.414669720768505e-05, "loss": 2.2949, "step": 5676 }, { "epoch": 0.42733209130770244, "grad_norm": 4.5091962814331055, "learning_rate": 6.413500402330602e-05, "loss": 1.7376, "step": 5677 }, { "epoch": 0.4274073655883626, "grad_norm": 6.263108730316162, "learning_rate": 6.412330999864155e-05, "loss": 1.6687, "step": 5678 }, { "epoch": 0.42748263986902274, "grad_norm": 5.759294033050537, "learning_rate": 6.411161513438684e-05, "loss": 2.134, "step": 5679 }, { "epoch": 0.4275579141496829, "grad_norm": 4.382905960083008, "learning_rate": 6.409991943123709e-05, "loss": 1.8283, "step": 5680 }, { "epoch": 0.42763318843034304, "grad_norm": 5.303624153137207, "learning_rate": 6.408822288988758e-05, "loss": 2.2724, "step": 5681 }, { "epoch": 0.4277084627110032, "grad_norm": 3.873399496078491, "learning_rate": 6.407652551103364e-05, "loss": 2.0191, "step": 5682 }, { "epoch": 0.4277837369916634, "grad_norm": 3.8777012825012207, "learning_rate": 6.406482729537068e-05, "loss": 1.7661, "step": 5683 }, { "epoch": 0.4278590112723235, "grad_norm": 4.47244119644165, "learning_rate": 6.405312824359408e-05, "loss": 1.9484, "step": 5684 }, { "epoch": 0.4279342855529837, "grad_norm": 5.212845802307129, "learning_rate": 6.404142835639931e-05, "loss": 1.8307, "step": 5685 }, { "epoch": 0.42800955983364386, "grad_norm": 5.502110481262207, "learning_rate": 6.402972763448194e-05, "loss": 1.9913, "step": 5686 }, { "epoch": 0.428084834114304, "grad_norm": 3.6294803619384766, "learning_rate": 6.401802607853749e-05, "loss": 1.8928, "step": 5687 }, { "epoch": 0.42816010839496416, "grad_norm": 5.221503734588623, "learning_rate": 6.400632368926163e-05, "loss": 1.8757, "step": 5688 }, { "epoch": 0.42823538267562433, "grad_norm": 4.934842109680176, "learning_rate": 6.399462046735001e-05, "loss": 1.9658, "step": 5689 }, { "epoch": 0.42831065695628445, "grad_norm": 4.679507255554199, "learning_rate": 6.398291641349836e-05, "loss": 1.9626, "step": 5690 }, { "epoch": 0.42838593123694463, "grad_norm": 5.309213638305664, "learning_rate": 6.397121152840245e-05, "loss": 2.0364, "step": 5691 }, { "epoch": 0.42846120551760475, "grad_norm": 3.6243607997894287, "learning_rate": 6.395950581275811e-05, "loss": 2.0319, "step": 5692 }, { "epoch": 0.4285364797982649, "grad_norm": 4.856205940246582, "learning_rate": 6.394779926726117e-05, "loss": 1.9477, "step": 5693 }, { "epoch": 0.4286117540789251, "grad_norm": 5.259153842926025, "learning_rate": 6.393609189260762e-05, "loss": 1.788, "step": 5694 }, { "epoch": 0.4286870283595852, "grad_norm": 4.2632365226745605, "learning_rate": 6.392438368949338e-05, "loss": 2.3005, "step": 5695 }, { "epoch": 0.4287623026402454, "grad_norm": 5.557430267333984, "learning_rate": 6.39126746586145e-05, "loss": 1.9711, "step": 5696 }, { "epoch": 0.42883757692090557, "grad_norm": 4.379390716552734, "learning_rate": 6.390096480066703e-05, "loss": 2.1913, "step": 5697 }, { "epoch": 0.4289128512015657, "grad_norm": 3.9160053730010986, "learning_rate": 6.388925411634708e-05, "loss": 1.5613, "step": 5698 }, { "epoch": 0.42898812548222587, "grad_norm": 4.172816276550293, "learning_rate": 6.387754260635081e-05, "loss": 2.2435, "step": 5699 }, { "epoch": 0.429063399762886, "grad_norm": 4.686192035675049, "learning_rate": 6.386583027137447e-05, "loss": 1.7772, "step": 5700 }, { "epoch": 0.42913867404354616, "grad_norm": 5.868608474731445, "learning_rate": 6.385411711211429e-05, "loss": 2.0232, "step": 5701 }, { "epoch": 0.42921394832420634, "grad_norm": 7.952432632446289, "learning_rate": 6.384240312926663e-05, "loss": 2.1553, "step": 5702 }, { "epoch": 0.42928922260486646, "grad_norm": 5.211426258087158, "learning_rate": 6.38306883235278e-05, "loss": 2.0059, "step": 5703 }, { "epoch": 0.42936449688552664, "grad_norm": 4.585365295410156, "learning_rate": 6.381897269559423e-05, "loss": 1.8291, "step": 5704 }, { "epoch": 0.4294397711661868, "grad_norm": 3.78871750831604, "learning_rate": 6.38072562461624e-05, "loss": 1.8944, "step": 5705 }, { "epoch": 0.42951504544684693, "grad_norm": 3.7727975845336914, "learning_rate": 6.37955389759288e-05, "loss": 1.7849, "step": 5706 }, { "epoch": 0.4295903197275071, "grad_norm": 4.853158950805664, "learning_rate": 6.378382088558998e-05, "loss": 1.9812, "step": 5707 }, { "epoch": 0.4296655940081673, "grad_norm": 6.993547439575195, "learning_rate": 6.377210197584256e-05, "loss": 2.1997, "step": 5708 }, { "epoch": 0.4297408682888274, "grad_norm": 4.194733619689941, "learning_rate": 6.37603822473832e-05, "loss": 1.7033, "step": 5709 }, { "epoch": 0.4298161425694876, "grad_norm": 5.313787460327148, "learning_rate": 6.374866170090858e-05, "loss": 2.2826, "step": 5710 }, { "epoch": 0.4298914168501477, "grad_norm": 3.630655527114868, "learning_rate": 6.373694033711551e-05, "loss": 1.9933, "step": 5711 }, { "epoch": 0.4299666911308079, "grad_norm": 4.226672172546387, "learning_rate": 6.372521815670072e-05, "loss": 2.0093, "step": 5712 }, { "epoch": 0.43004196541146805, "grad_norm": 5.1546630859375, "learning_rate": 6.371349516036111e-05, "loss": 2.1626, "step": 5713 }, { "epoch": 0.4301172396921282, "grad_norm": 4.447945594787598, "learning_rate": 6.370177134879356e-05, "loss": 1.7292, "step": 5714 }, { "epoch": 0.43019251397278835, "grad_norm": 4.924492359161377, "learning_rate": 6.3690046722695e-05, "loss": 1.8213, "step": 5715 }, { "epoch": 0.4302677882534485, "grad_norm": 5.612069606781006, "learning_rate": 6.367832128276245e-05, "loss": 1.8508, "step": 5716 }, { "epoch": 0.43034306253410864, "grad_norm": 3.8491220474243164, "learning_rate": 6.366659502969297e-05, "loss": 1.8841, "step": 5717 }, { "epoch": 0.4304183368147688, "grad_norm": 5.035004138946533, "learning_rate": 6.365486796418361e-05, "loss": 1.8389, "step": 5718 }, { "epoch": 0.430493611095429, "grad_norm": 5.173888206481934, "learning_rate": 6.364314008693154e-05, "loss": 1.9747, "step": 5719 }, { "epoch": 0.4305688853760891, "grad_norm": 4.266820907592773, "learning_rate": 6.363141139863394e-05, "loss": 1.5425, "step": 5720 }, { "epoch": 0.4306441596567493, "grad_norm": 4.174493312835693, "learning_rate": 6.361968189998802e-05, "loss": 1.9229, "step": 5721 }, { "epoch": 0.4307194339374094, "grad_norm": 4.937036991119385, "learning_rate": 6.360795159169111e-05, "loss": 2.0161, "step": 5722 }, { "epoch": 0.4307947082180696, "grad_norm": 4.678271293640137, "learning_rate": 6.359622047444052e-05, "loss": 2.3053, "step": 5723 }, { "epoch": 0.43086998249872976, "grad_norm": 8.525506019592285, "learning_rate": 6.358448854893363e-05, "loss": 1.8797, "step": 5724 }, { "epoch": 0.4309452567793899, "grad_norm": 4.631918430328369, "learning_rate": 6.357275581586788e-05, "loss": 2.0413, "step": 5725 }, { "epoch": 0.43102053106005006, "grad_norm": 5.002639293670654, "learning_rate": 6.356102227594075e-05, "loss": 1.7297, "step": 5726 }, { "epoch": 0.43109580534071024, "grad_norm": 4.278130054473877, "learning_rate": 6.354928792984973e-05, "loss": 1.7146, "step": 5727 }, { "epoch": 0.43117107962137036, "grad_norm": 4.839090347290039, "learning_rate": 6.353755277829244e-05, "loss": 1.907, "step": 5728 }, { "epoch": 0.43124635390203053, "grad_norm": 4.6313252449035645, "learning_rate": 6.352581682196648e-05, "loss": 1.7001, "step": 5729 }, { "epoch": 0.43132162818269065, "grad_norm": 4.1141557693481445, "learning_rate": 6.351408006156953e-05, "loss": 2.1519, "step": 5730 }, { "epoch": 0.43139690246335083, "grad_norm": 4.779444694519043, "learning_rate": 6.350234249779929e-05, "loss": 1.9672, "step": 5731 }, { "epoch": 0.431472176744011, "grad_norm": 5.1121907234191895, "learning_rate": 6.349060413135352e-05, "loss": 2.4931, "step": 5732 }, { "epoch": 0.4315474510246711, "grad_norm": 4.61134147644043, "learning_rate": 6.347886496293003e-05, "loss": 2.0464, "step": 5733 }, { "epoch": 0.4316227253053313, "grad_norm": 4.426197528839111, "learning_rate": 6.346712499322673e-05, "loss": 2.0627, "step": 5734 }, { "epoch": 0.4316979995859915, "grad_norm": 7.25959587097168, "learning_rate": 6.345538422294148e-05, "loss": 2.3694, "step": 5735 }, { "epoch": 0.4317732738666516, "grad_norm": 3.6505913734436035, "learning_rate": 6.344364265277223e-05, "loss": 1.8285, "step": 5736 }, { "epoch": 0.4318485481473118, "grad_norm": 5.894350051879883, "learning_rate": 6.3431900283417e-05, "loss": 1.9006, "step": 5737 }, { "epoch": 0.43192382242797195, "grad_norm": 4.518353462219238, "learning_rate": 6.342015711557385e-05, "loss": 2.4241, "step": 5738 }, { "epoch": 0.43199909670863207, "grad_norm": 6.546237945556641, "learning_rate": 6.340841314994085e-05, "loss": 1.8049, "step": 5739 }, { "epoch": 0.43207437098929224, "grad_norm": 4.4490065574646, "learning_rate": 6.339666838721618e-05, "loss": 1.6927, "step": 5740 }, { "epoch": 0.43214964526995236, "grad_norm": 4.030456066131592, "learning_rate": 6.3384922828098e-05, "loss": 2.1084, "step": 5741 }, { "epoch": 0.43222491955061254, "grad_norm": 4.70572566986084, "learning_rate": 6.337317647328458e-05, "loss": 1.8181, "step": 5742 }, { "epoch": 0.4323001938312727, "grad_norm": 3.6510467529296875, "learning_rate": 6.336142932347417e-05, "loss": 2.2258, "step": 5743 }, { "epoch": 0.43237546811193284, "grad_norm": 6.072367191314697, "learning_rate": 6.334968137936514e-05, "loss": 1.5756, "step": 5744 }, { "epoch": 0.432450742392593, "grad_norm": 4.301730155944824, "learning_rate": 6.333793264165586e-05, "loss": 1.8066, "step": 5745 }, { "epoch": 0.4325260166732532, "grad_norm": 3.0722105503082275, "learning_rate": 6.332618311104474e-05, "loss": 1.8871, "step": 5746 }, { "epoch": 0.4326012909539133, "grad_norm": 5.575454235076904, "learning_rate": 6.331443278823029e-05, "loss": 2.3544, "step": 5747 }, { "epoch": 0.4326765652345735, "grad_norm": 5.277907371520996, "learning_rate": 6.3302681673911e-05, "loss": 1.6439, "step": 5748 }, { "epoch": 0.4327518395152336, "grad_norm": 6.179056167602539, "learning_rate": 6.329092976878545e-05, "loss": 2.3607, "step": 5749 }, { "epoch": 0.4328271137958938, "grad_norm": 4.6801605224609375, "learning_rate": 6.327917707355227e-05, "loss": 1.8758, "step": 5750 }, { "epoch": 0.43290238807655396, "grad_norm": 6.048764705657959, "learning_rate": 6.326742358891011e-05, "loss": 1.9761, "step": 5751 }, { "epoch": 0.4329776623572141, "grad_norm": 4.253023624420166, "learning_rate": 6.32556693155577e-05, "loss": 1.9671, "step": 5752 }, { "epoch": 0.43305293663787425, "grad_norm": 7.505892276763916, "learning_rate": 6.324391425419379e-05, "loss": 2.0121, "step": 5753 }, { "epoch": 0.43312821091853443, "grad_norm": 6.723575592041016, "learning_rate": 6.323215840551714e-05, "loss": 2.3984, "step": 5754 }, { "epoch": 0.43320348519919455, "grad_norm": 5.20761251449585, "learning_rate": 6.322040177022666e-05, "loss": 1.7984, "step": 5755 }, { "epoch": 0.4332787594798547, "grad_norm": 5.013870716094971, "learning_rate": 6.320864434902122e-05, "loss": 2.2126, "step": 5756 }, { "epoch": 0.4333540337605149, "grad_norm": 4.628096103668213, "learning_rate": 6.319688614259979e-05, "loss": 2.038, "step": 5757 }, { "epoch": 0.433429308041175, "grad_norm": 7.383403778076172, "learning_rate": 6.318512715166135e-05, "loss": 2.0267, "step": 5758 }, { "epoch": 0.4335045823218352, "grad_norm": 4.653458595275879, "learning_rate": 6.317336737690493e-05, "loss": 1.9686, "step": 5759 }, { "epoch": 0.4335798566024953, "grad_norm": 3.618833303451538, "learning_rate": 6.31616068190296e-05, "loss": 2.2715, "step": 5760 }, { "epoch": 0.4336551308831555, "grad_norm": 5.47083044052124, "learning_rate": 6.314984547873451e-05, "loss": 1.7802, "step": 5761 }, { "epoch": 0.43373040516381567, "grad_norm": 5.446893692016602, "learning_rate": 6.313808335671886e-05, "loss": 1.773, "step": 5762 }, { "epoch": 0.4338056794444758, "grad_norm": 8.371232032775879, "learning_rate": 6.312632045368184e-05, "loss": 1.9115, "step": 5763 }, { "epoch": 0.43388095372513596, "grad_norm": 7.154722213745117, "learning_rate": 6.311455677032274e-05, "loss": 2.0646, "step": 5764 }, { "epoch": 0.43395622800579614, "grad_norm": 5.043438911437988, "learning_rate": 6.310279230734084e-05, "loss": 1.9507, "step": 5765 }, { "epoch": 0.43403150228645626, "grad_norm": 3.613232135772705, "learning_rate": 6.309102706543556e-05, "loss": 1.9675, "step": 5766 }, { "epoch": 0.43410677656711644, "grad_norm": 3.601789712905884, "learning_rate": 6.307926104530629e-05, "loss": 1.9699, "step": 5767 }, { "epoch": 0.4341820508477766, "grad_norm": 6.2960944175720215, "learning_rate": 6.306749424765246e-05, "loss": 2.1078, "step": 5768 }, { "epoch": 0.43425732512843673, "grad_norm": 4.588296413421631, "learning_rate": 6.30557266731736e-05, "loss": 1.8613, "step": 5769 }, { "epoch": 0.4343325994090969, "grad_norm": 5.749566555023193, "learning_rate": 6.304395832256925e-05, "loss": 1.8505, "step": 5770 }, { "epoch": 0.43440787368975703, "grad_norm": 3.626641273498535, "learning_rate": 6.3032189196539e-05, "loss": 2.1019, "step": 5771 }, { "epoch": 0.4344831479704172, "grad_norm": 4.830503940582275, "learning_rate": 6.30204192957825e-05, "loss": 2.1722, "step": 5772 }, { "epoch": 0.4345584222510774, "grad_norm": 5.286192893981934, "learning_rate": 6.300864862099943e-05, "loss": 2.0046, "step": 5773 }, { "epoch": 0.4346336965317375, "grad_norm": 4.790497779846191, "learning_rate": 6.299687717288953e-05, "loss": 1.8867, "step": 5774 }, { "epoch": 0.4347089708123977, "grad_norm": 3.465799331665039, "learning_rate": 6.298510495215258e-05, "loss": 2.2847, "step": 5775 }, { "epoch": 0.43478424509305785, "grad_norm": 4.555606365203857, "learning_rate": 6.29733319594884e-05, "loss": 1.8833, "step": 5776 }, { "epoch": 0.434859519373718, "grad_norm": 6.483745574951172, "learning_rate": 6.296155819559684e-05, "loss": 1.7383, "step": 5777 }, { "epoch": 0.43493479365437815, "grad_norm": 3.956658363342285, "learning_rate": 6.294978366117786e-05, "loss": 1.7581, "step": 5778 }, { "epoch": 0.43501006793503827, "grad_norm": 4.499672889709473, "learning_rate": 6.293800835693137e-05, "loss": 2.0297, "step": 5779 }, { "epoch": 0.43508534221569845, "grad_norm": 4.4785637855529785, "learning_rate": 6.292623228355743e-05, "loss": 1.8827, "step": 5780 }, { "epoch": 0.4351606164963586, "grad_norm": 5.152351379394531, "learning_rate": 6.291445544175607e-05, "loss": 2.0522, "step": 5781 }, { "epoch": 0.43523589077701874, "grad_norm": 5.675931453704834, "learning_rate": 6.290267783222737e-05, "loss": 1.8767, "step": 5782 }, { "epoch": 0.4353111650576789, "grad_norm": 4.8720855712890625, "learning_rate": 6.289089945567151e-05, "loss": 1.8225, "step": 5783 }, { "epoch": 0.4353864393383391, "grad_norm": 8.385793685913086, "learning_rate": 6.287912031278868e-05, "loss": 2.1011, "step": 5784 }, { "epoch": 0.4354617136189992, "grad_norm": 3.90242075920105, "learning_rate": 6.286734040427908e-05, "loss": 1.5936, "step": 5785 }, { "epoch": 0.4355369878996594, "grad_norm": 4.243035316467285, "learning_rate": 6.285555973084302e-05, "loss": 1.7424, "step": 5786 }, { "epoch": 0.43561226218031956, "grad_norm": 6.727688312530518, "learning_rate": 6.28437782931808e-05, "loss": 1.6863, "step": 5787 }, { "epoch": 0.4356875364609797, "grad_norm": 4.153524398803711, "learning_rate": 6.283199609199285e-05, "loss": 2.2002, "step": 5788 }, { "epoch": 0.43576281074163986, "grad_norm": 4.874256610870361, "learning_rate": 6.282021312797952e-05, "loss": 1.8772, "step": 5789 }, { "epoch": 0.4358380850223, "grad_norm": 4.528047561645508, "learning_rate": 6.280842940184133e-05, "loss": 1.8688, "step": 5790 }, { "epoch": 0.43591335930296016, "grad_norm": 4.553868293762207, "learning_rate": 6.279664491427875e-05, "loss": 2.0458, "step": 5791 }, { "epoch": 0.43598863358362033, "grad_norm": 6.397035121917725, "learning_rate": 6.278485966599235e-05, "loss": 1.7075, "step": 5792 }, { "epoch": 0.43606390786428045, "grad_norm": 4.026989936828613, "learning_rate": 6.277307365768273e-05, "loss": 1.9292, "step": 5793 }, { "epoch": 0.43613918214494063, "grad_norm": 6.154115200042725, "learning_rate": 6.276128689005053e-05, "loss": 2.0618, "step": 5794 }, { "epoch": 0.4362144564256008, "grad_norm": 4.665353775024414, "learning_rate": 6.274949936379644e-05, "loss": 2.0919, "step": 5795 }, { "epoch": 0.4362897307062609, "grad_norm": 5.1261115074157715, "learning_rate": 6.273771107962119e-05, "loss": 1.8395, "step": 5796 }, { "epoch": 0.4363650049869211, "grad_norm": 4.472558498382568, "learning_rate": 6.272592203822558e-05, "loss": 1.7939, "step": 5797 }, { "epoch": 0.4364402792675812, "grad_norm": 6.943745136260986, "learning_rate": 6.27141322403104e-05, "loss": 2.2446, "step": 5798 }, { "epoch": 0.4365155535482414, "grad_norm": 5.025240898132324, "learning_rate": 6.270234168657653e-05, "loss": 1.8791, "step": 5799 }, { "epoch": 0.4365908278289016, "grad_norm": 4.878539562225342, "learning_rate": 6.269055037772492e-05, "loss": 2.0237, "step": 5800 }, { "epoch": 0.4366661021095617, "grad_norm": 3.9647951126098633, "learning_rate": 6.26787583144565e-05, "loss": 1.9357, "step": 5801 }, { "epoch": 0.43674137639022187, "grad_norm": 3.8735458850860596, "learning_rate": 6.266696549747227e-05, "loss": 2.0257, "step": 5802 }, { "epoch": 0.43681665067088205, "grad_norm": 4.7664690017700195, "learning_rate": 6.26551719274733e-05, "loss": 2.404, "step": 5803 }, { "epoch": 0.43689192495154217, "grad_norm": 3.4478893280029297, "learning_rate": 6.264337760516064e-05, "loss": 2.0116, "step": 5804 }, { "epoch": 0.43696719923220234, "grad_norm": 4.413941383361816, "learning_rate": 6.263158253123548e-05, "loss": 1.8766, "step": 5805 }, { "epoch": 0.4370424735128625, "grad_norm": 3.876295804977417, "learning_rate": 6.261978670639899e-05, "loss": 1.8091, "step": 5806 }, { "epoch": 0.43711774779352264, "grad_norm": 4.756508827209473, "learning_rate": 6.260799013135237e-05, "loss": 1.7618, "step": 5807 }, { "epoch": 0.4371930220741828, "grad_norm": 4.375035762786865, "learning_rate": 6.259619280679694e-05, "loss": 1.8209, "step": 5808 }, { "epoch": 0.43726829635484293, "grad_norm": 4.6030497550964355, "learning_rate": 6.258439473343398e-05, "loss": 1.9201, "step": 5809 }, { "epoch": 0.4373435706355031, "grad_norm": 4.600318431854248, "learning_rate": 6.257259591196484e-05, "loss": 2.2298, "step": 5810 }, { "epoch": 0.4374188449161633, "grad_norm": 4.446491241455078, "learning_rate": 6.256079634309098e-05, "loss": 2.1536, "step": 5811 }, { "epoch": 0.4374941191968234, "grad_norm": 3.885686159133911, "learning_rate": 6.25489960275138e-05, "loss": 1.8866, "step": 5812 }, { "epoch": 0.4375693934774836, "grad_norm": 3.783769130706787, "learning_rate": 6.253719496593482e-05, "loss": 1.7782, "step": 5813 }, { "epoch": 0.43764466775814376, "grad_norm": 4.285191535949707, "learning_rate": 6.252539315905557e-05, "loss": 1.9723, "step": 5814 }, { "epoch": 0.4377199420388039, "grad_norm": 4.648037910461426, "learning_rate": 6.251359060757762e-05, "loss": 2.0891, "step": 5815 }, { "epoch": 0.43779521631946405, "grad_norm": 4.315683364868164, "learning_rate": 6.250178731220265e-05, "loss": 1.5708, "step": 5816 }, { "epoch": 0.43787049060012423, "grad_norm": 4.218448162078857, "learning_rate": 6.248998327363227e-05, "loss": 1.8053, "step": 5817 }, { "epoch": 0.43794576488078435, "grad_norm": 3.898834466934204, "learning_rate": 6.247817849256823e-05, "loss": 1.8033, "step": 5818 }, { "epoch": 0.4380210391614445, "grad_norm": 3.9393954277038574, "learning_rate": 6.246637296971228e-05, "loss": 1.6337, "step": 5819 }, { "epoch": 0.43809631344210465, "grad_norm": 5.663894176483154, "learning_rate": 6.245456670576621e-05, "loss": 1.7471, "step": 5820 }, { "epoch": 0.4381715877227648, "grad_norm": 3.4593889713287354, "learning_rate": 6.244275970143191e-05, "loss": 1.6809, "step": 5821 }, { "epoch": 0.438246862003425, "grad_norm": 4.155667304992676, "learning_rate": 6.243095195741124e-05, "loss": 2.1139, "step": 5822 }, { "epoch": 0.4383221362840851, "grad_norm": 4.147579669952393, "learning_rate": 6.241914347440614e-05, "loss": 1.862, "step": 5823 }, { "epoch": 0.4383974105647453, "grad_norm": 4.642208099365234, "learning_rate": 6.240733425311859e-05, "loss": 1.9903, "step": 5824 }, { "epoch": 0.43847268484540547, "grad_norm": 4.561496257781982, "learning_rate": 6.239552429425063e-05, "loss": 2.0583, "step": 5825 }, { "epoch": 0.4385479591260656, "grad_norm": 4.853338241577148, "learning_rate": 6.238371359850433e-05, "loss": 1.8325, "step": 5826 }, { "epoch": 0.43862323340672577, "grad_norm": 4.065107345581055, "learning_rate": 6.237190216658177e-05, "loss": 2.3571, "step": 5827 }, { "epoch": 0.4386985076873859, "grad_norm": 4.894888401031494, "learning_rate": 6.236008999918514e-05, "loss": 2.1457, "step": 5828 }, { "epoch": 0.43877378196804606, "grad_norm": 5.1090569496154785, "learning_rate": 6.234827709701662e-05, "loss": 1.7546, "step": 5829 }, { "epoch": 0.43884905624870624, "grad_norm": 4.411669731140137, "learning_rate": 6.233646346077844e-05, "loss": 1.8638, "step": 5830 }, { "epoch": 0.43892433052936636, "grad_norm": 6.119626998901367, "learning_rate": 6.232464909117292e-05, "loss": 2.1074, "step": 5831 }, { "epoch": 0.43899960481002653, "grad_norm": 5.595834255218506, "learning_rate": 6.231283398890237e-05, "loss": 1.8592, "step": 5832 }, { "epoch": 0.4390748790906867, "grad_norm": 4.224753379821777, "learning_rate": 6.230101815466917e-05, "loss": 1.7581, "step": 5833 }, { "epoch": 0.43915015337134683, "grad_norm": 6.337724685668945, "learning_rate": 6.228920158917574e-05, "loss": 1.6936, "step": 5834 }, { "epoch": 0.439225427652007, "grad_norm": 4.504908084869385, "learning_rate": 6.227738429312456e-05, "loss": 2.0062, "step": 5835 }, { "epoch": 0.4393007019326672, "grad_norm": 4.386195659637451, "learning_rate": 6.226556626721808e-05, "loss": 2.2207, "step": 5836 }, { "epoch": 0.4393759762133273, "grad_norm": 3.7572054862976074, "learning_rate": 6.225374751215888e-05, "loss": 2.2299, "step": 5837 }, { "epoch": 0.4394512504939875, "grad_norm": 5.244462490081787, "learning_rate": 6.224192802864955e-05, "loss": 2.0849, "step": 5838 }, { "epoch": 0.4395265247746476, "grad_norm": 5.741331577301025, "learning_rate": 6.223010781739273e-05, "loss": 2.0038, "step": 5839 }, { "epoch": 0.4396017990553078, "grad_norm": 3.723790168762207, "learning_rate": 6.221828687909109e-05, "loss": 1.7946, "step": 5840 }, { "epoch": 0.43967707333596795, "grad_norm": 7.204499244689941, "learning_rate": 6.220646521444736e-05, "loss": 2.2006, "step": 5841 }, { "epoch": 0.43975234761662807, "grad_norm": 3.997116804122925, "learning_rate": 6.21946428241643e-05, "loss": 2.2271, "step": 5842 }, { "epoch": 0.43982762189728825, "grad_norm": 4.753002643585205, "learning_rate": 6.21828197089447e-05, "loss": 1.6895, "step": 5843 }, { "epoch": 0.4399028961779484, "grad_norm": 5.885253429412842, "learning_rate": 6.217099586949143e-05, "loss": 2.0463, "step": 5844 }, { "epoch": 0.43997817045860854, "grad_norm": 3.3102731704711914, "learning_rate": 6.215917130650739e-05, "loss": 1.7357, "step": 5845 }, { "epoch": 0.4400534447392687, "grad_norm": 5.026159286499023, "learning_rate": 6.21473460206955e-05, "loss": 1.8191, "step": 5846 }, { "epoch": 0.4401287190199289, "grad_norm": 5.543838977813721, "learning_rate": 6.213552001275874e-05, "loss": 2.1985, "step": 5847 }, { "epoch": 0.440203993300589, "grad_norm": 5.000566005706787, "learning_rate": 6.212369328340014e-05, "loss": 1.8587, "step": 5848 }, { "epoch": 0.4402792675812492, "grad_norm": 5.314465522766113, "learning_rate": 6.211186583332275e-05, "loss": 2.0911, "step": 5849 }, { "epoch": 0.4403545418619093, "grad_norm": 6.697752475738525, "learning_rate": 6.210003766322969e-05, "loss": 2.1173, "step": 5850 }, { "epoch": 0.4404298161425695, "grad_norm": 5.667004108428955, "learning_rate": 6.208820877382414e-05, "loss": 1.9221, "step": 5851 }, { "epoch": 0.44050509042322966, "grad_norm": 4.934892177581787, "learning_rate": 6.207637916580923e-05, "loss": 1.5931, "step": 5852 }, { "epoch": 0.4405803647038898, "grad_norm": 3.7798006534576416, "learning_rate": 6.206454883988823e-05, "loss": 1.716, "step": 5853 }, { "epoch": 0.44065563898454996, "grad_norm": 6.391505241394043, "learning_rate": 6.205271779676443e-05, "loss": 1.6619, "step": 5854 }, { "epoch": 0.44073091326521013, "grad_norm": 5.354294776916504, "learning_rate": 6.204088603714113e-05, "loss": 1.9408, "step": 5855 }, { "epoch": 0.44080618754587025, "grad_norm": 6.182084083557129, "learning_rate": 6.202905356172171e-05, "loss": 2.0821, "step": 5856 }, { "epoch": 0.44088146182653043, "grad_norm": 4.672182083129883, "learning_rate": 6.201722037120957e-05, "loss": 1.8795, "step": 5857 }, { "epoch": 0.44095673610719055, "grad_norm": 5.455212593078613, "learning_rate": 6.200538646630817e-05, "loss": 2.393, "step": 5858 }, { "epoch": 0.4410320103878507, "grad_norm": 3.989269971847534, "learning_rate": 6.199355184772099e-05, "loss": 1.9019, "step": 5859 }, { "epoch": 0.4411072846685109, "grad_norm": 4.534832000732422, "learning_rate": 6.198171651615155e-05, "loss": 2.1006, "step": 5860 }, { "epoch": 0.441182558949171, "grad_norm": 5.176645755767822, "learning_rate": 6.196988047230345e-05, "loss": 1.8378, "step": 5861 }, { "epoch": 0.4412578332298312, "grad_norm": 3.412224769592285, "learning_rate": 6.195804371688031e-05, "loss": 1.6773, "step": 5862 }, { "epoch": 0.4413331075104914, "grad_norm": 5.141180038452148, "learning_rate": 6.194620625058578e-05, "loss": 1.7391, "step": 5863 }, { "epoch": 0.4414083817911515, "grad_norm": 3.93776798248291, "learning_rate": 6.193436807412356e-05, "loss": 1.9573, "step": 5864 }, { "epoch": 0.44148365607181167, "grad_norm": 6.58004903793335, "learning_rate": 6.19225291881974e-05, "loss": 1.9819, "step": 5865 }, { "epoch": 0.44155893035247185, "grad_norm": 4.9218525886535645, "learning_rate": 6.19106895935111e-05, "loss": 2.1121, "step": 5866 }, { "epoch": 0.44163420463313197, "grad_norm": 4.046970844268799, "learning_rate": 6.189884929076847e-05, "loss": 1.7778, "step": 5867 }, { "epoch": 0.44170947891379214, "grad_norm": 3.4083220958709717, "learning_rate": 6.18870082806734e-05, "loss": 1.7752, "step": 5868 }, { "epoch": 0.44178475319445226, "grad_norm": 6.671065807342529, "learning_rate": 6.187516656392978e-05, "loss": 2.0058, "step": 5869 }, { "epoch": 0.44186002747511244, "grad_norm": 6.113986015319824, "learning_rate": 6.18633241412416e-05, "loss": 1.6301, "step": 5870 }, { "epoch": 0.4419353017557726, "grad_norm": 5.218296051025391, "learning_rate": 6.185148101331281e-05, "loss": 1.9825, "step": 5871 }, { "epoch": 0.44201057603643273, "grad_norm": 4.4001688957214355, "learning_rate": 6.183963718084752e-05, "loss": 1.9731, "step": 5872 }, { "epoch": 0.4420858503170929, "grad_norm": 3.642742395401001, "learning_rate": 6.182779264454974e-05, "loss": 1.9946, "step": 5873 }, { "epoch": 0.4421611245977531, "grad_norm": 6.567331314086914, "learning_rate": 6.181594740512365e-05, "loss": 1.9168, "step": 5874 }, { "epoch": 0.4422363988784132, "grad_norm": 4.609991550445557, "learning_rate": 6.180410146327336e-05, "loss": 2.1172, "step": 5875 }, { "epoch": 0.4423116731590734, "grad_norm": 6.285871505737305, "learning_rate": 6.179225481970313e-05, "loss": 1.5599, "step": 5876 }, { "epoch": 0.4423869474397335, "grad_norm": 5.3340983390808105, "learning_rate": 6.178040747511717e-05, "loss": 2.3341, "step": 5877 }, { "epoch": 0.4424622217203937, "grad_norm": 4.116152763366699, "learning_rate": 6.176855943021981e-05, "loss": 1.8963, "step": 5878 }, { "epoch": 0.44253749600105385, "grad_norm": 3.549401044845581, "learning_rate": 6.175671068571535e-05, "loss": 2.0334, "step": 5879 }, { "epoch": 0.442612770281714, "grad_norm": 4.7267022132873535, "learning_rate": 6.174486124230817e-05, "loss": 1.877, "step": 5880 }, { "epoch": 0.44268804456237415, "grad_norm": 5.716840744018555, "learning_rate": 6.17330111007027e-05, "loss": 2.0898, "step": 5881 }, { "epoch": 0.4427633188430343, "grad_norm": 4.019646644592285, "learning_rate": 6.172116026160337e-05, "loss": 1.8261, "step": 5882 }, { "epoch": 0.44283859312369445, "grad_norm": 5.86329460144043, "learning_rate": 6.17093087257147e-05, "loss": 1.7064, "step": 5883 }, { "epoch": 0.4429138674043546, "grad_norm": 4.133573532104492, "learning_rate": 6.169745649374122e-05, "loss": 2.0934, "step": 5884 }, { "epoch": 0.4429891416850148, "grad_norm": 5.172225475311279, "learning_rate": 6.168560356638752e-05, "loss": 1.8559, "step": 5885 }, { "epoch": 0.4430644159656749, "grad_norm": 6.817885875701904, "learning_rate": 6.167374994435822e-05, "loss": 2.1575, "step": 5886 }, { "epoch": 0.4431396902463351, "grad_norm": 7.958605766296387, "learning_rate": 6.166189562835798e-05, "loss": 1.9088, "step": 5887 }, { "epoch": 0.4432149645269952, "grad_norm": 4.35015869140625, "learning_rate": 6.16500406190915e-05, "loss": 1.6406, "step": 5888 }, { "epoch": 0.4432902388076554, "grad_norm": 4.303564071655273, "learning_rate": 6.163818491726354e-05, "loss": 1.8652, "step": 5889 }, { "epoch": 0.44336551308831557, "grad_norm": 3.940829277038574, "learning_rate": 6.162632852357887e-05, "loss": 1.9498, "step": 5890 }, { "epoch": 0.4434407873689757, "grad_norm": 5.269733428955078, "learning_rate": 6.161447143874234e-05, "loss": 2.1676, "step": 5891 }, { "epoch": 0.44351606164963586, "grad_norm": 5.32184362411499, "learning_rate": 6.160261366345882e-05, "loss": 1.7529, "step": 5892 }, { "epoch": 0.44359133593029604, "grad_norm": 6.089979648590088, "learning_rate": 6.159075519843319e-05, "loss": 2.0035, "step": 5893 }, { "epoch": 0.44366661021095616, "grad_norm": 4.3718414306640625, "learning_rate": 6.157889604437043e-05, "loss": 1.7562, "step": 5894 }, { "epoch": 0.44374188449161633, "grad_norm": 4.9573516845703125, "learning_rate": 6.156703620197553e-05, "loss": 2.0056, "step": 5895 }, { "epoch": 0.4438171587722765, "grad_norm": 3.8163676261901855, "learning_rate": 6.155517567195351e-05, "loss": 1.7949, "step": 5896 }, { "epoch": 0.44389243305293663, "grad_norm": 4.765012264251709, "learning_rate": 6.154331445500945e-05, "loss": 1.9379, "step": 5897 }, { "epoch": 0.4439677073335968, "grad_norm": 5.006673336029053, "learning_rate": 6.153145255184846e-05, "loss": 1.6598, "step": 5898 }, { "epoch": 0.4440429816142569, "grad_norm": 4.2472734451293945, "learning_rate": 6.15195899631757e-05, "loss": 1.9413, "step": 5899 }, { "epoch": 0.4441182558949171, "grad_norm": 6.310937404632568, "learning_rate": 6.150772668969639e-05, "loss": 1.816, "step": 5900 }, { "epoch": 0.4441935301755773, "grad_norm": 5.364302158355713, "learning_rate": 6.149586273211573e-05, "loss": 1.8705, "step": 5901 }, { "epoch": 0.4442688044562374, "grad_norm": 3.4378249645233154, "learning_rate": 6.148399809113903e-05, "loss": 1.8854, "step": 5902 }, { "epoch": 0.4443440787368976, "grad_norm": 5.181258201599121, "learning_rate": 6.147213276747159e-05, "loss": 1.9383, "step": 5903 }, { "epoch": 0.44441935301755775, "grad_norm": 5.779449939727783, "learning_rate": 6.146026676181877e-05, "loss": 1.945, "step": 5904 }, { "epoch": 0.44449462729821787, "grad_norm": 4.231904983520508, "learning_rate": 6.144840007488598e-05, "loss": 1.9648, "step": 5905 }, { "epoch": 0.44456990157887805, "grad_norm": 4.831796646118164, "learning_rate": 6.143653270737866e-05, "loss": 2.0543, "step": 5906 }, { "epoch": 0.44464517585953817, "grad_norm": 5.520266532897949, "learning_rate": 6.142466466000228e-05, "loss": 1.5273, "step": 5907 }, { "epoch": 0.44472045014019834, "grad_norm": 4.027518272399902, "learning_rate": 6.141279593346237e-05, "loss": 1.9979, "step": 5908 }, { "epoch": 0.4447957244208585, "grad_norm": 3.4035582542419434, "learning_rate": 6.140092652846448e-05, "loss": 1.9474, "step": 5909 }, { "epoch": 0.44487099870151864, "grad_norm": 4.433760643005371, "learning_rate": 6.138905644571421e-05, "loss": 1.9754, "step": 5910 }, { "epoch": 0.4449462729821788, "grad_norm": 7.222991943359375, "learning_rate": 6.137718568591722e-05, "loss": 2.4565, "step": 5911 }, { "epoch": 0.445021547262839, "grad_norm": 5.692912578582764, "learning_rate": 6.13653142497792e-05, "loss": 2.0414, "step": 5912 }, { "epoch": 0.4450968215434991, "grad_norm": 4.777101993560791, "learning_rate": 6.135344213800586e-05, "loss": 2.2095, "step": 5913 }, { "epoch": 0.4451720958241593, "grad_norm": 4.028824806213379, "learning_rate": 6.134156935130296e-05, "loss": 2.0912, "step": 5914 }, { "epoch": 0.44524737010481946, "grad_norm": 3.3961150646209717, "learning_rate": 6.13296958903763e-05, "loss": 1.7184, "step": 5915 }, { "epoch": 0.4453226443854796, "grad_norm": 5.075456142425537, "learning_rate": 6.131782175593172e-05, "loss": 2.1433, "step": 5916 }, { "epoch": 0.44539791866613976, "grad_norm": 5.175343990325928, "learning_rate": 6.130594694867512e-05, "loss": 1.9034, "step": 5917 }, { "epoch": 0.4454731929467999, "grad_norm": 3.562257766723633, "learning_rate": 6.12940714693124e-05, "loss": 2.0215, "step": 5918 }, { "epoch": 0.44554846722746005, "grad_norm": 4.450735092163086, "learning_rate": 6.128219531854956e-05, "loss": 1.9351, "step": 5919 }, { "epoch": 0.44562374150812023, "grad_norm": 6.513960361480713, "learning_rate": 6.127031849709257e-05, "loss": 1.8493, "step": 5920 }, { "epoch": 0.44569901578878035, "grad_norm": 3.5960655212402344, "learning_rate": 6.125844100564746e-05, "loss": 1.9367, "step": 5921 }, { "epoch": 0.4457742900694405, "grad_norm": 5.0706095695495605, "learning_rate": 6.124656284492036e-05, "loss": 1.8621, "step": 5922 }, { "epoch": 0.4458495643501007, "grad_norm": 3.510718584060669, "learning_rate": 6.123468401561738e-05, "loss": 1.8233, "step": 5923 }, { "epoch": 0.4459248386307608, "grad_norm": 3.0832386016845703, "learning_rate": 6.122280451844467e-05, "loss": 1.9186, "step": 5924 }, { "epoch": 0.446000112911421, "grad_norm": 3.89540958404541, "learning_rate": 6.121092435410841e-05, "loss": 1.8799, "step": 5925 }, { "epoch": 0.4460753871920811, "grad_norm": 6.350008964538574, "learning_rate": 6.119904352331488e-05, "loss": 1.951, "step": 5926 }, { "epoch": 0.4461506614727413, "grad_norm": 3.2523128986358643, "learning_rate": 6.118716202677033e-05, "loss": 1.9655, "step": 5927 }, { "epoch": 0.44622593575340147, "grad_norm": 6.013514518737793, "learning_rate": 6.117527986518113e-05, "loss": 1.8137, "step": 5928 }, { "epoch": 0.4463012100340616, "grad_norm": 5.659713268280029, "learning_rate": 6.116339703925358e-05, "loss": 1.9177, "step": 5929 }, { "epoch": 0.44637648431472177, "grad_norm": 5.024892330169678, "learning_rate": 6.115151354969413e-05, "loss": 2.0692, "step": 5930 }, { "epoch": 0.44645175859538194, "grad_norm": 4.851288318634033, "learning_rate": 6.113962939720918e-05, "loss": 1.9722, "step": 5931 }, { "epoch": 0.44652703287604206, "grad_norm": 4.694975852966309, "learning_rate": 6.112774458250522e-05, "loss": 2.0585, "step": 5932 }, { "epoch": 0.44660230715670224, "grad_norm": 5.526699066162109, "learning_rate": 6.111585910628878e-05, "loss": 2.1207, "step": 5933 }, { "epoch": 0.4466775814373624, "grad_norm": 4.565197467803955, "learning_rate": 6.110397296926641e-05, "loss": 2.1849, "step": 5934 }, { "epoch": 0.44675285571802253, "grad_norm": 3.6555278301239014, "learning_rate": 6.10920861721447e-05, "loss": 1.9686, "step": 5935 }, { "epoch": 0.4468281299986827, "grad_norm": 5.101633548736572, "learning_rate": 6.108019871563031e-05, "loss": 1.7502, "step": 5936 }, { "epoch": 0.44690340427934283, "grad_norm": 4.757491111755371, "learning_rate": 6.10683106004299e-05, "loss": 1.8732, "step": 5937 }, { "epoch": 0.446978678560003, "grad_norm": 6.349032402038574, "learning_rate": 6.105642182725017e-05, "loss": 2.1068, "step": 5938 }, { "epoch": 0.4470539528406632, "grad_norm": 4.216952800750732, "learning_rate": 6.104453239679789e-05, "loss": 1.8794, "step": 5939 }, { "epoch": 0.4471292271213233, "grad_norm": 5.036924839019775, "learning_rate": 6.103264230977986e-05, "loss": 1.7914, "step": 5940 }, { "epoch": 0.4472045014019835, "grad_norm": 3.6164581775665283, "learning_rate": 6.10207515669029e-05, "loss": 2.0867, "step": 5941 }, { "epoch": 0.44727977568264365, "grad_norm": 5.532937526702881, "learning_rate": 6.100886016887387e-05, "loss": 1.8802, "step": 5942 }, { "epoch": 0.4473550499633038, "grad_norm": 6.9371562004089355, "learning_rate": 6.0996968116399686e-05, "loss": 1.9058, "step": 5943 }, { "epoch": 0.44743032424396395, "grad_norm": 5.944525241851807, "learning_rate": 6.098507541018732e-05, "loss": 2.2233, "step": 5944 }, { "epoch": 0.4475055985246241, "grad_norm": 4.571796894073486, "learning_rate": 6.097318205094373e-05, "loss": 1.6698, "step": 5945 }, { "epoch": 0.44758087280528425, "grad_norm": 5.640833377838135, "learning_rate": 6.096128803937596e-05, "loss": 1.9164, "step": 5946 }, { "epoch": 0.4476561470859444, "grad_norm": 4.008815765380859, "learning_rate": 6.0949393376191066e-05, "loss": 1.6408, "step": 5947 }, { "epoch": 0.44773142136660454, "grad_norm": 4.3384246826171875, "learning_rate": 6.0937498062096146e-05, "loss": 1.8556, "step": 5948 }, { "epoch": 0.4478066956472647, "grad_norm": 6.0752153396606445, "learning_rate": 6.092560209779835e-05, "loss": 2.1825, "step": 5949 }, { "epoch": 0.4478819699279249, "grad_norm": 4.470860481262207, "learning_rate": 6.091370548400486e-05, "loss": 2.0284, "step": 5950 }, { "epoch": 0.447957244208585, "grad_norm": 4.851217269897461, "learning_rate": 6.0901808221422885e-05, "loss": 2.0249, "step": 5951 }, { "epoch": 0.4480325184892452, "grad_norm": 4.3529438972473145, "learning_rate": 6.0889910310759714e-05, "loss": 2.2993, "step": 5952 }, { "epoch": 0.44810779276990537, "grad_norm": 5.031883716583252, "learning_rate": 6.087801175272261e-05, "loss": 1.8768, "step": 5953 }, { "epoch": 0.4481830670505655, "grad_norm": 4.142302989959717, "learning_rate": 6.0866112548018915e-05, "loss": 1.9138, "step": 5954 }, { "epoch": 0.44825834133122566, "grad_norm": 6.355103015899658, "learning_rate": 6.085421269735601e-05, "loss": 2.5333, "step": 5955 }, { "epoch": 0.4483336156118858, "grad_norm": 3.5790059566497803, "learning_rate": 6.084231220144131e-05, "loss": 1.5943, "step": 5956 }, { "epoch": 0.44840888989254596, "grad_norm": 4.150872230529785, "learning_rate": 6.083041106098225e-05, "loss": 2.2456, "step": 5957 }, { "epoch": 0.44848416417320613, "grad_norm": 4.3456010818481445, "learning_rate": 6.081850927668634e-05, "loss": 1.7906, "step": 5958 }, { "epoch": 0.44855943845386625, "grad_norm": 4.23631477355957, "learning_rate": 6.0806606849261096e-05, "loss": 1.6512, "step": 5959 }, { "epoch": 0.44863471273452643, "grad_norm": 5.442815780639648, "learning_rate": 6.079470377941407e-05, "loss": 1.9065, "step": 5960 }, { "epoch": 0.4487099870151866, "grad_norm": 6.5241498947143555, "learning_rate": 6.0782800067852886e-05, "loss": 1.8874, "step": 5961 }, { "epoch": 0.4487852612958467, "grad_norm": 4.726539611816406, "learning_rate": 6.077089571528517e-05, "loss": 2.1969, "step": 5962 }, { "epoch": 0.4488605355765069, "grad_norm": 5.546689033508301, "learning_rate": 6.075899072241862e-05, "loss": 1.8063, "step": 5963 }, { "epoch": 0.4489358098571671, "grad_norm": 4.864353656768799, "learning_rate": 6.074708508996094e-05, "loss": 1.8378, "step": 5964 }, { "epoch": 0.4490110841378272, "grad_norm": 7.372176170349121, "learning_rate": 6.073517881861988e-05, "loss": 1.8837, "step": 5965 }, { "epoch": 0.4490863584184874, "grad_norm": 4.812704563140869, "learning_rate": 6.072327190910323e-05, "loss": 1.9821, "step": 5966 }, { "epoch": 0.4491616326991475, "grad_norm": 4.812704563140869, "learning_rate": 6.072327190910323e-05, "loss": 2.0037, "step": 5967 }, { "epoch": 0.44923690697980767, "grad_norm": 5.514007568359375, "learning_rate": 6.071136436211886e-05, "loss": 1.7521, "step": 5968 }, { "epoch": 0.44931218126046785, "grad_norm": 5.489030838012695, "learning_rate": 6.0699456178374605e-05, "loss": 2.1959, "step": 5969 }, { "epoch": 0.44938745554112797, "grad_norm": 4.788032054901123, "learning_rate": 6.068754735857838e-05, "loss": 1.7582, "step": 5970 }, { "epoch": 0.44946272982178814, "grad_norm": 5.289285182952881, "learning_rate": 6.067563790343813e-05, "loss": 2.2174, "step": 5971 }, { "epoch": 0.4495380041024483, "grad_norm": 3.956015110015869, "learning_rate": 6.066372781366183e-05, "loss": 1.9942, "step": 5972 }, { "epoch": 0.44961327838310844, "grad_norm": 5.779730319976807, "learning_rate": 6.065181708995752e-05, "loss": 2.014, "step": 5973 }, { "epoch": 0.4496885526637686, "grad_norm": 4.122438430786133, "learning_rate": 6.0639905733033245e-05, "loss": 1.7382, "step": 5974 }, { "epoch": 0.44976382694442874, "grad_norm": 10.876434326171875, "learning_rate": 6.06279937435971e-05, "loss": 2.2528, "step": 5975 }, { "epoch": 0.4498391012250889, "grad_norm": 6.262092590332031, "learning_rate": 6.0616081122357235e-05, "loss": 2.422, "step": 5976 }, { "epoch": 0.4499143755057491, "grad_norm": 4.21637487411499, "learning_rate": 6.06041678700218e-05, "loss": 2.0436, "step": 5977 }, { "epoch": 0.4499896497864092, "grad_norm": 5.686376571655273, "learning_rate": 6.0592253987299e-05, "loss": 2.0256, "step": 5978 }, { "epoch": 0.4500649240670694, "grad_norm": 4.940842628479004, "learning_rate": 6.0580339474897106e-05, "loss": 1.7117, "step": 5979 }, { "epoch": 0.45014019834772956, "grad_norm": 4.913683891296387, "learning_rate": 6.0568424333524386e-05, "loss": 1.9737, "step": 5980 }, { "epoch": 0.4502154726283897, "grad_norm": 5.5284833908081055, "learning_rate": 6.055650856388917e-05, "loss": 1.7662, "step": 5981 }, { "epoch": 0.45029074690904985, "grad_norm": 3.949586868286133, "learning_rate": 6.05445921666998e-05, "loss": 1.8073, "step": 5982 }, { "epoch": 0.45036602118971003, "grad_norm": 4.705341339111328, "learning_rate": 6.053267514266468e-05, "loss": 1.7362, "step": 5983 }, { "epoch": 0.45044129547037015, "grad_norm": 3.7629899978637695, "learning_rate": 6.0520757492492244e-05, "loss": 1.7402, "step": 5984 }, { "epoch": 0.4505165697510303, "grad_norm": 6.499493598937988, "learning_rate": 6.0508839216890964e-05, "loss": 2.4095, "step": 5985 }, { "epoch": 0.45059184403169045, "grad_norm": 4.297918319702148, "learning_rate": 6.049692031656935e-05, "loss": 2.0019, "step": 5986 }, { "epoch": 0.4506671183123506, "grad_norm": 4.276834487915039, "learning_rate": 6.048500079223595e-05, "loss": 1.8343, "step": 5987 }, { "epoch": 0.4507423925930108, "grad_norm": 4.483490943908691, "learning_rate": 6.047308064459933e-05, "loss": 1.9587, "step": 5988 }, { "epoch": 0.4508176668736709, "grad_norm": 5.314219951629639, "learning_rate": 6.04611598743681e-05, "loss": 1.8696, "step": 5989 }, { "epoch": 0.4508929411543311, "grad_norm": 3.4307024478912354, "learning_rate": 6.044923848225096e-05, "loss": 1.5495, "step": 5990 }, { "epoch": 0.45096821543499127, "grad_norm": 4.445413112640381, "learning_rate": 6.043731646895656e-05, "loss": 2.0814, "step": 5991 }, { "epoch": 0.4510434897156514, "grad_norm": 4.226023197174072, "learning_rate": 6.042539383519364e-05, "loss": 1.8908, "step": 5992 }, { "epoch": 0.45111876399631157, "grad_norm": 4.241024494171143, "learning_rate": 6.041347058167098e-05, "loss": 2.0917, "step": 5993 }, { "epoch": 0.45119403827697174, "grad_norm": 6.026360034942627, "learning_rate": 6.0401546709097366e-05, "loss": 1.5562, "step": 5994 }, { "epoch": 0.45126931255763186, "grad_norm": 3.9674556255340576, "learning_rate": 6.038962221818165e-05, "loss": 1.9904, "step": 5995 }, { "epoch": 0.45134458683829204, "grad_norm": 4.2482733726501465, "learning_rate": 6.0377697109632716e-05, "loss": 1.7127, "step": 5996 }, { "epoch": 0.45141986111895216, "grad_norm": 4.999752521514893, "learning_rate": 6.036577138415945e-05, "loss": 1.921, "step": 5997 }, { "epoch": 0.45149513539961234, "grad_norm": 5.2310309410095215, "learning_rate": 6.035384504247082e-05, "loss": 2.2203, "step": 5998 }, { "epoch": 0.4515704096802725, "grad_norm": 6.787916660308838, "learning_rate": 6.03419180852758e-05, "loss": 1.9591, "step": 5999 }, { "epoch": 0.45164568396093263, "grad_norm": 4.101888179779053, "learning_rate": 6.032999051328345e-05, "loss": 2.7233, "step": 6000 }, { "epoch": 0.4517209582415928, "grad_norm": 5.638253211975098, "learning_rate": 6.031806232720277e-05, "loss": 2.1475, "step": 6001 }, { "epoch": 0.451796232522253, "grad_norm": 6.141194820404053, "learning_rate": 6.030613352774293e-05, "loss": 1.7782, "step": 6002 }, { "epoch": 0.4518715068029131, "grad_norm": 4.367072105407715, "learning_rate": 6.0294204115613e-05, "loss": 1.7448, "step": 6003 }, { "epoch": 0.4519467810835733, "grad_norm": 3.6829652786254883, "learning_rate": 6.0282274091522174e-05, "loss": 1.8882, "step": 6004 }, { "epoch": 0.4520220553642334, "grad_norm": 4.12482213973999, "learning_rate": 6.0270343456179666e-05, "loss": 2.2419, "step": 6005 }, { "epoch": 0.4520973296448936, "grad_norm": 4.992110252380371, "learning_rate": 6.025841221029469e-05, "loss": 2.1173, "step": 6006 }, { "epoch": 0.45217260392555375, "grad_norm": 3.1126821041107178, "learning_rate": 6.024648035457656e-05, "loss": 1.9173, "step": 6007 }, { "epoch": 0.45224787820621387, "grad_norm": 4.627983570098877, "learning_rate": 6.023454788973457e-05, "loss": 2.1659, "step": 6008 }, { "epoch": 0.45232315248687405, "grad_norm": 4.1611528396606445, "learning_rate": 6.022261481647808e-05, "loss": 2.0813, "step": 6009 }, { "epoch": 0.4523984267675342, "grad_norm": 5.859460830688477, "learning_rate": 6.021068113551645e-05, "loss": 1.8312, "step": 6010 }, { "epoch": 0.45247370104819434, "grad_norm": 3.900682210922241, "learning_rate": 6.019874684755914e-05, "loss": 1.9834, "step": 6011 }, { "epoch": 0.4525489753288545, "grad_norm": 4.332180023193359, "learning_rate": 6.0186811953315593e-05, "loss": 1.8246, "step": 6012 }, { "epoch": 0.4526242496095147, "grad_norm": 4.874317169189453, "learning_rate": 6.01748764534953e-05, "loss": 2.292, "step": 6013 }, { "epoch": 0.4526995238901748, "grad_norm": 4.688967227935791, "learning_rate": 6.0162940348807804e-05, "loss": 1.6333, "step": 6014 }, { "epoch": 0.452774798170835, "grad_norm": 6.165711402893066, "learning_rate": 6.0151003639962654e-05, "loss": 2.1033, "step": 6015 }, { "epoch": 0.4528500724514951, "grad_norm": 5.114849090576172, "learning_rate": 6.0139066327669466e-05, "loss": 1.937, "step": 6016 }, { "epoch": 0.4529253467321553, "grad_norm": 3.925725221633911, "learning_rate": 6.012712841263788e-05, "loss": 1.916, "step": 6017 }, { "epoch": 0.45300062101281546, "grad_norm": 3.900815725326538, "learning_rate": 6.011518989557757e-05, "loss": 1.6135, "step": 6018 }, { "epoch": 0.4530758952934756, "grad_norm": 6.237217903137207, "learning_rate": 6.010325077719825e-05, "loss": 1.9457, "step": 6019 }, { "epoch": 0.45315116957413576, "grad_norm": 5.853795051574707, "learning_rate": 6.009131105820965e-05, "loss": 1.953, "step": 6020 }, { "epoch": 0.45322644385479594, "grad_norm": 6.7400898933410645, "learning_rate": 6.007937073932157e-05, "loss": 2.1115, "step": 6021 }, { "epoch": 0.45330171813545606, "grad_norm": 5.347073554992676, "learning_rate": 6.0067429821243816e-05, "loss": 1.7168, "step": 6022 }, { "epoch": 0.45337699241611623, "grad_norm": 5.739337921142578, "learning_rate": 6.005548830468625e-05, "loss": 1.7385, "step": 6023 }, { "epoch": 0.45345226669677635, "grad_norm": 4.295375823974609, "learning_rate": 6.004354619035876e-05, "loss": 1.9291, "step": 6024 }, { "epoch": 0.4535275409774365, "grad_norm": 5.407988548278809, "learning_rate": 6.003160347897126e-05, "loss": 1.9602, "step": 6025 }, { "epoch": 0.4536028152580967, "grad_norm": 5.012875556945801, "learning_rate": 6.0019660171233715e-05, "loss": 1.658, "step": 6026 }, { "epoch": 0.4536780895387568, "grad_norm": 5.996007442474365, "learning_rate": 6.0007716267856126e-05, "loss": 1.988, "step": 6027 }, { "epoch": 0.453753363819417, "grad_norm": 5.223214149475098, "learning_rate": 5.9995771769548514e-05, "loss": 1.6316, "step": 6028 }, { "epoch": 0.4538286381000772, "grad_norm": 3.7236433029174805, "learning_rate": 5.998382667702096e-05, "loss": 1.8148, "step": 6029 }, { "epoch": 0.4539039123807373, "grad_norm": 6.124312877655029, "learning_rate": 5.997188099098357e-05, "loss": 2.2687, "step": 6030 }, { "epoch": 0.45397918666139747, "grad_norm": 5.337610721588135, "learning_rate": 5.995993471214645e-05, "loss": 1.7451, "step": 6031 }, { "epoch": 0.45405446094205765, "grad_norm": 6.2527337074279785, "learning_rate": 5.9947987841219774e-05, "loss": 1.7022, "step": 6032 }, { "epoch": 0.45412973522271777, "grad_norm": 4.850403308868408, "learning_rate": 5.9936040378913785e-05, "loss": 1.7393, "step": 6033 }, { "epoch": 0.45420500950337794, "grad_norm": 6.706861972808838, "learning_rate": 5.9924092325938694e-05, "loss": 1.7573, "step": 6034 }, { "epoch": 0.45428028378403806, "grad_norm": 4.820898532867432, "learning_rate": 5.991214368300481e-05, "loss": 2.0673, "step": 6035 }, { "epoch": 0.45435555806469824, "grad_norm": 4.394272327423096, "learning_rate": 5.9900194450822413e-05, "loss": 1.928, "step": 6036 }, { "epoch": 0.4544308323453584, "grad_norm": 4.091886043548584, "learning_rate": 5.988824463010187e-05, "loss": 1.8936, "step": 6037 }, { "epoch": 0.45450610662601854, "grad_norm": 5.050229072570801, "learning_rate": 5.987629422155354e-05, "loss": 1.6381, "step": 6038 }, { "epoch": 0.4545813809066787, "grad_norm": 5.246290683746338, "learning_rate": 5.9864343225887864e-05, "loss": 1.7704, "step": 6039 }, { "epoch": 0.4546566551873389, "grad_norm": 5.636992931365967, "learning_rate": 5.985239164381529e-05, "loss": 1.9712, "step": 6040 }, { "epoch": 0.454731929467999, "grad_norm": 4.305088043212891, "learning_rate": 5.98404394760463e-05, "loss": 1.6054, "step": 6041 }, { "epoch": 0.4548072037486592, "grad_norm": 4.476842403411865, "learning_rate": 5.9828486723291435e-05, "loss": 2.4206, "step": 6042 }, { "epoch": 0.45488247802931936, "grad_norm": 4.255067348480225, "learning_rate": 5.981653338626122e-05, "loss": 1.6144, "step": 6043 }, { "epoch": 0.4549577523099795, "grad_norm": 5.431327819824219, "learning_rate": 5.980457946566625e-05, "loss": 2.2985, "step": 6044 }, { "epoch": 0.45503302659063966, "grad_norm": 5.836861610412598, "learning_rate": 5.979262496221718e-05, "loss": 1.68, "step": 6045 }, { "epoch": 0.4551083008712998, "grad_norm": 4.298095226287842, "learning_rate": 5.978066987662465e-05, "loss": 1.7517, "step": 6046 }, { "epoch": 0.45518357515195995, "grad_norm": 4.132864475250244, "learning_rate": 5.976871420959936e-05, "loss": 1.9019, "step": 6047 }, { "epoch": 0.4552588494326201, "grad_norm": 5.600347518920898, "learning_rate": 5.975675796185204e-05, "loss": 1.9065, "step": 6048 }, { "epoch": 0.45533412371328025, "grad_norm": 3.901496648788452, "learning_rate": 5.9744801134093454e-05, "loss": 1.9271, "step": 6049 }, { "epoch": 0.4554093979939404, "grad_norm": 5.38663387298584, "learning_rate": 5.9732843727034395e-05, "loss": 1.8771, "step": 6050 }, { "epoch": 0.4554846722746006, "grad_norm": 5.455505847930908, "learning_rate": 5.972088574138571e-05, "loss": 1.9057, "step": 6051 }, { "epoch": 0.4555599465552607, "grad_norm": 5.193195343017578, "learning_rate": 5.9708927177858265e-05, "loss": 1.687, "step": 6052 }, { "epoch": 0.4556352208359209, "grad_norm": 5.373270034790039, "learning_rate": 5.969696803716295e-05, "loss": 1.9376, "step": 6053 }, { "epoch": 0.455710495116581, "grad_norm": 5.75775671005249, "learning_rate": 5.968500832001072e-05, "loss": 2.2669, "step": 6054 }, { "epoch": 0.4557857693972412, "grad_norm": 4.701210975646973, "learning_rate": 5.967304802711252e-05, "loss": 1.9175, "step": 6055 }, { "epoch": 0.45586104367790137, "grad_norm": 5.253629684448242, "learning_rate": 5.966108715917937e-05, "loss": 2.2749, "step": 6056 }, { "epoch": 0.4559363179585615, "grad_norm": 5.26934289932251, "learning_rate": 5.9649125716922316e-05, "loss": 2.4595, "step": 6057 }, { "epoch": 0.45601159223922166, "grad_norm": 6.326803684234619, "learning_rate": 5.9637163701052434e-05, "loss": 2.0817, "step": 6058 }, { "epoch": 0.45608686651988184, "grad_norm": 5.832178592681885, "learning_rate": 5.9625201112280806e-05, "loss": 2.15, "step": 6059 }, { "epoch": 0.45616214080054196, "grad_norm": 5.275348663330078, "learning_rate": 5.96132379513186e-05, "loss": 1.5737, "step": 6060 }, { "epoch": 0.45623741508120214, "grad_norm": 4.472059726715088, "learning_rate": 5.960127421887697e-05, "loss": 1.9204, "step": 6061 }, { "epoch": 0.4563126893618623, "grad_norm": 4.421325206756592, "learning_rate": 5.9589309915667146e-05, "loss": 1.7761, "step": 6062 }, { "epoch": 0.45638796364252243, "grad_norm": 6.40798807144165, "learning_rate": 5.9577345042400355e-05, "loss": 2.2179, "step": 6063 }, { "epoch": 0.4564632379231826, "grad_norm": 4.162315368652344, "learning_rate": 5.9565379599787885e-05, "loss": 1.9176, "step": 6064 }, { "epoch": 0.45653851220384273, "grad_norm": 5.723100185394287, "learning_rate": 5.955341358854105e-05, "loss": 2.1868, "step": 6065 }, { "epoch": 0.4566137864845029, "grad_norm": 4.859919548034668, "learning_rate": 5.954144700937119e-05, "loss": 1.9833, "step": 6066 }, { "epoch": 0.4566890607651631, "grad_norm": 5.13480281829834, "learning_rate": 5.952947986298967e-05, "loss": 1.7, "step": 6067 }, { "epoch": 0.4567643350458232, "grad_norm": 4.001134872436523, "learning_rate": 5.9517512150107926e-05, "loss": 2.0627, "step": 6068 }, { "epoch": 0.4568396093264834, "grad_norm": 3.800814151763916, "learning_rate": 5.950554387143739e-05, "loss": 1.6681, "step": 6069 }, { "epoch": 0.45691488360714355, "grad_norm": 8.66020393371582, "learning_rate": 5.949357502768954e-05, "loss": 2.0261, "step": 6070 }, { "epoch": 0.45699015788780367, "grad_norm": 4.2896623611450195, "learning_rate": 5.948160561957591e-05, "loss": 1.866, "step": 6071 }, { "epoch": 0.45706543216846385, "grad_norm": 3.8667845726013184, "learning_rate": 5.9469635647808006e-05, "loss": 1.7372, "step": 6072 }, { "epoch": 0.45714070644912397, "grad_norm": 3.50824236869812, "learning_rate": 5.945766511309745e-05, "loss": 1.9068, "step": 6073 }, { "epoch": 0.45721598072978414, "grad_norm": 4.218340873718262, "learning_rate": 5.944569401615585e-05, "loss": 1.573, "step": 6074 }, { "epoch": 0.4572912550104443, "grad_norm": 4.466118335723877, "learning_rate": 5.943372235769483e-05, "loss": 1.8682, "step": 6075 }, { "epoch": 0.45736652929110444, "grad_norm": 4.892060279846191, "learning_rate": 5.942175013842609e-05, "loss": 1.973, "step": 6076 }, { "epoch": 0.4574418035717646, "grad_norm": 5.726229190826416, "learning_rate": 5.9409777359061324e-05, "loss": 2.0796, "step": 6077 }, { "epoch": 0.4575170778524248, "grad_norm": 4.99564266204834, "learning_rate": 5.939780402031232e-05, "loss": 2.0617, "step": 6078 }, { "epoch": 0.4575923521330849, "grad_norm": 6.545931339263916, "learning_rate": 5.938583012289082e-05, "loss": 1.7876, "step": 6079 }, { "epoch": 0.4576676264137451, "grad_norm": 4.696487903594971, "learning_rate": 5.937385566750864e-05, "loss": 1.7632, "step": 6080 }, { "epoch": 0.45774290069440526, "grad_norm": 3.8622817993164062, "learning_rate": 5.9361880654877645e-05, "loss": 1.9871, "step": 6081 }, { "epoch": 0.4578181749750654, "grad_norm": 3.7468810081481934, "learning_rate": 5.934990508570971e-05, "loss": 2.0188, "step": 6082 }, { "epoch": 0.45789344925572556, "grad_norm": 4.950771331787109, "learning_rate": 5.9337928960716746e-05, "loss": 2.0899, "step": 6083 }, { "epoch": 0.4579687235363857, "grad_norm": 5.0970611572265625, "learning_rate": 5.932595228061069e-05, "loss": 1.9046, "step": 6084 }, { "epoch": 0.45804399781704586, "grad_norm": 4.2430291175842285, "learning_rate": 5.931397504610353e-05, "loss": 1.8966, "step": 6085 }, { "epoch": 0.45811927209770603, "grad_norm": 4.811651706695557, "learning_rate": 5.9301997257907296e-05, "loss": 1.7643, "step": 6086 }, { "epoch": 0.45819454637836615, "grad_norm": 5.101222991943359, "learning_rate": 5.9290018916734e-05, "loss": 1.9049, "step": 6087 }, { "epoch": 0.45826982065902633, "grad_norm": 6.524330139160156, "learning_rate": 5.9278040023295744e-05, "loss": 1.7444, "step": 6088 }, { "epoch": 0.4583450949396865, "grad_norm": 4.4134955406188965, "learning_rate": 5.926606057830462e-05, "loss": 1.9636, "step": 6089 }, { "epoch": 0.4584203692203466, "grad_norm": 4.550789833068848, "learning_rate": 5.925408058247278e-05, "loss": 2.3889, "step": 6090 }, { "epoch": 0.4584956435010068, "grad_norm": 4.4516072273254395, "learning_rate": 5.924210003651242e-05, "loss": 1.7583, "step": 6091 }, { "epoch": 0.458570917781667, "grad_norm": 4.464069366455078, "learning_rate": 5.923011894113573e-05, "loss": 2.019, "step": 6092 }, { "epoch": 0.4586461920623271, "grad_norm": 4.778139114379883, "learning_rate": 5.9218137297054945e-05, "loss": 1.8796, "step": 6093 }, { "epoch": 0.45872146634298727, "grad_norm": 5.728743553161621, "learning_rate": 5.9206155104982333e-05, "loss": 1.9561, "step": 6094 }, { "epoch": 0.4587967406236474, "grad_norm": 3.7842822074890137, "learning_rate": 5.919417236563023e-05, "loss": 1.648, "step": 6095 }, { "epoch": 0.45887201490430757, "grad_norm": 4.400025367736816, "learning_rate": 5.918218907971095e-05, "loss": 1.8278, "step": 6096 }, { "epoch": 0.45894728918496774, "grad_norm": 4.153076171875, "learning_rate": 5.9170205247936896e-05, "loss": 1.7074, "step": 6097 }, { "epoch": 0.45902256346562786, "grad_norm": 5.94003963470459, "learning_rate": 5.915822087102044e-05, "loss": 1.9478, "step": 6098 }, { "epoch": 0.45909783774628804, "grad_norm": 4.797269344329834, "learning_rate": 5.914623594967404e-05, "loss": 1.6932, "step": 6099 }, { "epoch": 0.4591731120269482, "grad_norm": 4.070412635803223, "learning_rate": 5.913425048461013e-05, "loss": 1.9724, "step": 6100 }, { "epoch": 0.45924838630760834, "grad_norm": 4.645787239074707, "learning_rate": 5.912226447654127e-05, "loss": 1.77, "step": 6101 }, { "epoch": 0.4593236605882685, "grad_norm": 3.9777133464813232, "learning_rate": 5.911027792617995e-05, "loss": 1.8891, "step": 6102 }, { "epoch": 0.45939893486892863, "grad_norm": 7.653037071228027, "learning_rate": 5.909829083423875e-05, "loss": 1.8425, "step": 6103 }, { "epoch": 0.4594742091495888, "grad_norm": 5.192277431488037, "learning_rate": 5.908630320143026e-05, "loss": 2.0377, "step": 6104 }, { "epoch": 0.459549483430249, "grad_norm": 4.545719623565674, "learning_rate": 5.907431502846712e-05, "loss": 1.8998, "step": 6105 }, { "epoch": 0.4596247577109091, "grad_norm": 5.5499653816223145, "learning_rate": 5.906232631606198e-05, "loss": 1.742, "step": 6106 }, { "epoch": 0.4597000319915693, "grad_norm": 4.814578056335449, "learning_rate": 5.905033706492755e-05, "loss": 2.0466, "step": 6107 }, { "epoch": 0.45977530627222946, "grad_norm": 4.70163631439209, "learning_rate": 5.903834727577656e-05, "loss": 2.0124, "step": 6108 }, { "epoch": 0.4598505805528896, "grad_norm": 5.952963352203369, "learning_rate": 5.902635694932175e-05, "loss": 2.2362, "step": 6109 }, { "epoch": 0.45992585483354975, "grad_norm": 6.062039375305176, "learning_rate": 5.901436608627592e-05, "loss": 1.8526, "step": 6110 }, { "epoch": 0.46000112911420993, "grad_norm": 6.062039375305176, "learning_rate": 5.901436608627592e-05, "loss": 1.855, "step": 6111 }, { "epoch": 0.46007640339487005, "grad_norm": 3.8001315593719482, "learning_rate": 5.900237468735188e-05, "loss": 1.5048, "step": 6112 }, { "epoch": 0.4601516776755302, "grad_norm": 4.128273963928223, "learning_rate": 5.89903827532625e-05, "loss": 2.0092, "step": 6113 }, { "epoch": 0.46022695195619034, "grad_norm": 8.076136589050293, "learning_rate": 5.897839028472066e-05, "loss": 2.4223, "step": 6114 }, { "epoch": 0.4603022262368505, "grad_norm": 4.980038166046143, "learning_rate": 5.896639728243929e-05, "loss": 1.9077, "step": 6115 }, { "epoch": 0.4603775005175107, "grad_norm": 4.359116554260254, "learning_rate": 5.895440374713133e-05, "loss": 2.0745, "step": 6116 }, { "epoch": 0.4604527747981708, "grad_norm": 4.29932165145874, "learning_rate": 5.894240967950976e-05, "loss": 1.9347, "step": 6117 }, { "epoch": 0.460528049078831, "grad_norm": 5.578684329986572, "learning_rate": 5.8930415080287585e-05, "loss": 2.1286, "step": 6118 }, { "epoch": 0.46060332335949117, "grad_norm": 5.237558364868164, "learning_rate": 5.8918419950177864e-05, "loss": 2.2142, "step": 6119 }, { "epoch": 0.4606785976401513, "grad_norm": 3.7731804847717285, "learning_rate": 5.890642428989366e-05, "loss": 2.0002, "step": 6120 }, { "epoch": 0.46075387192081146, "grad_norm": 6.02181339263916, "learning_rate": 5.889442810014809e-05, "loss": 1.8548, "step": 6121 }, { "epoch": 0.4608291462014716, "grad_norm": 5.883323669433594, "learning_rate": 5.888243138165429e-05, "loss": 1.9086, "step": 6122 }, { "epoch": 0.46090442048213176, "grad_norm": 5.164095401763916, "learning_rate": 5.887043413512543e-05, "loss": 1.7454, "step": 6123 }, { "epoch": 0.46097969476279194, "grad_norm": 4.250068664550781, "learning_rate": 5.8858436361274714e-05, "loss": 2.0833, "step": 6124 }, { "epoch": 0.46105496904345206, "grad_norm": 5.0383782386779785, "learning_rate": 5.884643806081538e-05, "loss": 1.8903, "step": 6125 }, { "epoch": 0.46113024332411223, "grad_norm": 4.567081451416016, "learning_rate": 5.883443923446068e-05, "loss": 1.9542, "step": 6126 }, { "epoch": 0.4612055176047724, "grad_norm": 5.049877643585205, "learning_rate": 5.882243988292393e-05, "loss": 1.8925, "step": 6127 }, { "epoch": 0.46128079188543253, "grad_norm": 4.685807228088379, "learning_rate": 5.881044000691842e-05, "loss": 1.9993, "step": 6128 }, { "epoch": 0.4613560661660927, "grad_norm": 6.258084774017334, "learning_rate": 5.879843960715754e-05, "loss": 2.3033, "step": 6129 }, { "epoch": 0.4614313404467529, "grad_norm": 3.606372356414795, "learning_rate": 5.878643868435467e-05, "loss": 2.0483, "step": 6130 }, { "epoch": 0.461506614727413, "grad_norm": 3.7735278606414795, "learning_rate": 5.877443723922323e-05, "loss": 1.8318, "step": 6131 }, { "epoch": 0.4615818890080732, "grad_norm": 4.213362216949463, "learning_rate": 5.876243527247669e-05, "loss": 1.9625, "step": 6132 }, { "epoch": 0.4616571632887333, "grad_norm": 4.581994533538818, "learning_rate": 5.87504327848285e-05, "loss": 1.988, "step": 6133 }, { "epoch": 0.4617324375693935, "grad_norm": 5.330654621124268, "learning_rate": 5.87384297769922e-05, "loss": 2.2455, "step": 6134 }, { "epoch": 0.46180771185005365, "grad_norm": 4.982420444488525, "learning_rate": 5.8726426249681296e-05, "loss": 2.0131, "step": 6135 }, { "epoch": 0.46188298613071377, "grad_norm": 5.3704633712768555, "learning_rate": 5.871442220360942e-05, "loss": 1.5961, "step": 6136 }, { "epoch": 0.46195826041137394, "grad_norm": 5.449668884277344, "learning_rate": 5.870241763949014e-05, "loss": 1.6291, "step": 6137 }, { "epoch": 0.4620335346920341, "grad_norm": 4.630037307739258, "learning_rate": 5.86904125580371e-05, "loss": 1.9306, "step": 6138 }, { "epoch": 0.46210880897269424, "grad_norm": 7.90316915512085, "learning_rate": 5.8678406959963974e-05, "loss": 1.6066, "step": 6139 }, { "epoch": 0.4621840832533544, "grad_norm": 3.5883285999298096, "learning_rate": 5.866640084598444e-05, "loss": 1.9163, "step": 6140 }, { "epoch": 0.4622593575340146, "grad_norm": 6.554765701293945, "learning_rate": 5.865439421681226e-05, "loss": 2.0131, "step": 6141 }, { "epoch": 0.4623346318146747, "grad_norm": 3.749727725982666, "learning_rate": 5.864238707316117e-05, "loss": 1.7178, "step": 6142 }, { "epoch": 0.4624099060953349, "grad_norm": 4.062201023101807, "learning_rate": 5.863037941574496e-05, "loss": 1.7238, "step": 6143 }, { "epoch": 0.462485180375995, "grad_norm": 5.0798540115356445, "learning_rate": 5.8618371245277474e-05, "loss": 1.9235, "step": 6144 }, { "epoch": 0.4625604546566552, "grad_norm": 4.518141746520996, "learning_rate": 5.860636256247252e-05, "loss": 1.9074, "step": 6145 }, { "epoch": 0.46263572893731536, "grad_norm": 4.606646537780762, "learning_rate": 5.859435336804401e-05, "loss": 2.0495, "step": 6146 }, { "epoch": 0.4627110032179755, "grad_norm": 4.6152262687683105, "learning_rate": 5.858234366270586e-05, "loss": 2.0432, "step": 6147 }, { "epoch": 0.46278627749863566, "grad_norm": 4.4273176193237305, "learning_rate": 5.8570333447172024e-05, "loss": 2.1614, "step": 6148 }, { "epoch": 0.46286155177929583, "grad_norm": 4.393697738647461, "learning_rate": 5.8558322722156437e-05, "loss": 2.3018, "step": 6149 }, { "epoch": 0.46293682605995595, "grad_norm": 4.865461349487305, "learning_rate": 5.854631148837312e-05, "loss": 1.99, "step": 6150 }, { "epoch": 0.46301210034061613, "grad_norm": 4.272097110748291, "learning_rate": 5.8534299746536103e-05, "loss": 1.833, "step": 6151 }, { "epoch": 0.46308737462127625, "grad_norm": 5.799352645874023, "learning_rate": 5.852228749735946e-05, "loss": 1.9095, "step": 6152 }, { "epoch": 0.4631626489019364, "grad_norm": 4.514222145080566, "learning_rate": 5.851027474155728e-05, "loss": 1.8374, "step": 6153 }, { "epoch": 0.4632379231825966, "grad_norm": 4.221158981323242, "learning_rate": 5.8498261479843685e-05, "loss": 1.8974, "step": 6154 }, { "epoch": 0.4633131974632567, "grad_norm": 4.292566776275635, "learning_rate": 5.848624771293284e-05, "loss": 1.9298, "step": 6155 }, { "epoch": 0.4633884717439169, "grad_norm": 5.188507556915283, "learning_rate": 5.847423344153891e-05, "loss": 2.0096, "step": 6156 }, { "epoch": 0.4634637460245771, "grad_norm": 6.87600040435791, "learning_rate": 5.84622186663761e-05, "loss": 1.9994, "step": 6157 }, { "epoch": 0.4635390203052372, "grad_norm": 5.687765121459961, "learning_rate": 5.845020338815869e-05, "loss": 1.8411, "step": 6158 }, { "epoch": 0.46361429458589737, "grad_norm": 5.334669589996338, "learning_rate": 5.8438187607600935e-05, "loss": 1.8098, "step": 6159 }, { "epoch": 0.46368956886655754, "grad_norm": 5.574984073638916, "learning_rate": 5.8426171325417136e-05, "loss": 1.9769, "step": 6160 }, { "epoch": 0.46376484314721766, "grad_norm": 5.236361980438232, "learning_rate": 5.841415454232162e-05, "loss": 1.8007, "step": 6161 }, { "epoch": 0.46384011742787784, "grad_norm": 5.731012344360352, "learning_rate": 5.840213725902877e-05, "loss": 1.9784, "step": 6162 }, { "epoch": 0.46391539170853796, "grad_norm": 5.692600727081299, "learning_rate": 5.839011947625295e-05, "loss": 2.1517, "step": 6163 }, { "epoch": 0.46399066598919814, "grad_norm": 6.384438514709473, "learning_rate": 5.8378101194708614e-05, "loss": 1.7702, "step": 6164 }, { "epoch": 0.4640659402698583, "grad_norm": 5.911719799041748, "learning_rate": 5.8366082415110215e-05, "loss": 1.9245, "step": 6165 }, { "epoch": 0.46414121455051843, "grad_norm": 6.263427734375, "learning_rate": 5.83540631381722e-05, "loss": 1.8174, "step": 6166 }, { "epoch": 0.4642164888311786, "grad_norm": 4.851933479309082, "learning_rate": 5.834204336460911e-05, "loss": 2.0516, "step": 6167 }, { "epoch": 0.4642917631118388, "grad_norm": 4.586934566497803, "learning_rate": 5.8330023095135476e-05, "loss": 1.8159, "step": 6168 }, { "epoch": 0.4643670373924989, "grad_norm": 6.346707344055176, "learning_rate": 5.8318002330465884e-05, "loss": 1.6566, "step": 6169 }, { "epoch": 0.4644423116731591, "grad_norm": 4.117097854614258, "learning_rate": 5.830598107131491e-05, "loss": 2.1882, "step": 6170 }, { "epoch": 0.46451758595381926, "grad_norm": 5.091454029083252, "learning_rate": 5.8293959318397194e-05, "loss": 1.5757, "step": 6171 }, { "epoch": 0.4645928602344794, "grad_norm": 6.693051815032959, "learning_rate": 5.82819370724274e-05, "loss": 2.215, "step": 6172 }, { "epoch": 0.46466813451513955, "grad_norm": 4.601351737976074, "learning_rate": 5.826991433412019e-05, "loss": 2.255, "step": 6173 }, { "epoch": 0.4647434087957997, "grad_norm": 4.238350868225098, "learning_rate": 5.825789110419032e-05, "loss": 1.7408, "step": 6174 }, { "epoch": 0.46481868307645985, "grad_norm": 3.776761054992676, "learning_rate": 5.8245867383352516e-05, "loss": 1.9469, "step": 6175 }, { "epoch": 0.46489395735712, "grad_norm": 5.935523986816406, "learning_rate": 5.8233843172321564e-05, "loss": 2.1649, "step": 6176 }, { "epoch": 0.46496923163778014, "grad_norm": 4.455471992492676, "learning_rate": 5.822181847181225e-05, "loss": 2.0635, "step": 6177 }, { "epoch": 0.4650445059184403, "grad_norm": 3.9656732082366943, "learning_rate": 5.8209793282539414e-05, "loss": 1.8731, "step": 6178 }, { "epoch": 0.4651197801991005, "grad_norm": 3.774287462234497, "learning_rate": 5.819776760521793e-05, "loss": 1.9017, "step": 6179 }, { "epoch": 0.4651950544797606, "grad_norm": 6.014800071716309, "learning_rate": 5.818574144056268e-05, "loss": 2.4556, "step": 6180 }, { "epoch": 0.4652703287604208, "grad_norm": 5.37847375869751, "learning_rate": 5.8173714789288604e-05, "loss": 2.0758, "step": 6181 }, { "epoch": 0.4653456030410809, "grad_norm": 5.380580902099609, "learning_rate": 5.816168765211063e-05, "loss": 1.7509, "step": 6182 }, { "epoch": 0.4654208773217411, "grad_norm": 3.3990745544433594, "learning_rate": 5.814966002974374e-05, "loss": 1.9663, "step": 6183 }, { "epoch": 0.46549615160240126, "grad_norm": 4.516787528991699, "learning_rate": 5.8137631922902944e-05, "loss": 2.1708, "step": 6184 }, { "epoch": 0.4655714258830614, "grad_norm": 3.905266761779785, "learning_rate": 5.812560333230328e-05, "loss": 1.8058, "step": 6185 }, { "epoch": 0.46564670016372156, "grad_norm": 4.200650691986084, "learning_rate": 5.811357425865981e-05, "loss": 2.3434, "step": 6186 }, { "epoch": 0.46572197444438174, "grad_norm": 4.848728179931641, "learning_rate": 5.8101544702687636e-05, "loss": 1.6202, "step": 6187 }, { "epoch": 0.46579724872504186, "grad_norm": 5.740512371063232, "learning_rate": 5.808951466510188e-05, "loss": 1.8352, "step": 6188 }, { "epoch": 0.46587252300570203, "grad_norm": 5.244520664215088, "learning_rate": 5.807748414661769e-05, "loss": 1.9215, "step": 6189 }, { "epoch": 0.4659477972863622, "grad_norm": 5.365092754364014, "learning_rate": 5.806545314795022e-05, "loss": 1.836, "step": 6190 }, { "epoch": 0.46602307156702233, "grad_norm": 3.605998992919922, "learning_rate": 5.805342166981472e-05, "loss": 2.051, "step": 6191 }, { "epoch": 0.4660983458476825, "grad_norm": 4.080414295196533, "learning_rate": 5.804138971292642e-05, "loss": 1.7786, "step": 6192 }, { "epoch": 0.4661736201283426, "grad_norm": 3.924818515777588, "learning_rate": 5.802935727800056e-05, "loss": 2.01, "step": 6193 }, { "epoch": 0.4662488944090028, "grad_norm": 4.2257466316223145, "learning_rate": 5.801732436575245e-05, "loss": 1.8988, "step": 6194 }, { "epoch": 0.466324168689663, "grad_norm": 4.413915157318115, "learning_rate": 5.800529097689742e-05, "loss": 1.8617, "step": 6195 }, { "epoch": 0.4663994429703231, "grad_norm": 3.800804376602173, "learning_rate": 5.799325711215079e-05, "loss": 2.1775, "step": 6196 }, { "epoch": 0.4664747172509833, "grad_norm": 4.679453372955322, "learning_rate": 5.7981222772227985e-05, "loss": 2.2822, "step": 6197 }, { "epoch": 0.46654999153164345, "grad_norm": 3.8881962299346924, "learning_rate": 5.796918795784437e-05, "loss": 1.424, "step": 6198 }, { "epoch": 0.46662526581230357, "grad_norm": 4.5422773361206055, "learning_rate": 5.7957152669715406e-05, "loss": 2.0155, "step": 6199 }, { "epoch": 0.46670054009296374, "grad_norm": 4.647782802581787, "learning_rate": 5.7945116908556554e-05, "loss": 1.7315, "step": 6200 }, { "epoch": 0.46677581437362387, "grad_norm": 5.930076599121094, "learning_rate": 5.793308067508328e-05, "loss": 1.4385, "step": 6201 }, { "epoch": 0.46685108865428404, "grad_norm": 4.503535747528076, "learning_rate": 5.7921043970011134e-05, "loss": 1.7247, "step": 6202 }, { "epoch": 0.4669263629349442, "grad_norm": 4.112096309661865, "learning_rate": 5.790900679405565e-05, "loss": 2.0921, "step": 6203 }, { "epoch": 0.46700163721560434, "grad_norm": 4.92310094833374, "learning_rate": 5.7896969147932414e-05, "loss": 1.7441, "step": 6204 }, { "epoch": 0.4670769114962645, "grad_norm": 5.805795192718506, "learning_rate": 5.7884931032357015e-05, "loss": 2.5231, "step": 6205 }, { "epoch": 0.4671521857769247, "grad_norm": 4.6085309982299805, "learning_rate": 5.787289244804509e-05, "loss": 2.0675, "step": 6206 }, { "epoch": 0.4672274600575848, "grad_norm": 5.6390061378479, "learning_rate": 5.786085339571229e-05, "loss": 2.2054, "step": 6207 }, { "epoch": 0.467302734338245, "grad_norm": 4.3372087478637695, "learning_rate": 5.784881387607433e-05, "loss": 1.7363, "step": 6208 }, { "epoch": 0.46737800861890516, "grad_norm": 4.134713172912598, "learning_rate": 5.783677388984689e-05, "loss": 1.6687, "step": 6209 }, { "epoch": 0.4674532828995653, "grad_norm": 5.727628707885742, "learning_rate": 5.7824733437745725e-05, "loss": 1.895, "step": 6210 }, { "epoch": 0.46752855718022546, "grad_norm": 5.829822063446045, "learning_rate": 5.781269252048662e-05, "loss": 1.9313, "step": 6211 }, { "epoch": 0.4676038314608856, "grad_norm": 4.493118762969971, "learning_rate": 5.780065113878537e-05, "loss": 2.1191, "step": 6212 }, { "epoch": 0.46767910574154575, "grad_norm": 5.89603328704834, "learning_rate": 5.778860929335777e-05, "loss": 1.8107, "step": 6213 }, { "epoch": 0.46775438002220593, "grad_norm": 4.582896709442139, "learning_rate": 5.77765669849197e-05, "loss": 2.0457, "step": 6214 }, { "epoch": 0.46782965430286605, "grad_norm": 4.682589054107666, "learning_rate": 5.7764524214187044e-05, "loss": 2.1012, "step": 6215 }, { "epoch": 0.4679049285835262, "grad_norm": 4.900998592376709, "learning_rate": 5.775248098187571e-05, "loss": 2.292, "step": 6216 }, { "epoch": 0.4679802028641864, "grad_norm": 5.489734649658203, "learning_rate": 5.774043728870162e-05, "loss": 1.7491, "step": 6217 }, { "epoch": 0.4680554771448465, "grad_norm": 5.939738750457764, "learning_rate": 5.772839313538073e-05, "loss": 1.9004, "step": 6218 }, { "epoch": 0.4681307514255067, "grad_norm": 6.664865970611572, "learning_rate": 5.771634852262906e-05, "loss": 2.3472, "step": 6219 }, { "epoch": 0.4682060257061669, "grad_norm": 4.616393566131592, "learning_rate": 5.77043034511626e-05, "loss": 2.1801, "step": 6220 }, { "epoch": 0.468281299986827, "grad_norm": 4.235937595367432, "learning_rate": 5.769225792169741e-05, "loss": 2.2261, "step": 6221 }, { "epoch": 0.46835657426748717, "grad_norm": 5.158927917480469, "learning_rate": 5.768021193494957e-05, "loss": 1.873, "step": 6222 }, { "epoch": 0.4684318485481473, "grad_norm": 5.125085353851318, "learning_rate": 5.766816549163514e-05, "loss": 1.9652, "step": 6223 }, { "epoch": 0.46850712282880747, "grad_norm": 4.54403829574585, "learning_rate": 5.76561185924703e-05, "loss": 1.8689, "step": 6224 }, { "epoch": 0.46858239710946764, "grad_norm": 3.592175006866455, "learning_rate": 5.764407123817116e-05, "loss": 1.8026, "step": 6225 }, { "epoch": 0.46865767139012776, "grad_norm": 4.588351249694824, "learning_rate": 5.763202342945392e-05, "loss": 1.8453, "step": 6226 }, { "epoch": 0.46873294567078794, "grad_norm": 6.254735469818115, "learning_rate": 5.761997516703479e-05, "loss": 2.043, "step": 6227 }, { "epoch": 0.4688082199514481, "grad_norm": 5.188050270080566, "learning_rate": 5.760792645163001e-05, "loss": 1.9984, "step": 6228 }, { "epoch": 0.46888349423210823, "grad_norm": 4.085492134094238, "learning_rate": 5.7595877283955814e-05, "loss": 1.8735, "step": 6229 }, { "epoch": 0.4689587685127684, "grad_norm": 4.205752372741699, "learning_rate": 5.7583827664728516e-05, "loss": 1.8845, "step": 6230 }, { "epoch": 0.46903404279342853, "grad_norm": 4.196042060852051, "learning_rate": 5.7571777594664434e-05, "loss": 1.8089, "step": 6231 }, { "epoch": 0.4691093170740887, "grad_norm": 4.628505229949951, "learning_rate": 5.7559727074479896e-05, "loss": 2.0956, "step": 6232 }, { "epoch": 0.4691845913547489, "grad_norm": 4.327082633972168, "learning_rate": 5.754767610489127e-05, "loss": 2.1062, "step": 6233 }, { "epoch": 0.469259865635409, "grad_norm": 4.564215183258057, "learning_rate": 5.753562468661497e-05, "loss": 1.8069, "step": 6234 }, { "epoch": 0.4693351399160692, "grad_norm": 4.412684440612793, "learning_rate": 5.752357282036738e-05, "loss": 1.8424, "step": 6235 }, { "epoch": 0.46941041419672935, "grad_norm": 4.835409164428711, "learning_rate": 5.751152050686498e-05, "loss": 1.6814, "step": 6236 }, { "epoch": 0.4694856884773895, "grad_norm": 3.582608938217163, "learning_rate": 5.749946774682425e-05, "loss": 1.7111, "step": 6237 }, { "epoch": 0.46956096275804965, "grad_norm": 4.621428966522217, "learning_rate": 5.7487414540961684e-05, "loss": 2.1499, "step": 6238 }, { "epoch": 0.4696362370387098, "grad_norm": 4.225468158721924, "learning_rate": 5.7475360889993793e-05, "loss": 1.5202, "step": 6239 }, { "epoch": 0.46971151131936995, "grad_norm": 4.9156413078308105, "learning_rate": 5.746330679463714e-05, "loss": 2.107, "step": 6240 }, { "epoch": 0.4697867856000301, "grad_norm": 4.5124192237854, "learning_rate": 5.745125225560833e-05, "loss": 1.8903, "step": 6241 }, { "epoch": 0.46986205988069024, "grad_norm": 4.60844087600708, "learning_rate": 5.743919727362395e-05, "loss": 1.9222, "step": 6242 }, { "epoch": 0.4699373341613504, "grad_norm": 6.4271979331970215, "learning_rate": 5.7427141849400625e-05, "loss": 1.9923, "step": 6243 }, { "epoch": 0.4700126084420106, "grad_norm": 3.941171407699585, "learning_rate": 5.7415085983655036e-05, "loss": 2.0671, "step": 6244 }, { "epoch": 0.4700878827226707, "grad_norm": 5.526283264160156, "learning_rate": 5.740302967710387e-05, "loss": 1.8658, "step": 6245 }, { "epoch": 0.4701631570033309, "grad_norm": 6.0859222412109375, "learning_rate": 5.739097293046382e-05, "loss": 2.2689, "step": 6246 }, { "epoch": 0.47023843128399107, "grad_norm": 4.590087890625, "learning_rate": 5.737891574445164e-05, "loss": 1.9471, "step": 6247 }, { "epoch": 0.4703137055646512, "grad_norm": 4.172879219055176, "learning_rate": 5.736685811978409e-05, "loss": 1.9712, "step": 6248 }, { "epoch": 0.47038897984531136, "grad_norm": 4.682403087615967, "learning_rate": 5.735480005717797e-05, "loss": 1.8274, "step": 6249 }, { "epoch": 0.4704642541259715, "grad_norm": 4.299895286560059, "learning_rate": 5.7342741557350086e-05, "loss": 1.5215, "step": 6250 }, { "epoch": 0.47053952840663166, "grad_norm": 6.324371337890625, "learning_rate": 5.733068262101728e-05, "loss": 2.0383, "step": 6251 }, { "epoch": 0.47061480268729183, "grad_norm": 5.781114101409912, "learning_rate": 5.7318623248896444e-05, "loss": 1.642, "step": 6252 }, { "epoch": 0.47069007696795195, "grad_norm": 5.2984938621521, "learning_rate": 5.730656344170444e-05, "loss": 1.6114, "step": 6253 }, { "epoch": 0.47076535124861213, "grad_norm": 4.689993858337402, "learning_rate": 5.729450320015821e-05, "loss": 1.8478, "step": 6254 }, { "epoch": 0.4708406255292723, "grad_norm": 10.7761812210083, "learning_rate": 5.728244252497469e-05, "loss": 2.1104, "step": 6255 }, { "epoch": 0.4709158998099324, "grad_norm": 4.138166427612305, "learning_rate": 5.7270381416870844e-05, "loss": 2.1262, "step": 6256 }, { "epoch": 0.4709911740905926, "grad_norm": 3.983053207397461, "learning_rate": 5.7258319876563706e-05, "loss": 1.7846, "step": 6257 }, { "epoch": 0.4710664483712528, "grad_norm": 9.24421501159668, "learning_rate": 5.724625790477026e-05, "loss": 1.6986, "step": 6258 }, { "epoch": 0.4711417226519129, "grad_norm": 7.4755425453186035, "learning_rate": 5.723419550220759e-05, "loss": 1.7405, "step": 6259 }, { "epoch": 0.4712169969325731, "grad_norm": 3.8950953483581543, "learning_rate": 5.7222132669592755e-05, "loss": 1.8926, "step": 6260 }, { "epoch": 0.4712922712132332, "grad_norm": 4.3589301109313965, "learning_rate": 5.7210069407642844e-05, "loss": 1.8608, "step": 6261 }, { "epoch": 0.47136754549389337, "grad_norm": 5.551499843597412, "learning_rate": 5.719800571707501e-05, "loss": 1.6912, "step": 6262 }, { "epoch": 0.47144281977455355, "grad_norm": 6.671307563781738, "learning_rate": 5.7185941598606366e-05, "loss": 2.0654, "step": 6263 }, { "epoch": 0.47151809405521367, "grad_norm": 4.551986217498779, "learning_rate": 5.717387705295413e-05, "loss": 1.7901, "step": 6264 }, { "epoch": 0.47159336833587384, "grad_norm": 3.8453047275543213, "learning_rate": 5.716181208083548e-05, "loss": 1.8323, "step": 6265 }, { "epoch": 0.471668642616534, "grad_norm": 3.31730580329895, "learning_rate": 5.7149746682967665e-05, "loss": 1.8142, "step": 6266 }, { "epoch": 0.47174391689719414, "grad_norm": 3.5658116340637207, "learning_rate": 5.713768086006791e-05, "loss": 1.736, "step": 6267 }, { "epoch": 0.4718191911778543, "grad_norm": 5.811598777770996, "learning_rate": 5.71256146128535e-05, "loss": 2.1401, "step": 6268 }, { "epoch": 0.4718944654585145, "grad_norm": 3.8438498973846436, "learning_rate": 5.711354794204177e-05, "loss": 1.9435, "step": 6269 }, { "epoch": 0.4719697397391746, "grad_norm": 3.2192771434783936, "learning_rate": 5.7101480848350016e-05, "loss": 1.8742, "step": 6270 }, { "epoch": 0.4720450140198348, "grad_norm": 4.943243503570557, "learning_rate": 5.70894133324956e-05, "loss": 1.9867, "step": 6271 }, { "epoch": 0.4721202883004949, "grad_norm": 4.282954692840576, "learning_rate": 5.707734539519591e-05, "loss": 1.8354, "step": 6272 }, { "epoch": 0.4721955625811551, "grad_norm": 3.7576797008514404, "learning_rate": 5.706527703716833e-05, "loss": 1.5095, "step": 6273 }, { "epoch": 0.47227083686181526, "grad_norm": 5.438213348388672, "learning_rate": 5.7053208259130296e-05, "loss": 1.9158, "step": 6274 }, { "epoch": 0.4723461111424754, "grad_norm": 4.024550914764404, "learning_rate": 5.7041139061799285e-05, "loss": 2.0162, "step": 6275 }, { "epoch": 0.47242138542313555, "grad_norm": 4.87020206451416, "learning_rate": 5.702906944589277e-05, "loss": 2.2639, "step": 6276 }, { "epoch": 0.47249665970379573, "grad_norm": 6.648222923278809, "learning_rate": 5.7016999412128235e-05, "loss": 1.4645, "step": 6277 }, { "epoch": 0.47257193398445585, "grad_norm": 6.833711624145508, "learning_rate": 5.7004928961223224e-05, "loss": 1.869, "step": 6278 }, { "epoch": 0.472647208265116, "grad_norm": 4.809195518493652, "learning_rate": 5.699285809389526e-05, "loss": 2.0548, "step": 6279 }, { "epoch": 0.47272248254577615, "grad_norm": 5.604750633239746, "learning_rate": 5.6980786810861974e-05, "loss": 1.9458, "step": 6280 }, { "epoch": 0.4727977568264363, "grad_norm": 6.335312366485596, "learning_rate": 5.696871511284094e-05, "loss": 1.7803, "step": 6281 }, { "epoch": 0.4728730311070965, "grad_norm": 3.6210274696350098, "learning_rate": 5.695664300054978e-05, "loss": 1.894, "step": 6282 }, { "epoch": 0.4729483053877566, "grad_norm": 3.923715114593506, "learning_rate": 5.694457047470616e-05, "loss": 1.7115, "step": 6283 }, { "epoch": 0.4730235796684168, "grad_norm": 3.4237923622131348, "learning_rate": 5.6932497536027754e-05, "loss": 1.7166, "step": 6284 }, { "epoch": 0.47309885394907697, "grad_norm": 5.746707916259766, "learning_rate": 5.6920424185232245e-05, "loss": 2.5159, "step": 6285 }, { "epoch": 0.4731741282297371, "grad_norm": 3.881298303604126, "learning_rate": 5.6908350423037394e-05, "loss": 2.151, "step": 6286 }, { "epoch": 0.47324940251039727, "grad_norm": 4.77166748046875, "learning_rate": 5.6896276250160915e-05, "loss": 1.7471, "step": 6287 }, { "epoch": 0.47332467679105744, "grad_norm": 4.470195770263672, "learning_rate": 5.6884201667320626e-05, "loss": 1.9611, "step": 6288 }, { "epoch": 0.47339995107171756, "grad_norm": 5.8305487632751465, "learning_rate": 5.687212667523428e-05, "loss": 1.7984, "step": 6289 }, { "epoch": 0.47347522535237774, "grad_norm": 3.7198565006256104, "learning_rate": 5.686005127461972e-05, "loss": 1.9287, "step": 6290 }, { "epoch": 0.47355049963303786, "grad_norm": 5.26173734664917, "learning_rate": 5.684797546619479e-05, "loss": 1.8081, "step": 6291 }, { "epoch": 0.47362577391369803, "grad_norm": 4.555299282073975, "learning_rate": 5.683589925067738e-05, "loss": 1.984, "step": 6292 }, { "epoch": 0.4737010481943582, "grad_norm": 3.355376720428467, "learning_rate": 5.6823822628785384e-05, "loss": 1.9806, "step": 6293 }, { "epoch": 0.47377632247501833, "grad_norm": 4.6258225440979, "learning_rate": 5.68117456012367e-05, "loss": 1.9366, "step": 6294 }, { "epoch": 0.4738515967556785, "grad_norm": 5.893681049346924, "learning_rate": 5.679966816874929e-05, "loss": 1.8333, "step": 6295 }, { "epoch": 0.4739268710363387, "grad_norm": 4.524265289306641, "learning_rate": 5.678759033204111e-05, "loss": 1.9951, "step": 6296 }, { "epoch": 0.4740021453169988, "grad_norm": 4.757098197937012, "learning_rate": 5.677551209183016e-05, "loss": 1.9888, "step": 6297 }, { "epoch": 0.474077419597659, "grad_norm": 3.6930558681488037, "learning_rate": 5.676343344883447e-05, "loss": 2.1562, "step": 6298 }, { "epoch": 0.4741526938783191, "grad_norm": 4.2077507972717285, "learning_rate": 5.675135440377206e-05, "loss": 1.8127, "step": 6299 }, { "epoch": 0.4742279681589793, "grad_norm": 4.4843573570251465, "learning_rate": 5.673927495736101e-05, "loss": 1.9962, "step": 6300 }, { "epoch": 0.47430324243963945, "grad_norm": 6.508464813232422, "learning_rate": 5.672719511031939e-05, "loss": 2.1499, "step": 6301 }, { "epoch": 0.47437851672029957, "grad_norm": 5.006059169769287, "learning_rate": 5.6715114863365315e-05, "loss": 1.9331, "step": 6302 }, { "epoch": 0.47445379100095975, "grad_norm": 4.76358699798584, "learning_rate": 5.6703034217216946e-05, "loss": 1.772, "step": 6303 }, { "epoch": 0.4745290652816199, "grad_norm": 4.853478908538818, "learning_rate": 5.669095317259243e-05, "loss": 1.7636, "step": 6304 }, { "epoch": 0.47460433956228004, "grad_norm": 5.0832390785217285, "learning_rate": 5.667887173020994e-05, "loss": 1.894, "step": 6305 }, { "epoch": 0.4746796138429402, "grad_norm": 4.127617359161377, "learning_rate": 5.6666789890787675e-05, "loss": 1.9766, "step": 6306 }, { "epoch": 0.4747548881236004, "grad_norm": 4.2291131019592285, "learning_rate": 5.6654707655043894e-05, "loss": 2.1901, "step": 6307 }, { "epoch": 0.4748301624042605, "grad_norm": 4.272363662719727, "learning_rate": 5.6642625023696825e-05, "loss": 1.8097, "step": 6308 }, { "epoch": 0.4749054366849207, "grad_norm": 4.337517738342285, "learning_rate": 5.663054199746477e-05, "loss": 1.9706, "step": 6309 }, { "epoch": 0.4749807109655808, "grad_norm": 3.9452738761901855, "learning_rate": 5.661845857706602e-05, "loss": 2.1088, "step": 6310 }, { "epoch": 0.475055985246241, "grad_norm": 4.407073974609375, "learning_rate": 5.6606374763218905e-05, "loss": 2.1809, "step": 6311 }, { "epoch": 0.47513125952690116, "grad_norm": 4.012044429779053, "learning_rate": 5.659429055664176e-05, "loss": 2.234, "step": 6312 }, { "epoch": 0.4752065338075613, "grad_norm": 3.9452366828918457, "learning_rate": 5.658220595805295e-05, "loss": 1.9897, "step": 6313 }, { "epoch": 0.47528180808822146, "grad_norm": 4.416503429412842, "learning_rate": 5.6570120968170894e-05, "loss": 2.2219, "step": 6314 }, { "epoch": 0.47535708236888163, "grad_norm": 4.730295658111572, "learning_rate": 5.6558035587714e-05, "loss": 1.7912, "step": 6315 }, { "epoch": 0.47543235664954175, "grad_norm": 4.5806193351745605, "learning_rate": 5.65459498174007e-05, "loss": 1.8763, "step": 6316 }, { "epoch": 0.47550763093020193, "grad_norm": 4.9890570640563965, "learning_rate": 5.6533863657949474e-05, "loss": 1.6085, "step": 6317 }, { "epoch": 0.4755829052108621, "grad_norm": 3.957623243331909, "learning_rate": 5.652177711007878e-05, "loss": 1.7229, "step": 6318 }, { "epoch": 0.4756581794915222, "grad_norm": 4.691608905792236, "learning_rate": 5.650969017450717e-05, "loss": 1.7539, "step": 6319 }, { "epoch": 0.4757334537721824, "grad_norm": 4.535962104797363, "learning_rate": 5.6497602851953156e-05, "loss": 1.7558, "step": 6320 }, { "epoch": 0.4758087280528425, "grad_norm": 4.202637195587158, "learning_rate": 5.6485515143135294e-05, "loss": 1.5503, "step": 6321 }, { "epoch": 0.4758840023335027, "grad_norm": 5.434566020965576, "learning_rate": 5.6473427048772165e-05, "loss": 1.9693, "step": 6322 }, { "epoch": 0.4759592766141629, "grad_norm": 3.815725326538086, "learning_rate": 5.646133856958237e-05, "loss": 2.7614, "step": 6323 }, { "epoch": 0.476034550894823, "grad_norm": 3.9051058292388916, "learning_rate": 5.6449249706284534e-05, "loss": 2.1292, "step": 6324 }, { "epoch": 0.47610982517548317, "grad_norm": 4.619932651519775, "learning_rate": 5.643716045959732e-05, "loss": 1.9181, "step": 6325 }, { "epoch": 0.47618509945614335, "grad_norm": 5.109991073608398, "learning_rate": 5.642507083023938e-05, "loss": 1.6782, "step": 6326 }, { "epoch": 0.47626037373680347, "grad_norm": 5.29757833480835, "learning_rate": 5.6412980818929406e-05, "loss": 2.1398, "step": 6327 }, { "epoch": 0.47633564801746364, "grad_norm": 4.539900302886963, "learning_rate": 5.640089042638614e-05, "loss": 1.9293, "step": 6328 }, { "epoch": 0.47641092229812376, "grad_norm": 5.332345008850098, "learning_rate": 5.6388799653328284e-05, "loss": 1.8492, "step": 6329 }, { "epoch": 0.47648619657878394, "grad_norm": 6.914534091949463, "learning_rate": 5.637670850047464e-05, "loss": 1.849, "step": 6330 }, { "epoch": 0.4765614708594441, "grad_norm": 4.831961154937744, "learning_rate": 5.6364616968543973e-05, "loss": 1.7322, "step": 6331 }, { "epoch": 0.47663674514010423, "grad_norm": 5.263299942016602, "learning_rate": 5.635252505825508e-05, "loss": 2.0602, "step": 6332 }, { "epoch": 0.4767120194207644, "grad_norm": 8.768918991088867, "learning_rate": 5.63404327703268e-05, "loss": 1.8796, "step": 6333 }, { "epoch": 0.4767872937014246, "grad_norm": 5.573263168334961, "learning_rate": 5.632834010547798e-05, "loss": 1.8019, "step": 6334 }, { "epoch": 0.4768625679820847, "grad_norm": 4.626332759857178, "learning_rate": 5.6316247064427504e-05, "loss": 2.0785, "step": 6335 }, { "epoch": 0.4769378422627449, "grad_norm": 13.972732543945312, "learning_rate": 5.630415364789426e-05, "loss": 1.8535, "step": 6336 }, { "epoch": 0.47701311654340506, "grad_norm": 4.4583563804626465, "learning_rate": 5.629205985659718e-05, "loss": 1.6426, "step": 6337 }, { "epoch": 0.4770883908240652, "grad_norm": 4.307314872741699, "learning_rate": 5.62799656912552e-05, "loss": 1.6567, "step": 6338 }, { "epoch": 0.47716366510472535, "grad_norm": 4.567723751068115, "learning_rate": 5.626787115258726e-05, "loss": 1.9421, "step": 6339 }, { "epoch": 0.4772389393853855, "grad_norm": 4.745924949645996, "learning_rate": 5.6255776241312374e-05, "loss": 2.2892, "step": 6340 }, { "epoch": 0.47731421366604565, "grad_norm": 4.9133100509643555, "learning_rate": 5.6243680958149525e-05, "loss": 1.8242, "step": 6341 }, { "epoch": 0.4773894879467058, "grad_norm": 5.110033988952637, "learning_rate": 5.623158530381778e-05, "loss": 1.6635, "step": 6342 }, { "epoch": 0.47746476222736595, "grad_norm": 4.4033613204956055, "learning_rate": 5.621948927903616e-05, "loss": 1.9696, "step": 6343 }, { "epoch": 0.4775400365080261, "grad_norm": 5.070498943328857, "learning_rate": 5.6207392884523755e-05, "loss": 2.2543, "step": 6344 }, { "epoch": 0.4776153107886863, "grad_norm": 7.706250190734863, "learning_rate": 5.619529612099965e-05, "loss": 1.7195, "step": 6345 }, { "epoch": 0.4776905850693464, "grad_norm": 3.8137874603271484, "learning_rate": 5.618319898918296e-05, "loss": 1.8165, "step": 6346 }, { "epoch": 0.4777658593500066, "grad_norm": 5.7412800788879395, "learning_rate": 5.6171101489792835e-05, "loss": 2.1338, "step": 6347 }, { "epoch": 0.4778411336306667, "grad_norm": 5.855752944946289, "learning_rate": 5.6159003623548444e-05, "loss": 1.9841, "step": 6348 }, { "epoch": 0.4779164079113269, "grad_norm": 4.267614841461182, "learning_rate": 5.6146905391168946e-05, "loss": 1.9286, "step": 6349 }, { "epoch": 0.47799168219198707, "grad_norm": 5.545926094055176, "learning_rate": 5.613480679337357e-05, "loss": 2.0125, "step": 6350 }, { "epoch": 0.4780669564726472, "grad_norm": 4.504691123962402, "learning_rate": 5.6122707830881516e-05, "loss": 1.7489, "step": 6351 }, { "epoch": 0.47814223075330736, "grad_norm": 4.732955455780029, "learning_rate": 5.6110608504412064e-05, "loss": 1.8037, "step": 6352 }, { "epoch": 0.47821750503396754, "grad_norm": 5.685470104217529, "learning_rate": 5.609850881468447e-05, "loss": 1.6545, "step": 6353 }, { "epoch": 0.47829277931462766, "grad_norm": 5.293968200683594, "learning_rate": 5.608640876241803e-05, "loss": 1.8031, "step": 6354 }, { "epoch": 0.47836805359528783, "grad_norm": 3.5953304767608643, "learning_rate": 5.607430834833204e-05, "loss": 1.7303, "step": 6355 }, { "epoch": 0.478443327875948, "grad_norm": 5.320721626281738, "learning_rate": 5.606220757314585e-05, "loss": 1.9182, "step": 6356 }, { "epoch": 0.47851860215660813, "grad_norm": 4.980002403259277, "learning_rate": 5.605010643757881e-05, "loss": 1.9659, "step": 6357 }, { "epoch": 0.4785938764372683, "grad_norm": 4.784584045410156, "learning_rate": 5.603800494235032e-05, "loss": 1.6457, "step": 6358 }, { "epoch": 0.4786691507179284, "grad_norm": 3.8784241676330566, "learning_rate": 5.6025903088179745e-05, "loss": 1.5963, "step": 6359 }, { "epoch": 0.4787444249985886, "grad_norm": 4.006401538848877, "learning_rate": 5.601380087578654e-05, "loss": 1.791, "step": 6360 }, { "epoch": 0.4788196992792488, "grad_norm": 5.516632080078125, "learning_rate": 5.600169830589012e-05, "loss": 1.7746, "step": 6361 }, { "epoch": 0.4788949735599089, "grad_norm": 3.885056495666504, "learning_rate": 5.5989595379209945e-05, "loss": 1.9317, "step": 6362 }, { "epoch": 0.4789702478405691, "grad_norm": 3.2825815677642822, "learning_rate": 5.597749209646551e-05, "loss": 1.8626, "step": 6363 }, { "epoch": 0.47904552212122925, "grad_norm": 4.296835422515869, "learning_rate": 5.5965388458376337e-05, "loss": 1.6603, "step": 6364 }, { "epoch": 0.47912079640188937, "grad_norm": 6.031205654144287, "learning_rate": 5.595328446566193e-05, "loss": 2.3506, "step": 6365 }, { "epoch": 0.47919607068254955, "grad_norm": 3.74759578704834, "learning_rate": 5.5941180119041845e-05, "loss": 1.8234, "step": 6366 }, { "epoch": 0.4792713449632097, "grad_norm": 3.4017677307128906, "learning_rate": 5.592907541923564e-05, "loss": 1.8133, "step": 6367 }, { "epoch": 0.47934661924386984, "grad_norm": 3.7446372509002686, "learning_rate": 5.5916970366962914e-05, "loss": 1.8927, "step": 6368 }, { "epoch": 0.47942189352453, "grad_norm": 4.254096508026123, "learning_rate": 5.5904864962943296e-05, "loss": 1.8294, "step": 6369 }, { "epoch": 0.47949716780519014, "grad_norm": 3.59273099899292, "learning_rate": 5.5892759207896386e-05, "loss": 1.8149, "step": 6370 }, { "epoch": 0.4795724420858503, "grad_norm": 4.840024471282959, "learning_rate": 5.588065310254185e-05, "loss": 1.8987, "step": 6371 }, { "epoch": 0.4796477163665105, "grad_norm": 6.468723297119141, "learning_rate": 5.586854664759935e-05, "loss": 1.8035, "step": 6372 }, { "epoch": 0.4797229906471706, "grad_norm": 6.230376720428467, "learning_rate": 5.585643984378861e-05, "loss": 1.7975, "step": 6373 }, { "epoch": 0.4797982649278308, "grad_norm": 4.697869300842285, "learning_rate": 5.58443326918293e-05, "loss": 2.0766, "step": 6374 }, { "epoch": 0.47987353920849096, "grad_norm": 4.413533687591553, "learning_rate": 5.58322251924412e-05, "loss": 1.5893, "step": 6375 }, { "epoch": 0.4799488134891511, "grad_norm": 4.307180404663086, "learning_rate": 5.5820117346344045e-05, "loss": 2.1065, "step": 6376 }, { "epoch": 0.48002408776981126, "grad_norm": 4.2669196128845215, "learning_rate": 5.5808009154257625e-05, "loss": 1.6462, "step": 6377 }, { "epoch": 0.4800993620504714, "grad_norm": 4.337838172912598, "learning_rate": 5.579590061690171e-05, "loss": 1.7609, "step": 6378 }, { "epoch": 0.48017463633113155, "grad_norm": 4.283106803894043, "learning_rate": 5.5783791734996124e-05, "loss": 1.79, "step": 6379 }, { "epoch": 0.48024991061179173, "grad_norm": 4.739707946777344, "learning_rate": 5.5771682509260735e-05, "loss": 2.1653, "step": 6380 }, { "epoch": 0.48032518489245185, "grad_norm": 5.706414699554443, "learning_rate": 5.5759572940415386e-05, "loss": 1.5917, "step": 6381 }, { "epoch": 0.480400459173112, "grad_norm": 6.01108980178833, "learning_rate": 5.574746302917994e-05, "loss": 1.9168, "step": 6382 }, { "epoch": 0.4804757334537722, "grad_norm": 5.496085166931152, "learning_rate": 5.5735352776274306e-05, "loss": 2.017, "step": 6383 }, { "epoch": 0.4805510077344323, "grad_norm": 4.263923168182373, "learning_rate": 5.57232421824184e-05, "loss": 1.9053, "step": 6384 }, { "epoch": 0.4806262820150925, "grad_norm": 3.3274738788604736, "learning_rate": 5.5711131248332185e-05, "loss": 1.925, "step": 6385 }, { "epoch": 0.4807015562957527, "grad_norm": 4.82871675491333, "learning_rate": 5.56990199747356e-05, "loss": 1.8865, "step": 6386 }, { "epoch": 0.4807768305764128, "grad_norm": 5.63754415512085, "learning_rate": 5.568690836234861e-05, "loss": 1.773, "step": 6387 }, { "epoch": 0.48085210485707297, "grad_norm": 4.828920364379883, "learning_rate": 5.567479641189126e-05, "loss": 1.6184, "step": 6388 }, { "epoch": 0.4809273791377331, "grad_norm": 4.03684663772583, "learning_rate": 5.5662684124083545e-05, "loss": 1.7707, "step": 6389 }, { "epoch": 0.48100265341839327, "grad_norm": 7.016988754272461, "learning_rate": 5.56505714996455e-05, "loss": 2.0252, "step": 6390 }, { "epoch": 0.48107792769905344, "grad_norm": 5.318288326263428, "learning_rate": 5.563845853929718e-05, "loss": 2.0716, "step": 6391 }, { "epoch": 0.48115320197971356, "grad_norm": 3.760000705718994, "learning_rate": 5.562634524375869e-05, "loss": 1.7625, "step": 6392 }, { "epoch": 0.48122847626037374, "grad_norm": 5.033058166503906, "learning_rate": 5.5614231613750124e-05, "loss": 1.6887, "step": 6393 }, { "epoch": 0.4813037505410339, "grad_norm": 3.9787211418151855, "learning_rate": 5.560211764999159e-05, "loss": 1.8863, "step": 6394 }, { "epoch": 0.48137902482169403, "grad_norm": 4.904836177825928, "learning_rate": 5.559000335320325e-05, "loss": 1.9658, "step": 6395 }, { "epoch": 0.4814542991023542, "grad_norm": 5.078438758850098, "learning_rate": 5.557788872410523e-05, "loss": 1.9511, "step": 6396 }, { "epoch": 0.48152957338301433, "grad_norm": 7.313134670257568, "learning_rate": 5.556577376341774e-05, "loss": 2.0546, "step": 6397 }, { "epoch": 0.4816048476636745, "grad_norm": 5.61083984375, "learning_rate": 5.555365847186097e-05, "loss": 1.9182, "step": 6398 }, { "epoch": 0.4816801219443347, "grad_norm": 6.040586948394775, "learning_rate": 5.5541542850155136e-05, "loss": 2.0561, "step": 6399 }, { "epoch": 0.4817553962249948, "grad_norm": 4.592431545257568, "learning_rate": 5.5529426899020484e-05, "loss": 2.0644, "step": 6400 }, { "epoch": 0.481830670505655, "grad_norm": 4.533272743225098, "learning_rate": 5.551731061917727e-05, "loss": 1.8183, "step": 6401 }, { "epoch": 0.48190594478631515, "grad_norm": 4.679579257965088, "learning_rate": 5.5505194011345764e-05, "loss": 2.0861, "step": 6402 }, { "epoch": 0.4819812190669753, "grad_norm": 4.739593505859375, "learning_rate": 5.549307707624627e-05, "loss": 1.6189, "step": 6403 }, { "epoch": 0.48205649334763545, "grad_norm": 3.4774935245513916, "learning_rate": 5.548095981459911e-05, "loss": 1.9006, "step": 6404 }, { "epoch": 0.4821317676282956, "grad_norm": 3.710263729095459, "learning_rate": 5.5468842227124615e-05, "loss": 1.6716, "step": 6405 }, { "epoch": 0.48220704190895575, "grad_norm": 5.572620868682861, "learning_rate": 5.545672431454314e-05, "loss": 1.686, "step": 6406 }, { "epoch": 0.4822823161896159, "grad_norm": 5.486653804779053, "learning_rate": 5.5444606077575056e-05, "loss": 2.2022, "step": 6407 }, { "epoch": 0.48235759047027604, "grad_norm": 3.2374887466430664, "learning_rate": 5.5432487516940765e-05, "loss": 1.7563, "step": 6408 }, { "epoch": 0.4824328647509362, "grad_norm": 5.346471786499023, "learning_rate": 5.5420368633360695e-05, "loss": 1.8299, "step": 6409 }, { "epoch": 0.4825081390315964, "grad_norm": 4.188678741455078, "learning_rate": 5.5408249427555245e-05, "loss": 1.9282, "step": 6410 }, { "epoch": 0.4825834133122565, "grad_norm": 4.597207546234131, "learning_rate": 5.5396129900244894e-05, "loss": 1.9242, "step": 6411 }, { "epoch": 0.4826586875929167, "grad_norm": 4.855897426605225, "learning_rate": 5.53840100521501e-05, "loss": 1.906, "step": 6412 }, { "epoch": 0.48273396187357687, "grad_norm": 5.525973796844482, "learning_rate": 5.5371889883991354e-05, "loss": 2.0479, "step": 6413 }, { "epoch": 0.482809236154237, "grad_norm": 7.5665283203125, "learning_rate": 5.535976939648917e-05, "loss": 1.9169, "step": 6414 }, { "epoch": 0.48288451043489716, "grad_norm": 3.9134891033172607, "learning_rate": 5.5347648590364085e-05, "loss": 1.938, "step": 6415 }, { "epoch": 0.48295978471555734, "grad_norm": 4.094476699829102, "learning_rate": 5.5335527466336625e-05, "loss": 1.6765, "step": 6416 }, { "epoch": 0.48303505899621746, "grad_norm": 4.275125503540039, "learning_rate": 5.5323406025127364e-05, "loss": 1.945, "step": 6417 }, { "epoch": 0.48311033327687763, "grad_norm": 4.317477703094482, "learning_rate": 5.5311284267456884e-05, "loss": 1.8148, "step": 6418 }, { "epoch": 0.48318560755753776, "grad_norm": 4.612224578857422, "learning_rate": 5.5299162194045806e-05, "loss": 2.2421, "step": 6419 }, { "epoch": 0.48326088183819793, "grad_norm": 5.999127388000488, "learning_rate": 5.528703980561474e-05, "loss": 1.9438, "step": 6420 }, { "epoch": 0.4833361561188581, "grad_norm": 4.804381847381592, "learning_rate": 5.527491710288433e-05, "loss": 2.0369, "step": 6421 }, { "epoch": 0.4834114303995182, "grad_norm": 16.27371597290039, "learning_rate": 5.526279408657522e-05, "loss": 2.0063, "step": 6422 }, { "epoch": 0.4834867046801784, "grad_norm": 4.779478073120117, "learning_rate": 5.525067075740812e-05, "loss": 1.4375, "step": 6423 }, { "epoch": 0.4835619789608386, "grad_norm": 6.568668842315674, "learning_rate": 5.523854711610369e-05, "loss": 2.0574, "step": 6424 }, { "epoch": 0.4836372532414987, "grad_norm": 5.429203510284424, "learning_rate": 5.522642316338268e-05, "loss": 1.8831, "step": 6425 }, { "epoch": 0.4837125275221589, "grad_norm": 5.603474140167236, "learning_rate": 5.52142988999658e-05, "loss": 1.6548, "step": 6426 }, { "epoch": 0.483787801802819, "grad_norm": 4.25176477432251, "learning_rate": 5.520217432657382e-05, "loss": 2.0371, "step": 6427 }, { "epoch": 0.48386307608347917, "grad_norm": 5.165562152862549, "learning_rate": 5.519004944392751e-05, "loss": 1.7478, "step": 6428 }, { "epoch": 0.48393835036413935, "grad_norm": 4.938843727111816, "learning_rate": 5.517792425274762e-05, "loss": 1.985, "step": 6429 }, { "epoch": 0.48401362464479947, "grad_norm": 4.424111366271973, "learning_rate": 5.516579875375502e-05, "loss": 2.0982, "step": 6430 }, { "epoch": 0.48408889892545964, "grad_norm": 4.951056957244873, "learning_rate": 5.51536729476705e-05, "loss": 1.7929, "step": 6431 }, { "epoch": 0.4841641732061198, "grad_norm": 5.1057281494140625, "learning_rate": 5.5141546835214906e-05, "loss": 2.2513, "step": 6432 }, { "epoch": 0.48423944748677994, "grad_norm": 3.885099411010742, "learning_rate": 5.5129420417109126e-05, "loss": 1.6526, "step": 6433 }, { "epoch": 0.4843147217674401, "grad_norm": 4.9807257652282715, "learning_rate": 5.511729369407399e-05, "loss": 2.0737, "step": 6434 }, { "epoch": 0.4843899960481003, "grad_norm": 4.173788547515869, "learning_rate": 5.5105166666830444e-05, "loss": 2.1203, "step": 6435 }, { "epoch": 0.4844652703287604, "grad_norm": 4.617697715759277, "learning_rate": 5.509303933609937e-05, "loss": 1.8019, "step": 6436 }, { "epoch": 0.4845405446094206, "grad_norm": 4.278045177459717, "learning_rate": 5.5080911702601734e-05, "loss": 2.2662, "step": 6437 }, { "epoch": 0.4846158188900807, "grad_norm": 4.496089935302734, "learning_rate": 5.5068783767058486e-05, "loss": 1.7241, "step": 6438 }, { "epoch": 0.4846910931707409, "grad_norm": 5.214039325714111, "learning_rate": 5.5056655530190584e-05, "loss": 1.8078, "step": 6439 }, { "epoch": 0.48476636745140106, "grad_norm": 4.256460666656494, "learning_rate": 5.504452699271901e-05, "loss": 2.028, "step": 6440 }, { "epoch": 0.4848416417320612, "grad_norm": 4.191312789916992, "learning_rate": 5.5032398155364775e-05, "loss": 1.737, "step": 6441 }, { "epoch": 0.48491691601272136, "grad_norm": 3.783006191253662, "learning_rate": 5.5020269018848923e-05, "loss": 2.128, "step": 6442 }, { "epoch": 0.48499219029338153, "grad_norm": 3.8177261352539062, "learning_rate": 5.5008139583892485e-05, "loss": 2.0486, "step": 6443 }, { "epoch": 0.48506746457404165, "grad_norm": 6.276378631591797, "learning_rate": 5.4996009851216515e-05, "loss": 1.689, "step": 6444 }, { "epoch": 0.4851427388547018, "grad_norm": 5.2423248291015625, "learning_rate": 5.49838798215421e-05, "loss": 1.7339, "step": 6445 }, { "epoch": 0.485218013135362, "grad_norm": 4.056105136871338, "learning_rate": 5.49717494955903e-05, "loss": 2.0997, "step": 6446 }, { "epoch": 0.4852932874160221, "grad_norm": 3.3604941368103027, "learning_rate": 5.4959618874082284e-05, "loss": 1.6656, "step": 6447 }, { "epoch": 0.4853685616966823, "grad_norm": 3.8618831634521484, "learning_rate": 5.494748795773915e-05, "loss": 1.9945, "step": 6448 }, { "epoch": 0.4854438359773424, "grad_norm": 4.705929279327393, "learning_rate": 5.493535674728206e-05, "loss": 1.7755, "step": 6449 }, { "epoch": 0.4855191102580026, "grad_norm": 3.9789557456970215, "learning_rate": 5.492322524343218e-05, "loss": 1.8262, "step": 6450 }, { "epoch": 0.48559438453866277, "grad_norm": 4.300180435180664, "learning_rate": 5.4911093446910654e-05, "loss": 1.9044, "step": 6451 }, { "epoch": 0.4856696588193229, "grad_norm": 5.275288105010986, "learning_rate": 5.4898961358438726e-05, "loss": 1.8408, "step": 6452 }, { "epoch": 0.48574493309998307, "grad_norm": 3.6018564701080322, "learning_rate": 5.488682897873761e-05, "loss": 1.8243, "step": 6453 }, { "epoch": 0.48582020738064324, "grad_norm": 4.977598667144775, "learning_rate": 5.487469630852854e-05, "loss": 1.8119, "step": 6454 }, { "epoch": 0.48589548166130336, "grad_norm": 5.883234977722168, "learning_rate": 5.486256334853276e-05, "loss": 1.9167, "step": 6455 }, { "epoch": 0.48597075594196354, "grad_norm": 6.700308322906494, "learning_rate": 5.4850430099471526e-05, "loss": 1.9208, "step": 6456 }, { "epoch": 0.48604603022262366, "grad_norm": 6.252125263214111, "learning_rate": 5.483829656206615e-05, "loss": 2.1003, "step": 6457 }, { "epoch": 0.48612130450328384, "grad_norm": 7.259039878845215, "learning_rate": 5.482616273703792e-05, "loss": 2.0475, "step": 6458 }, { "epoch": 0.486196578783944, "grad_norm": 4.768459320068359, "learning_rate": 5.481402862510817e-05, "loss": 2.1664, "step": 6459 }, { "epoch": 0.48627185306460413, "grad_norm": 4.7697529792785645, "learning_rate": 5.480189422699824e-05, "loss": 1.9889, "step": 6460 }, { "epoch": 0.4863471273452643, "grad_norm": 4.36065149307251, "learning_rate": 5.478975954342947e-05, "loss": 1.9103, "step": 6461 }, { "epoch": 0.4864224016259245, "grad_norm": 4.5343217849731445, "learning_rate": 5.477762457512323e-05, "loss": 2.1913, "step": 6462 }, { "epoch": 0.4864976759065846, "grad_norm": 4.51072359085083, "learning_rate": 5.476548932280092e-05, "loss": 1.9727, "step": 6463 }, { "epoch": 0.4865729501872448, "grad_norm": 4.391369819641113, "learning_rate": 5.4753353787183945e-05, "loss": 1.9416, "step": 6464 }, { "epoch": 0.48664822446790496, "grad_norm": 3.696805000305176, "learning_rate": 5.474121796899373e-05, "loss": 1.8544, "step": 6465 }, { "epoch": 0.4867234987485651, "grad_norm": 5.127026557922363, "learning_rate": 5.4729081868951706e-05, "loss": 1.6701, "step": 6466 }, { "epoch": 0.48679877302922525, "grad_norm": 3.75046443939209, "learning_rate": 5.471694548777934e-05, "loss": 2.1559, "step": 6467 }, { "epoch": 0.48687404730988537, "grad_norm": 6.920037746429443, "learning_rate": 5.4704808826198085e-05, "loss": 1.8228, "step": 6468 }, { "epoch": 0.48694932159054555, "grad_norm": 3.4855964183807373, "learning_rate": 5.4692671884929445e-05, "loss": 1.7654, "step": 6469 }, { "epoch": 0.4870245958712057, "grad_norm": 4.799417495727539, "learning_rate": 5.4680534664694935e-05, "loss": 1.6938, "step": 6470 }, { "epoch": 0.48709987015186584, "grad_norm": 4.094606399536133, "learning_rate": 5.466839716621608e-05, "loss": 1.899, "step": 6471 }, { "epoch": 0.487175144432526, "grad_norm": 4.776040554046631, "learning_rate": 5.4656259390214406e-05, "loss": 1.708, "step": 6472 }, { "epoch": 0.4872504187131862, "grad_norm": 3.5086989402770996, "learning_rate": 5.464412133741146e-05, "loss": 2.1431, "step": 6473 }, { "epoch": 0.4873256929938463, "grad_norm": 5.574459075927734, "learning_rate": 5.463198300852882e-05, "loss": 2.0567, "step": 6474 }, { "epoch": 0.4874009672745065, "grad_norm": 5.2659759521484375, "learning_rate": 5.46198444042881e-05, "loss": 1.8074, "step": 6475 }, { "epoch": 0.4874762415551666, "grad_norm": 4.93739652633667, "learning_rate": 5.460770552541088e-05, "loss": 2.391, "step": 6476 }, { "epoch": 0.4875515158358268, "grad_norm": 4.549590587615967, "learning_rate": 5.4595566372618775e-05, "loss": 1.9116, "step": 6477 }, { "epoch": 0.48762679011648696, "grad_norm": 5.146921634674072, "learning_rate": 5.458342694663345e-05, "loss": 2.0942, "step": 6478 }, { "epoch": 0.4877020643971471, "grad_norm": 6.532907009124756, "learning_rate": 5.457128724817654e-05, "loss": 2.1415, "step": 6479 }, { "epoch": 0.48777733867780726, "grad_norm": 4.148580074310303, "learning_rate": 5.455914727796971e-05, "loss": 1.9203, "step": 6480 }, { "epoch": 0.48785261295846744, "grad_norm": 4.636167049407959, "learning_rate": 5.454700703673468e-05, "loss": 1.7817, "step": 6481 }, { "epoch": 0.48792788723912756, "grad_norm": 4.693921089172363, "learning_rate": 5.453486652519311e-05, "loss": 1.9467, "step": 6482 }, { "epoch": 0.48800316151978773, "grad_norm": 5.099684715270996, "learning_rate": 5.4522725744066735e-05, "loss": 1.7751, "step": 6483 }, { "epoch": 0.4880784358004479, "grad_norm": 4.242069721221924, "learning_rate": 5.451058469407729e-05, "loss": 1.9235, "step": 6484 }, { "epoch": 0.488153710081108, "grad_norm": 4.0216064453125, "learning_rate": 5.449844337594654e-05, "loss": 1.7325, "step": 6485 }, { "epoch": 0.4882289843617682, "grad_norm": 5.807652473449707, "learning_rate": 5.448630179039622e-05, "loss": 1.9754, "step": 6486 }, { "epoch": 0.4883042586424283, "grad_norm": 4.336977005004883, "learning_rate": 5.447415993814815e-05, "loss": 1.3755, "step": 6487 }, { "epoch": 0.4883795329230885, "grad_norm": 6.303496837615967, "learning_rate": 5.4462017819924106e-05, "loss": 2.0581, "step": 6488 }, { "epoch": 0.4884548072037487, "grad_norm": 5.578193664550781, "learning_rate": 5.444987543644591e-05, "loss": 1.8661, "step": 6489 }, { "epoch": 0.4885300814844088, "grad_norm": 5.070801734924316, "learning_rate": 5.443773278843538e-05, "loss": 1.7357, "step": 6490 }, { "epoch": 0.48860535576506897, "grad_norm": 4.3117356300354, "learning_rate": 5.4425589876614366e-05, "loss": 2.3736, "step": 6491 }, { "epoch": 0.48868063004572915, "grad_norm": 4.435965061187744, "learning_rate": 5.441344670170474e-05, "loss": 1.7422, "step": 6492 }, { "epoch": 0.48875590432638927, "grad_norm": 3.736522674560547, "learning_rate": 5.4401303264428364e-05, "loss": 2.2251, "step": 6493 }, { "epoch": 0.48883117860704944, "grad_norm": 5.066519737243652, "learning_rate": 5.438915956550714e-05, "loss": 2.1216, "step": 6494 }, { "epoch": 0.4889064528877096, "grad_norm": 5.010735511779785, "learning_rate": 5.4377015605662986e-05, "loss": 1.8648, "step": 6495 }, { "epoch": 0.48898172716836974, "grad_norm": 4.502397537231445, "learning_rate": 5.4364871385617786e-05, "loss": 2.0692, "step": 6496 }, { "epoch": 0.4890570014490299, "grad_norm": 3.5062954425811768, "learning_rate": 5.435272690609353e-05, "loss": 2.0433, "step": 6497 }, { "epoch": 0.48913227572969004, "grad_norm": 3.7083046436309814, "learning_rate": 5.434058216781214e-05, "loss": 1.66, "step": 6498 }, { "epoch": 0.4892075500103502, "grad_norm": 9.759012222290039, "learning_rate": 5.4328437171495604e-05, "loss": 2.0309, "step": 6499 }, { "epoch": 0.4892828242910104, "grad_norm": 4.209195613861084, "learning_rate": 5.4316291917865883e-05, "loss": 2.0649, "step": 6500 }, { "epoch": 0.4893580985716705, "grad_norm": 5.8448333740234375, "learning_rate": 5.4304146407644994e-05, "loss": 1.5482, "step": 6501 }, { "epoch": 0.4894333728523307, "grad_norm": 4.206943511962891, "learning_rate": 5.429200064155495e-05, "loss": 2.0601, "step": 6502 }, { "epoch": 0.48950864713299086, "grad_norm": 8.297439575195312, "learning_rate": 5.427985462031779e-05, "loss": 2.5263, "step": 6503 }, { "epoch": 0.489583921413651, "grad_norm": 5.022284507751465, "learning_rate": 5.4267708344655556e-05, "loss": 1.8905, "step": 6504 }, { "epoch": 0.48965919569431116, "grad_norm": 6.0491228103637695, "learning_rate": 5.42555618152903e-05, "loss": 1.9399, "step": 6505 }, { "epoch": 0.4897344699749713, "grad_norm": 6.512802600860596, "learning_rate": 5.424341503294411e-05, "loss": 1.8161, "step": 6506 }, { "epoch": 0.48980974425563145, "grad_norm": 5.927082538604736, "learning_rate": 5.4231267998339066e-05, "loss": 2.0209, "step": 6507 }, { "epoch": 0.4898850185362916, "grad_norm": 5.356221675872803, "learning_rate": 5.421912071219728e-05, "loss": 1.9805, "step": 6508 }, { "epoch": 0.48996029281695175, "grad_norm": 4.81496000289917, "learning_rate": 5.420697317524088e-05, "loss": 1.8737, "step": 6509 }, { "epoch": 0.4900355670976119, "grad_norm": 5.58132266998291, "learning_rate": 5.4194825388191995e-05, "loss": 2.1025, "step": 6510 }, { "epoch": 0.4901108413782721, "grad_norm": 4.656373500823975, "learning_rate": 5.418267735177278e-05, "loss": 1.9189, "step": 6511 }, { "epoch": 0.4901861156589322, "grad_norm": 5.391656875610352, "learning_rate": 5.41705290667054e-05, "loss": 2.1958, "step": 6512 }, { "epoch": 0.4902613899395924, "grad_norm": 5.249373435974121, "learning_rate": 5.4158380533712016e-05, "loss": 1.7043, "step": 6513 }, { "epoch": 0.49033666422025257, "grad_norm": 4.356224536895752, "learning_rate": 5.414623175351486e-05, "loss": 2.0932, "step": 6514 }, { "epoch": 0.4904119385009127, "grad_norm": 7.173582077026367, "learning_rate": 5.413408272683611e-05, "loss": 1.5527, "step": 6515 }, { "epoch": 0.49048721278157287, "grad_norm": 6.840654373168945, "learning_rate": 5.4121933454398e-05, "loss": 1.938, "step": 6516 }, { "epoch": 0.490562487062233, "grad_norm": 4.42674446105957, "learning_rate": 5.410978393692279e-05, "loss": 2.1111, "step": 6517 }, { "epoch": 0.49063776134289316, "grad_norm": 4.461450576782227, "learning_rate": 5.40976341751327e-05, "loss": 2.0442, "step": 6518 }, { "epoch": 0.49071303562355334, "grad_norm": 5.244041442871094, "learning_rate": 5.408548416975002e-05, "loss": 1.4828, "step": 6519 }, { "epoch": 0.49078830990421346, "grad_norm": 4.033167839050293, "learning_rate": 5.4073333921497036e-05, "loss": 2.0941, "step": 6520 }, { "epoch": 0.49086358418487364, "grad_norm": 5.228991508483887, "learning_rate": 5.406118343109604e-05, "loss": 1.7965, "step": 6521 }, { "epoch": 0.4909388584655338, "grad_norm": 6.748352527618408, "learning_rate": 5.404903269926933e-05, "loss": 2.2177, "step": 6522 }, { "epoch": 0.49101413274619393, "grad_norm": 4.209940433502197, "learning_rate": 5.4036881726739265e-05, "loss": 1.7173, "step": 6523 }, { "epoch": 0.4910894070268541, "grad_norm": 4.503101348876953, "learning_rate": 5.402473051422814e-05, "loss": 1.9533, "step": 6524 }, { "epoch": 0.49116468130751423, "grad_norm": 5.839128494262695, "learning_rate": 5.401257906245835e-05, "loss": 2.0055, "step": 6525 }, { "epoch": 0.4912399555881744, "grad_norm": 4.642331600189209, "learning_rate": 5.400042737215224e-05, "loss": 2.099, "step": 6526 }, { "epoch": 0.4913152298688346, "grad_norm": 7.155869960784912, "learning_rate": 5.398827544403221e-05, "loss": 2.2093, "step": 6527 }, { "epoch": 0.4913905041494947, "grad_norm": 5.854203224182129, "learning_rate": 5.397612327882064e-05, "loss": 1.9402, "step": 6528 }, { "epoch": 0.4914657784301549, "grad_norm": 5.820188045501709, "learning_rate": 5.396397087723994e-05, "loss": 1.9695, "step": 6529 }, { "epoch": 0.49154105271081505, "grad_norm": 3.5693118572235107, "learning_rate": 5.395181824001256e-05, "loss": 1.8955, "step": 6530 }, { "epoch": 0.49161632699147517, "grad_norm": 7.379233360290527, "learning_rate": 5.3939665367860926e-05, "loss": 2.2634, "step": 6531 }, { "epoch": 0.49169160127213535, "grad_norm": 5.738043308258057, "learning_rate": 5.3927512261507485e-05, "loss": 2.2121, "step": 6532 }, { "epoch": 0.4917668755527955, "grad_norm": 4.206314563751221, "learning_rate": 5.391535892167471e-05, "loss": 1.8922, "step": 6533 }, { "epoch": 0.49184214983345564, "grad_norm": 3.834730863571167, "learning_rate": 5.390320534908508e-05, "loss": 1.8218, "step": 6534 }, { "epoch": 0.4919174241141158, "grad_norm": 4.031350135803223, "learning_rate": 5.389105154446109e-05, "loss": 2.0442, "step": 6535 }, { "epoch": 0.49199269839477594, "grad_norm": 5.927657604217529, "learning_rate": 5.3878897508525265e-05, "loss": 1.8528, "step": 6536 }, { "epoch": 0.4920679726754361, "grad_norm": 5.304276466369629, "learning_rate": 5.386674324200009e-05, "loss": 1.9195, "step": 6537 }, { "epoch": 0.4921432469560963, "grad_norm": 6.380029201507568, "learning_rate": 5.385458874560815e-05, "loss": 1.7714, "step": 6538 }, { "epoch": 0.4922185212367564, "grad_norm": 4.784676551818848, "learning_rate": 5.3842434020071954e-05, "loss": 2.2081, "step": 6539 }, { "epoch": 0.4922937955174166, "grad_norm": 4.669191837310791, "learning_rate": 5.383027906611409e-05, "loss": 1.7475, "step": 6540 }, { "epoch": 0.49236906979807676, "grad_norm": 4.375756740570068, "learning_rate": 5.381812388445711e-05, "loss": 1.765, "step": 6541 }, { "epoch": 0.4924443440787369, "grad_norm": 6.5041961669921875, "learning_rate": 5.380596847582363e-05, "loss": 1.7643, "step": 6542 }, { "epoch": 0.49251961835939706, "grad_norm": 4.717128276824951, "learning_rate": 5.379381284093624e-05, "loss": 1.8469, "step": 6543 }, { "epoch": 0.49259489264005724, "grad_norm": 5.612990856170654, "learning_rate": 5.378165698051757e-05, "loss": 2.2545, "step": 6544 }, { "epoch": 0.49267016692071736, "grad_norm": 4.011388301849365, "learning_rate": 5.3769500895290245e-05, "loss": 1.8134, "step": 6545 }, { "epoch": 0.49274544120137753, "grad_norm": 5.489065170288086, "learning_rate": 5.3757344585976886e-05, "loss": 1.8106, "step": 6546 }, { "epoch": 0.49282071548203765, "grad_norm": 4.551653861999512, "learning_rate": 5.37451880533002e-05, "loss": 1.659, "step": 6547 }, { "epoch": 0.49289598976269783, "grad_norm": 4.0693464279174805, "learning_rate": 5.3733031297982816e-05, "loss": 2.0894, "step": 6548 }, { "epoch": 0.492971264043358, "grad_norm": 6.810680866241455, "learning_rate": 5.372087432074743e-05, "loss": 1.9829, "step": 6549 }, { "epoch": 0.4930465383240181, "grad_norm": 4.803364276885986, "learning_rate": 5.3708717122316744e-05, "loss": 2.0914, "step": 6550 }, { "epoch": 0.4931218126046783, "grad_norm": 4.585589408874512, "learning_rate": 5.3696559703413474e-05, "loss": 1.63, "step": 6551 }, { "epoch": 0.4931970868853385, "grad_norm": 3.918064594268799, "learning_rate": 5.368440206476032e-05, "loss": 1.62, "step": 6552 }, { "epoch": 0.4932723611659986, "grad_norm": 4.058220863342285, "learning_rate": 5.3672244207080045e-05, "loss": 1.8729, "step": 6553 }, { "epoch": 0.49334763544665877, "grad_norm": 4.199698448181152, "learning_rate": 5.36600861310954e-05, "loss": 1.8036, "step": 6554 }, { "epoch": 0.4934229097273189, "grad_norm": 4.538209915161133, "learning_rate": 5.3647927837529135e-05, "loss": 2.0558, "step": 6555 }, { "epoch": 0.49349818400797907, "grad_norm": 4.436923503875732, "learning_rate": 5.363576932710401e-05, "loss": 1.8746, "step": 6556 }, { "epoch": 0.49357345828863924, "grad_norm": 4.124301433563232, "learning_rate": 5.362361060054284e-05, "loss": 2.0124, "step": 6557 }, { "epoch": 0.49364873256929936, "grad_norm": 4.70476770401001, "learning_rate": 5.3611451658568414e-05, "loss": 1.9247, "step": 6558 }, { "epoch": 0.49372400684995954, "grad_norm": 4.670519828796387, "learning_rate": 5.359929250190355e-05, "loss": 2.0932, "step": 6559 }, { "epoch": 0.4937992811306197, "grad_norm": 3.702254295349121, "learning_rate": 5.358713313127108e-05, "loss": 1.6535, "step": 6560 }, { "epoch": 0.49387455541127984, "grad_norm": 4.913912296295166, "learning_rate": 5.357497354739385e-05, "loss": 2.0881, "step": 6561 }, { "epoch": 0.49394982969194, "grad_norm": 4.65692663192749, "learning_rate": 5.356281375099467e-05, "loss": 2.0455, "step": 6562 }, { "epoch": 0.4940251039726002, "grad_norm": 5.847254276275635, "learning_rate": 5.355065374279645e-05, "loss": 1.8759, "step": 6563 }, { "epoch": 0.4941003782532603, "grad_norm": 4.444882869720459, "learning_rate": 5.353849352352206e-05, "loss": 1.791, "step": 6564 }, { "epoch": 0.4941756525339205, "grad_norm": 5.502654075622559, "learning_rate": 5.352633309389437e-05, "loss": 1.5627, "step": 6565 }, { "epoch": 0.4942509268145806, "grad_norm": 5.259897708892822, "learning_rate": 5.35141724546363e-05, "loss": 1.7208, "step": 6566 }, { "epoch": 0.4943262010952408, "grad_norm": 4.840779781341553, "learning_rate": 5.350201160647077e-05, "loss": 1.8647, "step": 6567 }, { "epoch": 0.49440147537590096, "grad_norm": 4.3065571784973145, "learning_rate": 5.348985055012069e-05, "loss": 2.0042, "step": 6568 }, { "epoch": 0.4944767496565611, "grad_norm": 3.9905309677124023, "learning_rate": 5.3477689286308996e-05, "loss": 1.9621, "step": 6569 }, { "epoch": 0.49455202393722125, "grad_norm": 5.944657325744629, "learning_rate": 5.3465527815758674e-05, "loss": 1.8088, "step": 6570 }, { "epoch": 0.49462729821788143, "grad_norm": 8.757949829101562, "learning_rate": 5.345336613919266e-05, "loss": 1.9353, "step": 6571 }, { "epoch": 0.49470257249854155, "grad_norm": 7.129432201385498, "learning_rate": 5.3441204257333946e-05, "loss": 1.7618, "step": 6572 }, { "epoch": 0.4947778467792017, "grad_norm": 4.390137195587158, "learning_rate": 5.342904217090549e-05, "loss": 1.9872, "step": 6573 }, { "epoch": 0.49485312105986184, "grad_norm": 6.598457336425781, "learning_rate": 5.3416879880630336e-05, "loss": 1.9089, "step": 6574 }, { "epoch": 0.494928395340522, "grad_norm": 8.359405517578125, "learning_rate": 5.340471738723147e-05, "loss": 2.3524, "step": 6575 }, { "epoch": 0.4950036696211822, "grad_norm": 4.667253494262695, "learning_rate": 5.3392554691431926e-05, "loss": 1.9262, "step": 6576 }, { "epoch": 0.4950789439018423, "grad_norm": 4.346924781799316, "learning_rate": 5.338039179395474e-05, "loss": 2.0853, "step": 6577 }, { "epoch": 0.4951542181825025, "grad_norm": 5.357377052307129, "learning_rate": 5.336822869552297e-05, "loss": 1.8427, "step": 6578 }, { "epoch": 0.49522949246316267, "grad_norm": 4.795270919799805, "learning_rate": 5.335606539685964e-05, "loss": 1.9301, "step": 6579 }, { "epoch": 0.4953047667438228, "grad_norm": 4.3588786125183105, "learning_rate": 5.3343901898687875e-05, "loss": 2.5415, "step": 6580 }, { "epoch": 0.49538004102448296, "grad_norm": 6.663821697235107, "learning_rate": 5.333173820173073e-05, "loss": 1.9014, "step": 6581 }, { "epoch": 0.49545531530514314, "grad_norm": 4.672849655151367, "learning_rate": 5.331957430671132e-05, "loss": 1.5636, "step": 6582 }, { "epoch": 0.49553058958580326, "grad_norm": 3.2184488773345947, "learning_rate": 5.3307410214352724e-05, "loss": 1.9791, "step": 6583 }, { "epoch": 0.49560586386646344, "grad_norm": 3.9048497676849365, "learning_rate": 5.32952459253781e-05, "loss": 2.0377, "step": 6584 }, { "epoch": 0.49568113814712356, "grad_norm": 6.407617568969727, "learning_rate": 5.328308144051054e-05, "loss": 1.8617, "step": 6585 }, { "epoch": 0.49575641242778373, "grad_norm": 4.093458652496338, "learning_rate": 5.327091676047322e-05, "loss": 1.7339, "step": 6586 }, { "epoch": 0.4958316867084439, "grad_norm": 6.431451797485352, "learning_rate": 5.325875188598929e-05, "loss": 2.0334, "step": 6587 }, { "epoch": 0.49590696098910403, "grad_norm": 4.165318012237549, "learning_rate": 5.3246586817781906e-05, "loss": 1.9062, "step": 6588 }, { "epoch": 0.4959822352697642, "grad_norm": 8.978361129760742, "learning_rate": 5.3234421556574254e-05, "loss": 2.1827, "step": 6589 }, { "epoch": 0.4960575095504244, "grad_norm": 4.872499465942383, "learning_rate": 5.322225610308952e-05, "loss": 2.0648, "step": 6590 }, { "epoch": 0.4961327838310845, "grad_norm": 5.128283500671387, "learning_rate": 5.32100904580509e-05, "loss": 1.6407, "step": 6591 }, { "epoch": 0.4962080581117447, "grad_norm": 3.9446115493774414, "learning_rate": 5.3197924622181614e-05, "loss": 1.5445, "step": 6592 }, { "epoch": 0.49628333239240485, "grad_norm": 5.773181915283203, "learning_rate": 5.3185758596204896e-05, "loss": 1.8557, "step": 6593 }, { "epoch": 0.496358606673065, "grad_norm": 4.181520938873291, "learning_rate": 5.3173592380843963e-05, "loss": 1.6195, "step": 6594 }, { "epoch": 0.49643388095372515, "grad_norm": 4.615962505340576, "learning_rate": 5.316142597682207e-05, "loss": 1.8266, "step": 6595 }, { "epoch": 0.49650915523438527, "grad_norm": 4.229737281799316, "learning_rate": 5.314925938486246e-05, "loss": 1.9448, "step": 6596 }, { "epoch": 0.49658442951504544, "grad_norm": 3.4112026691436768, "learning_rate": 5.313709260568842e-05, "loss": 1.8504, "step": 6597 }, { "epoch": 0.4966597037957056, "grad_norm": 4.6149444580078125, "learning_rate": 5.312492564002324e-05, "loss": 2.0293, "step": 6598 }, { "epoch": 0.49673497807636574, "grad_norm": 3.720318078994751, "learning_rate": 5.311275848859019e-05, "loss": 2.1347, "step": 6599 }, { "epoch": 0.4968102523570259, "grad_norm": 3.8474161624908447, "learning_rate": 5.310059115211259e-05, "loss": 1.914, "step": 6600 }, { "epoch": 0.4968855266376861, "grad_norm": 3.9680564403533936, "learning_rate": 5.3088423631313735e-05, "loss": 1.8852, "step": 6601 }, { "epoch": 0.4969608009183462, "grad_norm": 4.404036521911621, "learning_rate": 5.307625592691694e-05, "loss": 1.9378, "step": 6602 }, { "epoch": 0.4970360751990064, "grad_norm": 4.159010410308838, "learning_rate": 5.306408803964557e-05, "loss": 2.2767, "step": 6603 }, { "epoch": 0.4971113494796665, "grad_norm": 4.397821426391602, "learning_rate": 5.3051919970222964e-05, "loss": 2.1887, "step": 6604 }, { "epoch": 0.4971866237603267, "grad_norm": 5.168182849884033, "learning_rate": 5.303975171937248e-05, "loss": 1.8381, "step": 6605 }, { "epoch": 0.49726189804098686, "grad_norm": 3.5057852268218994, "learning_rate": 5.302758328781746e-05, "loss": 1.9219, "step": 6606 }, { "epoch": 0.497337172321647, "grad_norm": 6.528621196746826, "learning_rate": 5.30154146762813e-05, "loss": 2.0501, "step": 6607 }, { "epoch": 0.49741244660230716, "grad_norm": 4.499448776245117, "learning_rate": 5.3003245885487393e-05, "loss": 1.7813, "step": 6608 }, { "epoch": 0.49748772088296733, "grad_norm": 4.891198635101318, "learning_rate": 5.2991076916159146e-05, "loss": 1.8765, "step": 6609 }, { "epoch": 0.49756299516362745, "grad_norm": 4.469904899597168, "learning_rate": 5.297890776901996e-05, "loss": 2.5131, "step": 6610 }, { "epoch": 0.49763826944428763, "grad_norm": 4.918172836303711, "learning_rate": 5.296673844479325e-05, "loss": 1.5167, "step": 6611 }, { "epoch": 0.4977135437249478, "grad_norm": 6.013672828674316, "learning_rate": 5.2954568944202434e-05, "loss": 1.9267, "step": 6612 }, { "epoch": 0.4977888180056079, "grad_norm": 4.5740275382995605, "learning_rate": 5.294239926797099e-05, "loss": 1.6924, "step": 6613 }, { "epoch": 0.4978640922862681, "grad_norm": 5.910006046295166, "learning_rate": 5.293022941682234e-05, "loss": 1.9462, "step": 6614 }, { "epoch": 0.4979393665669282, "grad_norm": 4.6266984939575195, "learning_rate": 5.2918059391479965e-05, "loss": 1.9748, "step": 6615 }, { "epoch": 0.4980146408475884, "grad_norm": 4.044189453125, "learning_rate": 5.290588919266735e-05, "loss": 1.7018, "step": 6616 }, { "epoch": 0.4980899151282486, "grad_norm": 4.312623977661133, "learning_rate": 5.289371882110794e-05, "loss": 1.82, "step": 6617 }, { "epoch": 0.4981651894089087, "grad_norm": 4.063395977020264, "learning_rate": 5.288154827752526e-05, "loss": 1.9681, "step": 6618 }, { "epoch": 0.49824046368956887, "grad_norm": 3.1786062717437744, "learning_rate": 5.286937756264278e-05, "loss": 1.6618, "step": 6619 }, { "epoch": 0.49831573797022904, "grad_norm": 3.902327060699463, "learning_rate": 5.285720667718406e-05, "loss": 1.9643, "step": 6620 }, { "epoch": 0.49839101225088916, "grad_norm": 5.7253546714782715, "learning_rate": 5.28450356218726e-05, "loss": 1.9578, "step": 6621 }, { "epoch": 0.49846628653154934, "grad_norm": 8.799309730529785, "learning_rate": 5.283286439743193e-05, "loss": 2.0379, "step": 6622 }, { "epoch": 0.49854156081220946, "grad_norm": 4.470696926116943, "learning_rate": 5.282069300458561e-05, "loss": 2.1186, "step": 6623 }, { "epoch": 0.49861683509286964, "grad_norm": 4.90806245803833, "learning_rate": 5.280852144405717e-05, "loss": 1.9687, "step": 6624 }, { "epoch": 0.4986921093735298, "grad_norm": 4.017029285430908, "learning_rate": 5.279634971657019e-05, "loss": 1.6861, "step": 6625 }, { "epoch": 0.49876738365418993, "grad_norm": 5.66743278503418, "learning_rate": 5.2784177822848245e-05, "loss": 2.3473, "step": 6626 }, { "epoch": 0.4988426579348501, "grad_norm": 3.9530794620513916, "learning_rate": 5.277200576361493e-05, "loss": 1.8846, "step": 6627 }, { "epoch": 0.4989179322155103, "grad_norm": 4.128320693969727, "learning_rate": 5.2759833539593814e-05, "loss": 1.905, "step": 6628 }, { "epoch": 0.4989932064961704, "grad_norm": 4.087277412414551, "learning_rate": 5.274766115150851e-05, "loss": 1.7715, "step": 6629 }, { "epoch": 0.4990684807768306, "grad_norm": 4.750850200653076, "learning_rate": 5.273548860008263e-05, "loss": 2.3477, "step": 6630 }, { "epoch": 0.49914375505749076, "grad_norm": 4.339632987976074, "learning_rate": 5.27233158860398e-05, "loss": 1.842, "step": 6631 }, { "epoch": 0.4992190293381509, "grad_norm": 6.536107540130615, "learning_rate": 5.271114301010368e-05, "loss": 2.0784, "step": 6632 }, { "epoch": 0.49929430361881105, "grad_norm": 4.373968601226807, "learning_rate": 5.2698969972997866e-05, "loss": 1.7521, "step": 6633 }, { "epoch": 0.4993695778994712, "grad_norm": 3.7168049812316895, "learning_rate": 5.268679677544605e-05, "loss": 1.9182, "step": 6634 }, { "epoch": 0.49944485218013135, "grad_norm": 4.963070869445801, "learning_rate": 5.267462341817185e-05, "loss": 1.8125, "step": 6635 }, { "epoch": 0.4995201264607915, "grad_norm": 6.004863739013672, "learning_rate": 5.266244990189898e-05, "loss": 1.5558, "step": 6636 }, { "epoch": 0.49959540074145165, "grad_norm": 3.28002667427063, "learning_rate": 5.265027622735109e-05, "loss": 1.8543, "step": 6637 }, { "epoch": 0.4996706750221118, "grad_norm": 4.824798107147217, "learning_rate": 5.26381023952519e-05, "loss": 2.1974, "step": 6638 }, { "epoch": 0.499745949302772, "grad_norm": 5.915852069854736, "learning_rate": 5.262592840632509e-05, "loss": 1.9609, "step": 6639 }, { "epoch": 0.4998212235834321, "grad_norm": 5.207124710083008, "learning_rate": 5.261375426129436e-05, "loss": 1.6356, "step": 6640 }, { "epoch": 0.4998964978640923, "grad_norm": 4.946179389953613, "learning_rate": 5.260157996088343e-05, "loss": 1.7205, "step": 6641 }, { "epoch": 0.49997177214475247, "grad_norm": 4.143404960632324, "learning_rate": 5.258940550581606e-05, "loss": 1.8587, "step": 6642 }, { "epoch": 0.5000470464254126, "grad_norm": 4.522736072540283, "learning_rate": 5.2577230896815944e-05, "loss": 1.6212, "step": 6643 }, { "epoch": 0.5001223207060728, "grad_norm": 7.085289478302002, "learning_rate": 5.256505613460686e-05, "loss": 1.8814, "step": 6644 }, { "epoch": 0.5001975949867329, "grad_norm": 4.942041397094727, "learning_rate": 5.255288121991253e-05, "loss": 2.0122, "step": 6645 }, { "epoch": 0.5002728692673931, "grad_norm": 5.063052654266357, "learning_rate": 5.2540706153456755e-05, "loss": 1.7972, "step": 6646 }, { "epoch": 0.5003481435480532, "grad_norm": 6.212124347686768, "learning_rate": 5.2528530935963274e-05, "loss": 2.0467, "step": 6647 }, { "epoch": 0.5004234178287134, "grad_norm": 4.333958148956299, "learning_rate": 5.2516355568155895e-05, "loss": 1.9459, "step": 6648 }, { "epoch": 0.5004986921093735, "grad_norm": 4.473553657531738, "learning_rate": 5.250418005075839e-05, "loss": 1.6653, "step": 6649 }, { "epoch": 0.5005739663900337, "grad_norm": 5.196820259094238, "learning_rate": 5.2492004384494576e-05, "loss": 1.9713, "step": 6650 }, { "epoch": 0.5006492406706938, "grad_norm": 4.646674156188965, "learning_rate": 5.247982857008825e-05, "loss": 2.4776, "step": 6651 }, { "epoch": 0.500724514951354, "grad_norm": 4.333740234375, "learning_rate": 5.246765260826322e-05, "loss": 2.2692, "step": 6652 }, { "epoch": 0.5007997892320142, "grad_norm": 5.411248207092285, "learning_rate": 5.245547649974334e-05, "loss": 1.8884, "step": 6653 }, { "epoch": 0.5008750635126743, "grad_norm": 4.009570121765137, "learning_rate": 5.244330024525241e-05, "loss": 1.9895, "step": 6654 }, { "epoch": 0.5009503377933344, "grad_norm": 6.674574851989746, "learning_rate": 5.243112384551431e-05, "loss": 2.1379, "step": 6655 }, { "epoch": 0.5010256120739947, "grad_norm": 4.459656238555908, "learning_rate": 5.241894730125286e-05, "loss": 1.7061, "step": 6656 }, { "epoch": 0.5011008863546548, "grad_norm": 4.510525226593018, "learning_rate": 5.240677061319194e-05, "loss": 1.7712, "step": 6657 }, { "epoch": 0.5011761606353149, "grad_norm": 4.68900203704834, "learning_rate": 5.23945937820554e-05, "loss": 1.9551, "step": 6658 }, { "epoch": 0.5012514349159751, "grad_norm": 5.634761810302734, "learning_rate": 5.238241680856715e-05, "loss": 1.9508, "step": 6659 }, { "epoch": 0.5013267091966352, "grad_norm": 4.2384352684021, "learning_rate": 5.237023969345104e-05, "loss": 2.0671, "step": 6660 }, { "epoch": 0.5014019834772954, "grad_norm": 4.337565898895264, "learning_rate": 5.2358062437430974e-05, "loss": 1.6626, "step": 6661 }, { "epoch": 0.5014772577579556, "grad_norm": 6.642696380615234, "learning_rate": 5.234588504123089e-05, "loss": 2.0451, "step": 6662 }, { "epoch": 0.5015525320386157, "grad_norm": 5.136529922485352, "learning_rate": 5.2333707505574656e-05, "loss": 1.8311, "step": 6663 }, { "epoch": 0.5016278063192758, "grad_norm": 3.1710305213928223, "learning_rate": 5.23215298311862e-05, "loss": 2.0421, "step": 6664 }, { "epoch": 0.5017030805999361, "grad_norm": 4.964192867279053, "learning_rate": 5.230935201878947e-05, "loss": 1.6524, "step": 6665 }, { "epoch": 0.5017783548805962, "grad_norm": 4.646667003631592, "learning_rate": 5.229717406910838e-05, "loss": 1.9214, "step": 6666 }, { "epoch": 0.5018536291612563, "grad_norm": 5.1704421043396, "learning_rate": 5.228499598286689e-05, "loss": 1.9825, "step": 6667 }, { "epoch": 0.5019289034419164, "grad_norm": 3.866488218307495, "learning_rate": 5.227281776078895e-05, "loss": 1.9435, "step": 6668 }, { "epoch": 0.5020041777225767, "grad_norm": 4.703546524047852, "learning_rate": 5.22606394035985e-05, "loss": 1.8869, "step": 6669 }, { "epoch": 0.5020794520032368, "grad_norm": 5.812427997589111, "learning_rate": 5.224846091201953e-05, "loss": 2.0668, "step": 6670 }, { "epoch": 0.5021547262838969, "grad_norm": 4.198016166687012, "learning_rate": 5.223628228677602e-05, "loss": 2.3489, "step": 6671 }, { "epoch": 0.5022300005645571, "grad_norm": 5.551123142242432, "learning_rate": 5.222410352859193e-05, "loss": 1.9847, "step": 6672 }, { "epoch": 0.5023052748452173, "grad_norm": 4.253283977508545, "learning_rate": 5.221192463819128e-05, "loss": 1.7987, "step": 6673 }, { "epoch": 0.5023805491258774, "grad_norm": 3.8291642665863037, "learning_rate": 5.219974561629803e-05, "loss": 1.7841, "step": 6674 }, { "epoch": 0.5024558234065376, "grad_norm": 4.810837745666504, "learning_rate": 5.218756646363625e-05, "loss": 2.2023, "step": 6675 }, { "epoch": 0.5025310976871977, "grad_norm": 4.038191318511963, "learning_rate": 5.21753871809299e-05, "loss": 1.5822, "step": 6676 }, { "epoch": 0.5026063719678578, "grad_norm": 4.373762130737305, "learning_rate": 5.2163207768903036e-05, "loss": 1.8033, "step": 6677 }, { "epoch": 0.5026816462485181, "grad_norm": 4.48949670791626, "learning_rate": 5.215102822827966e-05, "loss": 1.9618, "step": 6678 }, { "epoch": 0.5027569205291782, "grad_norm": 3.321491003036499, "learning_rate": 5.2138848559783835e-05, "loss": 1.9513, "step": 6679 }, { "epoch": 0.5028321948098383, "grad_norm": 5.063469886779785, "learning_rate": 5.212666876413961e-05, "loss": 2.0714, "step": 6680 }, { "epoch": 0.5029074690904985, "grad_norm": 5.096776962280273, "learning_rate": 5.211448884207102e-05, "loss": 1.7062, "step": 6681 }, { "epoch": 0.5029827433711587, "grad_norm": 4.336129188537598, "learning_rate": 5.210230879430214e-05, "loss": 1.9647, "step": 6682 }, { "epoch": 0.5030580176518188, "grad_norm": 4.75596809387207, "learning_rate": 5.209012862155703e-05, "loss": 1.6804, "step": 6683 }, { "epoch": 0.503133291932479, "grad_norm": 4.212070465087891, "learning_rate": 5.207794832455978e-05, "loss": 1.9132, "step": 6684 }, { "epoch": 0.5032085662131391, "grad_norm": 4.937553882598877, "learning_rate": 5.2065767904034445e-05, "loss": 2.1129, "step": 6685 }, { "epoch": 0.5032838404937993, "grad_norm": 10.074333190917969, "learning_rate": 5.2053587360705156e-05, "loss": 2.1045, "step": 6686 }, { "epoch": 0.5033591147744594, "grad_norm": 4.156828880310059, "learning_rate": 5.204140669529598e-05, "loss": 1.9989, "step": 6687 }, { "epoch": 0.5034343890551196, "grad_norm": 5.474024772644043, "learning_rate": 5.2029225908531044e-05, "loss": 2.0191, "step": 6688 }, { "epoch": 0.5035096633357797, "grad_norm": 14.56948184967041, "learning_rate": 5.2017045001134445e-05, "loss": 2.0714, "step": 6689 }, { "epoch": 0.5035849376164399, "grad_norm": 4.782439231872559, "learning_rate": 5.200486397383031e-05, "loss": 2.0435, "step": 6690 }, { "epoch": 0.5036602118971001, "grad_norm": 4.0484747886657715, "learning_rate": 5.199268282734276e-05, "loss": 2.0064, "step": 6691 }, { "epoch": 0.5037354861777602, "grad_norm": 5.236764430999756, "learning_rate": 5.1980501562395925e-05, "loss": 1.5891, "step": 6692 }, { "epoch": 0.5038107604584203, "grad_norm": 3.6485483646392822, "learning_rate": 5.196832017971397e-05, "loss": 2.065, "step": 6693 }, { "epoch": 0.5038860347390806, "grad_norm": 4.8559041023254395, "learning_rate": 5.195613868002101e-05, "loss": 1.8319, "step": 6694 }, { "epoch": 0.5039613090197407, "grad_norm": 3.490993022918701, "learning_rate": 5.1943957064041225e-05, "loss": 1.6896, "step": 6695 }, { "epoch": 0.5040365833004008, "grad_norm": 4.025605201721191, "learning_rate": 5.193177533249879e-05, "loss": 1.9785, "step": 6696 }, { "epoch": 0.504111857581061, "grad_norm": 5.432565212249756, "learning_rate": 5.1919593486117824e-05, "loss": 2.1705, "step": 6697 }, { "epoch": 0.5041871318617211, "grad_norm": 4.28887414932251, "learning_rate": 5.190741152562255e-05, "loss": 1.8371, "step": 6698 }, { "epoch": 0.5042624061423813, "grad_norm": 3.529099941253662, "learning_rate": 5.1895229451737135e-05, "loss": 1.9488, "step": 6699 }, { "epoch": 0.5043376804230415, "grad_norm": 5.892711639404297, "learning_rate": 5.188304726518577e-05, "loss": 2.1959, "step": 6700 }, { "epoch": 0.5044129547037016, "grad_norm": 4.754748821258545, "learning_rate": 5.187086496669265e-05, "loss": 1.8788, "step": 6701 }, { "epoch": 0.5044882289843617, "grad_norm": 6.027359485626221, "learning_rate": 5.1858682556981954e-05, "loss": 1.9679, "step": 6702 }, { "epoch": 0.504563503265022, "grad_norm": 3.580472469329834, "learning_rate": 5.184650003677793e-05, "loss": 1.7225, "step": 6703 }, { "epoch": 0.5046387775456821, "grad_norm": 4.181191921234131, "learning_rate": 5.183431740680478e-05, "loss": 1.8376, "step": 6704 }, { "epoch": 0.5047140518263422, "grad_norm": 3.86775541305542, "learning_rate": 5.1822134667786714e-05, "loss": 1.6344, "step": 6705 }, { "epoch": 0.5047893261070024, "grad_norm": 5.569202423095703, "learning_rate": 5.180995182044798e-05, "loss": 1.9528, "step": 6706 }, { "epoch": 0.5048646003876626, "grad_norm": 5.448028087615967, "learning_rate": 5.1797768865512794e-05, "loss": 1.9753, "step": 6707 }, { "epoch": 0.5049398746683227, "grad_norm": 4.283004283905029, "learning_rate": 5.1785585803705416e-05, "loss": 1.9926, "step": 6708 }, { "epoch": 0.5050151489489828, "grad_norm": 6.169532299041748, "learning_rate": 5.177340263575008e-05, "loss": 1.7316, "step": 6709 }, { "epoch": 0.505090423229643, "grad_norm": 4.4532623291015625, "learning_rate": 5.1761219362371036e-05, "loss": 1.7977, "step": 6710 }, { "epoch": 0.5051656975103032, "grad_norm": 4.912318229675293, "learning_rate": 5.1749035984292574e-05, "loss": 1.8289, "step": 6711 }, { "epoch": 0.5052409717909633, "grad_norm": 5.287909030914307, "learning_rate": 5.173685250223893e-05, "loss": 2.0992, "step": 6712 }, { "epoch": 0.5053162460716235, "grad_norm": 4.788411617279053, "learning_rate": 5.1724668916934384e-05, "loss": 1.8494, "step": 6713 }, { "epoch": 0.5053915203522836, "grad_norm": 4.089344501495361, "learning_rate": 5.1712485229103224e-05, "loss": 2.2271, "step": 6714 }, { "epoch": 0.5054667946329438, "grad_norm": 3.6223223209381104, "learning_rate": 5.1700301439469736e-05, "loss": 2.0639, "step": 6715 }, { "epoch": 0.505542068913604, "grad_norm": 4.406446933746338, "learning_rate": 5.168811754875821e-05, "loss": 1.6676, "step": 6716 }, { "epoch": 0.5056173431942641, "grad_norm": 5.190280914306641, "learning_rate": 5.167593355769294e-05, "loss": 1.9448, "step": 6717 }, { "epoch": 0.5056926174749242, "grad_norm": 4.703592777252197, "learning_rate": 5.166374946699821e-05, "loss": 1.8969, "step": 6718 }, { "epoch": 0.5057678917555845, "grad_norm": 3.4521965980529785, "learning_rate": 5.165156527739836e-05, "loss": 1.6392, "step": 6719 }, { "epoch": 0.5058431660362446, "grad_norm": 6.324052333831787, "learning_rate": 5.1639380989617694e-05, "loss": 2.009, "step": 6720 }, { "epoch": 0.5059184403169047, "grad_norm": 4.220641613006592, "learning_rate": 5.1627196604380534e-05, "loss": 1.9461, "step": 6721 }, { "epoch": 0.5059937145975649, "grad_norm": 5.34996223449707, "learning_rate": 5.16150121224112e-05, "loss": 1.9681, "step": 6722 }, { "epoch": 0.506068988878225, "grad_norm": 3.2762959003448486, "learning_rate": 5.160282754443404e-05, "loss": 1.9002, "step": 6723 }, { "epoch": 0.5061442631588852, "grad_norm": 5.706763744354248, "learning_rate": 5.1590642871173354e-05, "loss": 1.8134, "step": 6724 }, { "epoch": 0.5062195374395454, "grad_norm": 5.4844136238098145, "learning_rate": 5.157845810335353e-05, "loss": 1.952, "step": 6725 }, { "epoch": 0.5062948117202055, "grad_norm": 5.797453880310059, "learning_rate": 5.15662732416989e-05, "loss": 1.9008, "step": 6726 }, { "epoch": 0.5063700860008656, "grad_norm": 3.9168620109558105, "learning_rate": 5.155408828693383e-05, "loss": 1.7457, "step": 6727 }, { "epoch": 0.5064453602815258, "grad_norm": 4.206057071685791, "learning_rate": 5.1541903239782664e-05, "loss": 1.5096, "step": 6728 }, { "epoch": 0.506520634562186, "grad_norm": 9.40599536895752, "learning_rate": 5.152971810096977e-05, "loss": 2.0214, "step": 6729 }, { "epoch": 0.5065959088428461, "grad_norm": 5.5322394371032715, "learning_rate": 5.151753287121952e-05, "loss": 2.4696, "step": 6730 }, { "epoch": 0.5066711831235062, "grad_norm": 5.431405067443848, "learning_rate": 5.150534755125631e-05, "loss": 1.8869, "step": 6731 }, { "epoch": 0.5067464574041665, "grad_norm": 7.7712321281433105, "learning_rate": 5.149316214180451e-05, "loss": 1.6708, "step": 6732 }, { "epoch": 0.5068217316848266, "grad_norm": 4.853963375091553, "learning_rate": 5.1480976643588486e-05, "loss": 2.1094, "step": 6733 }, { "epoch": 0.5068970059654867, "grad_norm": 4.799790382385254, "learning_rate": 5.1468791057332665e-05, "loss": 1.9773, "step": 6734 }, { "epoch": 0.5069722802461469, "grad_norm": 7.293521404266357, "learning_rate": 5.1456605383761414e-05, "loss": 2.3182, "step": 6735 }, { "epoch": 0.507047554526807, "grad_norm": 4.357425689697266, "learning_rate": 5.144441962359917e-05, "loss": 2.0324, "step": 6736 }, { "epoch": 0.5071228288074672, "grad_norm": 4.715337753295898, "learning_rate": 5.143223377757032e-05, "loss": 1.9506, "step": 6737 }, { "epoch": 0.5071981030881274, "grad_norm": 4.954517364501953, "learning_rate": 5.142004784639928e-05, "loss": 2.068, "step": 6738 }, { "epoch": 0.5072733773687875, "grad_norm": 8.095152854919434, "learning_rate": 5.140786183081046e-05, "loss": 1.8576, "step": 6739 }, { "epoch": 0.5073486516494476, "grad_norm": 3.657353401184082, "learning_rate": 5.13956757315283e-05, "loss": 1.7307, "step": 6740 }, { "epoch": 0.5074239259301079, "grad_norm": 3.8049044609069824, "learning_rate": 5.138348954927723e-05, "loss": 1.5787, "step": 6741 }, { "epoch": 0.507499200210768, "grad_norm": 6.047858715057373, "learning_rate": 5.137130328478166e-05, "loss": 1.9857, "step": 6742 }, { "epoch": 0.5075744744914281, "grad_norm": 5.1330461502075195, "learning_rate": 5.135911693876606e-05, "loss": 2.1349, "step": 6743 }, { "epoch": 0.5076497487720883, "grad_norm": 5.729600429534912, "learning_rate": 5.134693051195485e-05, "loss": 1.7973, "step": 6744 }, { "epoch": 0.5077250230527485, "grad_norm": 5.038491249084473, "learning_rate": 5.133474400507249e-05, "loss": 2.0774, "step": 6745 }, { "epoch": 0.5078002973334086, "grad_norm": 5.444791316986084, "learning_rate": 5.132255741884343e-05, "loss": 2.0905, "step": 6746 }, { "epoch": 0.5078755716140687, "grad_norm": 4.389624118804932, "learning_rate": 5.131037075399212e-05, "loss": 1.7255, "step": 6747 }, { "epoch": 0.5079508458947289, "grad_norm": 4.421477317810059, "learning_rate": 5.129818401124303e-05, "loss": 1.7822, "step": 6748 }, { "epoch": 0.5080261201753891, "grad_norm": 5.592228412628174, "learning_rate": 5.1285997191320635e-05, "loss": 2.1499, "step": 6749 }, { "epoch": 0.5081013944560492, "grad_norm": 5.381822109222412, "learning_rate": 5.1273810294949386e-05, "loss": 1.7784, "step": 6750 }, { "epoch": 0.5081766687367094, "grad_norm": 5.821904182434082, "learning_rate": 5.126162332285378e-05, "loss": 1.9367, "step": 6751 }, { "epoch": 0.5082519430173695, "grad_norm": 3.8950958251953125, "learning_rate": 5.124943627575827e-05, "loss": 1.7842, "step": 6752 }, { "epoch": 0.5083272172980297, "grad_norm": 5.104915142059326, "learning_rate": 5.1237249154387366e-05, "loss": 2.0075, "step": 6753 }, { "epoch": 0.5084024915786899, "grad_norm": 7.373931407928467, "learning_rate": 5.122506195946556e-05, "loss": 1.9125, "step": 6754 }, { "epoch": 0.50847776585935, "grad_norm": 5.867264270782471, "learning_rate": 5.121287469171733e-05, "loss": 2.1131, "step": 6755 }, { "epoch": 0.5085530401400101, "grad_norm": 4.396439552307129, "learning_rate": 5.120068735186718e-05, "loss": 1.9448, "step": 6756 }, { "epoch": 0.5086283144206704, "grad_norm": 5.604939937591553, "learning_rate": 5.11884999406396e-05, "loss": 1.9606, "step": 6757 }, { "epoch": 0.5087035887013305, "grad_norm": 4.8259358406066895, "learning_rate": 5.1176312458759115e-05, "loss": 1.9132, "step": 6758 }, { "epoch": 0.5087788629819906, "grad_norm": 5.529745101928711, "learning_rate": 5.116412490695023e-05, "loss": 1.8852, "step": 6759 }, { "epoch": 0.5088541372626508, "grad_norm": 5.993441581726074, "learning_rate": 5.115193728593747e-05, "loss": 1.9044, "step": 6760 }, { "epoch": 0.508929411543311, "grad_norm": 5.119747161865234, "learning_rate": 5.113974959644534e-05, "loss": 1.973, "step": 6761 }, { "epoch": 0.5090046858239711, "grad_norm": 8.281730651855469, "learning_rate": 5.112756183919837e-05, "loss": 1.9255, "step": 6762 }, { "epoch": 0.5090799601046313, "grad_norm": 4.355381965637207, "learning_rate": 5.111537401492106e-05, "loss": 2.0383, "step": 6763 }, { "epoch": 0.5091552343852914, "grad_norm": 6.217527866363525, "learning_rate": 5.110318612433799e-05, "loss": 1.9155, "step": 6764 }, { "epoch": 0.5092305086659515, "grad_norm": 5.128861904144287, "learning_rate": 5.1090998168173666e-05, "loss": 1.939, "step": 6765 }, { "epoch": 0.5093057829466117, "grad_norm": 4.530824661254883, "learning_rate": 5.107881014715263e-05, "loss": 1.8618, "step": 6766 }, { "epoch": 0.5093810572272719, "grad_norm": 3.5501315593719482, "learning_rate": 5.106662206199943e-05, "loss": 1.8874, "step": 6767 }, { "epoch": 0.509456331507932, "grad_norm": 3.821485757827759, "learning_rate": 5.10544339134386e-05, "loss": 1.6803, "step": 6768 }, { "epoch": 0.5095316057885921, "grad_norm": 4.466263294219971, "learning_rate": 5.104224570219469e-05, "loss": 1.7033, "step": 6769 }, { "epoch": 0.5096068800692524, "grad_norm": 6.206378936767578, "learning_rate": 5.1030057428992284e-05, "loss": 1.8901, "step": 6770 }, { "epoch": 0.5096821543499125, "grad_norm": 3.7993228435516357, "learning_rate": 5.1017869094555914e-05, "loss": 1.6731, "step": 6771 }, { "epoch": 0.5097574286305726, "grad_norm": 4.2592620849609375, "learning_rate": 5.1005680699610146e-05, "loss": 1.8052, "step": 6772 }, { "epoch": 0.5098327029112328, "grad_norm": 6.020939826965332, "learning_rate": 5.0993492244879546e-05, "loss": 1.9472, "step": 6773 }, { "epoch": 0.509907977191893, "grad_norm": 5.051983833312988, "learning_rate": 5.098130373108867e-05, "loss": 1.7637, "step": 6774 }, { "epoch": 0.5099832514725531, "grad_norm": 4.431304454803467, "learning_rate": 5.096911515896211e-05, "loss": 2.0273, "step": 6775 }, { "epoch": 0.5100585257532133, "grad_norm": 4.491030693054199, "learning_rate": 5.095692652922445e-05, "loss": 1.6398, "step": 6776 }, { "epoch": 0.5101338000338734, "grad_norm": 5.151197910308838, "learning_rate": 5.094473784260023e-05, "loss": 2.0918, "step": 6777 }, { "epoch": 0.5102090743145336, "grad_norm": 5.7026214599609375, "learning_rate": 5.093254909981407e-05, "loss": 2.0468, "step": 6778 }, { "epoch": 0.5102843485951938, "grad_norm": 5.698533535003662, "learning_rate": 5.092036030159055e-05, "loss": 1.7777, "step": 6779 }, { "epoch": 0.5103596228758539, "grad_norm": 5.243957042694092, "learning_rate": 5.090817144865423e-05, "loss": 2.1034, "step": 6780 }, { "epoch": 0.510434897156514, "grad_norm": 4.988044738769531, "learning_rate": 5.089598254172974e-05, "loss": 1.76, "step": 6781 }, { "epoch": 0.5105101714371743, "grad_norm": 4.061254024505615, "learning_rate": 5.088379358154165e-05, "loss": 1.7152, "step": 6782 }, { "epoch": 0.5105854457178344, "grad_norm": 5.8456807136535645, "learning_rate": 5.087160456881458e-05, "loss": 1.7742, "step": 6783 }, { "epoch": 0.5106607199984945, "grad_norm": 6.585233211517334, "learning_rate": 5.0859415504273114e-05, "loss": 1.895, "step": 6784 }, { "epoch": 0.5107359942791546, "grad_norm": 6.776304721832275, "learning_rate": 5.0847226388641845e-05, "loss": 1.8219, "step": 6785 }, { "epoch": 0.5108112685598148, "grad_norm": 4.197993755340576, "learning_rate": 5.083503722264541e-05, "loss": 1.6516, "step": 6786 }, { "epoch": 0.510886542840475, "grad_norm": 5.710675239562988, "learning_rate": 5.0822848007008414e-05, "loss": 1.8449, "step": 6787 }, { "epoch": 0.5109618171211351, "grad_norm": 5.319664001464844, "learning_rate": 5.0810658742455464e-05, "loss": 1.9438, "step": 6788 }, { "epoch": 0.5110370914017953, "grad_norm": 4.425714015960693, "learning_rate": 5.0798469429711185e-05, "loss": 1.7941, "step": 6789 }, { "epoch": 0.5111123656824554, "grad_norm": 5.851222038269043, "learning_rate": 5.0786280069500155e-05, "loss": 1.7404, "step": 6790 }, { "epoch": 0.5111876399631156, "grad_norm": 3.832235336303711, "learning_rate": 5.0774090662547055e-05, "loss": 1.991, "step": 6791 }, { "epoch": 0.5112629142437758, "grad_norm": 5.484068393707275, "learning_rate": 5.076190120957649e-05, "loss": 2.1917, "step": 6792 }, { "epoch": 0.5113381885244359, "grad_norm": 4.618852138519287, "learning_rate": 5.074971171131308e-05, "loss": 2.2317, "step": 6793 }, { "epoch": 0.511413462805096, "grad_norm": 10.821094512939453, "learning_rate": 5.0737522168481453e-05, "loss": 2.0404, "step": 6794 }, { "epoch": 0.5114887370857563, "grad_norm": 4.949869632720947, "learning_rate": 5.0725332581806265e-05, "loss": 1.6138, "step": 6795 }, { "epoch": 0.5115640113664164, "grad_norm": 7.573105335235596, "learning_rate": 5.0713142952012126e-05, "loss": 2.0455, "step": 6796 }, { "epoch": 0.5116392856470765, "grad_norm": 4.421236991882324, "learning_rate": 5.070095327982368e-05, "loss": 2.1107, "step": 6797 }, { "epoch": 0.5117145599277367, "grad_norm": 5.214579105377197, "learning_rate": 5.068876356596558e-05, "loss": 2.0186, "step": 6798 }, { "epoch": 0.5117898342083969, "grad_norm": 4.235634803771973, "learning_rate": 5.0676573811162465e-05, "loss": 2.0007, "step": 6799 }, { "epoch": 0.511865108489057, "grad_norm": 7.372382164001465, "learning_rate": 5.066438401613898e-05, "loss": 2.1085, "step": 6800 }, { "epoch": 0.5119403827697172, "grad_norm": 4.532595634460449, "learning_rate": 5.0652194181619774e-05, "loss": 1.754, "step": 6801 }, { "epoch": 0.5120156570503773, "grad_norm": 4.227382183074951, "learning_rate": 5.064000430832947e-05, "loss": 1.8598, "step": 6802 }, { "epoch": 0.5120909313310374, "grad_norm": 4.752003192901611, "learning_rate": 5.062781439699277e-05, "loss": 2.131, "step": 6803 }, { "epoch": 0.5121662056116977, "grad_norm": 4.954397201538086, "learning_rate": 5.06156244483343e-05, "loss": 2.0002, "step": 6804 }, { "epoch": 0.5122414798923578, "grad_norm": 5.293426036834717, "learning_rate": 5.060343446307872e-05, "loss": 1.963, "step": 6805 }, { "epoch": 0.5123167541730179, "grad_norm": 8.222238540649414, "learning_rate": 5.05912444419507e-05, "loss": 1.632, "step": 6806 }, { "epoch": 0.512392028453678, "grad_norm": 4.669669151306152, "learning_rate": 5.057905438567488e-05, "loss": 1.8256, "step": 6807 }, { "epoch": 0.5124673027343383, "grad_norm": 4.59066104888916, "learning_rate": 5.056686429497594e-05, "loss": 2.3087, "step": 6808 }, { "epoch": 0.5125425770149984, "grad_norm": 4.068904876708984, "learning_rate": 5.055467417057855e-05, "loss": 1.6905, "step": 6809 }, { "epoch": 0.5126178512956585, "grad_norm": 5.9013261795043945, "learning_rate": 5.054248401320738e-05, "loss": 2.3371, "step": 6810 }, { "epoch": 0.5126931255763187, "grad_norm": 8.629036903381348, "learning_rate": 5.053029382358708e-05, "loss": 1.807, "step": 6811 }, { "epoch": 0.5127683998569789, "grad_norm": 4.809014320373535, "learning_rate": 5.0518103602442346e-05, "loss": 1.7238, "step": 6812 }, { "epoch": 0.512843674137639, "grad_norm": 5.800009250640869, "learning_rate": 5.0505913350497825e-05, "loss": 2.4917, "step": 6813 }, { "epoch": 0.5129189484182992, "grad_norm": 4.991604328155518, "learning_rate": 5.049372306847821e-05, "loss": 2.08, "step": 6814 }, { "epoch": 0.5129942226989593, "grad_norm": 4.171029567718506, "learning_rate": 5.0481532757108184e-05, "loss": 1.9725, "step": 6815 }, { "epoch": 0.5130694969796195, "grad_norm": 9.149380683898926, "learning_rate": 5.046934241711242e-05, "loss": 2.2616, "step": 6816 }, { "epoch": 0.5131447712602797, "grad_norm": 4.736367225646973, "learning_rate": 5.045715204921559e-05, "loss": 1.7606, "step": 6817 }, { "epoch": 0.5132200455409398, "grad_norm": 4.694098949432373, "learning_rate": 5.0444961654142385e-05, "loss": 2.3341, "step": 6818 }, { "epoch": 0.5132953198215999, "grad_norm": 5.636812686920166, "learning_rate": 5.0432771232617484e-05, "loss": 2.4845, "step": 6819 }, { "epoch": 0.5133705941022602, "grad_norm": 4.193817138671875, "learning_rate": 5.042058078536557e-05, "loss": 1.7284, "step": 6820 }, { "epoch": 0.5134458683829203, "grad_norm": 6.854394912719727, "learning_rate": 5.040839031311134e-05, "loss": 2.0648, "step": 6821 }, { "epoch": 0.5135211426635804, "grad_norm": 4.402436256408691, "learning_rate": 5.0396199816579485e-05, "loss": 2.1523, "step": 6822 }, { "epoch": 0.5135964169442406, "grad_norm": 3.992069721221924, "learning_rate": 5.038400929649466e-05, "loss": 1.8521, "step": 6823 }, { "epoch": 0.5136716912249008, "grad_norm": 3.866669178009033, "learning_rate": 5.037181875358161e-05, "loss": 1.664, "step": 6824 }, { "epoch": 0.5137469655055609, "grad_norm": 3.647256374359131, "learning_rate": 5.035962818856499e-05, "loss": 2.0171, "step": 6825 }, { "epoch": 0.513822239786221, "grad_norm": 4.605841636657715, "learning_rate": 5.034743760216951e-05, "loss": 1.9501, "step": 6826 }, { "epoch": 0.5138975140668812, "grad_norm": 3.749753475189209, "learning_rate": 5.0335246995119864e-05, "loss": 1.6774, "step": 6827 }, { "epoch": 0.5139727883475413, "grad_norm": 4.9727678298950195, "learning_rate": 5.032305636814075e-05, "loss": 2.1272, "step": 6828 }, { "epoch": 0.5140480626282015, "grad_norm": 4.057631492614746, "learning_rate": 5.0310865721956854e-05, "loss": 2.6007, "step": 6829 }, { "epoch": 0.5141233369088617, "grad_norm": 3.980250120162964, "learning_rate": 5.029867505729289e-05, "loss": 2.0065, "step": 6830 }, { "epoch": 0.5141986111895218, "grad_norm": 5.497494220733643, "learning_rate": 5.028648437487354e-05, "loss": 2.1502, "step": 6831 }, { "epoch": 0.5142738854701819, "grad_norm": 3.7058589458465576, "learning_rate": 5.0274293675423525e-05, "loss": 1.8049, "step": 6832 }, { "epoch": 0.5143491597508422, "grad_norm": 7.040330410003662, "learning_rate": 5.0262102959667546e-05, "loss": 2.063, "step": 6833 }, { "epoch": 0.5144244340315023, "grad_norm": 5.201131343841553, "learning_rate": 5.024991222833029e-05, "loss": 2.1388, "step": 6834 }, { "epoch": 0.5144997083121624, "grad_norm": 5.2422356605529785, "learning_rate": 5.023772148213646e-05, "loss": 1.9717, "step": 6835 }, { "epoch": 0.5145749825928226, "grad_norm": 4.488335609436035, "learning_rate": 5.022553072181078e-05, "loss": 2.0539, "step": 6836 }, { "epoch": 0.5146502568734828, "grad_norm": 4.804295063018799, "learning_rate": 5.021333994807794e-05, "loss": 1.6963, "step": 6837 }, { "epoch": 0.5147255311541429, "grad_norm": 3.918212413787842, "learning_rate": 5.020114916166265e-05, "loss": 1.8088, "step": 6838 }, { "epoch": 0.5148008054348031, "grad_norm": 4.215106010437012, "learning_rate": 5.018895836328962e-05, "loss": 1.8461, "step": 6839 }, { "epoch": 0.5148760797154632, "grad_norm": 4.371639728546143, "learning_rate": 5.017676755368357e-05, "loss": 1.7672, "step": 6840 }, { "epoch": 0.5149513539961234, "grad_norm": 3.7421443462371826, "learning_rate": 5.016457673356917e-05, "loss": 2.0301, "step": 6841 }, { "epoch": 0.5150266282767836, "grad_norm": 5.293332099914551, "learning_rate": 5.015238590367117e-05, "loss": 2.3953, "step": 6842 }, { "epoch": 0.5151019025574437, "grad_norm": 5.041553020477295, "learning_rate": 5.014019506471428e-05, "loss": 1.9352, "step": 6843 }, { "epoch": 0.5151771768381038, "grad_norm": 4.102403163909912, "learning_rate": 5.012800421742318e-05, "loss": 1.9296, "step": 6844 }, { "epoch": 0.5152524511187639, "grad_norm": 4.40903377532959, "learning_rate": 5.0115813362522615e-05, "loss": 1.6571, "step": 6845 }, { "epoch": 0.5153277253994242, "grad_norm": 4.813185214996338, "learning_rate": 5.0103622500737255e-05, "loss": 2.4134, "step": 6846 }, { "epoch": 0.5154029996800843, "grad_norm": 3.8157384395599365, "learning_rate": 5.009143163279184e-05, "loss": 1.6864, "step": 6847 }, { "epoch": 0.5154782739607444, "grad_norm": 3.538339376449585, "learning_rate": 5.0079240759411085e-05, "loss": 2.0502, "step": 6848 }, { "epoch": 0.5155535482414046, "grad_norm": 5.106586933135986, "learning_rate": 5.00670498813197e-05, "loss": 1.7201, "step": 6849 }, { "epoch": 0.5156288225220648, "grad_norm": 4.649513244628906, "learning_rate": 5.005485899924237e-05, "loss": 2.0033, "step": 6850 }, { "epoch": 0.5157040968027249, "grad_norm": 5.803775787353516, "learning_rate": 5.004266811390385e-05, "loss": 1.8993, "step": 6851 }, { "epoch": 0.5157793710833851, "grad_norm": 4.632706165313721, "learning_rate": 5.0030477226028816e-05, "loss": 2.2742, "step": 6852 }, { "epoch": 0.5158546453640452, "grad_norm": 5.387406826019287, "learning_rate": 5.0018286336342e-05, "loss": 1.8267, "step": 6853 }, { "epoch": 0.5159299196447054, "grad_norm": 5.561295509338379, "learning_rate": 5.0006095445568124e-05, "loss": 2.2549, "step": 6854 }, { "epoch": 0.5160051939253656, "grad_norm": 7.257455348968506, "learning_rate": 4.9993904554431894e-05, "loss": 2.1567, "step": 6855 }, { "epoch": 0.5160804682060257, "grad_norm": 5.317276954650879, "learning_rate": 4.9981713663658004e-05, "loss": 1.6771, "step": 6856 }, { "epoch": 0.5161557424866858, "grad_norm": 4.524988651275635, "learning_rate": 4.99695227739712e-05, "loss": 2.0057, "step": 6857 }, { "epoch": 0.5162310167673461, "grad_norm": 5.059763431549072, "learning_rate": 4.9957331886096164e-05, "loss": 2.1157, "step": 6858 }, { "epoch": 0.5163062910480062, "grad_norm": 7.32643985748291, "learning_rate": 4.994514100075765e-05, "loss": 2.3038, "step": 6859 }, { "epoch": 0.5163815653286663, "grad_norm": 5.789794445037842, "learning_rate": 4.993295011868032e-05, "loss": 1.7921, "step": 6860 }, { "epoch": 0.5164568396093265, "grad_norm": 3.712477684020996, "learning_rate": 4.992075924058894e-05, "loss": 1.9185, "step": 6861 }, { "epoch": 0.5165321138899867, "grad_norm": 4.081736087799072, "learning_rate": 4.990856836720817e-05, "loss": 1.7324, "step": 6862 }, { "epoch": 0.5166073881706468, "grad_norm": 4.747015953063965, "learning_rate": 4.989637749926275e-05, "loss": 1.9919, "step": 6863 }, { "epoch": 0.5166826624513069, "grad_norm": 3.6823556423187256, "learning_rate": 4.9884186637477404e-05, "loss": 2.012, "step": 6864 }, { "epoch": 0.5167579367319671, "grad_norm": 4.525242328643799, "learning_rate": 4.9871995782576816e-05, "loss": 1.7546, "step": 6865 }, { "epoch": 0.5168332110126272, "grad_norm": 5.906013488769531, "learning_rate": 4.9859804935285734e-05, "loss": 2.0468, "step": 6866 }, { "epoch": 0.5169084852932874, "grad_norm": 4.098196983337402, "learning_rate": 4.984761409632882e-05, "loss": 1.8925, "step": 6867 }, { "epoch": 0.5169837595739476, "grad_norm": 6.528939723968506, "learning_rate": 4.9835423266430836e-05, "loss": 2.3215, "step": 6868 }, { "epoch": 0.5170590338546077, "grad_norm": 5.712769508361816, "learning_rate": 4.982323244631645e-05, "loss": 2.2422, "step": 6869 }, { "epoch": 0.5171343081352678, "grad_norm": 4.830761909484863, "learning_rate": 4.981104163671039e-05, "loss": 1.6947, "step": 6870 }, { "epoch": 0.5172095824159281, "grad_norm": 4.076691627502441, "learning_rate": 4.979885083833736e-05, "loss": 2.0192, "step": 6871 }, { "epoch": 0.5172848566965882, "grad_norm": 4.893392086029053, "learning_rate": 4.978666005192208e-05, "loss": 2.1991, "step": 6872 }, { "epoch": 0.5173601309772483, "grad_norm": 5.401125907897949, "learning_rate": 4.977446927818924e-05, "loss": 2.272, "step": 6873 }, { "epoch": 0.5174354052579085, "grad_norm": 7.253882884979248, "learning_rate": 4.976227851786356e-05, "loss": 2.2889, "step": 6874 }, { "epoch": 0.5175106795385687, "grad_norm": 5.098233699798584, "learning_rate": 4.975008777166973e-05, "loss": 1.8926, "step": 6875 }, { "epoch": 0.5175859538192288, "grad_norm": 5.662689685821533, "learning_rate": 4.973789704033248e-05, "loss": 1.8065, "step": 6876 }, { "epoch": 0.517661228099889, "grad_norm": 3.8207900524139404, "learning_rate": 4.972570632457648e-05, "loss": 1.9377, "step": 6877 }, { "epoch": 0.5177365023805491, "grad_norm": 6.19397497177124, "learning_rate": 4.9713515625126475e-05, "loss": 2.7659, "step": 6878 }, { "epoch": 0.5178117766612093, "grad_norm": 5.682821750640869, "learning_rate": 4.9701324942707125e-05, "loss": 2.0679, "step": 6879 }, { "epoch": 0.5178870509418695, "grad_norm": 5.77880859375, "learning_rate": 4.968913427804315e-05, "loss": 1.8812, "step": 6880 }, { "epoch": 0.5179623252225296, "grad_norm": 4.24069881439209, "learning_rate": 4.967694363185926e-05, "loss": 1.7689, "step": 6881 }, { "epoch": 0.5180375995031897, "grad_norm": 5.851943492889404, "learning_rate": 4.9664753004880134e-05, "loss": 1.8395, "step": 6882 }, { "epoch": 0.5181128737838498, "grad_norm": 5.177786350250244, "learning_rate": 4.96525623978305e-05, "loss": 1.813, "step": 6883 }, { "epoch": 0.5181881480645101, "grad_norm": 4.929501056671143, "learning_rate": 4.964037181143501e-05, "loss": 1.7638, "step": 6884 }, { "epoch": 0.5182634223451702, "grad_norm": 4.608745574951172, "learning_rate": 4.96281812464184e-05, "loss": 2.28, "step": 6885 }, { "epoch": 0.5183386966258303, "grad_norm": 5.799046993255615, "learning_rate": 4.9615990703505335e-05, "loss": 2.0126, "step": 6886 }, { "epoch": 0.5184139709064906, "grad_norm": 4.481860160827637, "learning_rate": 4.960380018342055e-05, "loss": 1.6807, "step": 6887 }, { "epoch": 0.5184892451871507, "grad_norm": 4.460608005523682, "learning_rate": 4.9591609686888676e-05, "loss": 1.9347, "step": 6888 }, { "epoch": 0.5185645194678108, "grad_norm": 4.363182067871094, "learning_rate": 4.9579419214634456e-05, "loss": 1.8492, "step": 6889 }, { "epoch": 0.518639793748471, "grad_norm": 4.726400375366211, "learning_rate": 4.9567228767382535e-05, "loss": 2.1296, "step": 6890 }, { "epoch": 0.5187150680291311, "grad_norm": 5.13202428817749, "learning_rate": 4.955503834585763e-05, "loss": 1.5846, "step": 6891 }, { "epoch": 0.5187903423097913, "grad_norm": 4.812490940093994, "learning_rate": 4.954284795078442e-05, "loss": 2.0728, "step": 6892 }, { "epoch": 0.5188656165904515, "grad_norm": 3.7938036918640137, "learning_rate": 4.95306575828876e-05, "loss": 1.7599, "step": 6893 }, { "epoch": 0.5189408908711116, "grad_norm": 3.8220901489257812, "learning_rate": 4.951846724289182e-05, "loss": 1.7535, "step": 6894 }, { "epoch": 0.5190161651517717, "grad_norm": 5.791460037231445, "learning_rate": 4.9506276931521785e-05, "loss": 2.2117, "step": 6895 }, { "epoch": 0.519091439432432, "grad_norm": 5.122953414916992, "learning_rate": 4.949408664950219e-05, "loss": 1.7235, "step": 6896 }, { "epoch": 0.5191667137130921, "grad_norm": 6.580044269561768, "learning_rate": 4.9481896397557665e-05, "loss": 1.9738, "step": 6897 }, { "epoch": 0.5192419879937522, "grad_norm": 3.792405843734741, "learning_rate": 4.946970617641292e-05, "loss": 1.9672, "step": 6898 }, { "epoch": 0.5193172622744124, "grad_norm": 4.758399963378906, "learning_rate": 4.9457515986792624e-05, "loss": 1.5851, "step": 6899 }, { "epoch": 0.5193925365550726, "grad_norm": 4.688673973083496, "learning_rate": 4.944532582942146e-05, "loss": 1.7311, "step": 6900 }, { "epoch": 0.5194678108357327, "grad_norm": 7.336785793304443, "learning_rate": 4.943313570502407e-05, "loss": 2.415, "step": 6901 }, { "epoch": 0.5195430851163929, "grad_norm": 5.997488975524902, "learning_rate": 4.942094561432513e-05, "loss": 2.4754, "step": 6902 }, { "epoch": 0.519618359397053, "grad_norm": 6.137256145477295, "learning_rate": 4.940875555804931e-05, "loss": 2.0807, "step": 6903 }, { "epoch": 0.5196936336777132, "grad_norm": 4.39750862121582, "learning_rate": 4.93965655369213e-05, "loss": 2.1995, "step": 6904 }, { "epoch": 0.5197689079583733, "grad_norm": 3.6037113666534424, "learning_rate": 4.9384375551665714e-05, "loss": 1.7112, "step": 6905 }, { "epoch": 0.5198441822390335, "grad_norm": 4.31144905090332, "learning_rate": 4.9372185603007255e-05, "loss": 1.9347, "step": 6906 }, { "epoch": 0.5199194565196936, "grad_norm": 4.638736248016357, "learning_rate": 4.935999569167054e-05, "loss": 1.838, "step": 6907 }, { "epoch": 0.5199947308003537, "grad_norm": 4.404640197753906, "learning_rate": 4.934780581838026e-05, "loss": 1.9759, "step": 6908 }, { "epoch": 0.520070005081014, "grad_norm": 4.184910774230957, "learning_rate": 4.9335615983861034e-05, "loss": 2.0044, "step": 6909 }, { "epoch": 0.5201452793616741, "grad_norm": 4.163177967071533, "learning_rate": 4.932342618883756e-05, "loss": 1.8945, "step": 6910 }, { "epoch": 0.5202205536423342, "grad_norm": 4.8875274658203125, "learning_rate": 4.931123643403443e-05, "loss": 1.8009, "step": 6911 }, { "epoch": 0.5202958279229944, "grad_norm": 4.165733337402344, "learning_rate": 4.9299046720176315e-05, "loss": 1.812, "step": 6912 }, { "epoch": 0.5203711022036546, "grad_norm": 4.415853023529053, "learning_rate": 4.9286857047987886e-05, "loss": 1.884, "step": 6913 }, { "epoch": 0.5204463764843147, "grad_norm": 5.037762641906738, "learning_rate": 4.927466741819373e-05, "loss": 1.9259, "step": 6914 }, { "epoch": 0.5205216507649749, "grad_norm": 4.049993515014648, "learning_rate": 4.926247783151855e-05, "loss": 1.8196, "step": 6915 }, { "epoch": 0.520596925045635, "grad_norm": 7.728363990783691, "learning_rate": 4.925028828868693e-05, "loss": 1.8621, "step": 6916 }, { "epoch": 0.5206721993262952, "grad_norm": 4.262650489807129, "learning_rate": 4.9238098790423526e-05, "loss": 1.9102, "step": 6917 }, { "epoch": 0.5207474736069554, "grad_norm": 5.607816219329834, "learning_rate": 4.922590933745295e-05, "loss": 2.0538, "step": 6918 }, { "epoch": 0.5208227478876155, "grad_norm": 4.166906356811523, "learning_rate": 4.921371993049985e-05, "loss": 1.9765, "step": 6919 }, { "epoch": 0.5208980221682756, "grad_norm": 4.375038146972656, "learning_rate": 4.9201530570288834e-05, "loss": 2.2206, "step": 6920 }, { "epoch": 0.5209732964489359, "grad_norm": 3.7139694690704346, "learning_rate": 4.918934125754456e-05, "loss": 1.5156, "step": 6921 }, { "epoch": 0.521048570729596, "grad_norm": 4.020748615264893, "learning_rate": 4.917715199299159e-05, "loss": 1.7546, "step": 6922 }, { "epoch": 0.5211238450102561, "grad_norm": 4.18602180480957, "learning_rate": 4.9164962777354605e-05, "loss": 1.8005, "step": 6923 }, { "epoch": 0.5211991192909162, "grad_norm": 3.9706244468688965, "learning_rate": 4.915277361135817e-05, "loss": 1.7741, "step": 6924 }, { "epoch": 0.5212743935715765, "grad_norm": 3.938563346862793, "learning_rate": 4.914058449572691e-05, "loss": 1.8682, "step": 6925 }, { "epoch": 0.5213496678522366, "grad_norm": 4.620153427124023, "learning_rate": 4.912839543118543e-05, "loss": 2.4686, "step": 6926 }, { "epoch": 0.5214249421328967, "grad_norm": 5.794540882110596, "learning_rate": 4.9116206418458364e-05, "loss": 1.8652, "step": 6927 }, { "epoch": 0.5215002164135569, "grad_norm": 4.430908679962158, "learning_rate": 4.910401745827027e-05, "loss": 1.9695, "step": 6928 }, { "epoch": 0.521575490694217, "grad_norm": 5.343656539916992, "learning_rate": 4.909182855134577e-05, "loss": 2.2022, "step": 6929 }, { "epoch": 0.5216507649748772, "grad_norm": 4.821630477905273, "learning_rate": 4.907963969840946e-05, "loss": 2.2725, "step": 6930 }, { "epoch": 0.5217260392555374, "grad_norm": 4.798400402069092, "learning_rate": 4.9067450900185926e-05, "loss": 1.7424, "step": 6931 }, { "epoch": 0.5218013135361975, "grad_norm": 5.2089667320251465, "learning_rate": 4.9055262157399776e-05, "loss": 1.7361, "step": 6932 }, { "epoch": 0.5218765878168576, "grad_norm": 5.804737567901611, "learning_rate": 4.9043073470775556e-05, "loss": 1.9865, "step": 6933 }, { "epoch": 0.5219518620975179, "grad_norm": 5.477221965789795, "learning_rate": 4.90308848410379e-05, "loss": 1.9161, "step": 6934 }, { "epoch": 0.522027136378178, "grad_norm": 4.658236026763916, "learning_rate": 4.901869626891133e-05, "loss": 1.8658, "step": 6935 }, { "epoch": 0.5221024106588381, "grad_norm": 4.555919647216797, "learning_rate": 4.900650775512047e-05, "loss": 2.1852, "step": 6936 }, { "epoch": 0.5221776849394983, "grad_norm": 4.156063556671143, "learning_rate": 4.8994319300389865e-05, "loss": 2.008, "step": 6937 }, { "epoch": 0.5222529592201585, "grad_norm": 3.579723358154297, "learning_rate": 4.8982130905444104e-05, "loss": 1.888, "step": 6938 }, { "epoch": 0.5223282335008186, "grad_norm": 5.1500420570373535, "learning_rate": 4.896994257100773e-05, "loss": 1.9456, "step": 6939 }, { "epoch": 0.5224035077814788, "grad_norm": 4.530655384063721, "learning_rate": 4.8957754297805314e-05, "loss": 2.0376, "step": 6940 }, { "epoch": 0.5224787820621389, "grad_norm": 5.194767951965332, "learning_rate": 4.894556608656141e-05, "loss": 2.1867, "step": 6941 }, { "epoch": 0.5225540563427991, "grad_norm": 4.082827091217041, "learning_rate": 4.89333779380006e-05, "loss": 2.2868, "step": 6942 }, { "epoch": 0.5226293306234592, "grad_norm": 5.288009166717529, "learning_rate": 4.892118985284738e-05, "loss": 1.65, "step": 6943 }, { "epoch": 0.5227046049041194, "grad_norm": 3.6602885723114014, "learning_rate": 4.890900183182633e-05, "loss": 1.9536, "step": 6944 }, { "epoch": 0.5227798791847795, "grad_norm": 5.31079626083374, "learning_rate": 4.889681387566202e-05, "loss": 1.7908, "step": 6945 }, { "epoch": 0.5228551534654396, "grad_norm": 5.516058921813965, "learning_rate": 4.888462598507893e-05, "loss": 1.7786, "step": 6946 }, { "epoch": 0.5229304277460999, "grad_norm": 5.667055130004883, "learning_rate": 4.887243816080165e-05, "loss": 2.0693, "step": 6947 }, { "epoch": 0.52300570202676, "grad_norm": 3.468761444091797, "learning_rate": 4.8860250403554665e-05, "loss": 1.7268, "step": 6948 }, { "epoch": 0.5230809763074201, "grad_norm": 3.6433684825897217, "learning_rate": 4.8848062714062544e-05, "loss": 1.8313, "step": 6949 }, { "epoch": 0.5231562505880804, "grad_norm": 4.384222030639648, "learning_rate": 4.8835875093049764e-05, "loss": 2.0591, "step": 6950 }, { "epoch": 0.5232315248687405, "grad_norm": 4.685346603393555, "learning_rate": 4.8823687541240896e-05, "loss": 1.9477, "step": 6951 }, { "epoch": 0.5233067991494006, "grad_norm": 3.928412437438965, "learning_rate": 4.881150005936041e-05, "loss": 1.8924, "step": 6952 }, { "epoch": 0.5233820734300608, "grad_norm": 4.876494884490967, "learning_rate": 4.879931264813284e-05, "loss": 1.82, "step": 6953 }, { "epoch": 0.5234573477107209, "grad_norm": 4.825035095214844, "learning_rate": 4.8787125308282684e-05, "loss": 1.8536, "step": 6954 }, { "epoch": 0.5235326219913811, "grad_norm": 4.440576076507568, "learning_rate": 4.877493804053446e-05, "loss": 1.8652, "step": 6955 }, { "epoch": 0.5236078962720413, "grad_norm": 3.434795618057251, "learning_rate": 4.8762750845612646e-05, "loss": 1.9216, "step": 6956 }, { "epoch": 0.5236831705527014, "grad_norm": 3.4492878913879395, "learning_rate": 4.8750563724241745e-05, "loss": 1.9036, "step": 6957 }, { "epoch": 0.5237584448333615, "grad_norm": 3.8548552989959717, "learning_rate": 4.8738376677146234e-05, "loss": 1.57, "step": 6958 }, { "epoch": 0.5238337191140218, "grad_norm": 3.8144750595092773, "learning_rate": 4.872618970505063e-05, "loss": 1.8975, "step": 6959 }, { "epoch": 0.5239089933946819, "grad_norm": 4.973082542419434, "learning_rate": 4.8714002808679384e-05, "loss": 1.7708, "step": 6960 }, { "epoch": 0.523984267675342, "grad_norm": 4.822205066680908, "learning_rate": 4.870181598875697e-05, "loss": 1.9087, "step": 6961 }, { "epoch": 0.5240595419560021, "grad_norm": 3.8845839500427246, "learning_rate": 4.8689629246007894e-05, "loss": 1.9338, "step": 6962 }, { "epoch": 0.5241348162366624, "grad_norm": 4.4362053871154785, "learning_rate": 4.867744258115658e-05, "loss": 1.9343, "step": 6963 }, { "epoch": 0.5242100905173225, "grad_norm": 4.53852653503418, "learning_rate": 4.8665255994927515e-05, "loss": 1.8991, "step": 6964 }, { "epoch": 0.5242853647979826, "grad_norm": 4.749608039855957, "learning_rate": 4.865306948804515e-05, "loss": 1.8591, "step": 6965 }, { "epoch": 0.5243606390786428, "grad_norm": 3.6270833015441895, "learning_rate": 4.8640883061233954e-05, "loss": 1.7171, "step": 6966 }, { "epoch": 0.524435913359303, "grad_norm": 5.376784801483154, "learning_rate": 4.8628696715218344e-05, "loss": 1.8227, "step": 6967 }, { "epoch": 0.5245111876399631, "grad_norm": 5.203309059143066, "learning_rate": 4.8616510450722784e-05, "loss": 1.6976, "step": 6968 }, { "epoch": 0.5245864619206233, "grad_norm": 4.017502307891846, "learning_rate": 4.860432426847171e-05, "loss": 2.0636, "step": 6969 }, { "epoch": 0.5246617362012834, "grad_norm": 4.973633766174316, "learning_rate": 4.8592138169189555e-05, "loss": 1.957, "step": 6970 }, { "epoch": 0.5247370104819435, "grad_norm": 3.9245617389678955, "learning_rate": 4.857995215360073e-05, "loss": 1.8552, "step": 6971 }, { "epoch": 0.5248122847626038, "grad_norm": 4.092756271362305, "learning_rate": 4.8567766222429706e-05, "loss": 1.7892, "step": 6972 }, { "epoch": 0.5248875590432639, "grad_norm": 5.6921892166137695, "learning_rate": 4.855558037640085e-05, "loss": 2.5466, "step": 6973 }, { "epoch": 0.524962833323924, "grad_norm": 4.092710018157959, "learning_rate": 4.85433946162386e-05, "loss": 2.1661, "step": 6974 }, { "epoch": 0.5250381076045842, "grad_norm": 4.116332530975342, "learning_rate": 4.853120894266735e-05, "loss": 1.848, "step": 6975 }, { "epoch": 0.5251133818852444, "grad_norm": 5.287973880767822, "learning_rate": 4.8519023356411505e-05, "loss": 1.8983, "step": 6976 }, { "epoch": 0.5251886561659045, "grad_norm": 3.953367233276367, "learning_rate": 4.850683785819551e-05, "loss": 1.7517, "step": 6977 }, { "epoch": 0.5252639304465647, "grad_norm": 3.8583693504333496, "learning_rate": 4.849465244874369e-05, "loss": 1.8783, "step": 6978 }, { "epoch": 0.5253392047272248, "grad_norm": 5.788880825042725, "learning_rate": 4.848246712878048e-05, "loss": 1.9744, "step": 6979 }, { "epoch": 0.525414479007885, "grad_norm": 5.482563018798828, "learning_rate": 4.847028189903024e-05, "loss": 2.1148, "step": 6980 }, { "epoch": 0.5254897532885452, "grad_norm": 7.347797393798828, "learning_rate": 4.845809676021735e-05, "loss": 2.3018, "step": 6981 }, { "epoch": 0.5255650275692053, "grad_norm": 4.234721660614014, "learning_rate": 4.8445911713066176e-05, "loss": 1.7296, "step": 6982 }, { "epoch": 0.5256403018498654, "grad_norm": 4.58505916595459, "learning_rate": 4.8433726758301104e-05, "loss": 1.9592, "step": 6983 }, { "epoch": 0.5257155761305256, "grad_norm": 4.317677974700928, "learning_rate": 4.8421541896646475e-05, "loss": 1.8073, "step": 6984 }, { "epoch": 0.5257908504111858, "grad_norm": 5.643073081970215, "learning_rate": 4.840935712882666e-05, "loss": 1.9062, "step": 6985 }, { "epoch": 0.5258661246918459, "grad_norm": 5.233477592468262, "learning_rate": 4.839717245556597e-05, "loss": 1.9583, "step": 6986 }, { "epoch": 0.525941398972506, "grad_norm": 4.624270439147949, "learning_rate": 4.8384987877588823e-05, "loss": 1.8511, "step": 6987 }, { "epoch": 0.5260166732531663, "grad_norm": 4.989600658416748, "learning_rate": 4.8372803395619484e-05, "loss": 1.9322, "step": 6988 }, { "epoch": 0.5260919475338264, "grad_norm": 6.288695812225342, "learning_rate": 4.836061901038233e-05, "loss": 2.4129, "step": 6989 }, { "epoch": 0.5261672218144865, "grad_norm": 4.117554187774658, "learning_rate": 4.834843472260165e-05, "loss": 1.9719, "step": 6990 }, { "epoch": 0.5262424960951467, "grad_norm": 4.179305553436279, "learning_rate": 4.83362505330018e-05, "loss": 2.1038, "step": 6991 }, { "epoch": 0.5263177703758068, "grad_norm": 4.1908793449401855, "learning_rate": 4.832406644230708e-05, "loss": 2.2383, "step": 6992 }, { "epoch": 0.526393044656467, "grad_norm": 4.188826560974121, "learning_rate": 4.831188245124179e-05, "loss": 1.8981, "step": 6993 }, { "epoch": 0.5264683189371272, "grad_norm": 4.018468379974365, "learning_rate": 4.829969856053027e-05, "loss": 2.1641, "step": 6994 }, { "epoch": 0.5265435932177873, "grad_norm": 3.9451262950897217, "learning_rate": 4.8287514770896774e-05, "loss": 1.8916, "step": 6995 }, { "epoch": 0.5266188674984474, "grad_norm": 4.334110260009766, "learning_rate": 4.827533108306563e-05, "loss": 1.8285, "step": 6996 }, { "epoch": 0.5266941417791077, "grad_norm": 3.5793375968933105, "learning_rate": 4.826314749776108e-05, "loss": 1.9938, "step": 6997 }, { "epoch": 0.5267694160597678, "grad_norm": 9.41827392578125, "learning_rate": 4.825096401570744e-05, "loss": 1.732, "step": 6998 }, { "epoch": 0.5268446903404279, "grad_norm": 4.204832077026367, "learning_rate": 4.823878063762896e-05, "loss": 2.0474, "step": 6999 }, { "epoch": 0.5269199646210881, "grad_norm": 4.062893390655518, "learning_rate": 4.822659736424994e-05, "loss": 1.8633, "step": 7000 }, { "epoch": 0.5269952389017483, "grad_norm": 4.513487815856934, "learning_rate": 4.82144141962946e-05, "loss": 1.5433, "step": 7001 }, { "epoch": 0.5270705131824084, "grad_norm": 3.875760555267334, "learning_rate": 4.820223113448722e-05, "loss": 1.7221, "step": 7002 }, { "epoch": 0.5271457874630685, "grad_norm": 3.7595367431640625, "learning_rate": 4.819004817955203e-05, "loss": 1.8335, "step": 7003 }, { "epoch": 0.5272210617437287, "grad_norm": 5.465179443359375, "learning_rate": 4.8177865332213304e-05, "loss": 1.7907, "step": 7004 }, { "epoch": 0.5272963360243889, "grad_norm": 6.533691883087158, "learning_rate": 4.8165682593195234e-05, "loss": 1.7674, "step": 7005 }, { "epoch": 0.527371610305049, "grad_norm": 5.1197381019592285, "learning_rate": 4.815349996322209e-05, "loss": 1.5915, "step": 7006 }, { "epoch": 0.5274468845857092, "grad_norm": 4.613940715789795, "learning_rate": 4.814131744301806e-05, "loss": 2.181, "step": 7007 }, { "epoch": 0.5275221588663693, "grad_norm": 3.5230231285095215, "learning_rate": 4.8129135033307364e-05, "loss": 1.5449, "step": 7008 }, { "epoch": 0.5275974331470294, "grad_norm": 3.9887583255767822, "learning_rate": 4.8116952734814244e-05, "loss": 2.1319, "step": 7009 }, { "epoch": 0.5276727074276897, "grad_norm": 5.512673377990723, "learning_rate": 4.810477054826286e-05, "loss": 2.1013, "step": 7010 }, { "epoch": 0.5277479817083498, "grad_norm": 5.702235221862793, "learning_rate": 4.8092588474377456e-05, "loss": 1.9598, "step": 7011 }, { "epoch": 0.5278232559890099, "grad_norm": 5.409027099609375, "learning_rate": 4.8080406513882175e-05, "loss": 2.2694, "step": 7012 }, { "epoch": 0.5278985302696702, "grad_norm": 5.838965892791748, "learning_rate": 4.8068224667501225e-05, "loss": 1.84, "step": 7013 }, { "epoch": 0.5279738045503303, "grad_norm": 3.913105010986328, "learning_rate": 4.8056042935958766e-05, "loss": 1.8403, "step": 7014 }, { "epoch": 0.5280490788309904, "grad_norm": 4.155385494232178, "learning_rate": 4.8043861319979e-05, "loss": 1.6537, "step": 7015 }, { "epoch": 0.5281243531116506, "grad_norm": 3.395749092102051, "learning_rate": 4.8031679820286044e-05, "loss": 1.6948, "step": 7016 }, { "epoch": 0.5281996273923107, "grad_norm": 5.627654552459717, "learning_rate": 4.80194984376041e-05, "loss": 1.7634, "step": 7017 }, { "epoch": 0.5282749016729709, "grad_norm": 4.569192886352539, "learning_rate": 4.800731717265726e-05, "loss": 1.7557, "step": 7018 }, { "epoch": 0.5283501759536311, "grad_norm": 6.035717010498047, "learning_rate": 4.7995136026169716e-05, "loss": 2.404, "step": 7019 }, { "epoch": 0.5284254502342912, "grad_norm": 5.31645393371582, "learning_rate": 4.798295499886557e-05, "loss": 1.7557, "step": 7020 }, { "epoch": 0.5285007245149513, "grad_norm": 4.1376142501831055, "learning_rate": 4.797077409146898e-05, "loss": 1.7917, "step": 7021 }, { "epoch": 0.5285759987956115, "grad_norm": 7.303235054016113, "learning_rate": 4.795859330470402e-05, "loss": 2.0473, "step": 7022 }, { "epoch": 0.5286512730762717, "grad_norm": 4.449717044830322, "learning_rate": 4.794641263929487e-05, "loss": 1.9801, "step": 7023 }, { "epoch": 0.5287265473569318, "grad_norm": 3.7318694591522217, "learning_rate": 4.793423209596557e-05, "loss": 1.8471, "step": 7024 }, { "epoch": 0.5288018216375919, "grad_norm": 4.434027194976807, "learning_rate": 4.792205167544023e-05, "loss": 1.9011, "step": 7025 }, { "epoch": 0.5288770959182522, "grad_norm": 4.1048431396484375, "learning_rate": 4.790987137844298e-05, "loss": 1.7617, "step": 7026 }, { "epoch": 0.5289523701989123, "grad_norm": 4.809249401092529, "learning_rate": 4.789769120569786e-05, "loss": 1.6215, "step": 7027 }, { "epoch": 0.5290276444795724, "grad_norm": 3.929060697555542, "learning_rate": 4.788551115792899e-05, "loss": 1.5432, "step": 7028 }, { "epoch": 0.5291029187602326, "grad_norm": 4.522894382476807, "learning_rate": 4.7873331235860396e-05, "loss": 1.9355, "step": 7029 }, { "epoch": 0.5291781930408928, "grad_norm": 3.6166481971740723, "learning_rate": 4.786115144021617e-05, "loss": 1.71, "step": 7030 }, { "epoch": 0.5292534673215529, "grad_norm": 3.611379623413086, "learning_rate": 4.784897177172034e-05, "loss": 1.973, "step": 7031 }, { "epoch": 0.5293287416022131, "grad_norm": 7.510868072509766, "learning_rate": 4.783679223109699e-05, "loss": 2.3842, "step": 7032 }, { "epoch": 0.5294040158828732, "grad_norm": 4.0979485511779785, "learning_rate": 4.782461281907011e-05, "loss": 1.7509, "step": 7033 }, { "epoch": 0.5294792901635333, "grad_norm": 4.53986120223999, "learning_rate": 4.781243353636378e-05, "loss": 1.5495, "step": 7034 }, { "epoch": 0.5295545644441936, "grad_norm": 3.8875980377197266, "learning_rate": 4.780025438370197e-05, "loss": 2.1569, "step": 7035 }, { "epoch": 0.5296298387248537, "grad_norm": 6.213791847229004, "learning_rate": 4.7788075361808743e-05, "loss": 2.0291, "step": 7036 }, { "epoch": 0.5297051130055138, "grad_norm": 4.641781806945801, "learning_rate": 4.7775896471408076e-05, "loss": 2.1041, "step": 7037 }, { "epoch": 0.529780387286174, "grad_norm": 3.900177478790283, "learning_rate": 4.776371771322401e-05, "loss": 1.7114, "step": 7038 }, { "epoch": 0.5298556615668342, "grad_norm": 4.5729146003723145, "learning_rate": 4.7751539087980484e-05, "loss": 1.9908, "step": 7039 }, { "epoch": 0.5299309358474943, "grad_norm": 7.400148391723633, "learning_rate": 4.7739360596401505e-05, "loss": 1.7709, "step": 7040 }, { "epoch": 0.5300062101281544, "grad_norm": 4.948382377624512, "learning_rate": 4.7727182239211066e-05, "loss": 2.2318, "step": 7041 }, { "epoch": 0.5300814844088146, "grad_norm": 4.408289432525635, "learning_rate": 4.771500401713311e-05, "loss": 1.8992, "step": 7042 }, { "epoch": 0.5301567586894748, "grad_norm": 5.357378959655762, "learning_rate": 4.770282593089162e-05, "loss": 1.9719, "step": 7043 }, { "epoch": 0.5302320329701349, "grad_norm": 5.367403030395508, "learning_rate": 4.769064798121054e-05, "loss": 1.9242, "step": 7044 }, { "epoch": 0.5303073072507951, "grad_norm": 5.554671287536621, "learning_rate": 4.767847016881381e-05, "loss": 1.9939, "step": 7045 }, { "epoch": 0.5303825815314552, "grad_norm": 6.019415855407715, "learning_rate": 4.7666292494425355e-05, "loss": 1.8112, "step": 7046 }, { "epoch": 0.5304578558121154, "grad_norm": 10.823569297790527, "learning_rate": 4.765411495876912e-05, "loss": 2.0026, "step": 7047 }, { "epoch": 0.5305331300927756, "grad_norm": 4.234469890594482, "learning_rate": 4.764193756256902e-05, "loss": 1.8089, "step": 7048 }, { "epoch": 0.5306084043734357, "grad_norm": 4.066115856170654, "learning_rate": 4.7629760306548975e-05, "loss": 1.7608, "step": 7049 }, { "epoch": 0.5306836786540958, "grad_norm": 3.949735403060913, "learning_rate": 4.761758319143287e-05, "loss": 1.8622, "step": 7050 }, { "epoch": 0.5307589529347561, "grad_norm": 2.902693033218384, "learning_rate": 4.760540621794462e-05, "loss": 1.6411, "step": 7051 }, { "epoch": 0.5308342272154162, "grad_norm": 4.166128635406494, "learning_rate": 4.759322938680808e-05, "loss": 1.8505, "step": 7052 }, { "epoch": 0.5309095014960763, "grad_norm": 4.4644927978515625, "learning_rate": 4.758105269874716e-05, "loss": 1.788, "step": 7053 }, { "epoch": 0.5309847757767365, "grad_norm": 3.843764305114746, "learning_rate": 4.75688761544857e-05, "loss": 1.7032, "step": 7054 }, { "epoch": 0.5310600500573966, "grad_norm": 3.5982511043548584, "learning_rate": 4.7556699754747605e-05, "loss": 1.9141, "step": 7055 }, { "epoch": 0.5311353243380568, "grad_norm": 4.304314613342285, "learning_rate": 4.754452350025668e-05, "loss": 1.9731, "step": 7056 }, { "epoch": 0.531210598618717, "grad_norm": 5.979513168334961, "learning_rate": 4.753234739173678e-05, "loss": 1.7079, "step": 7057 }, { "epoch": 0.5312858728993771, "grad_norm": 5.045577049255371, "learning_rate": 4.7520171429911755e-05, "loss": 2.1946, "step": 7058 }, { "epoch": 0.5313611471800372, "grad_norm": 3.9108498096466064, "learning_rate": 4.750799561550542e-05, "loss": 1.944, "step": 7059 }, { "epoch": 0.5314364214606974, "grad_norm": 5.862430095672607, "learning_rate": 4.749581994924161e-05, "loss": 2.0753, "step": 7060 }, { "epoch": 0.5315116957413576, "grad_norm": 4.302278518676758, "learning_rate": 4.748364443184411e-05, "loss": 2.3075, "step": 7061 }, { "epoch": 0.5315869700220177, "grad_norm": 5.241310119628906, "learning_rate": 4.747146906403674e-05, "loss": 1.8098, "step": 7062 }, { "epoch": 0.5316622443026778, "grad_norm": 4.788744926452637, "learning_rate": 4.7459293846543256e-05, "loss": 2.1982, "step": 7063 }, { "epoch": 0.5317375185833381, "grad_norm": 6.442987442016602, "learning_rate": 4.744711878008748e-05, "loss": 2.2314, "step": 7064 }, { "epoch": 0.5318127928639982, "grad_norm": 3.9667046070098877, "learning_rate": 4.743494386539316e-05, "loss": 1.7363, "step": 7065 }, { "epoch": 0.5318880671446583, "grad_norm": 3.9151365756988525, "learning_rate": 4.7422769103184074e-05, "loss": 2.1363, "step": 7066 }, { "epoch": 0.5319633414253185, "grad_norm": 5.04141092300415, "learning_rate": 4.741059449418396e-05, "loss": 2.2775, "step": 7067 }, { "epoch": 0.5320386157059787, "grad_norm": 4.230179786682129, "learning_rate": 4.7398420039116576e-05, "loss": 2.3637, "step": 7068 }, { "epoch": 0.5321138899866388, "grad_norm": 3.9334120750427246, "learning_rate": 4.738624573870565e-05, "loss": 1.5845, "step": 7069 }, { "epoch": 0.532189164267299, "grad_norm": 4.179913520812988, "learning_rate": 4.737407159367494e-05, "loss": 1.6291, "step": 7070 }, { "epoch": 0.5322644385479591, "grad_norm": 3.8906409740448, "learning_rate": 4.7361897604748114e-05, "loss": 1.8825, "step": 7071 }, { "epoch": 0.5323397128286192, "grad_norm": 4.138206958770752, "learning_rate": 4.734972377264892e-05, "loss": 2.0115, "step": 7072 }, { "epoch": 0.5324149871092795, "grad_norm": 4.013099670410156, "learning_rate": 4.7337550098101034e-05, "loss": 1.8908, "step": 7073 }, { "epoch": 0.5324902613899396, "grad_norm": 7.243107318878174, "learning_rate": 4.732537658182815e-05, "loss": 1.7049, "step": 7074 }, { "epoch": 0.5325655356705997, "grad_norm": 7.560256481170654, "learning_rate": 4.7313203224553965e-05, "loss": 2.1741, "step": 7075 }, { "epoch": 0.53264080995126, "grad_norm": 4.1712799072265625, "learning_rate": 4.730103002700213e-05, "loss": 1.7437, "step": 7076 }, { "epoch": 0.5327160842319201, "grad_norm": 4.552664756774902, "learning_rate": 4.7288856989896336e-05, "loss": 2.0069, "step": 7077 }, { "epoch": 0.5327913585125802, "grad_norm": 3.638277292251587, "learning_rate": 4.727668411396019e-05, "loss": 1.4366, "step": 7078 }, { "epoch": 0.5328666327932404, "grad_norm": 3.880326509475708, "learning_rate": 4.726451139991738e-05, "loss": 1.7208, "step": 7079 }, { "epoch": 0.5329419070739005, "grad_norm": 4.694003105163574, "learning_rate": 4.725233884849151e-05, "loss": 1.7411, "step": 7080 }, { "epoch": 0.5330171813545607, "grad_norm": 4.782301902770996, "learning_rate": 4.724016646040621e-05, "loss": 1.9971, "step": 7081 }, { "epoch": 0.5330924556352208, "grad_norm": 5.097995281219482, "learning_rate": 4.722799423638509e-05, "loss": 1.8428, "step": 7082 }, { "epoch": 0.533167729915881, "grad_norm": 4.44391393661499, "learning_rate": 4.721582217715177e-05, "loss": 1.8378, "step": 7083 }, { "epoch": 0.5332430041965411, "grad_norm": 4.077167510986328, "learning_rate": 4.720365028342982e-05, "loss": 1.937, "step": 7084 }, { "epoch": 0.5333182784772013, "grad_norm": 4.596120834350586, "learning_rate": 4.719147855594285e-05, "loss": 2.088, "step": 7085 }, { "epoch": 0.5333935527578615, "grad_norm": 10.432188987731934, "learning_rate": 4.7179306995414404e-05, "loss": 1.938, "step": 7086 }, { "epoch": 0.5334688270385216, "grad_norm": 7.165554046630859, "learning_rate": 4.716713560256809e-05, "loss": 1.8936, "step": 7087 }, { "epoch": 0.5335441013191817, "grad_norm": 5.582726001739502, "learning_rate": 4.715496437812741e-05, "loss": 1.9756, "step": 7088 }, { "epoch": 0.533619375599842, "grad_norm": 7.4014177322387695, "learning_rate": 4.714279332281594e-05, "loss": 1.8441, "step": 7089 }, { "epoch": 0.5336946498805021, "grad_norm": 4.264235496520996, "learning_rate": 4.713062243735722e-05, "loss": 2.0099, "step": 7090 }, { "epoch": 0.5337699241611622, "grad_norm": 5.126283168792725, "learning_rate": 4.711845172247475e-05, "loss": 1.7007, "step": 7091 }, { "epoch": 0.5338451984418224, "grad_norm": 5.324197769165039, "learning_rate": 4.710628117889207e-05, "loss": 1.8389, "step": 7092 }, { "epoch": 0.5339204727224826, "grad_norm": 5.139522552490234, "learning_rate": 4.7094110807332656e-05, "loss": 1.8308, "step": 7093 }, { "epoch": 0.5339957470031427, "grad_norm": 4.007652282714844, "learning_rate": 4.708194060852004e-05, "loss": 2.0468, "step": 7094 }, { "epoch": 0.5340710212838029, "grad_norm": 3.9862287044525146, "learning_rate": 4.706977058317766e-05, "loss": 1.9759, "step": 7095 }, { "epoch": 0.534146295564463, "grad_norm": 6.065804958343506, "learning_rate": 4.7057600732029016e-05, "loss": 1.84, "step": 7096 }, { "epoch": 0.5342215698451231, "grad_norm": 4.315358638763428, "learning_rate": 4.704543105579757e-05, "loss": 1.979, "step": 7097 }, { "epoch": 0.5342968441257834, "grad_norm": 7.23746919631958, "learning_rate": 4.7033261555206776e-05, "loss": 2.1668, "step": 7098 }, { "epoch": 0.5343721184064435, "grad_norm": 6.216946125030518, "learning_rate": 4.702109223098005e-05, "loss": 2.0449, "step": 7099 }, { "epoch": 0.5344473926871036, "grad_norm": 5.135339260101318, "learning_rate": 4.700892308384087e-05, "loss": 1.714, "step": 7100 }, { "epoch": 0.5345226669677637, "grad_norm": 5.694299221038818, "learning_rate": 4.699675411451261e-05, "loss": 2.1415, "step": 7101 }, { "epoch": 0.534597941248424, "grad_norm": 3.72849440574646, "learning_rate": 4.698458532371871e-05, "loss": 1.7641, "step": 7102 }, { "epoch": 0.5346732155290841, "grad_norm": 4.403263568878174, "learning_rate": 4.6972416712182546e-05, "loss": 1.9095, "step": 7103 }, { "epoch": 0.5347484898097442, "grad_norm": 6.242741584777832, "learning_rate": 4.696024828062755e-05, "loss": 2.0923, "step": 7104 }, { "epoch": 0.5348237640904044, "grad_norm": 5.412423610687256, "learning_rate": 4.694808002977704e-05, "loss": 2.0866, "step": 7105 }, { "epoch": 0.5348990383710646, "grad_norm": 4.419037342071533, "learning_rate": 4.6935911960354425e-05, "loss": 1.7658, "step": 7106 }, { "epoch": 0.5349743126517247, "grad_norm": 6.027182102203369, "learning_rate": 4.692374407308307e-05, "loss": 1.8826, "step": 7107 }, { "epoch": 0.5350495869323849, "grad_norm": 5.010058403015137, "learning_rate": 4.6911576368686284e-05, "loss": 1.4985, "step": 7108 }, { "epoch": 0.535124861213045, "grad_norm": 5.639893054962158, "learning_rate": 4.689940884788743e-05, "loss": 1.9841, "step": 7109 }, { "epoch": 0.5352001354937052, "grad_norm": 5.386854648590088, "learning_rate": 4.688724151140981e-05, "loss": 1.6238, "step": 7110 }, { "epoch": 0.5352754097743654, "grad_norm": 5.632213115692139, "learning_rate": 4.6875074359976775e-05, "loss": 2.0899, "step": 7111 }, { "epoch": 0.5353506840550255, "grad_norm": 3.9364993572235107, "learning_rate": 4.686290739431158e-05, "loss": 1.9543, "step": 7112 }, { "epoch": 0.5354259583356856, "grad_norm": 5.6435546875, "learning_rate": 4.685074061513755e-05, "loss": 2.5083, "step": 7113 }, { "epoch": 0.5355012326163459, "grad_norm": 5.331766128540039, "learning_rate": 4.6838574023177936e-05, "loss": 1.8142, "step": 7114 }, { "epoch": 0.535576506897006, "grad_norm": 5.100854873657227, "learning_rate": 4.682640761915606e-05, "loss": 2.1522, "step": 7115 }, { "epoch": 0.5356517811776661, "grad_norm": 6.256421089172363, "learning_rate": 4.6814241403795116e-05, "loss": 2.0506, "step": 7116 }, { "epoch": 0.5357270554583263, "grad_norm": 6.039792537689209, "learning_rate": 4.6802075377818404e-05, "loss": 2.0938, "step": 7117 }, { "epoch": 0.5358023297389864, "grad_norm": 3.491872549057007, "learning_rate": 4.678990954194911e-05, "loss": 1.7932, "step": 7118 }, { "epoch": 0.5358776040196466, "grad_norm": 4.274329662322998, "learning_rate": 4.67777438969105e-05, "loss": 1.9438, "step": 7119 }, { "epoch": 0.5359528783003067, "grad_norm": 5.088064670562744, "learning_rate": 4.676557844342576e-05, "loss": 2.0418, "step": 7120 }, { "epoch": 0.5360281525809669, "grad_norm": 5.919598579406738, "learning_rate": 4.675341318221809e-05, "loss": 1.9826, "step": 7121 }, { "epoch": 0.536103426861627, "grad_norm": 5.451521396636963, "learning_rate": 4.674124811401071e-05, "loss": 1.7512, "step": 7122 }, { "epoch": 0.5361787011422872, "grad_norm": 4.607489585876465, "learning_rate": 4.6729083239526775e-05, "loss": 1.8516, "step": 7123 }, { "epoch": 0.5362539754229474, "grad_norm": 5.585968494415283, "learning_rate": 4.671691855948947e-05, "loss": 1.6594, "step": 7124 }, { "epoch": 0.5363292497036075, "grad_norm": 3.7467923164367676, "learning_rate": 4.670475407462191e-05, "loss": 1.9381, "step": 7125 }, { "epoch": 0.5364045239842676, "grad_norm": 4.839333534240723, "learning_rate": 4.669258978564728e-05, "loss": 2.0935, "step": 7126 }, { "epoch": 0.5364797982649279, "grad_norm": 4.495025634765625, "learning_rate": 4.6680425693288684e-05, "loss": 2.2325, "step": 7127 }, { "epoch": 0.536555072545588, "grad_norm": 5.829689025878906, "learning_rate": 4.666826179826928e-05, "loss": 1.8106, "step": 7128 }, { "epoch": 0.5366303468262481, "grad_norm": 5.903088092803955, "learning_rate": 4.665609810131213e-05, "loss": 1.7813, "step": 7129 }, { "epoch": 0.5367056211069083, "grad_norm": 4.563745021820068, "learning_rate": 4.664393460314036e-05, "loss": 1.8227, "step": 7130 }, { "epoch": 0.5367808953875685, "grad_norm": 6.159015655517578, "learning_rate": 4.663177130447705e-05, "loss": 1.98, "step": 7131 }, { "epoch": 0.5368561696682286, "grad_norm": 5.188133716583252, "learning_rate": 4.6619608206045276e-05, "loss": 2.3185, "step": 7132 }, { "epoch": 0.5369314439488888, "grad_norm": 5.6095662117004395, "learning_rate": 4.6607445308568085e-05, "loss": 2.4495, "step": 7133 }, { "epoch": 0.5370067182295489, "grad_norm": 4.729706287384033, "learning_rate": 4.659528261276855e-05, "loss": 2.1361, "step": 7134 }, { "epoch": 0.537081992510209, "grad_norm": 3.586517810821533, "learning_rate": 4.658312011936968e-05, "loss": 1.9664, "step": 7135 }, { "epoch": 0.5371572667908693, "grad_norm": 4.189297199249268, "learning_rate": 4.6570957829094525e-05, "loss": 2.0263, "step": 7136 }, { "epoch": 0.5372325410715294, "grad_norm": 5.3675537109375, "learning_rate": 4.655879574266607e-05, "loss": 1.8416, "step": 7137 }, { "epoch": 0.5373078153521895, "grad_norm": 3.8867509365081787, "learning_rate": 4.6546633860807346e-05, "loss": 1.8297, "step": 7138 }, { "epoch": 0.5373830896328496, "grad_norm": 3.9739983081817627, "learning_rate": 4.653447218424134e-05, "loss": 1.9554, "step": 7139 }, { "epoch": 0.5374583639135099, "grad_norm": 3.485842704772949, "learning_rate": 4.6522310713690995e-05, "loss": 1.8739, "step": 7140 }, { "epoch": 0.53753363819417, "grad_norm": 4.629156112670898, "learning_rate": 4.6510149449879325e-05, "loss": 1.9451, "step": 7141 }, { "epoch": 0.5376089124748301, "grad_norm": 4.9842634201049805, "learning_rate": 4.649798839352923e-05, "loss": 2.164, "step": 7142 }, { "epoch": 0.5376841867554903, "grad_norm": 4.058955669403076, "learning_rate": 4.648582754536372e-05, "loss": 2.0392, "step": 7143 }, { "epoch": 0.5377594610361505, "grad_norm": 4.076719760894775, "learning_rate": 4.647366690610564e-05, "loss": 1.7612, "step": 7144 }, { "epoch": 0.5378347353168106, "grad_norm": 6.8890275955200195, "learning_rate": 4.6461506476477966e-05, "loss": 2.3213, "step": 7145 }, { "epoch": 0.5379100095974708, "grad_norm": 5.026981830596924, "learning_rate": 4.6449346257203555e-05, "loss": 1.5731, "step": 7146 }, { "epoch": 0.5379852838781309, "grad_norm": 4.281398296356201, "learning_rate": 4.643718624900534e-05, "loss": 1.8817, "step": 7147 }, { "epoch": 0.5380605581587911, "grad_norm": 5.409975528717041, "learning_rate": 4.642502645260617e-05, "loss": 1.8751, "step": 7148 }, { "epoch": 0.5381358324394513, "grad_norm": 4.666240692138672, "learning_rate": 4.6412866868728935e-05, "loss": 1.6206, "step": 7149 }, { "epoch": 0.5382111067201114, "grad_norm": 4.69838285446167, "learning_rate": 4.6400707498096454e-05, "loss": 2.2947, "step": 7150 }, { "epoch": 0.5382863810007715, "grad_norm": 4.841192245483398, "learning_rate": 4.6388548341431604e-05, "loss": 1.8125, "step": 7151 }, { "epoch": 0.5383616552814318, "grad_norm": 3.94899320602417, "learning_rate": 4.6376389399457176e-05, "loss": 1.7008, "step": 7152 }, { "epoch": 0.5384369295620919, "grad_norm": 6.47050666809082, "learning_rate": 4.6364230672896e-05, "loss": 2.2182, "step": 7153 }, { "epoch": 0.538512203842752, "grad_norm": 4.819343090057373, "learning_rate": 4.6352072162470883e-05, "loss": 1.7317, "step": 7154 }, { "epoch": 0.5385874781234122, "grad_norm": 5.318504810333252, "learning_rate": 4.63399138689046e-05, "loss": 1.8923, "step": 7155 }, { "epoch": 0.5386627524040724, "grad_norm": 6.286028861999512, "learning_rate": 4.632775579291996e-05, "loss": 1.8324, "step": 7156 }, { "epoch": 0.5387380266847325, "grad_norm": 4.735677242279053, "learning_rate": 4.631559793523968e-05, "loss": 2.0792, "step": 7157 }, { "epoch": 0.5388133009653926, "grad_norm": 4.113296985626221, "learning_rate": 4.630344029658654e-05, "loss": 2.0041, "step": 7158 }, { "epoch": 0.5388885752460528, "grad_norm": 5.619882583618164, "learning_rate": 4.6291282877683254e-05, "loss": 1.7686, "step": 7159 }, { "epoch": 0.538963849526713, "grad_norm": 3.4414563179016113, "learning_rate": 4.627912567925259e-05, "loss": 2.162, "step": 7160 }, { "epoch": 0.5390391238073731, "grad_norm": 5.1061110496521, "learning_rate": 4.6266968702017195e-05, "loss": 2.0211, "step": 7161 }, { "epoch": 0.5391143980880333, "grad_norm": 4.122971534729004, "learning_rate": 4.6254811946699824e-05, "loss": 2.3782, "step": 7162 }, { "epoch": 0.5391896723686934, "grad_norm": 5.279617786407471, "learning_rate": 4.6242655414023125e-05, "loss": 2.0438, "step": 7163 }, { "epoch": 0.5392649466493535, "grad_norm": 6.048137664794922, "learning_rate": 4.623049910470978e-05, "loss": 1.844, "step": 7164 }, { "epoch": 0.5393402209300138, "grad_norm": 3.968564987182617, "learning_rate": 4.621834301948244e-05, "loss": 1.7759, "step": 7165 }, { "epoch": 0.5394154952106739, "grad_norm": 6.077966690063477, "learning_rate": 4.620618715906378e-05, "loss": 1.7141, "step": 7166 }, { "epoch": 0.539490769491334, "grad_norm": 4.009443759918213, "learning_rate": 4.619403152417638e-05, "loss": 1.7804, "step": 7167 }, { "epoch": 0.5395660437719942, "grad_norm": 6.092879295349121, "learning_rate": 4.6181876115542904e-05, "loss": 1.8143, "step": 7168 }, { "epoch": 0.5396413180526544, "grad_norm": 5.466367721557617, "learning_rate": 4.6169720933885924e-05, "loss": 1.6723, "step": 7169 }, { "epoch": 0.5397165923333145, "grad_norm": 4.8065900802612305, "learning_rate": 4.6157565979928044e-05, "loss": 2.0034, "step": 7170 }, { "epoch": 0.5397918666139747, "grad_norm": 6.0215373039245605, "learning_rate": 4.614541125439186e-05, "loss": 1.9522, "step": 7171 }, { "epoch": 0.5398671408946348, "grad_norm": 5.7492475509643555, "learning_rate": 4.61332567579999e-05, "loss": 2.1315, "step": 7172 }, { "epoch": 0.539942415175295, "grad_norm": 5.747673511505127, "learning_rate": 4.6121102491474754e-05, "loss": 1.8017, "step": 7173 }, { "epoch": 0.5400176894559552, "grad_norm": 4.093325138092041, "learning_rate": 4.6108948455538906e-05, "loss": 2.1799, "step": 7174 }, { "epoch": 0.5400929637366153, "grad_norm": 6.3927531242370605, "learning_rate": 4.609679465091493e-05, "loss": 1.735, "step": 7175 }, { "epoch": 0.5401682380172754, "grad_norm": 3.98288631439209, "learning_rate": 4.608464107832529e-05, "loss": 2.0449, "step": 7176 }, { "epoch": 0.5402435122979357, "grad_norm": 5.237273693084717, "learning_rate": 4.607248773849253e-05, "loss": 2.0829, "step": 7177 }, { "epoch": 0.5403187865785958, "grad_norm": 4.548027038574219, "learning_rate": 4.606033463213908e-05, "loss": 1.8221, "step": 7178 }, { "epoch": 0.5403940608592559, "grad_norm": 6.00238561630249, "learning_rate": 4.6048181759987456e-05, "loss": 2.0311, "step": 7179 }, { "epoch": 0.540469335139916, "grad_norm": 4.499464988708496, "learning_rate": 4.603602912276007e-05, "loss": 1.7508, "step": 7180 }, { "epoch": 0.5405446094205762, "grad_norm": 4.544561386108398, "learning_rate": 4.602387672117938e-05, "loss": 1.911, "step": 7181 }, { "epoch": 0.5406198837012364, "grad_norm": 4.576714515686035, "learning_rate": 4.6011724555967806e-05, "loss": 2.0198, "step": 7182 }, { "epoch": 0.5406951579818965, "grad_norm": 5.893620014190674, "learning_rate": 4.599957262784778e-05, "loss": 1.744, "step": 7183 }, { "epoch": 0.5407704322625567, "grad_norm": 4.704677104949951, "learning_rate": 4.5987420937541664e-05, "loss": 1.9483, "step": 7184 }, { "epoch": 0.5408457065432168, "grad_norm": 6.924588680267334, "learning_rate": 4.597526948577188e-05, "loss": 1.8612, "step": 7185 }, { "epoch": 0.540920980823877, "grad_norm": 4.043465614318848, "learning_rate": 4.596311827326075e-05, "loss": 1.8183, "step": 7186 }, { "epoch": 0.5409962551045372, "grad_norm": 4.647791862487793, "learning_rate": 4.595096730073066e-05, "loss": 1.8423, "step": 7187 }, { "epoch": 0.5410715293851973, "grad_norm": 4.878299713134766, "learning_rate": 4.593881656890397e-05, "loss": 1.8761, "step": 7188 }, { "epoch": 0.5411468036658574, "grad_norm": 4.8372111320495605, "learning_rate": 4.592666607850297e-05, "loss": 1.9255, "step": 7189 }, { "epoch": 0.5412220779465177, "grad_norm": 4.870363235473633, "learning_rate": 4.5914515830249996e-05, "loss": 2.0947, "step": 7190 }, { "epoch": 0.5412973522271778, "grad_norm": 4.007397651672363, "learning_rate": 4.590236582486731e-05, "loss": 1.6737, "step": 7191 }, { "epoch": 0.5413726265078379, "grad_norm": 3.873143434524536, "learning_rate": 4.589021606307723e-05, "loss": 1.9809, "step": 7192 }, { "epoch": 0.5414479007884981, "grad_norm": 5.2229695320129395, "learning_rate": 4.5878066545602006e-05, "loss": 1.9063, "step": 7193 }, { "epoch": 0.5415231750691583, "grad_norm": 5.185843467712402, "learning_rate": 4.5865917273163915e-05, "loss": 1.7828, "step": 7194 }, { "epoch": 0.5415984493498184, "grad_norm": 7.470333576202393, "learning_rate": 4.585376824648516e-05, "loss": 1.927, "step": 7195 }, { "epoch": 0.5416737236304786, "grad_norm": 5.343120574951172, "learning_rate": 4.5841619466288e-05, "loss": 1.975, "step": 7196 }, { "epoch": 0.5417489979111387, "grad_norm": 3.1645259857177734, "learning_rate": 4.582947093329462e-05, "loss": 1.8065, "step": 7197 }, { "epoch": 0.5418242721917989, "grad_norm": 4.749367713928223, "learning_rate": 4.581732264822724e-05, "loss": 1.8178, "step": 7198 }, { "epoch": 0.541899546472459, "grad_norm": 4.746345043182373, "learning_rate": 4.580517461180801e-05, "loss": 2.0404, "step": 7199 }, { "epoch": 0.5419748207531192, "grad_norm": 5.490805625915527, "learning_rate": 4.5793026824759136e-05, "loss": 1.9331, "step": 7200 }, { "epoch": 0.5420500950337793, "grad_norm": 5.888421535491943, "learning_rate": 4.578087928780273e-05, "loss": 2.1156, "step": 7201 }, { "epoch": 0.5421253693144394, "grad_norm": 4.284915924072266, "learning_rate": 4.576873200166094e-05, "loss": 1.7174, "step": 7202 }, { "epoch": 0.5422006435950997, "grad_norm": 4.496925354003906, "learning_rate": 4.57565849670559e-05, "loss": 2.1483, "step": 7203 }, { "epoch": 0.5422759178757598, "grad_norm": 5.7125091552734375, "learning_rate": 4.5744438184709696e-05, "loss": 2.073, "step": 7204 }, { "epoch": 0.5423511921564199, "grad_norm": 5.5248003005981445, "learning_rate": 4.5732291655344456e-05, "loss": 1.9524, "step": 7205 }, { "epoch": 0.5424264664370801, "grad_norm": 4.666708469390869, "learning_rate": 4.572014537968221e-05, "loss": 1.7433, "step": 7206 }, { "epoch": 0.5425017407177403, "grad_norm": 4.9575347900390625, "learning_rate": 4.5707999358445066e-05, "loss": 1.757, "step": 7207 }, { "epoch": 0.5425770149984004, "grad_norm": 4.72661018371582, "learning_rate": 4.569585359235502e-05, "loss": 1.6633, "step": 7208 }, { "epoch": 0.5426522892790606, "grad_norm": 6.025607585906982, "learning_rate": 4.5683708082134135e-05, "loss": 2.2094, "step": 7209 }, { "epoch": 0.5427275635597207, "grad_norm": 5.6639227867126465, "learning_rate": 4.567156282850441e-05, "loss": 1.8059, "step": 7210 }, { "epoch": 0.5428028378403809, "grad_norm": 5.5550537109375, "learning_rate": 4.565941783218788e-05, "loss": 2.0967, "step": 7211 }, { "epoch": 0.5428781121210411, "grad_norm": 5.448683261871338, "learning_rate": 4.564727309390648e-05, "loss": 2.1925, "step": 7212 }, { "epoch": 0.5429533864017012, "grad_norm": 4.301255702972412, "learning_rate": 4.563512861438222e-05, "loss": 2.1201, "step": 7213 }, { "epoch": 0.5430286606823613, "grad_norm": 5.220934867858887, "learning_rate": 4.562298439433703e-05, "loss": 1.4957, "step": 7214 }, { "epoch": 0.5431039349630216, "grad_norm": 5.2877116203308105, "learning_rate": 4.561084043449287e-05, "loss": 1.5886, "step": 7215 }, { "epoch": 0.5431792092436817, "grad_norm": 4.303807735443115, "learning_rate": 4.559869673557164e-05, "loss": 1.8627, "step": 7216 }, { "epoch": 0.5432544835243418, "grad_norm": 4.707512378692627, "learning_rate": 4.558655329829529e-05, "loss": 1.7436, "step": 7217 }, { "epoch": 0.5433297578050019, "grad_norm": 4.219814777374268, "learning_rate": 4.5574410123385645e-05, "loss": 1.9253, "step": 7218 }, { "epoch": 0.5434050320856622, "grad_norm": 4.085941791534424, "learning_rate": 4.556226721156463e-05, "loss": 2.1195, "step": 7219 }, { "epoch": 0.5434803063663223, "grad_norm": 5.37359619140625, "learning_rate": 4.55501245635541e-05, "loss": 2.395, "step": 7220 }, { "epoch": 0.5435555806469824, "grad_norm": 5.066382884979248, "learning_rate": 4.553798218007589e-05, "loss": 1.8986, "step": 7221 }, { "epoch": 0.5436308549276426, "grad_norm": 6.823185920715332, "learning_rate": 4.552584006185186e-05, "loss": 1.9875, "step": 7222 }, { "epoch": 0.5437061292083027, "grad_norm": 3.806490898132324, "learning_rate": 4.551369820960377e-05, "loss": 1.9114, "step": 7223 }, { "epoch": 0.5437814034889629, "grad_norm": 5.039447784423828, "learning_rate": 4.550155662405347e-05, "loss": 1.8261, "step": 7224 }, { "epoch": 0.5438566777696231, "grad_norm": 4.0304436683654785, "learning_rate": 4.548941530592272e-05, "loss": 2.0245, "step": 7225 }, { "epoch": 0.5439319520502832, "grad_norm": 5.348670959472656, "learning_rate": 4.5477274255933276e-05, "loss": 2.1405, "step": 7226 }, { "epoch": 0.5440072263309433, "grad_norm": 6.283318996429443, "learning_rate": 4.5465133474806903e-05, "loss": 1.9118, "step": 7227 }, { "epoch": 0.5440825006116036, "grad_norm": 4.4190545082092285, "learning_rate": 4.545299296326535e-05, "loss": 2.1445, "step": 7228 }, { "epoch": 0.5441577748922637, "grad_norm": 6.2103166580200195, "learning_rate": 4.5440852722030294e-05, "loss": 1.8545, "step": 7229 }, { "epoch": 0.5442330491729238, "grad_norm": 5.552028656005859, "learning_rate": 4.5428712751823476e-05, "loss": 1.8581, "step": 7230 }, { "epoch": 0.544308323453584, "grad_norm": 4.799374103546143, "learning_rate": 4.541657305336656e-05, "loss": 2.0975, "step": 7231 }, { "epoch": 0.5443835977342442, "grad_norm": 3.859109878540039, "learning_rate": 4.5404433627381236e-05, "loss": 2.1311, "step": 7232 }, { "epoch": 0.5444588720149043, "grad_norm": 4.820240497589111, "learning_rate": 4.539229447458914e-05, "loss": 1.7116, "step": 7233 }, { "epoch": 0.5445341462955645, "grad_norm": 3.7648513317108154, "learning_rate": 4.53801555957119e-05, "loss": 1.7167, "step": 7234 }, { "epoch": 0.5446094205762246, "grad_norm": 4.766754150390625, "learning_rate": 4.536801699147119e-05, "loss": 1.7716, "step": 7235 }, { "epoch": 0.5446846948568848, "grad_norm": 4.234167575836182, "learning_rate": 4.535587866258855e-05, "loss": 2.2069, "step": 7236 }, { "epoch": 0.5447599691375449, "grad_norm": 4.321406364440918, "learning_rate": 4.534374060978561e-05, "loss": 1.8936, "step": 7237 }, { "epoch": 0.5448352434182051, "grad_norm": 5.81303596496582, "learning_rate": 4.533160283378392e-05, "loss": 1.728, "step": 7238 }, { "epoch": 0.5449105176988652, "grad_norm": 4.741263389587402, "learning_rate": 4.531946533530507e-05, "loss": 2.1517, "step": 7239 }, { "epoch": 0.5449857919795253, "grad_norm": 3.782055616378784, "learning_rate": 4.530732811507055e-05, "loss": 1.9274, "step": 7240 }, { "epoch": 0.5450610662601856, "grad_norm": 4.311224937438965, "learning_rate": 4.529519117380192e-05, "loss": 1.7882, "step": 7241 }, { "epoch": 0.5451363405408457, "grad_norm": 4.395197868347168, "learning_rate": 4.5283054512220665e-05, "loss": 1.899, "step": 7242 }, { "epoch": 0.5452116148215058, "grad_norm": 5.094570636749268, "learning_rate": 4.527091813104831e-05, "loss": 2.1774, "step": 7243 }, { "epoch": 0.545286889102166, "grad_norm": 5.369093894958496, "learning_rate": 4.525878203100628e-05, "loss": 2.0271, "step": 7244 }, { "epoch": 0.5453621633828262, "grad_norm": 3.9408247470855713, "learning_rate": 4.524664621281608e-05, "loss": 1.9604, "step": 7245 }, { "epoch": 0.5454374376634863, "grad_norm": 4.695934295654297, "learning_rate": 4.52345106771991e-05, "loss": 1.849, "step": 7246 }, { "epoch": 0.5455127119441465, "grad_norm": 4.815086841583252, "learning_rate": 4.522237542487679e-05, "loss": 1.8387, "step": 7247 }, { "epoch": 0.5455879862248066, "grad_norm": 4.516716480255127, "learning_rate": 4.5210240456570544e-05, "loss": 1.7391, "step": 7248 }, { "epoch": 0.5456632605054668, "grad_norm": 5.474329471588135, "learning_rate": 4.5198105773001784e-05, "loss": 1.9555, "step": 7249 }, { "epoch": 0.545738534786127, "grad_norm": 5.571545600891113, "learning_rate": 4.5185971374891834e-05, "loss": 1.7115, "step": 7250 }, { "epoch": 0.5458138090667871, "grad_norm": 5.538477420806885, "learning_rate": 4.5173837262962075e-05, "loss": 1.8419, "step": 7251 }, { "epoch": 0.5458890833474472, "grad_norm": 5.463476181030273, "learning_rate": 4.516170343793386e-05, "loss": 1.845, "step": 7252 }, { "epoch": 0.5459643576281075, "grad_norm": 3.232405424118042, "learning_rate": 4.514956990052848e-05, "loss": 1.7819, "step": 7253 }, { "epoch": 0.5460396319087676, "grad_norm": 4.944972515106201, "learning_rate": 4.513743665146726e-05, "loss": 1.9253, "step": 7254 }, { "epoch": 0.5461149061894277, "grad_norm": 3.788090229034424, "learning_rate": 4.512530369147146e-05, "loss": 1.6777, "step": 7255 }, { "epoch": 0.5461901804700879, "grad_norm": 5.750962734222412, "learning_rate": 4.51131710212624e-05, "loss": 2.0733, "step": 7256 }, { "epoch": 0.5462654547507481, "grad_norm": 4.356586456298828, "learning_rate": 4.510103864156127e-05, "loss": 1.9877, "step": 7257 }, { "epoch": 0.5463407290314082, "grad_norm": 7.100419521331787, "learning_rate": 4.508890655308936e-05, "loss": 2.0544, "step": 7258 }, { "epoch": 0.5464160033120683, "grad_norm": 3.6300253868103027, "learning_rate": 4.5076774756567834e-05, "loss": 1.5025, "step": 7259 }, { "epoch": 0.5464912775927285, "grad_norm": 5.504705905914307, "learning_rate": 4.506464325271796e-05, "loss": 1.8322, "step": 7260 }, { "epoch": 0.5465665518733887, "grad_norm": 3.5102968215942383, "learning_rate": 4.5052512042260854e-05, "loss": 1.6375, "step": 7261 }, { "epoch": 0.5466418261540488, "grad_norm": 4.714266777038574, "learning_rate": 4.5040381125917734e-05, "loss": 1.7604, "step": 7262 }, { "epoch": 0.546717100434709, "grad_norm": 5.035887718200684, "learning_rate": 4.5028250504409706e-05, "loss": 1.467, "step": 7263 }, { "epoch": 0.5467923747153691, "grad_norm": 4.72441291809082, "learning_rate": 4.5016120178457935e-05, "loss": 1.7012, "step": 7264 }, { "epoch": 0.5468676489960292, "grad_norm": 6.256455898284912, "learning_rate": 4.50039901487835e-05, "loss": 2.05, "step": 7265 }, { "epoch": 0.5469429232766895, "grad_norm": 4.261584281921387, "learning_rate": 4.499186041610752e-05, "loss": 2.2197, "step": 7266 }, { "epoch": 0.5470181975573496, "grad_norm": 4.091939926147461, "learning_rate": 4.497973098115109e-05, "loss": 1.7024, "step": 7267 }, { "epoch": 0.5470934718380097, "grad_norm": 4.211394786834717, "learning_rate": 4.496760184463522e-05, "loss": 2.1085, "step": 7268 }, { "epoch": 0.54716874611867, "grad_norm": 4.209076881408691, "learning_rate": 4.4955473007281e-05, "loss": 1.7858, "step": 7269 }, { "epoch": 0.5472440203993301, "grad_norm": 5.287043571472168, "learning_rate": 4.494334446980942e-05, "loss": 1.9093, "step": 7270 }, { "epoch": 0.5473192946799902, "grad_norm": 4.956048011779785, "learning_rate": 4.4931216232941526e-05, "loss": 2.3107, "step": 7271 }, { "epoch": 0.5473945689606504, "grad_norm": 4.21409273147583, "learning_rate": 4.491908829739826e-05, "loss": 1.6429, "step": 7272 }, { "epoch": 0.5474698432413105, "grad_norm": 4.417195796966553, "learning_rate": 4.4906960663900636e-05, "loss": 2.132, "step": 7273 }, { "epoch": 0.5475451175219707, "grad_norm": 4.306947708129883, "learning_rate": 4.4894833333169574e-05, "loss": 1.8505, "step": 7274 }, { "epoch": 0.5476203918026309, "grad_norm": 4.216700077056885, "learning_rate": 4.4882706305926026e-05, "loss": 1.4864, "step": 7275 }, { "epoch": 0.547695666083291, "grad_norm": 4.046915054321289, "learning_rate": 4.487057958289089e-05, "loss": 1.9534, "step": 7276 }, { "epoch": 0.5477709403639511, "grad_norm": 5.374661922454834, "learning_rate": 4.4858453164785105e-05, "loss": 2.2955, "step": 7277 }, { "epoch": 0.5478462146446113, "grad_norm": 5.344202995300293, "learning_rate": 4.4846327052329504e-05, "loss": 2.2217, "step": 7278 }, { "epoch": 0.5479214889252715, "grad_norm": 4.85532283782959, "learning_rate": 4.4834201246245e-05, "loss": 2.167, "step": 7279 }, { "epoch": 0.5479967632059316, "grad_norm": 3.7367851734161377, "learning_rate": 4.482207574725238e-05, "loss": 2.1791, "step": 7280 }, { "epoch": 0.5480720374865917, "grad_norm": 5.266561985015869, "learning_rate": 4.4809950556072525e-05, "loss": 2.2391, "step": 7281 }, { "epoch": 0.548147311767252, "grad_norm": 5.5713725090026855, "learning_rate": 4.4797825673426194e-05, "loss": 1.8552, "step": 7282 }, { "epoch": 0.5482225860479121, "grad_norm": 5.478013038635254, "learning_rate": 4.47857011000342e-05, "loss": 1.9048, "step": 7283 }, { "epoch": 0.5482978603285722, "grad_norm": 4.999684810638428, "learning_rate": 4.477357683661734e-05, "loss": 1.9032, "step": 7284 }, { "epoch": 0.5483731346092324, "grad_norm": 4.312249183654785, "learning_rate": 4.476145288389631e-05, "loss": 1.7808, "step": 7285 }, { "epoch": 0.5484484088898925, "grad_norm": 3.2888519763946533, "learning_rate": 4.47493292425919e-05, "loss": 1.8086, "step": 7286 }, { "epoch": 0.5485236831705527, "grad_norm": 4.756214141845703, "learning_rate": 4.473720591342478e-05, "loss": 2.1057, "step": 7287 }, { "epoch": 0.5485989574512129, "grad_norm": 4.740874290466309, "learning_rate": 4.472508289711569e-05, "loss": 1.6812, "step": 7288 }, { "epoch": 0.548674231731873, "grad_norm": 3.677210569381714, "learning_rate": 4.471296019438527e-05, "loss": 2.0035, "step": 7289 }, { "epoch": 0.5487495060125331, "grad_norm": 3.867335081100464, "learning_rate": 4.470083780595421e-05, "loss": 1.8038, "step": 7290 }, { "epoch": 0.5488247802931934, "grad_norm": 4.989095687866211, "learning_rate": 4.468871573254312e-05, "loss": 1.5651, "step": 7291 }, { "epoch": 0.5489000545738535, "grad_norm": 4.610777378082275, "learning_rate": 4.467659397487265e-05, "loss": 2.0209, "step": 7292 }, { "epoch": 0.5489753288545136, "grad_norm": 2.9480783939361572, "learning_rate": 4.466447253366338e-05, "loss": 1.7167, "step": 7293 }, { "epoch": 0.5490506031351738, "grad_norm": 4.002351760864258, "learning_rate": 4.465235140963594e-05, "loss": 1.6173, "step": 7294 }, { "epoch": 0.549125877415834, "grad_norm": 3.871690273284912, "learning_rate": 4.4640230603510835e-05, "loss": 1.6102, "step": 7295 }, { "epoch": 0.5492011516964941, "grad_norm": 5.054386138916016, "learning_rate": 4.462811011600865e-05, "loss": 1.8483, "step": 7296 }, { "epoch": 0.5492764259771542, "grad_norm": 5.16343879699707, "learning_rate": 4.46159899478499e-05, "loss": 1.612, "step": 7297 }, { "epoch": 0.5493517002578144, "grad_norm": 4.424676418304443, "learning_rate": 4.4603870099755104e-05, "loss": 1.8288, "step": 7298 }, { "epoch": 0.5494269745384746, "grad_norm": 5.338099956512451, "learning_rate": 4.459175057244476e-05, "loss": 1.7232, "step": 7299 }, { "epoch": 0.5495022488191347, "grad_norm": 3.822824001312256, "learning_rate": 4.457963136663931e-05, "loss": 1.8117, "step": 7300 }, { "epoch": 0.5495775230997949, "grad_norm": 4.4067769050598145, "learning_rate": 4.456751248305924e-05, "loss": 1.8385, "step": 7301 }, { "epoch": 0.549652797380455, "grad_norm": 5.020514965057373, "learning_rate": 4.455539392242494e-05, "loss": 1.9673, "step": 7302 }, { "epoch": 0.5497280716611151, "grad_norm": 6.6952619552612305, "learning_rate": 4.454327568545687e-05, "loss": 1.5423, "step": 7303 }, { "epoch": 0.5498033459417754, "grad_norm": 4.522897720336914, "learning_rate": 4.453115777287539e-05, "loss": 1.7981, "step": 7304 }, { "epoch": 0.5498786202224355, "grad_norm": 6.122636795043945, "learning_rate": 4.4519040185400904e-05, "loss": 2.1514, "step": 7305 }, { "epoch": 0.5499538945030956, "grad_norm": 4.2663469314575195, "learning_rate": 4.4506922923753735e-05, "loss": 1.7761, "step": 7306 }, { "epoch": 0.5500291687837559, "grad_norm": 4.534322738647461, "learning_rate": 4.449480598865426e-05, "loss": 1.704, "step": 7307 }, { "epoch": 0.550104443064416, "grad_norm": 3.895892381668091, "learning_rate": 4.4482689380822744e-05, "loss": 1.9618, "step": 7308 }, { "epoch": 0.5501797173450761, "grad_norm": 3.3728456497192383, "learning_rate": 4.447057310097953e-05, "loss": 1.662, "step": 7309 }, { "epoch": 0.5502549916257363, "grad_norm": 3.4772403240203857, "learning_rate": 4.445845714984487e-05, "loss": 2.2187, "step": 7310 }, { "epoch": 0.5503302659063964, "grad_norm": 3.8210296630859375, "learning_rate": 4.444634152813905e-05, "loss": 1.7237, "step": 7311 }, { "epoch": 0.5504055401870566, "grad_norm": 4.806382656097412, "learning_rate": 4.443422623658227e-05, "loss": 2.2813, "step": 7312 }, { "epoch": 0.5504808144677168, "grad_norm": 5.3934855461120605, "learning_rate": 4.4422111275894785e-05, "loss": 1.9531, "step": 7313 }, { "epoch": 0.5505560887483769, "grad_norm": 6.14397668838501, "learning_rate": 4.440999664679676e-05, "loss": 1.9077, "step": 7314 }, { "epoch": 0.550631363029037, "grad_norm": 3.7839744091033936, "learning_rate": 4.439788235000841e-05, "loss": 1.7434, "step": 7315 }, { "epoch": 0.5507066373096972, "grad_norm": 5.898906707763672, "learning_rate": 4.438576838624989e-05, "loss": 1.7614, "step": 7316 }, { "epoch": 0.5507819115903574, "grad_norm": 4.722009181976318, "learning_rate": 4.4373654756241314e-05, "loss": 1.928, "step": 7317 }, { "epoch": 0.5508571858710175, "grad_norm": 7.242898941040039, "learning_rate": 4.436154146070283e-05, "loss": 1.7158, "step": 7318 }, { "epoch": 0.5509324601516776, "grad_norm": 8.452837944030762, "learning_rate": 4.4349428500354514e-05, "loss": 2.184, "step": 7319 }, { "epoch": 0.5510077344323379, "grad_norm": 4.863820552825928, "learning_rate": 4.4337315875916473e-05, "loss": 1.5358, "step": 7320 }, { "epoch": 0.551083008712998, "grad_norm": 4.039882659912109, "learning_rate": 4.432520358810875e-05, "loss": 1.673, "step": 7321 }, { "epoch": 0.5511582829936581, "grad_norm": 5.298859596252441, "learning_rate": 4.43130916376514e-05, "loss": 1.9258, "step": 7322 }, { "epoch": 0.5512335572743183, "grad_norm": 4.4065937995910645, "learning_rate": 4.430098002526442e-05, "loss": 1.9056, "step": 7323 }, { "epoch": 0.5513088315549785, "grad_norm": 4.024372100830078, "learning_rate": 4.4288868751667826e-05, "loss": 2.0418, "step": 7324 }, { "epoch": 0.5513841058356386, "grad_norm": 4.446809768676758, "learning_rate": 4.427675781758161e-05, "loss": 2.0881, "step": 7325 }, { "epoch": 0.5514593801162988, "grad_norm": 4.457399845123291, "learning_rate": 4.426464722372571e-05, "loss": 1.6843, "step": 7326 }, { "epoch": 0.5515346543969589, "grad_norm": 4.253214359283447, "learning_rate": 4.425253697082007e-05, "loss": 1.8723, "step": 7327 }, { "epoch": 0.551609928677619, "grad_norm": 4.1736226081848145, "learning_rate": 4.4240427059584646e-05, "loss": 1.5328, "step": 7328 }, { "epoch": 0.5516852029582793, "grad_norm": 5.437619686126709, "learning_rate": 4.4228317490739276e-05, "loss": 2.0168, "step": 7329 }, { "epoch": 0.5517604772389394, "grad_norm": 4.763359546661377, "learning_rate": 4.421620826500388e-05, "loss": 1.8036, "step": 7330 }, { "epoch": 0.5518357515195995, "grad_norm": 4.337121963500977, "learning_rate": 4.4204099383098294e-05, "loss": 1.5906, "step": 7331 }, { "epoch": 0.5519110258002597, "grad_norm": 5.2828779220581055, "learning_rate": 4.419199084574237e-05, "loss": 2.0052, "step": 7332 }, { "epoch": 0.5519863000809199, "grad_norm": 5.544689178466797, "learning_rate": 4.417988265365596e-05, "loss": 1.824, "step": 7333 }, { "epoch": 0.55206157436158, "grad_norm": 5.500155925750732, "learning_rate": 4.4167774807558796e-05, "loss": 1.9976, "step": 7334 }, { "epoch": 0.5521368486422401, "grad_norm": 4.5070390701293945, "learning_rate": 4.415566730817071e-05, "loss": 2.0174, "step": 7335 }, { "epoch": 0.5522121229229003, "grad_norm": 4.999845504760742, "learning_rate": 4.414356015621141e-05, "loss": 2.0856, "step": 7336 }, { "epoch": 0.5522873972035605, "grad_norm": 6.309022903442383, "learning_rate": 4.413145335240066e-05, "loss": 2.0811, "step": 7337 }, { "epoch": 0.5523626714842206, "grad_norm": 3.884275197982788, "learning_rate": 4.4119346897458156e-05, "loss": 1.5132, "step": 7338 }, { "epoch": 0.5524379457648808, "grad_norm": 5.557037830352783, "learning_rate": 4.410724079210364e-05, "loss": 1.7826, "step": 7339 }, { "epoch": 0.5525132200455409, "grad_norm": 5.318655014038086, "learning_rate": 4.4095135037056715e-05, "loss": 2.13, "step": 7340 }, { "epoch": 0.552588494326201, "grad_norm": 3.735858678817749, "learning_rate": 4.408302963303709e-05, "loss": 1.7768, "step": 7341 }, { "epoch": 0.5526637686068613, "grad_norm": 4.463040351867676, "learning_rate": 4.407092458076436e-05, "loss": 2.0833, "step": 7342 }, { "epoch": 0.5527390428875214, "grad_norm": 3.703172445297241, "learning_rate": 4.405881988095818e-05, "loss": 1.7127, "step": 7343 }, { "epoch": 0.5528143171681815, "grad_norm": 4.204286098480225, "learning_rate": 4.4046715534338084e-05, "loss": 1.9628, "step": 7344 }, { "epoch": 0.5528895914488418, "grad_norm": 3.9676642417907715, "learning_rate": 4.403461154162369e-05, "loss": 1.8234, "step": 7345 }, { "epoch": 0.5529648657295019, "grad_norm": 4.691980838775635, "learning_rate": 4.40225079035345e-05, "loss": 2.6842, "step": 7346 }, { "epoch": 0.553040140010162, "grad_norm": 4.52195405960083, "learning_rate": 4.4010404620790066e-05, "loss": 1.9568, "step": 7347 }, { "epoch": 0.5531154142908222, "grad_norm": 4.678031921386719, "learning_rate": 4.39983016941099e-05, "loss": 2.0218, "step": 7348 }, { "epoch": 0.5531906885714823, "grad_norm": 3.9991066455841064, "learning_rate": 4.3986199124213465e-05, "loss": 1.6209, "step": 7349 }, { "epoch": 0.5532659628521425, "grad_norm": 4.050024509429932, "learning_rate": 4.397409691182026e-05, "loss": 2.0356, "step": 7350 }, { "epoch": 0.5533412371328027, "grad_norm": 5.740078449249268, "learning_rate": 4.396199505764968e-05, "loss": 2.0958, "step": 7351 }, { "epoch": 0.5534165114134628, "grad_norm": 6.486820220947266, "learning_rate": 4.3949893562421195e-05, "loss": 2.1881, "step": 7352 }, { "epoch": 0.5534917856941229, "grad_norm": 5.230270862579346, "learning_rate": 4.393779242685416e-05, "loss": 2.105, "step": 7353 }, { "epoch": 0.5535670599747832, "grad_norm": 3.867591381072998, "learning_rate": 4.392569165166798e-05, "loss": 1.9645, "step": 7354 }, { "epoch": 0.5536423342554433, "grad_norm": 4.531659126281738, "learning_rate": 4.391359123758198e-05, "loss": 1.7865, "step": 7355 }, { "epoch": 0.5537176085361034, "grad_norm": 3.9760096073150635, "learning_rate": 4.3901491185315544e-05, "loss": 1.9326, "step": 7356 }, { "epoch": 0.5537928828167635, "grad_norm": 5.3282999992370605, "learning_rate": 4.388939149558795e-05, "loss": 1.9297, "step": 7357 }, { "epoch": 0.5538681570974238, "grad_norm": 5.008666515350342, "learning_rate": 4.387729216911849e-05, "loss": 1.952, "step": 7358 }, { "epoch": 0.5539434313780839, "grad_norm": 5.533703327178955, "learning_rate": 4.3865193206626446e-05, "loss": 1.8613, "step": 7359 }, { "epoch": 0.554018705658744, "grad_norm": 5.5756707191467285, "learning_rate": 4.385309460883107e-05, "loss": 2.0622, "step": 7360 }, { "epoch": 0.5540939799394042, "grad_norm": 7.701865196228027, "learning_rate": 4.3840996376451574e-05, "loss": 2.3025, "step": 7361 }, { "epoch": 0.5541692542200644, "grad_norm": 5.8811774253845215, "learning_rate": 4.382889851020718e-05, "loss": 2.211, "step": 7362 }, { "epoch": 0.5542445285007245, "grad_norm": 4.434207916259766, "learning_rate": 4.381680101081706e-05, "loss": 2.0364, "step": 7363 }, { "epoch": 0.5543198027813847, "grad_norm": 4.133795738220215, "learning_rate": 4.380470387900036e-05, "loss": 1.7435, "step": 7364 }, { "epoch": 0.5543950770620448, "grad_norm": 6.588438034057617, "learning_rate": 4.3792607115476256e-05, "loss": 1.7612, "step": 7365 }, { "epoch": 0.554470351342705, "grad_norm": 5.552674770355225, "learning_rate": 4.378051072096384e-05, "loss": 2.0138, "step": 7366 }, { "epoch": 0.5545456256233652, "grad_norm": 4.626672744750977, "learning_rate": 4.376841469618224e-05, "loss": 1.7241, "step": 7367 }, { "epoch": 0.5546208999040253, "grad_norm": 3.9006006717681885, "learning_rate": 4.375631904185047e-05, "loss": 1.8154, "step": 7368 }, { "epoch": 0.5546961741846854, "grad_norm": 4.490985870361328, "learning_rate": 4.3744223758687645e-05, "loss": 1.8341, "step": 7369 }, { "epoch": 0.5547714484653457, "grad_norm": 3.63482403755188, "learning_rate": 4.373212884741275e-05, "loss": 1.8988, "step": 7370 }, { "epoch": 0.5548467227460058, "grad_norm": 5.313016891479492, "learning_rate": 4.3720034308744825e-05, "loss": 2.177, "step": 7371 }, { "epoch": 0.5549219970266659, "grad_norm": 5.0663652420043945, "learning_rate": 4.3707940143402825e-05, "loss": 1.8069, "step": 7372 }, { "epoch": 0.5549972713073261, "grad_norm": 3.6593358516693115, "learning_rate": 4.369584635210575e-05, "loss": 2.1928, "step": 7373 }, { "epoch": 0.5550725455879862, "grad_norm": 3.938316583633423, "learning_rate": 4.368375293557251e-05, "loss": 2.1227, "step": 7374 }, { "epoch": 0.5551478198686464, "grad_norm": 5.642133712768555, "learning_rate": 4.367165989452203e-05, "loss": 1.9792, "step": 7375 }, { "epoch": 0.5552230941493065, "grad_norm": 7.100983619689941, "learning_rate": 4.365956722967321e-05, "loss": 1.845, "step": 7376 }, { "epoch": 0.5552983684299667, "grad_norm": 5.176304340362549, "learning_rate": 4.3647474941744947e-05, "loss": 1.9888, "step": 7377 }, { "epoch": 0.5553736427106268, "grad_norm": 4.718135356903076, "learning_rate": 4.3635383031456045e-05, "loss": 1.5989, "step": 7378 }, { "epoch": 0.555448916991287, "grad_norm": 4.617133617401123, "learning_rate": 4.3623291499525356e-05, "loss": 2.3071, "step": 7379 }, { "epoch": 0.5555241912719472, "grad_norm": 4.759497165679932, "learning_rate": 4.361120034667172e-05, "loss": 2.1348, "step": 7380 }, { "epoch": 0.5555994655526073, "grad_norm": 4.604881763458252, "learning_rate": 4.3599109573613875e-05, "loss": 1.8121, "step": 7381 }, { "epoch": 0.5556747398332674, "grad_norm": 5.332067966461182, "learning_rate": 4.35870191810706e-05, "loss": 2.3038, "step": 7382 }, { "epoch": 0.5557500141139277, "grad_norm": 4.925795078277588, "learning_rate": 4.357492916976062e-05, "loss": 2.0304, "step": 7383 }, { "epoch": 0.5558252883945878, "grad_norm": 3.9847230911254883, "learning_rate": 4.35628395404027e-05, "loss": 1.8119, "step": 7384 }, { "epoch": 0.5559005626752479, "grad_norm": 3.7008583545684814, "learning_rate": 4.355075029371547e-05, "loss": 1.7616, "step": 7385 }, { "epoch": 0.5559758369559081, "grad_norm": 3.713547945022583, "learning_rate": 4.353866143041764e-05, "loss": 2.1348, "step": 7386 }, { "epoch": 0.5560511112365683, "grad_norm": 6.205094814300537, "learning_rate": 4.352657295122784e-05, "loss": 1.9995, "step": 7387 }, { "epoch": 0.5561263855172284, "grad_norm": 4.85087251663208, "learning_rate": 4.3514484856864724e-05, "loss": 1.6254, "step": 7388 }, { "epoch": 0.5562016597978886, "grad_norm": 4.379775524139404, "learning_rate": 4.3502397148046855e-05, "loss": 1.6096, "step": 7389 }, { "epoch": 0.5562769340785487, "grad_norm": 5.867446422576904, "learning_rate": 4.349030982549285e-05, "loss": 1.6307, "step": 7390 }, { "epoch": 0.5563522083592088, "grad_norm": 4.021681308746338, "learning_rate": 4.347822288992123e-05, "loss": 1.6925, "step": 7391 }, { "epoch": 0.5564274826398691, "grad_norm": 4.031123638153076, "learning_rate": 4.346613634205055e-05, "loss": 1.6419, "step": 7392 }, { "epoch": 0.5565027569205292, "grad_norm": 5.8570146560668945, "learning_rate": 4.3454050182599315e-05, "loss": 1.7624, "step": 7393 }, { "epoch": 0.5565780312011893, "grad_norm": 4.970510959625244, "learning_rate": 4.3441964412286026e-05, "loss": 1.8939, "step": 7394 }, { "epoch": 0.5566533054818494, "grad_norm": 5.431758403778076, "learning_rate": 4.3429879031829125e-05, "loss": 1.6955, "step": 7395 }, { "epoch": 0.5567285797625097, "grad_norm": 4.710243225097656, "learning_rate": 4.341779404194706e-05, "loss": 1.7647, "step": 7396 }, { "epoch": 0.5568038540431698, "grad_norm": 3.986436128616333, "learning_rate": 4.3405709443358256e-05, "loss": 1.5752, "step": 7397 }, { "epoch": 0.5568791283238299, "grad_norm": 6.038854122161865, "learning_rate": 4.33936252367811e-05, "loss": 1.776, "step": 7398 }, { "epoch": 0.5569544026044901, "grad_norm": 6.0635600090026855, "learning_rate": 4.338154142293398e-05, "loss": 1.9351, "step": 7399 }, { "epoch": 0.5570296768851503, "grad_norm": 5.238595962524414, "learning_rate": 4.336945800253522e-05, "loss": 1.8521, "step": 7400 }, { "epoch": 0.5571049511658104, "grad_norm": 5.034926414489746, "learning_rate": 4.335737497630318e-05, "loss": 1.9239, "step": 7401 }, { "epoch": 0.5571802254464706, "grad_norm": 6.994501113891602, "learning_rate": 4.334529234495612e-05, "loss": 2.0602, "step": 7402 }, { "epoch": 0.5572554997271307, "grad_norm": 4.744661808013916, "learning_rate": 4.3333210109212336e-05, "loss": 2.0983, "step": 7403 }, { "epoch": 0.5573307740077909, "grad_norm": 4.382345199584961, "learning_rate": 4.3321128269790074e-05, "loss": 1.9319, "step": 7404 }, { "epoch": 0.5574060482884511, "grad_norm": 6.412303924560547, "learning_rate": 4.3309046827407594e-05, "loss": 1.7208, "step": 7405 }, { "epoch": 0.5574813225691112, "grad_norm": 13.301950454711914, "learning_rate": 4.329696578278306e-05, "loss": 2.0987, "step": 7406 }, { "epoch": 0.5575565968497713, "grad_norm": 5.290268898010254, "learning_rate": 4.3284885136634704e-05, "loss": 1.5533, "step": 7407 }, { "epoch": 0.5576318711304316, "grad_norm": 6.362166881561279, "learning_rate": 4.327280488968063e-05, "loss": 1.948, "step": 7408 }, { "epoch": 0.5577071454110917, "grad_norm": 3.867560863494873, "learning_rate": 4.3260725042639014e-05, "loss": 1.7492, "step": 7409 }, { "epoch": 0.5577824196917518, "grad_norm": 4.308571815490723, "learning_rate": 4.324864559622795e-05, "loss": 2.0932, "step": 7410 }, { "epoch": 0.557857693972412, "grad_norm": 4.236301898956299, "learning_rate": 4.323656655116553e-05, "loss": 1.8278, "step": 7411 }, { "epoch": 0.5579329682530721, "grad_norm": 5.600011825561523, "learning_rate": 4.3224487908169844e-05, "loss": 1.7568, "step": 7412 }, { "epoch": 0.5580082425337323, "grad_norm": 5.102320671081543, "learning_rate": 4.32124096679589e-05, "loss": 2.0575, "step": 7413 }, { "epoch": 0.5580835168143924, "grad_norm": 5.7027130126953125, "learning_rate": 4.320033183125072e-05, "loss": 2.0576, "step": 7414 }, { "epoch": 0.5581587910950526, "grad_norm": 4.68280029296875, "learning_rate": 4.31882543987633e-05, "loss": 1.926, "step": 7415 }, { "epoch": 0.5582340653757127, "grad_norm": 4.541062355041504, "learning_rate": 4.3176177371214634e-05, "loss": 1.9934, "step": 7416 }, { "epoch": 0.5583093396563729, "grad_norm": 5.059689998626709, "learning_rate": 4.316410074932262e-05, "loss": 1.7551, "step": 7417 }, { "epoch": 0.5583846139370331, "grad_norm": 4.464911460876465, "learning_rate": 4.315202453380522e-05, "loss": 1.9189, "step": 7418 }, { "epoch": 0.5584598882176932, "grad_norm": 3.9713943004608154, "learning_rate": 4.3139948725380295e-05, "loss": 1.8338, "step": 7419 }, { "epoch": 0.5585351624983533, "grad_norm": 5.557081699371338, "learning_rate": 4.312787332476574e-05, "loss": 1.8421, "step": 7420 }, { "epoch": 0.5586104367790136, "grad_norm": 6.325413703918457, "learning_rate": 4.311579833267939e-05, "loss": 2.0226, "step": 7421 }, { "epoch": 0.5586857110596737, "grad_norm": 4.482569217681885, "learning_rate": 4.3103723749839096e-05, "loss": 1.982, "step": 7422 }, { "epoch": 0.5587609853403338, "grad_norm": 4.55171537399292, "learning_rate": 4.3091649576962624e-05, "loss": 1.794, "step": 7423 }, { "epoch": 0.558836259620994, "grad_norm": 5.786789894104004, "learning_rate": 4.3079575814767766e-05, "loss": 1.7255, "step": 7424 }, { "epoch": 0.5589115339016542, "grad_norm": 4.895748615264893, "learning_rate": 4.306750246397225e-05, "loss": 1.6539, "step": 7425 }, { "epoch": 0.5589868081823143, "grad_norm": 6.730261325836182, "learning_rate": 4.305542952529386e-05, "loss": 1.9069, "step": 7426 }, { "epoch": 0.5590620824629745, "grad_norm": 4.3582000732421875, "learning_rate": 4.304335699945023e-05, "loss": 1.8138, "step": 7427 }, { "epoch": 0.5591373567436346, "grad_norm": 5.143578052520752, "learning_rate": 4.3031284887159065e-05, "loss": 1.7364, "step": 7428 }, { "epoch": 0.5592126310242947, "grad_norm": 6.419661521911621, "learning_rate": 4.301921318913804e-05, "loss": 1.8883, "step": 7429 }, { "epoch": 0.559287905304955, "grad_norm": 4.226903438568115, "learning_rate": 4.300714190610473e-05, "loss": 1.6471, "step": 7430 }, { "epoch": 0.5593631795856151, "grad_norm": 3.828778028488159, "learning_rate": 4.2995071038776794e-05, "loss": 1.8076, "step": 7431 }, { "epoch": 0.5594384538662752, "grad_norm": 3.457158327102661, "learning_rate": 4.2983000587871763e-05, "loss": 1.7967, "step": 7432 }, { "epoch": 0.5595137281469353, "grad_norm": 4.551523685455322, "learning_rate": 4.2970930554107244e-05, "loss": 1.9437, "step": 7433 }, { "epoch": 0.5595890024275956, "grad_norm": 7.061385631561279, "learning_rate": 4.2958860938200706e-05, "loss": 2.1281, "step": 7434 }, { "epoch": 0.5596642767082557, "grad_norm": 5.252857685089111, "learning_rate": 4.294679174086971e-05, "loss": 1.768, "step": 7435 }, { "epoch": 0.5597395509889158, "grad_norm": 5.978367805480957, "learning_rate": 4.293472296283168e-05, "loss": 2.1951, "step": 7436 }, { "epoch": 0.559814825269576, "grad_norm": 5.210687637329102, "learning_rate": 4.292265460480411e-05, "loss": 1.8667, "step": 7437 }, { "epoch": 0.5598900995502362, "grad_norm": 4.041530132293701, "learning_rate": 4.2910586667504406e-05, "loss": 1.7806, "step": 7438 }, { "epoch": 0.5599653738308963, "grad_norm": 3.877411365509033, "learning_rate": 4.289851915165001e-05, "loss": 1.7278, "step": 7439 }, { "epoch": 0.5600406481115565, "grad_norm": 3.7501790523529053, "learning_rate": 4.288645205795824e-05, "loss": 1.7401, "step": 7440 }, { "epoch": 0.5601159223922166, "grad_norm": 3.820499897003174, "learning_rate": 4.28743853871465e-05, "loss": 1.5775, "step": 7441 }, { "epoch": 0.5601911966728768, "grad_norm": 4.196033000946045, "learning_rate": 4.28623191399321e-05, "loss": 1.8763, "step": 7442 }, { "epoch": 0.560266470953537, "grad_norm": 4.171761512756348, "learning_rate": 4.285025331703236e-05, "loss": 1.8668, "step": 7443 }, { "epoch": 0.5603417452341971, "grad_norm": 4.1810503005981445, "learning_rate": 4.283818791916453e-05, "loss": 1.7125, "step": 7444 }, { "epoch": 0.5604170195148572, "grad_norm": 5.239692687988281, "learning_rate": 4.2826122947045875e-05, "loss": 1.8159, "step": 7445 }, { "epoch": 0.5604922937955175, "grad_norm": 5.308150768280029, "learning_rate": 4.281405840139364e-05, "loss": 2.1461, "step": 7446 }, { "epoch": 0.5605675680761776, "grad_norm": 4.282695293426514, "learning_rate": 4.280199428292501e-05, "loss": 1.757, "step": 7447 }, { "epoch": 0.5606428423568377, "grad_norm": 3.744215726852417, "learning_rate": 4.278993059235716e-05, "loss": 2.2698, "step": 7448 }, { "epoch": 0.5607181166374979, "grad_norm": 4.231766223907471, "learning_rate": 4.277786733040725e-05, "loss": 1.8222, "step": 7449 }, { "epoch": 0.560793390918158, "grad_norm": 4.536243915557861, "learning_rate": 4.276580449779242e-05, "loss": 1.5572, "step": 7450 }, { "epoch": 0.5608686651988182, "grad_norm": 3.9466090202331543, "learning_rate": 4.275374209522974e-05, "loss": 1.9276, "step": 7451 }, { "epoch": 0.5609439394794784, "grad_norm": 4.0187458992004395, "learning_rate": 4.27416801234363e-05, "loss": 1.6828, "step": 7452 }, { "epoch": 0.5610192137601385, "grad_norm": 4.821317195892334, "learning_rate": 4.272961858312916e-05, "loss": 1.7848, "step": 7453 }, { "epoch": 0.5610944880407986, "grad_norm": 4.9564900398254395, "learning_rate": 4.2717557475025335e-05, "loss": 2.0061, "step": 7454 }, { "epoch": 0.5611697623214588, "grad_norm": 5.768749237060547, "learning_rate": 4.27054967998418e-05, "loss": 2.3863, "step": 7455 }, { "epoch": 0.561245036602119, "grad_norm": 5.145970344543457, "learning_rate": 4.269343655829558e-05, "loss": 1.9027, "step": 7456 }, { "epoch": 0.5613203108827791, "grad_norm": 4.501526355743408, "learning_rate": 4.2681376751103575e-05, "loss": 1.9796, "step": 7457 }, { "epoch": 0.5613955851634392, "grad_norm": 4.454838752746582, "learning_rate": 4.266931737898273e-05, "loss": 2.2908, "step": 7458 }, { "epoch": 0.5614708594440995, "grad_norm": 5.044410705566406, "learning_rate": 4.265725844264992e-05, "loss": 1.5678, "step": 7459 }, { "epoch": 0.5615461337247596, "grad_norm": 6.122454643249512, "learning_rate": 4.264519994282202e-05, "loss": 1.6593, "step": 7460 }, { "epoch": 0.5616214080054197, "grad_norm": 4.768460750579834, "learning_rate": 4.263314188021591e-05, "loss": 1.905, "step": 7461 }, { "epoch": 0.5616966822860799, "grad_norm": 4.453472137451172, "learning_rate": 4.2621084255548363e-05, "loss": 1.9826, "step": 7462 }, { "epoch": 0.5617719565667401, "grad_norm": 5.3138108253479, "learning_rate": 4.2609027069536186e-05, "loss": 2.1403, "step": 7463 }, { "epoch": 0.5618472308474002, "grad_norm": 4.939953327178955, "learning_rate": 4.259697032289613e-05, "loss": 2.0381, "step": 7464 }, { "epoch": 0.5619225051280604, "grad_norm": 4.13845157623291, "learning_rate": 4.258491401634497e-05, "loss": 1.751, "step": 7465 }, { "epoch": 0.5619977794087205, "grad_norm": 4.92451810836792, "learning_rate": 4.2572858150599374e-05, "loss": 1.6275, "step": 7466 }, { "epoch": 0.5620730536893807, "grad_norm": 5.270460605621338, "learning_rate": 4.256080272637607e-05, "loss": 2.5168, "step": 7467 }, { "epoch": 0.5621483279700409, "grad_norm": 4.0166096687316895, "learning_rate": 4.254874774439168e-05, "loss": 1.4184, "step": 7468 }, { "epoch": 0.562223602250701, "grad_norm": 6.811859130859375, "learning_rate": 4.253669320536286e-05, "loss": 1.862, "step": 7469 }, { "epoch": 0.5622988765313611, "grad_norm": 4.710254669189453, "learning_rate": 4.252463911000621e-05, "loss": 1.8795, "step": 7470 }, { "epoch": 0.5623741508120214, "grad_norm": 4.976755619049072, "learning_rate": 4.251258545903835e-05, "loss": 1.8689, "step": 7471 }, { "epoch": 0.5624494250926815, "grad_norm": 4.888972759246826, "learning_rate": 4.250053225317576e-05, "loss": 1.9504, "step": 7472 }, { "epoch": 0.5625246993733416, "grad_norm": 4.428262233734131, "learning_rate": 4.2488479493135034e-05, "loss": 2.0106, "step": 7473 }, { "epoch": 0.5625999736540017, "grad_norm": 4.419419765472412, "learning_rate": 4.2476427179632635e-05, "loss": 2.2447, "step": 7474 }, { "epoch": 0.562675247934662, "grad_norm": 3.3958523273468018, "learning_rate": 4.246437531338506e-05, "loss": 1.9714, "step": 7475 }, { "epoch": 0.5627505222153221, "grad_norm": 4.874242782592773, "learning_rate": 4.245232389510874e-05, "loss": 2.0492, "step": 7476 }, { "epoch": 0.5628257964959822, "grad_norm": 4.655029296875, "learning_rate": 4.24402729255201e-05, "loss": 2.0589, "step": 7477 }, { "epoch": 0.5629010707766424, "grad_norm": 4.235930442810059, "learning_rate": 4.242822240533558e-05, "loss": 1.9571, "step": 7478 }, { "epoch": 0.5629763450573025, "grad_norm": 5.331794738769531, "learning_rate": 4.2416172335271476e-05, "loss": 2.2019, "step": 7479 }, { "epoch": 0.5630516193379627, "grad_norm": 6.205273151397705, "learning_rate": 4.240412271604419e-05, "loss": 2.4965, "step": 7480 }, { "epoch": 0.5631268936186229, "grad_norm": 3.5802690982818604, "learning_rate": 4.239207354837e-05, "loss": 1.4024, "step": 7481 }, { "epoch": 0.563202167899283, "grad_norm": 5.780887603759766, "learning_rate": 4.238002483296522e-05, "loss": 1.9583, "step": 7482 }, { "epoch": 0.5632774421799431, "grad_norm": 4.809574127197266, "learning_rate": 4.236797657054608e-05, "loss": 1.9486, "step": 7483 }, { "epoch": 0.5633527164606034, "grad_norm": 5.021664619445801, "learning_rate": 4.2355928761828855e-05, "loss": 1.9926, "step": 7484 }, { "epoch": 0.5634279907412635, "grad_norm": 3.7073211669921875, "learning_rate": 4.234388140752972e-05, "loss": 1.6559, "step": 7485 }, { "epoch": 0.5635032650219236, "grad_norm": 4.195054531097412, "learning_rate": 4.233183450836487e-05, "loss": 1.7289, "step": 7486 }, { "epoch": 0.5635785393025838, "grad_norm": 3.6992850303649902, "learning_rate": 4.2319788065050445e-05, "loss": 1.7606, "step": 7487 }, { "epoch": 0.563653813583244, "grad_norm": 4.9785661697387695, "learning_rate": 4.230774207830261e-05, "loss": 1.8724, "step": 7488 }, { "epoch": 0.5637290878639041, "grad_norm": 5.745755672454834, "learning_rate": 4.229569654883741e-05, "loss": 1.9408, "step": 7489 }, { "epoch": 0.5638043621445643, "grad_norm": 4.568248271942139, "learning_rate": 4.2283651477370966e-05, "loss": 2.0735, "step": 7490 }, { "epoch": 0.5638796364252244, "grad_norm": 7.15548038482666, "learning_rate": 4.227160686461928e-05, "loss": 1.901, "step": 7491 }, { "epoch": 0.5639549107058845, "grad_norm": 4.740743637084961, "learning_rate": 4.22595627112984e-05, "loss": 1.8834, "step": 7492 }, { "epoch": 0.5640301849865447, "grad_norm": 5.510941505432129, "learning_rate": 4.2247519018124306e-05, "loss": 1.689, "step": 7493 }, { "epoch": 0.5641054592672049, "grad_norm": 3.954754590988159, "learning_rate": 4.223547578581295e-05, "loss": 1.9628, "step": 7494 }, { "epoch": 0.564180733547865, "grad_norm": 5.239189147949219, "learning_rate": 4.2223433015080314e-05, "loss": 1.9016, "step": 7495 }, { "epoch": 0.5642560078285251, "grad_norm": 3.6925413608551025, "learning_rate": 4.221139070664224e-05, "loss": 1.5892, "step": 7496 }, { "epoch": 0.5643312821091854, "grad_norm": 3.8457372188568115, "learning_rate": 4.219934886121465e-05, "loss": 1.8128, "step": 7497 }, { "epoch": 0.5644065563898455, "grad_norm": 4.166738510131836, "learning_rate": 4.218730747951338e-05, "loss": 1.8704, "step": 7498 }, { "epoch": 0.5644818306705056, "grad_norm": 4.019085884094238, "learning_rate": 4.2175266562254287e-05, "loss": 1.9747, "step": 7499 }, { "epoch": 0.5645571049511658, "grad_norm": 4.934507369995117, "learning_rate": 4.216322611015312e-05, "loss": 2.0902, "step": 7500 }, { "epoch": 0.564632379231826, "grad_norm": 5.739882469177246, "learning_rate": 4.21511861239257e-05, "loss": 1.8918, "step": 7501 }, { "epoch": 0.5647076535124861, "grad_norm": 6.980597496032715, "learning_rate": 4.2139146604287715e-05, "loss": 1.8077, "step": 7502 }, { "epoch": 0.5647829277931463, "grad_norm": 5.41663932800293, "learning_rate": 4.212710755195493e-05, "loss": 1.6561, "step": 7503 }, { "epoch": 0.5648582020738064, "grad_norm": 4.110705852508545, "learning_rate": 4.2115068967643e-05, "loss": 1.781, "step": 7504 }, { "epoch": 0.5649334763544666, "grad_norm": 4.848496437072754, "learning_rate": 4.2103030852067605e-05, "loss": 1.812, "step": 7505 }, { "epoch": 0.5650087506351268, "grad_norm": 5.051675796508789, "learning_rate": 4.209099320594436e-05, "loss": 1.8409, "step": 7506 }, { "epoch": 0.5650840249157869, "grad_norm": 4.491971015930176, "learning_rate": 4.2078956029988884e-05, "loss": 1.6414, "step": 7507 }, { "epoch": 0.565159299196447, "grad_norm": 3.921613931655884, "learning_rate": 4.206691932491673e-05, "loss": 2.1208, "step": 7508 }, { "epoch": 0.5652345734771073, "grad_norm": 4.366812705993652, "learning_rate": 4.205488309144346e-05, "loss": 1.9683, "step": 7509 }, { "epoch": 0.5653098477577674, "grad_norm": 4.005619049072266, "learning_rate": 4.20428473302846e-05, "loss": 1.5771, "step": 7510 }, { "epoch": 0.5653851220384275, "grad_norm": 5.860362529754639, "learning_rate": 4.203081204215563e-05, "loss": 1.9189, "step": 7511 }, { "epoch": 0.5654603963190876, "grad_norm": 3.9213802814483643, "learning_rate": 4.2018777227772034e-05, "loss": 2.0196, "step": 7512 }, { "epoch": 0.5655356705997479, "grad_norm": 4.284903049468994, "learning_rate": 4.20067428878492e-05, "loss": 1.6371, "step": 7513 }, { "epoch": 0.565610944880408, "grad_norm": 7.116332530975342, "learning_rate": 4.19947090231026e-05, "loss": 2.1986, "step": 7514 }, { "epoch": 0.5656862191610681, "grad_norm": 6.714890956878662, "learning_rate": 4.198267563424755e-05, "loss": 1.9509, "step": 7515 }, { "epoch": 0.5657614934417283, "grad_norm": 5.1159257888793945, "learning_rate": 4.1970642721999455e-05, "loss": 2.0176, "step": 7516 }, { "epoch": 0.5658367677223884, "grad_norm": 5.18733549118042, "learning_rate": 4.195861028707359e-05, "loss": 2.2061, "step": 7517 }, { "epoch": 0.5659120420030486, "grad_norm": 6.285945892333984, "learning_rate": 4.194657833018529e-05, "loss": 1.9538, "step": 7518 }, { "epoch": 0.5659873162837088, "grad_norm": 5.772747039794922, "learning_rate": 4.193454685204979e-05, "loss": 1.8703, "step": 7519 }, { "epoch": 0.5660625905643689, "grad_norm": 4.274621486663818, "learning_rate": 4.192251585338234e-05, "loss": 1.9077, "step": 7520 }, { "epoch": 0.566137864845029, "grad_norm": 6.213925361633301, "learning_rate": 4.191048533489813e-05, "loss": 2.0577, "step": 7521 }, { "epoch": 0.5662131391256893, "grad_norm": 6.930326461791992, "learning_rate": 4.189845529731238e-05, "loss": 1.9268, "step": 7522 }, { "epoch": 0.5662884134063494, "grad_norm": 4.048588752746582, "learning_rate": 4.18864257413402e-05, "loss": 2.0413, "step": 7523 }, { "epoch": 0.5663636876870095, "grad_norm": 4.316848278045654, "learning_rate": 4.187439666769672e-05, "loss": 1.7213, "step": 7524 }, { "epoch": 0.5664389619676697, "grad_norm": 5.2397589683532715, "learning_rate": 4.186236807709706e-05, "loss": 2.2517, "step": 7525 }, { "epoch": 0.5665142362483299, "grad_norm": 5.127415657043457, "learning_rate": 4.1850339970256266e-05, "loss": 1.9301, "step": 7526 }, { "epoch": 0.56658951052899, "grad_norm": 5.182537078857422, "learning_rate": 4.183831234788938e-05, "loss": 2.2056, "step": 7527 }, { "epoch": 0.5666647848096502, "grad_norm": 7.227933883666992, "learning_rate": 4.18262852107114e-05, "loss": 1.5408, "step": 7528 }, { "epoch": 0.5667400590903103, "grad_norm": 4.186732769012451, "learning_rate": 4.1814258559437325e-05, "loss": 1.7282, "step": 7529 }, { "epoch": 0.5668153333709705, "grad_norm": 4.639697551727295, "learning_rate": 4.180223239478208e-05, "loss": 1.9161, "step": 7530 }, { "epoch": 0.5668906076516306, "grad_norm": 5.565513610839844, "learning_rate": 4.17902067174606e-05, "loss": 1.8947, "step": 7531 }, { "epoch": 0.5669658819322908, "grad_norm": 4.691824913024902, "learning_rate": 4.177818152818776e-05, "loss": 2.1495, "step": 7532 }, { "epoch": 0.5670411562129509, "grad_norm": 6.342762470245361, "learning_rate": 4.176615682767846e-05, "loss": 2.3951, "step": 7533 }, { "epoch": 0.567116430493611, "grad_norm": 4.102329730987549, "learning_rate": 4.175413261664749e-05, "loss": 1.8957, "step": 7534 }, { "epoch": 0.5671917047742713, "grad_norm": 3.8884243965148926, "learning_rate": 4.17421088958097e-05, "loss": 1.8524, "step": 7535 }, { "epoch": 0.5672669790549314, "grad_norm": 5.006742477416992, "learning_rate": 4.1730085665879814e-05, "loss": 1.8079, "step": 7536 }, { "epoch": 0.5673422533355915, "grad_norm": 3.7115249633789062, "learning_rate": 4.171806292757263e-05, "loss": 1.8584, "step": 7537 }, { "epoch": 0.5674175276162517, "grad_norm": 4.174636363983154, "learning_rate": 4.170604068160282e-05, "loss": 1.8813, "step": 7538 }, { "epoch": 0.5674928018969119, "grad_norm": 5.886826515197754, "learning_rate": 4.1694018928685114e-05, "loss": 2.1428, "step": 7539 }, { "epoch": 0.567568076177572, "grad_norm": 4.3213605880737305, "learning_rate": 4.1681997669534134e-05, "loss": 1.9808, "step": 7540 }, { "epoch": 0.5676433504582322, "grad_norm": 3.2923669815063477, "learning_rate": 4.166997690486452e-05, "loss": 2.3043, "step": 7541 }, { "epoch": 0.5677186247388923, "grad_norm": 4.019612789154053, "learning_rate": 4.1657956635390896e-05, "loss": 1.938, "step": 7542 }, { "epoch": 0.5677938990195525, "grad_norm": 6.246671676635742, "learning_rate": 4.164593686182779e-05, "loss": 2.0247, "step": 7543 }, { "epoch": 0.5678691733002127, "grad_norm": 4.0431623458862305, "learning_rate": 4.1633917584889804e-05, "loss": 1.9226, "step": 7544 }, { "epoch": 0.5679444475808728, "grad_norm": 4.333072662353516, "learning_rate": 4.1621898805291384e-05, "loss": 1.9595, "step": 7545 }, { "epoch": 0.5680197218615329, "grad_norm": 4.088685035705566, "learning_rate": 4.160988052374706e-05, "loss": 1.9298, "step": 7546 }, { "epoch": 0.5680949961421932, "grad_norm": 4.085568904876709, "learning_rate": 4.159786274097125e-05, "loss": 2.0882, "step": 7547 }, { "epoch": 0.5681702704228533, "grad_norm": 3.7184553146362305, "learning_rate": 4.15858454576784e-05, "loss": 1.8381, "step": 7548 }, { "epoch": 0.5682455447035134, "grad_norm": 8.849818229675293, "learning_rate": 4.157382867458288e-05, "loss": 1.8773, "step": 7549 }, { "epoch": 0.5683208189841736, "grad_norm": 4.118654251098633, "learning_rate": 4.156181239239909e-05, "loss": 1.776, "step": 7550 }, { "epoch": 0.5683960932648338, "grad_norm": 4.431726932525635, "learning_rate": 4.154979661184132e-05, "loss": 1.9969, "step": 7551 }, { "epoch": 0.5684713675454939, "grad_norm": 4.57051944732666, "learning_rate": 4.153778133362391e-05, "loss": 1.8497, "step": 7552 }, { "epoch": 0.568546641826154, "grad_norm": 5.797927379608154, "learning_rate": 4.15257665584611e-05, "loss": 1.9165, "step": 7553 }, { "epoch": 0.5686219161068142, "grad_norm": 4.613893985748291, "learning_rate": 4.151375228706719e-05, "loss": 2.1549, "step": 7554 }, { "epoch": 0.5686971903874743, "grad_norm": 4.822774887084961, "learning_rate": 4.150173852015632e-05, "loss": 1.9217, "step": 7555 }, { "epoch": 0.5687724646681345, "grad_norm": 3.97965145111084, "learning_rate": 4.148972525844272e-05, "loss": 2.1573, "step": 7556 }, { "epoch": 0.5688477389487947, "grad_norm": 4.471109390258789, "learning_rate": 4.147771250264055e-05, "loss": 2.0096, "step": 7557 }, { "epoch": 0.5689230132294548, "grad_norm": 3.559664726257324, "learning_rate": 4.1465700253463895e-05, "loss": 1.8905, "step": 7558 }, { "epoch": 0.5689982875101149, "grad_norm": 5.800527095794678, "learning_rate": 4.145368851162689e-05, "loss": 2.2562, "step": 7559 }, { "epoch": 0.5690735617907752, "grad_norm": 5.9950714111328125, "learning_rate": 4.144167727784357e-05, "loss": 2.1329, "step": 7560 }, { "epoch": 0.5691488360714353, "grad_norm": 4.242916584014893, "learning_rate": 4.1429666552827994e-05, "loss": 2.0566, "step": 7561 }, { "epoch": 0.5692241103520954, "grad_norm": 6.299023628234863, "learning_rate": 4.141765633729413e-05, "loss": 2.287, "step": 7562 }, { "epoch": 0.5692993846327556, "grad_norm": 5.601983547210693, "learning_rate": 4.1405646631955996e-05, "loss": 1.8472, "step": 7563 }, { "epoch": 0.5693746589134158, "grad_norm": 5.557967662811279, "learning_rate": 4.139363743752749e-05, "loss": 2.0713, "step": 7564 }, { "epoch": 0.5694499331940759, "grad_norm": 3.4045512676239014, "learning_rate": 4.138162875472256e-05, "loss": 1.8601, "step": 7565 }, { "epoch": 0.5695252074747361, "grad_norm": 5.910848140716553, "learning_rate": 4.136962058425505e-05, "loss": 1.712, "step": 7566 }, { "epoch": 0.5696004817553962, "grad_norm": 5.968437671661377, "learning_rate": 4.135761292683886e-05, "loss": 2.1861, "step": 7567 }, { "epoch": 0.5696757560360564, "grad_norm": 5.811207294464111, "learning_rate": 4.134560578318776e-05, "loss": 1.5196, "step": 7568 }, { "epoch": 0.5697510303167166, "grad_norm": 4.529589653015137, "learning_rate": 4.1333599154015564e-05, "loss": 1.8235, "step": 7569 }, { "epoch": 0.5698263045973767, "grad_norm": 3.7079923152923584, "learning_rate": 4.132159304003604e-05, "loss": 1.8984, "step": 7570 }, { "epoch": 0.5699015788780368, "grad_norm": 4.084473133087158, "learning_rate": 4.130958744196291e-05, "loss": 2.0156, "step": 7571 }, { "epoch": 0.569976853158697, "grad_norm": 4.172169208526611, "learning_rate": 4.129758236050987e-05, "loss": 1.9223, "step": 7572 }, { "epoch": 0.5700521274393572, "grad_norm": 3.107506036758423, "learning_rate": 4.1285577796390576e-05, "loss": 1.4488, "step": 7573 }, { "epoch": 0.5701274017200173, "grad_norm": 4.356510162353516, "learning_rate": 4.127357375031871e-05, "loss": 1.9654, "step": 7574 }, { "epoch": 0.5702026760006774, "grad_norm": 5.4899187088012695, "learning_rate": 4.126157022300781e-05, "loss": 2.0589, "step": 7575 }, { "epoch": 0.5702779502813377, "grad_norm": 4.5292792320251465, "learning_rate": 4.124956721517151e-05, "loss": 2.0968, "step": 7576 }, { "epoch": 0.5703532245619978, "grad_norm": 4.7366042137146, "learning_rate": 4.1237564727523315e-05, "loss": 1.8485, "step": 7577 }, { "epoch": 0.5704284988426579, "grad_norm": 5.013432025909424, "learning_rate": 4.122556276077677e-05, "loss": 1.416, "step": 7578 }, { "epoch": 0.5705037731233181, "grad_norm": 5.654892921447754, "learning_rate": 4.121356131564533e-05, "loss": 1.777, "step": 7579 }, { "epoch": 0.5705790474039782, "grad_norm": 6.319755554199219, "learning_rate": 4.120156039284246e-05, "loss": 1.8112, "step": 7580 }, { "epoch": 0.5706543216846384, "grad_norm": 4.170255661010742, "learning_rate": 4.11895599930816e-05, "loss": 1.8495, "step": 7581 }, { "epoch": 0.5707295959652986, "grad_norm": 4.765605926513672, "learning_rate": 4.1177560117076095e-05, "loss": 1.5139, "step": 7582 }, { "epoch": 0.5708048702459587, "grad_norm": 4.976629257202148, "learning_rate": 4.1165560765539326e-05, "loss": 2.1208, "step": 7583 }, { "epoch": 0.5708801445266188, "grad_norm": 3.914757013320923, "learning_rate": 4.115356193918464e-05, "loss": 1.5957, "step": 7584 }, { "epoch": 0.5709554188072791, "grad_norm": 4.362957000732422, "learning_rate": 4.11415636387253e-05, "loss": 2.5513, "step": 7585 }, { "epoch": 0.5710306930879392, "grad_norm": 7.167521953582764, "learning_rate": 4.112956586487458e-05, "loss": 2.1259, "step": 7586 }, { "epoch": 0.5711059673685993, "grad_norm": 6.460814476013184, "learning_rate": 4.1117568618345713e-05, "loss": 2.0566, "step": 7587 }, { "epoch": 0.5711812416492595, "grad_norm": 5.285572052001953, "learning_rate": 4.110557189985193e-05, "loss": 1.562, "step": 7588 }, { "epoch": 0.5712565159299197, "grad_norm": 4.9157867431640625, "learning_rate": 4.109357571010635e-05, "loss": 1.9378, "step": 7589 }, { "epoch": 0.5713317902105798, "grad_norm": 5.586403846740723, "learning_rate": 4.108158004982214e-05, "loss": 1.9796, "step": 7590 }, { "epoch": 0.5714070644912399, "grad_norm": 4.578375816345215, "learning_rate": 4.106958491971243e-05, "loss": 1.7201, "step": 7591 }, { "epoch": 0.5714823387719001, "grad_norm": 5.858940601348877, "learning_rate": 4.1057590320490254e-05, "loss": 2.0072, "step": 7592 }, { "epoch": 0.5715576130525603, "grad_norm": 4.025996685028076, "learning_rate": 4.104559625286868e-05, "loss": 1.927, "step": 7593 }, { "epoch": 0.5716328873332204, "grad_norm": 5.558964252471924, "learning_rate": 4.103360271756071e-05, "loss": 1.7311, "step": 7594 }, { "epoch": 0.5717081616138806, "grad_norm": 4.400188446044922, "learning_rate": 4.102160971527935e-05, "loss": 2.0643, "step": 7595 }, { "epoch": 0.5717834358945407, "grad_norm": 4.661321640014648, "learning_rate": 4.10096172467375e-05, "loss": 1.6388, "step": 7596 }, { "epoch": 0.5718587101752008, "grad_norm": 5.56693172454834, "learning_rate": 4.099762531264813e-05, "loss": 1.9227, "step": 7597 }, { "epoch": 0.5719339844558611, "grad_norm": 5.320420265197754, "learning_rate": 4.098563391372409e-05, "loss": 2.1256, "step": 7598 }, { "epoch": 0.5720092587365212, "grad_norm": 4.389521598815918, "learning_rate": 4.0973643050678274e-05, "loss": 2.0456, "step": 7599 }, { "epoch": 0.5720845330171813, "grad_norm": 5.7201828956604, "learning_rate": 4.0961652724223456e-05, "loss": 1.8459, "step": 7600 }, { "epoch": 0.5721598072978415, "grad_norm": 6.614492416381836, "learning_rate": 4.094966293507246e-05, "loss": 1.701, "step": 7601 }, { "epoch": 0.5722350815785017, "grad_norm": 5.0776190757751465, "learning_rate": 4.0937673683938026e-05, "loss": 1.8044, "step": 7602 }, { "epoch": 0.5723103558591618, "grad_norm": 4.892368793487549, "learning_rate": 4.0925684971532896e-05, "loss": 1.9371, "step": 7603 }, { "epoch": 0.572385630139822, "grad_norm": 4.289630889892578, "learning_rate": 4.091369679856975e-05, "loss": 1.8379, "step": 7604 }, { "epoch": 0.5724609044204821, "grad_norm": 5.6774773597717285, "learning_rate": 4.090170916576125e-05, "loss": 1.6623, "step": 7605 }, { "epoch": 0.5725361787011423, "grad_norm": 6.10878849029541, "learning_rate": 4.088972207382006e-05, "loss": 2.0404, "step": 7606 }, { "epoch": 0.5726114529818025, "grad_norm": 3.3511202335357666, "learning_rate": 4.087773552345873e-05, "loss": 1.8754, "step": 7607 }, { "epoch": 0.5726867272624626, "grad_norm": 4.990893840789795, "learning_rate": 4.086574951538987e-05, "loss": 1.7503, "step": 7608 }, { "epoch": 0.5727620015431227, "grad_norm": 4.608138561248779, "learning_rate": 4.0853764050325975e-05, "loss": 1.956, "step": 7609 }, { "epoch": 0.5728372758237829, "grad_norm": 5.314117431640625, "learning_rate": 4.084177912897957e-05, "loss": 2.2445, "step": 7610 }, { "epoch": 0.5729125501044431, "grad_norm": 3.822608709335327, "learning_rate": 4.082979475206311e-05, "loss": 1.7419, "step": 7611 }, { "epoch": 0.5729878243851032, "grad_norm": 4.511881351470947, "learning_rate": 4.0817810920289054e-05, "loss": 1.7492, "step": 7612 }, { "epoch": 0.5730630986657633, "grad_norm": 12.96013355255127, "learning_rate": 4.080582763436978e-05, "loss": 2.2388, "step": 7613 }, { "epoch": 0.5731383729464236, "grad_norm": 6.898222923278809, "learning_rate": 4.079384489501768e-05, "loss": 2.2605, "step": 7614 }, { "epoch": 0.5732136472270837, "grad_norm": 6.851225852966309, "learning_rate": 4.078186270294507e-05, "loss": 1.922, "step": 7615 }, { "epoch": 0.5732889215077438, "grad_norm": 4.838808536529541, "learning_rate": 4.07698810588643e-05, "loss": 1.9222, "step": 7616 }, { "epoch": 0.573364195788404, "grad_norm": 3.6850783824920654, "learning_rate": 4.075789996348759e-05, "loss": 1.9528, "step": 7617 }, { "epoch": 0.5734394700690641, "grad_norm": 4.440414905548096, "learning_rate": 4.074591941752724e-05, "loss": 1.6703, "step": 7618 }, { "epoch": 0.5735147443497243, "grad_norm": 5.036422252655029, "learning_rate": 4.073393942169539e-05, "loss": 1.9079, "step": 7619 }, { "epoch": 0.5735900186303845, "grad_norm": 4.969206809997559, "learning_rate": 4.0721959976704274e-05, "loss": 1.9456, "step": 7620 }, { "epoch": 0.5736652929110446, "grad_norm": 4.848805904388428, "learning_rate": 4.0709981083266005e-05, "loss": 2.345, "step": 7621 }, { "epoch": 0.5737405671917047, "grad_norm": 3.982607126235962, "learning_rate": 4.069800274209271e-05, "loss": 1.835, "step": 7622 }, { "epoch": 0.573815841472365, "grad_norm": 5.371452808380127, "learning_rate": 4.0686024953896476e-05, "loss": 1.6242, "step": 7623 }, { "epoch": 0.5738911157530251, "grad_norm": 3.659156322479248, "learning_rate": 4.0674047719389316e-05, "loss": 2.0293, "step": 7624 }, { "epoch": 0.5739663900336852, "grad_norm": 4.546854496002197, "learning_rate": 4.0662071039283266e-05, "loss": 1.6729, "step": 7625 }, { "epoch": 0.5740416643143454, "grad_norm": 5.95877742767334, "learning_rate": 4.065009491429029e-05, "loss": 2.0728, "step": 7626 }, { "epoch": 0.5741169385950056, "grad_norm": 5.082108020782471, "learning_rate": 4.0638119345122367e-05, "loss": 1.9028, "step": 7627 }, { "epoch": 0.5741922128756657, "grad_norm": 4.440121173858643, "learning_rate": 4.0626144332491366e-05, "loss": 1.6467, "step": 7628 }, { "epoch": 0.5742674871563259, "grad_norm": 3.6257505416870117, "learning_rate": 4.061416987710921e-05, "loss": 1.6215, "step": 7629 }, { "epoch": 0.574342761436986, "grad_norm": 4.495459079742432, "learning_rate": 4.0602195979687694e-05, "loss": 1.7879, "step": 7630 }, { "epoch": 0.5744180357176462, "grad_norm": 4.656881332397461, "learning_rate": 4.059022264093868e-05, "loss": 1.9973, "step": 7631 }, { "epoch": 0.5744933099983063, "grad_norm": 6.694538593292236, "learning_rate": 4.057824986157392e-05, "loss": 2.1799, "step": 7632 }, { "epoch": 0.5745685842789665, "grad_norm": 4.634024620056152, "learning_rate": 4.056627764230519e-05, "loss": 1.6009, "step": 7633 }, { "epoch": 0.5746438585596266, "grad_norm": 3.561347484588623, "learning_rate": 4.055430598384416e-05, "loss": 1.7629, "step": 7634 }, { "epoch": 0.5747191328402868, "grad_norm": 5.816922664642334, "learning_rate": 4.0542334886902564e-05, "loss": 2.0131, "step": 7635 }, { "epoch": 0.574794407120947, "grad_norm": 5.384393692016602, "learning_rate": 4.0530364352192e-05, "loss": 1.8412, "step": 7636 }, { "epoch": 0.5748696814016071, "grad_norm": 5.188614368438721, "learning_rate": 4.0518394380424105e-05, "loss": 1.9094, "step": 7637 }, { "epoch": 0.5749449556822672, "grad_norm": 5.6943182945251465, "learning_rate": 4.050642497231047e-05, "loss": 1.7961, "step": 7638 }, { "epoch": 0.5750202299629275, "grad_norm": 4.254000663757324, "learning_rate": 4.0494456128562616e-05, "loss": 1.8426, "step": 7639 }, { "epoch": 0.5750955042435876, "grad_norm": 3.4602999687194824, "learning_rate": 4.048248784989209e-05, "loss": 1.9444, "step": 7640 }, { "epoch": 0.5751707785242477, "grad_norm": 3.9344232082366943, "learning_rate": 4.047052013701034e-05, "loss": 1.6866, "step": 7641 }, { "epoch": 0.5752460528049079, "grad_norm": 4.342310905456543, "learning_rate": 4.045855299062883e-05, "loss": 1.7588, "step": 7642 }, { "epoch": 0.575321327085568, "grad_norm": 4.8720622062683105, "learning_rate": 4.0446586411458955e-05, "loss": 1.6449, "step": 7643 }, { "epoch": 0.5753966013662282, "grad_norm": 5.865235805511475, "learning_rate": 4.0434620400212126e-05, "loss": 1.6086, "step": 7644 }, { "epoch": 0.5754718756468884, "grad_norm": 4.478507995605469, "learning_rate": 4.042265495759965e-05, "loss": 1.4689, "step": 7645 }, { "epoch": 0.5755471499275485, "grad_norm": 5.448455810546875, "learning_rate": 4.041069008433287e-05, "loss": 1.9208, "step": 7646 }, { "epoch": 0.5756224242082086, "grad_norm": 5.161365985870361, "learning_rate": 4.039872578112304e-05, "loss": 1.9861, "step": 7647 }, { "epoch": 0.5756976984888689, "grad_norm": 5.40657901763916, "learning_rate": 4.038676204868142e-05, "loss": 1.7774, "step": 7648 }, { "epoch": 0.575772972769529, "grad_norm": 4.4866557121276855, "learning_rate": 4.03747988877192e-05, "loss": 1.7399, "step": 7649 }, { "epoch": 0.5758482470501891, "grad_norm": 4.824374198913574, "learning_rate": 4.036283629894759e-05, "loss": 1.9157, "step": 7650 }, { "epoch": 0.5759235213308492, "grad_norm": 5.27728271484375, "learning_rate": 4.035087428307769e-05, "loss": 1.5463, "step": 7651 }, { "epoch": 0.5759987956115095, "grad_norm": 6.984309673309326, "learning_rate": 4.033891284082064e-05, "loss": 1.8198, "step": 7652 }, { "epoch": 0.5760740698921696, "grad_norm": 4.725070953369141, "learning_rate": 4.032695197288748e-05, "loss": 1.8195, "step": 7653 }, { "epoch": 0.5761493441728297, "grad_norm": 4.154239177703857, "learning_rate": 4.0314991679989286e-05, "loss": 1.8958, "step": 7654 }, { "epoch": 0.5762246184534899, "grad_norm": 4.55750846862793, "learning_rate": 4.030303196283706e-05, "loss": 1.9958, "step": 7655 }, { "epoch": 0.57629989273415, "grad_norm": 5.894731044769287, "learning_rate": 4.029107282214174e-05, "loss": 2.026, "step": 7656 }, { "epoch": 0.5763751670148102, "grad_norm": 5.585499286651611, "learning_rate": 4.02791142586143e-05, "loss": 1.973, "step": 7657 }, { "epoch": 0.5764504412954704, "grad_norm": 4.196101188659668, "learning_rate": 4.026715627296561e-05, "loss": 1.8333, "step": 7658 }, { "epoch": 0.5765257155761305, "grad_norm": 5.871015548706055, "learning_rate": 4.025519886590656e-05, "loss": 2.0052, "step": 7659 }, { "epoch": 0.5766009898567906, "grad_norm": 5.82728910446167, "learning_rate": 4.024324203814797e-05, "loss": 1.9099, "step": 7660 }, { "epoch": 0.5766762641374509, "grad_norm": 4.0447821617126465, "learning_rate": 4.023128579040066e-05, "loss": 2.0021, "step": 7661 }, { "epoch": 0.576751538418111, "grad_norm": 3.868428945541382, "learning_rate": 4.021933012337536e-05, "loss": 1.8261, "step": 7662 }, { "epoch": 0.5768268126987711, "grad_norm": 5.3470354080200195, "learning_rate": 4.020737503778284e-05, "loss": 2.1472, "step": 7663 }, { "epoch": 0.5769020869794313, "grad_norm": 4.169758319854736, "learning_rate": 4.019542053433376e-05, "loss": 1.6341, "step": 7664 }, { "epoch": 0.5769773612600915, "grad_norm": 5.550473690032959, "learning_rate": 4.018346661373881e-05, "loss": 2.0297, "step": 7665 }, { "epoch": 0.5770526355407516, "grad_norm": 4.343144416809082, "learning_rate": 4.017151327670858e-05, "loss": 1.7207, "step": 7666 }, { "epoch": 0.5771279098214118, "grad_norm": 3.7713727951049805, "learning_rate": 4.0159560523953715e-05, "loss": 1.7291, "step": 7667 }, { "epoch": 0.5772031841020719, "grad_norm": 5.782696723937988, "learning_rate": 4.014760835618472e-05, "loss": 1.635, "step": 7668 }, { "epoch": 0.5772784583827321, "grad_norm": 5.870705604553223, "learning_rate": 4.013565677411214e-05, "loss": 1.7758, "step": 7669 }, { "epoch": 0.5773537326633922, "grad_norm": 4.923253536224365, "learning_rate": 4.0123705778446464e-05, "loss": 1.8138, "step": 7670 }, { "epoch": 0.5774290069440524, "grad_norm": 5.282963752746582, "learning_rate": 4.011175536989814e-05, "loss": 1.9698, "step": 7671 }, { "epoch": 0.5775042812247125, "grad_norm": 5.4413933753967285, "learning_rate": 4.00998055491776e-05, "loss": 1.7254, "step": 7672 }, { "epoch": 0.5775795555053727, "grad_norm": 5.781665802001953, "learning_rate": 4.0087856316995195e-05, "loss": 2.0015, "step": 7673 }, { "epoch": 0.5776548297860329, "grad_norm": 4.89331579208374, "learning_rate": 4.007590767406131e-05, "loss": 2.1218, "step": 7674 }, { "epoch": 0.577730104066693, "grad_norm": 4.19381856918335, "learning_rate": 4.006395962108622e-05, "loss": 1.7952, "step": 7675 }, { "epoch": 0.5778053783473531, "grad_norm": 4.383309841156006, "learning_rate": 4.005201215878023e-05, "loss": 1.5965, "step": 7676 }, { "epoch": 0.5778806526280134, "grad_norm": 4.509468078613281, "learning_rate": 4.004006528785357e-05, "loss": 1.8957, "step": 7677 }, { "epoch": 0.5779559269086735, "grad_norm": 5.223258972167969, "learning_rate": 4.0028119009016465e-05, "loss": 1.6935, "step": 7678 }, { "epoch": 0.5780312011893336, "grad_norm": 4.945888996124268, "learning_rate": 4.001617332297905e-05, "loss": 1.6292, "step": 7679 }, { "epoch": 0.5781064754699938, "grad_norm": 5.944561958312988, "learning_rate": 4.00042282304515e-05, "loss": 2.3109, "step": 7680 }, { "epoch": 0.578181749750654, "grad_norm": 5.593074321746826, "learning_rate": 3.999228373214388e-05, "loss": 1.9073, "step": 7681 }, { "epoch": 0.5782570240313141, "grad_norm": 6.466760635375977, "learning_rate": 3.99803398287663e-05, "loss": 1.9025, "step": 7682 }, { "epoch": 0.5783322983119743, "grad_norm": 3.901648998260498, "learning_rate": 3.9968396521028754e-05, "loss": 1.5699, "step": 7683 }, { "epoch": 0.5784075725926344, "grad_norm": 4.755966663360596, "learning_rate": 3.995645380964127e-05, "loss": 1.9468, "step": 7684 }, { "epoch": 0.5784828468732945, "grad_norm": 4.47395658493042, "learning_rate": 3.994451169531376e-05, "loss": 1.7155, "step": 7685 }, { "epoch": 0.5785581211539548, "grad_norm": 5.608683109283447, "learning_rate": 3.993257017875619e-05, "loss": 1.8306, "step": 7686 }, { "epoch": 0.5786333954346149, "grad_norm": 4.782827854156494, "learning_rate": 3.992062926067844e-05, "loss": 2.1272, "step": 7687 }, { "epoch": 0.578708669715275, "grad_norm": 6.436601161956787, "learning_rate": 3.990868894179035e-05, "loss": 2.0505, "step": 7688 }, { "epoch": 0.5787839439959351, "grad_norm": 4.658920764923096, "learning_rate": 3.9896749222801765e-05, "loss": 2.0737, "step": 7689 }, { "epoch": 0.5788592182765954, "grad_norm": 3.5421383380889893, "learning_rate": 3.9884810104422434e-05, "loss": 1.906, "step": 7690 }, { "epoch": 0.5789344925572555, "grad_norm": 4.739721775054932, "learning_rate": 3.987287158736213e-05, "loss": 1.9066, "step": 7691 }, { "epoch": 0.5790097668379156, "grad_norm": 4.567431449890137, "learning_rate": 3.986093367233054e-05, "loss": 2.7803, "step": 7692 }, { "epoch": 0.5790850411185758, "grad_norm": 6.311440944671631, "learning_rate": 3.984899636003736e-05, "loss": 2.198, "step": 7693 }, { "epoch": 0.579160315399236, "grad_norm": 3.944124698638916, "learning_rate": 3.983705965119221e-05, "loss": 2.3358, "step": 7694 }, { "epoch": 0.5792355896798961, "grad_norm": 5.065308570861816, "learning_rate": 3.982512354650472e-05, "loss": 1.7071, "step": 7695 }, { "epoch": 0.5793108639605563, "grad_norm": 8.038674354553223, "learning_rate": 3.981318804668442e-05, "loss": 2.0069, "step": 7696 }, { "epoch": 0.5793861382412164, "grad_norm": 4.614602088928223, "learning_rate": 3.980125315244088e-05, "loss": 1.814, "step": 7697 }, { "epoch": 0.5794614125218766, "grad_norm": 6.746983528137207, "learning_rate": 3.978931886448355e-05, "loss": 1.82, "step": 7698 }, { "epoch": 0.5795366868025368, "grad_norm": 3.966038227081299, "learning_rate": 3.977738518352195e-05, "loss": 1.4564, "step": 7699 }, { "epoch": 0.5796119610831969, "grad_norm": 5.068717002868652, "learning_rate": 3.976545211026545e-05, "loss": 1.5265, "step": 7700 }, { "epoch": 0.579687235363857, "grad_norm": 5.492524147033691, "learning_rate": 3.975351964542346e-05, "loss": 2.0166, "step": 7701 }, { "epoch": 0.5797625096445173, "grad_norm": 4.053000450134277, "learning_rate": 3.974158778970532e-05, "loss": 2.267, "step": 7702 }, { "epoch": 0.5798377839251774, "grad_norm": 4.663575649261475, "learning_rate": 3.9729656543820346e-05, "loss": 1.7, "step": 7703 }, { "epoch": 0.5799130582058375, "grad_norm": 5.478631019592285, "learning_rate": 3.971772590847783e-05, "loss": 1.8409, "step": 7704 }, { "epoch": 0.5799883324864977, "grad_norm": 3.9644811153411865, "learning_rate": 3.970579588438701e-05, "loss": 1.8415, "step": 7705 }, { "epoch": 0.5800636067671578, "grad_norm": 3.0490872859954834, "learning_rate": 3.969386647225709e-05, "loss": 2.3869, "step": 7706 }, { "epoch": 0.580138881047818, "grad_norm": 3.567423105239868, "learning_rate": 3.968193767279722e-05, "loss": 2.0545, "step": 7707 }, { "epoch": 0.5802141553284781, "grad_norm": 6.265186309814453, "learning_rate": 3.9670009486716566e-05, "loss": 1.9483, "step": 7708 }, { "epoch": 0.5802894296091383, "grad_norm": 4.558940410614014, "learning_rate": 3.96580819147242e-05, "loss": 1.8082, "step": 7709 }, { "epoch": 0.5803647038897984, "grad_norm": 4.768979549407959, "learning_rate": 3.9646154957529196e-05, "loss": 2.0926, "step": 7710 }, { "epoch": 0.5804399781704586, "grad_norm": 5.609062671661377, "learning_rate": 3.963422861584056e-05, "loss": 1.5971, "step": 7711 }, { "epoch": 0.5805152524511188, "grad_norm": 6.203240394592285, "learning_rate": 3.9622302890367316e-05, "loss": 1.811, "step": 7712 }, { "epoch": 0.5805905267317789, "grad_norm": 5.027291774749756, "learning_rate": 3.9610377781818356e-05, "loss": 2.0047, "step": 7713 }, { "epoch": 0.580665801012439, "grad_norm": 5.699218273162842, "learning_rate": 3.9598453290902646e-05, "loss": 1.6443, "step": 7714 }, { "epoch": 0.5807410752930993, "grad_norm": 4.884763717651367, "learning_rate": 3.9586529418329024e-05, "loss": 1.8848, "step": 7715 }, { "epoch": 0.5808163495737594, "grad_norm": 4.573393821716309, "learning_rate": 3.957460616480637e-05, "loss": 1.9472, "step": 7716 }, { "epoch": 0.5808916238544195, "grad_norm": 4.217409133911133, "learning_rate": 3.956268353104346e-05, "loss": 1.9957, "step": 7717 }, { "epoch": 0.5809668981350797, "grad_norm": 4.545864582061768, "learning_rate": 3.9550761517749046e-05, "loss": 1.8175, "step": 7718 }, { "epoch": 0.5810421724157399, "grad_norm": 4.692615032196045, "learning_rate": 3.9538840125631904e-05, "loss": 1.9325, "step": 7719 }, { "epoch": 0.5811174466964, "grad_norm": 7.264098167419434, "learning_rate": 3.952691935540068e-05, "loss": 2.0231, "step": 7720 }, { "epoch": 0.5811927209770602, "grad_norm": 5.382420063018799, "learning_rate": 3.9514999207764064e-05, "loss": 2.099, "step": 7721 }, { "epoch": 0.5812679952577203, "grad_norm": 8.596435546875, "learning_rate": 3.950307968343065e-05, "loss": 2.2097, "step": 7722 }, { "epoch": 0.5813432695383804, "grad_norm": 6.394593238830566, "learning_rate": 3.949116078310905e-05, "loss": 2.124, "step": 7723 }, { "epoch": 0.5814185438190407, "grad_norm": 5.426838397979736, "learning_rate": 3.947924250750776e-05, "loss": 2.133, "step": 7724 }, { "epoch": 0.5814938180997008, "grad_norm": 5.8588151931762695, "learning_rate": 3.946732485733533e-05, "loss": 1.772, "step": 7725 }, { "epoch": 0.5815690923803609, "grad_norm": 4.996241092681885, "learning_rate": 3.9455407833300206e-05, "loss": 1.612, "step": 7726 }, { "epoch": 0.5816443666610212, "grad_norm": 4.909712791442871, "learning_rate": 3.9443491436110856e-05, "loss": 2.071, "step": 7727 }, { "epoch": 0.5817196409416813, "grad_norm": 4.151638507843018, "learning_rate": 3.9431575666475625e-05, "loss": 1.8044, "step": 7728 }, { "epoch": 0.5817949152223414, "grad_norm": 4.3523406982421875, "learning_rate": 3.941966052510292e-05, "loss": 2.0515, "step": 7729 }, { "epoch": 0.5818701895030015, "grad_norm": 6.174666404724121, "learning_rate": 3.940774601270101e-05, "loss": 1.9962, "step": 7730 }, { "epoch": 0.5819454637836617, "grad_norm": 5.057032108306885, "learning_rate": 3.939583212997823e-05, "loss": 1.85, "step": 7731 }, { "epoch": 0.5820207380643219, "grad_norm": 4.086638450622559, "learning_rate": 3.938391887764278e-05, "loss": 1.7365, "step": 7732 }, { "epoch": 0.582096012344982, "grad_norm": 4.138251781463623, "learning_rate": 3.9372006256402916e-05, "loss": 1.6448, "step": 7733 }, { "epoch": 0.5821712866256422, "grad_norm": 4.54074239730835, "learning_rate": 3.9360094266966767e-05, "loss": 1.9168, "step": 7734 }, { "epoch": 0.5822465609063023, "grad_norm": 4.207004070281982, "learning_rate": 3.934818291004248e-05, "loss": 1.7292, "step": 7735 }, { "epoch": 0.5823218351869625, "grad_norm": 4.732818603515625, "learning_rate": 3.933627218633818e-05, "loss": 2.0884, "step": 7736 }, { "epoch": 0.5823971094676227, "grad_norm": 5.5070085525512695, "learning_rate": 3.9324362096561887e-05, "loss": 1.8961, "step": 7737 }, { "epoch": 0.5824723837482828, "grad_norm": 4.441492557525635, "learning_rate": 3.931245264142163e-05, "loss": 2.054, "step": 7738 }, { "epoch": 0.5825476580289429, "grad_norm": 5.317041397094727, "learning_rate": 3.93005438216254e-05, "loss": 1.9607, "step": 7739 }, { "epoch": 0.5826229323096032, "grad_norm": 4.862093925476074, "learning_rate": 3.928863563788116e-05, "loss": 1.7507, "step": 7740 }, { "epoch": 0.5826982065902633, "grad_norm": 4.7248029708862305, "learning_rate": 3.9276728090896765e-05, "loss": 1.7736, "step": 7741 }, { "epoch": 0.5827734808709234, "grad_norm": 4.251784324645996, "learning_rate": 3.926482118138013e-05, "loss": 1.8955, "step": 7742 }, { "epoch": 0.5828487551515836, "grad_norm": 6.493978977203369, "learning_rate": 3.9252914910039076e-05, "loss": 1.9545, "step": 7743 }, { "epoch": 0.5829240294322438, "grad_norm": 4.953982830047607, "learning_rate": 3.92410092775814e-05, "loss": 1.5539, "step": 7744 }, { "epoch": 0.5829993037129039, "grad_norm": 3.9922866821289062, "learning_rate": 3.9229104284714836e-05, "loss": 2.0885, "step": 7745 }, { "epoch": 0.5830745779935641, "grad_norm": 4.021296501159668, "learning_rate": 3.921719993214714e-05, "loss": 1.78, "step": 7746 }, { "epoch": 0.5831498522742242, "grad_norm": 3.900331735610962, "learning_rate": 3.9205296220585944e-05, "loss": 1.907, "step": 7747 }, { "epoch": 0.5832251265548843, "grad_norm": 6.634807586669922, "learning_rate": 3.919339315073893e-05, "loss": 1.8327, "step": 7748 }, { "epoch": 0.5833004008355445, "grad_norm": 9.16468620300293, "learning_rate": 3.918149072331367e-05, "loss": 2.2514, "step": 7749 }, { "epoch": 0.5833756751162047, "grad_norm": 6.211683750152588, "learning_rate": 3.916958893901774e-05, "loss": 1.7381, "step": 7750 }, { "epoch": 0.5834509493968648, "grad_norm": 4.504829406738281, "learning_rate": 3.9157687798558706e-05, "loss": 1.8975, "step": 7751 }, { "epoch": 0.5835262236775249, "grad_norm": 4.639247894287109, "learning_rate": 3.9145787302643996e-05, "loss": 1.814, "step": 7752 }, { "epoch": 0.5836014979581852, "grad_norm": 3.7421114444732666, "learning_rate": 3.9133887451981096e-05, "loss": 1.8407, "step": 7753 }, { "epoch": 0.5836767722388453, "grad_norm": 4.946784496307373, "learning_rate": 3.9121988247277394e-05, "loss": 1.8168, "step": 7754 }, { "epoch": 0.5837520465195054, "grad_norm": 5.382870197296143, "learning_rate": 3.9110089689240304e-05, "loss": 1.776, "step": 7755 }, { "epoch": 0.5838273208001656, "grad_norm": 5.082200050354004, "learning_rate": 3.9098191778577106e-05, "loss": 1.6497, "step": 7756 }, { "epoch": 0.5839025950808258, "grad_norm": 3.401374340057373, "learning_rate": 3.908629451599516e-05, "loss": 2.0001, "step": 7757 }, { "epoch": 0.5839778693614859, "grad_norm": 4.333251953125, "learning_rate": 3.907439790220166e-05, "loss": 1.8119, "step": 7758 }, { "epoch": 0.5840531436421461, "grad_norm": 4.623253345489502, "learning_rate": 3.906250193790387e-05, "loss": 1.9465, "step": 7759 }, { "epoch": 0.5841284179228062, "grad_norm": 5.91024112701416, "learning_rate": 3.9050606623808945e-05, "loss": 1.8, "step": 7760 }, { "epoch": 0.5842036922034664, "grad_norm": 5.61050271987915, "learning_rate": 3.903871196062406e-05, "loss": 1.6253, "step": 7761 }, { "epoch": 0.5842789664841266, "grad_norm": 5.143840312957764, "learning_rate": 3.9026817949056274e-05, "loss": 1.6732, "step": 7762 }, { "epoch": 0.5843542407647867, "grad_norm": 4.222826957702637, "learning_rate": 3.90149245898127e-05, "loss": 2.1959, "step": 7763 }, { "epoch": 0.5844295150454468, "grad_norm": 3.9595866203308105, "learning_rate": 3.900303188360032e-05, "loss": 1.7966, "step": 7764 }, { "epoch": 0.584504789326107, "grad_norm": 5.239078521728516, "learning_rate": 3.899113983112614e-05, "loss": 2.0547, "step": 7765 }, { "epoch": 0.5845800636067672, "grad_norm": 5.827862739562988, "learning_rate": 3.897924843309711e-05, "loss": 2.3592, "step": 7766 }, { "epoch": 0.5846553378874273, "grad_norm": 6.119013786315918, "learning_rate": 3.896735769022014e-05, "loss": 1.9863, "step": 7767 }, { "epoch": 0.5847306121680874, "grad_norm": 5.658279895782471, "learning_rate": 3.895546760320212e-05, "loss": 1.6501, "step": 7768 }, { "epoch": 0.5848058864487476, "grad_norm": 5.072042465209961, "learning_rate": 3.8943578172749826e-05, "loss": 1.6072, "step": 7769 }, { "epoch": 0.5848811607294078, "grad_norm": 6.512777328491211, "learning_rate": 3.893168939957011e-05, "loss": 1.8617, "step": 7770 }, { "epoch": 0.5849564350100679, "grad_norm": 4.064201354980469, "learning_rate": 3.8919801284369685e-05, "loss": 1.631, "step": 7771 }, { "epoch": 0.5850317092907281, "grad_norm": 4.377372741699219, "learning_rate": 3.8907913827855304e-05, "loss": 2.0552, "step": 7772 }, { "epoch": 0.5851069835713882, "grad_norm": 7.75787353515625, "learning_rate": 3.8896027030733597e-05, "loss": 1.8891, "step": 7773 }, { "epoch": 0.5851822578520484, "grad_norm": 6.155920505523682, "learning_rate": 3.888414089371124e-05, "loss": 1.9693, "step": 7774 }, { "epoch": 0.5852575321327086, "grad_norm": 4.713784217834473, "learning_rate": 3.8872255417494795e-05, "loss": 1.3946, "step": 7775 }, { "epoch": 0.5853328064133687, "grad_norm": 6.909249782562256, "learning_rate": 3.8860370602790844e-05, "loss": 1.8722, "step": 7776 }, { "epoch": 0.5854080806940288, "grad_norm": 6.590963840484619, "learning_rate": 3.884848645030589e-05, "loss": 1.9261, "step": 7777 }, { "epoch": 0.5854833549746891, "grad_norm": 6.702365875244141, "learning_rate": 3.8836602960746435e-05, "loss": 1.9424, "step": 7778 }, { "epoch": 0.5855586292553492, "grad_norm": 4.4740729331970215, "learning_rate": 3.882472013481889e-05, "loss": 1.7071, "step": 7779 }, { "epoch": 0.5856339035360093, "grad_norm": 3.27465558052063, "learning_rate": 3.881283797322967e-05, "loss": 1.7021, "step": 7780 }, { "epoch": 0.5857091778166695, "grad_norm": 4.187403202056885, "learning_rate": 3.880095647668512e-05, "loss": 1.8652, "step": 7781 }, { "epoch": 0.5857844520973297, "grad_norm": 4.300467014312744, "learning_rate": 3.878907564589158e-05, "loss": 1.8324, "step": 7782 }, { "epoch": 0.5858597263779898, "grad_norm": 5.975734710693359, "learning_rate": 3.877719548155535e-05, "loss": 2.1406, "step": 7783 }, { "epoch": 0.58593500065865, "grad_norm": 5.079812526702881, "learning_rate": 3.876531598438262e-05, "loss": 1.6022, "step": 7784 }, { "epoch": 0.5860102749393101, "grad_norm": 5.078145980834961, "learning_rate": 3.875343715507964e-05, "loss": 2.1089, "step": 7785 }, { "epoch": 0.5860855492199702, "grad_norm": 4.834609508514404, "learning_rate": 3.8741558994352536e-05, "loss": 1.9356, "step": 7786 }, { "epoch": 0.5861608235006304, "grad_norm": 4.924367904663086, "learning_rate": 3.872968150290745e-05, "loss": 2.1639, "step": 7787 }, { "epoch": 0.5862360977812906, "grad_norm": 5.167743682861328, "learning_rate": 3.8717804681450456e-05, "loss": 1.7953, "step": 7788 }, { "epoch": 0.5863113720619507, "grad_norm": 6.30468225479126, "learning_rate": 3.870592853068761e-05, "loss": 2.0109, "step": 7789 }, { "epoch": 0.5863866463426108, "grad_norm": 5.480498790740967, "learning_rate": 3.8694053051324895e-05, "loss": 1.9012, "step": 7790 }, { "epoch": 0.5864619206232711, "grad_norm": 5.345117568969727, "learning_rate": 3.868217824406831e-05, "loss": 1.9743, "step": 7791 }, { "epoch": 0.5865371949039312, "grad_norm": 4.603163719177246, "learning_rate": 3.867030410962372e-05, "loss": 1.5296, "step": 7792 }, { "epoch": 0.5866124691845913, "grad_norm": 5.377043724060059, "learning_rate": 3.865843064869706e-05, "loss": 1.8706, "step": 7793 }, { "epoch": 0.5866877434652515, "grad_norm": 4.131460189819336, "learning_rate": 3.864655786199415e-05, "loss": 1.9028, "step": 7794 }, { "epoch": 0.5867630177459117, "grad_norm": 4.421370506286621, "learning_rate": 3.863468575022081e-05, "loss": 1.5959, "step": 7795 }, { "epoch": 0.5868382920265718, "grad_norm": 4.667974948883057, "learning_rate": 3.8622814314082775e-05, "loss": 1.8764, "step": 7796 }, { "epoch": 0.586913566307232, "grad_norm": 4.5249457359313965, "learning_rate": 3.86109435542858e-05, "loss": 1.6356, "step": 7797 }, { "epoch": 0.5869888405878921, "grad_norm": 6.392907619476318, "learning_rate": 3.859907347153554e-05, "loss": 2.0216, "step": 7798 }, { "epoch": 0.5870641148685523, "grad_norm": 4.534290313720703, "learning_rate": 3.858720406653764e-05, "loss": 1.9153, "step": 7799 }, { "epoch": 0.5871393891492125, "grad_norm": 4.514787197113037, "learning_rate": 3.8575335339997736e-05, "loss": 1.891, "step": 7800 }, { "epoch": 0.5872146634298726, "grad_norm": 5.686667442321777, "learning_rate": 3.856346729262135e-05, "loss": 1.8746, "step": 7801 }, { "epoch": 0.5872899377105327, "grad_norm": 6.384697437286377, "learning_rate": 3.855159992511403e-05, "loss": 1.6886, "step": 7802 }, { "epoch": 0.587365211991193, "grad_norm": 4.876718044281006, "learning_rate": 3.853973323818123e-05, "loss": 2.2076, "step": 7803 }, { "epoch": 0.5874404862718531, "grad_norm": 6.946642875671387, "learning_rate": 3.852786723252842e-05, "loss": 2.4625, "step": 7804 }, { "epoch": 0.5875157605525132, "grad_norm": 3.707252264022827, "learning_rate": 3.851600190886096e-05, "loss": 1.5427, "step": 7805 }, { "epoch": 0.5875910348331733, "grad_norm": 4.963434219360352, "learning_rate": 3.850413726788428e-05, "loss": 1.717, "step": 7806 }, { "epoch": 0.5876663091138336, "grad_norm": 4.309384822845459, "learning_rate": 3.849227331030362e-05, "loss": 1.7033, "step": 7807 }, { "epoch": 0.5877415833944937, "grad_norm": 3.8997962474823, "learning_rate": 3.84804100368243e-05, "loss": 2.0988, "step": 7808 }, { "epoch": 0.5878168576751538, "grad_norm": 4.916024684906006, "learning_rate": 3.8468547448151554e-05, "loss": 1.7303, "step": 7809 }, { "epoch": 0.587892131955814, "grad_norm": 4.8505988121032715, "learning_rate": 3.845668554499057e-05, "loss": 2.0689, "step": 7810 }, { "epoch": 0.5879674062364741, "grad_norm": 4.44301700592041, "learning_rate": 3.84448243280465e-05, "loss": 1.4431, "step": 7811 }, { "epoch": 0.5880426805171343, "grad_norm": 4.837834358215332, "learning_rate": 3.84329637980245e-05, "loss": 1.896, "step": 7812 }, { "epoch": 0.5881179547977945, "grad_norm": 4.032279968261719, "learning_rate": 3.8421103955629576e-05, "loss": 1.9865, "step": 7813 }, { "epoch": 0.5881932290784546, "grad_norm": 5.82047176361084, "learning_rate": 3.840924480156682e-05, "loss": 2.1286, "step": 7814 }, { "epoch": 0.5882685033591147, "grad_norm": 5.075139045715332, "learning_rate": 3.839738633654119e-05, "loss": 1.9871, "step": 7815 }, { "epoch": 0.588343777639775, "grad_norm": 4.9096856117248535, "learning_rate": 3.838552856125764e-05, "loss": 1.7693, "step": 7816 }, { "epoch": 0.5884190519204351, "grad_norm": 4.260968208312988, "learning_rate": 3.8373671476421124e-05, "loss": 1.7235, "step": 7817 }, { "epoch": 0.5884943262010952, "grad_norm": 7.050853729248047, "learning_rate": 3.836181508273646e-05, "loss": 2.1274, "step": 7818 }, { "epoch": 0.5885696004817554, "grad_norm": 11.854440689086914, "learning_rate": 3.834995938090851e-05, "loss": 2.3924, "step": 7819 }, { "epoch": 0.5886448747624156, "grad_norm": 5.542457580566406, "learning_rate": 3.8338104371642023e-05, "loss": 1.9113, "step": 7820 }, { "epoch": 0.5887201490430757, "grad_norm": 4.369715213775635, "learning_rate": 3.8326250055641787e-05, "loss": 1.7374, "step": 7821 }, { "epoch": 0.5887954233237359, "grad_norm": 3.8842902183532715, "learning_rate": 3.831439643361248e-05, "loss": 1.6356, "step": 7822 }, { "epoch": 0.588870697604396, "grad_norm": 4.105565071105957, "learning_rate": 3.830254350625879e-05, "loss": 1.7974, "step": 7823 }, { "epoch": 0.5889459718850562, "grad_norm": 4.933945655822754, "learning_rate": 3.829069127428531e-05, "loss": 1.9895, "step": 7824 }, { "epoch": 0.5890212461657164, "grad_norm": 5.1834001541137695, "learning_rate": 3.8278839738396645e-05, "loss": 2.1483, "step": 7825 }, { "epoch": 0.5890965204463765, "grad_norm": 3.823296308517456, "learning_rate": 3.826698889929731e-05, "loss": 1.8366, "step": 7826 }, { "epoch": 0.5891717947270366, "grad_norm": 4.330671310424805, "learning_rate": 3.8255138757691845e-05, "loss": 1.955, "step": 7827 }, { "epoch": 0.5892470690076967, "grad_norm": 5.462123870849609, "learning_rate": 3.824328931428466e-05, "loss": 2.1855, "step": 7828 }, { "epoch": 0.589322343288357, "grad_norm": 5.613857269287109, "learning_rate": 3.823144056978021e-05, "loss": 1.9632, "step": 7829 }, { "epoch": 0.5893976175690171, "grad_norm": 6.444812297821045, "learning_rate": 3.821959252488283e-05, "loss": 1.8918, "step": 7830 }, { "epoch": 0.5894728918496772, "grad_norm": 5.26508092880249, "learning_rate": 3.820774518029687e-05, "loss": 1.9225, "step": 7831 }, { "epoch": 0.5895481661303374, "grad_norm": 7.023900508880615, "learning_rate": 3.819589853672664e-05, "loss": 2.4601, "step": 7832 }, { "epoch": 0.5896234404109976, "grad_norm": 5.7562432289123535, "learning_rate": 3.818405259487636e-05, "loss": 1.7443, "step": 7833 }, { "epoch": 0.5896987146916577, "grad_norm": 6.4446821212768555, "learning_rate": 3.817220735545027e-05, "loss": 2.5144, "step": 7834 }, { "epoch": 0.5897739889723179, "grad_norm": 13.31273078918457, "learning_rate": 3.8160362819152496e-05, "loss": 2.0518, "step": 7835 }, { "epoch": 0.589849263252978, "grad_norm": 4.9524455070495605, "learning_rate": 3.81485189866872e-05, "loss": 1.9837, "step": 7836 }, { "epoch": 0.5899245375336382, "grad_norm": 5.590907096862793, "learning_rate": 3.813667585875842e-05, "loss": 1.7155, "step": 7837 }, { "epoch": 0.5899998118142984, "grad_norm": 5.180372714996338, "learning_rate": 3.812483343607023e-05, "loss": 1.8015, "step": 7838 }, { "epoch": 0.5900750860949585, "grad_norm": 6.319912433624268, "learning_rate": 3.811299171932661e-05, "loss": 2.0784, "step": 7839 }, { "epoch": 0.5901503603756186, "grad_norm": 4.384718418121338, "learning_rate": 3.8101150709231546e-05, "loss": 1.7121, "step": 7840 }, { "epoch": 0.5902256346562789, "grad_norm": 4.356826305389404, "learning_rate": 3.808931040648892e-05, "loss": 1.9065, "step": 7841 }, { "epoch": 0.590300908936939, "grad_norm": 6.731471538543701, "learning_rate": 3.807747081180261e-05, "loss": 1.6769, "step": 7842 }, { "epoch": 0.5903761832175991, "grad_norm": 6.328938007354736, "learning_rate": 3.806563192587645e-05, "loss": 2.0173, "step": 7843 }, { "epoch": 0.5904514574982593, "grad_norm": 4.392867088317871, "learning_rate": 3.805379374941425e-05, "loss": 1.7314, "step": 7844 }, { "epoch": 0.5905267317789195, "grad_norm": 3.661583662033081, "learning_rate": 3.80419562831197e-05, "loss": 1.7148, "step": 7845 }, { "epoch": 0.5906020060595796, "grad_norm": 5.592376232147217, "learning_rate": 3.803011952769657e-05, "loss": 2.2591, "step": 7846 }, { "epoch": 0.5906772803402397, "grad_norm": 6.500184059143066, "learning_rate": 3.8018283483848464e-05, "loss": 1.897, "step": 7847 }, { "epoch": 0.5907525546208999, "grad_norm": 5.537911415100098, "learning_rate": 3.800644815227903e-05, "loss": 1.8472, "step": 7848 }, { "epoch": 0.59082782890156, "grad_norm": 4.041790962219238, "learning_rate": 3.7994613533691844e-05, "loss": 1.925, "step": 7849 }, { "epoch": 0.5909031031822202, "grad_norm": 4.398980617523193, "learning_rate": 3.798277962879043e-05, "loss": 1.9459, "step": 7850 }, { "epoch": 0.5909783774628804, "grad_norm": 4.110594272613525, "learning_rate": 3.7970946438278305e-05, "loss": 1.8032, "step": 7851 }, { "epoch": 0.5910536517435405, "grad_norm": 6.38279390335083, "learning_rate": 3.795911396285888e-05, "loss": 1.9273, "step": 7852 }, { "epoch": 0.5911289260242006, "grad_norm": 4.685363292694092, "learning_rate": 3.794728220323559e-05, "loss": 1.6711, "step": 7853 }, { "epoch": 0.5912042003048609, "grad_norm": 4.361425399780273, "learning_rate": 3.7935451160111774e-05, "loss": 1.9887, "step": 7854 }, { "epoch": 0.591279474585521, "grad_norm": 3.8905081748962402, "learning_rate": 3.79236208341908e-05, "loss": 1.9184, "step": 7855 }, { "epoch": 0.5913547488661811, "grad_norm": 5.164790630340576, "learning_rate": 3.791179122617588e-05, "loss": 2.1106, "step": 7856 }, { "epoch": 0.5914300231468413, "grad_norm": 5.259014129638672, "learning_rate": 3.789996233677032e-05, "loss": 2.2265, "step": 7857 }, { "epoch": 0.5915052974275015, "grad_norm": 4.083639621734619, "learning_rate": 3.788813416667727e-05, "loss": 1.9156, "step": 7858 }, { "epoch": 0.5915805717081616, "grad_norm": 5.092536449432373, "learning_rate": 3.787630671659989e-05, "loss": 1.9323, "step": 7859 }, { "epoch": 0.5916558459888218, "grad_norm": 5.620119094848633, "learning_rate": 3.7864479987241266e-05, "loss": 1.6787, "step": 7860 }, { "epoch": 0.5917311202694819, "grad_norm": 5.034956932067871, "learning_rate": 3.785265397930452e-05, "loss": 1.7319, "step": 7861 }, { "epoch": 0.5918063945501421, "grad_norm": 4.173698425292969, "learning_rate": 3.784082869349262e-05, "loss": 1.845, "step": 7862 }, { "epoch": 0.5918816688308023, "grad_norm": 5.740963459014893, "learning_rate": 3.7829004130508564e-05, "loss": 2.0568, "step": 7863 }, { "epoch": 0.5919569431114624, "grad_norm": 8.077258110046387, "learning_rate": 3.781718029105531e-05, "loss": 2.085, "step": 7864 }, { "epoch": 0.5920322173921225, "grad_norm": 4.321019649505615, "learning_rate": 3.780535717583571e-05, "loss": 2.1147, "step": 7865 }, { "epoch": 0.5921074916727826, "grad_norm": 4.551716327667236, "learning_rate": 3.779353478555265e-05, "loss": 1.7932, "step": 7866 }, { "epoch": 0.5921827659534429, "grad_norm": 5.301575660705566, "learning_rate": 3.7781713120908905e-05, "loss": 1.842, "step": 7867 }, { "epoch": 0.592258040234103, "grad_norm": 4.060632228851318, "learning_rate": 3.776989218260728e-05, "loss": 1.7008, "step": 7868 }, { "epoch": 0.5923333145147631, "grad_norm": 4.6447014808654785, "learning_rate": 3.7758071971350456e-05, "loss": 1.7512, "step": 7869 }, { "epoch": 0.5924085887954234, "grad_norm": 4.044189929962158, "learning_rate": 3.7746252487841135e-05, "loss": 1.7419, "step": 7870 }, { "epoch": 0.5924838630760835, "grad_norm": 5.584356784820557, "learning_rate": 3.773443373278193e-05, "loss": 1.987, "step": 7871 }, { "epoch": 0.5925591373567436, "grad_norm": 6.960014343261719, "learning_rate": 3.7722615706875474e-05, "loss": 1.8299, "step": 7872 }, { "epoch": 0.5926344116374038, "grad_norm": 4.694604396820068, "learning_rate": 3.771079841082426e-05, "loss": 1.5138, "step": 7873 }, { "epoch": 0.5927096859180639, "grad_norm": 5.120667457580566, "learning_rate": 3.769898184533084e-05, "loss": 2.014, "step": 7874 }, { "epoch": 0.5927849601987241, "grad_norm": 3.970510244369507, "learning_rate": 3.768716601109764e-05, "loss": 1.9238, "step": 7875 }, { "epoch": 0.5928602344793843, "grad_norm": 4.8945488929748535, "learning_rate": 3.767535090882709e-05, "loss": 1.9816, "step": 7876 }, { "epoch": 0.5929355087600444, "grad_norm": 4.261218070983887, "learning_rate": 3.766353653922156e-05, "loss": 1.647, "step": 7877 }, { "epoch": 0.5930107830407045, "grad_norm": 5.660770416259766, "learning_rate": 3.765172290298341e-05, "loss": 2.1478, "step": 7878 }, { "epoch": 0.5930860573213648, "grad_norm": 8.129709243774414, "learning_rate": 3.763991000081488e-05, "loss": 1.9031, "step": 7879 }, { "epoch": 0.5931613316020249, "grad_norm": 5.439948558807373, "learning_rate": 3.762809783341823e-05, "loss": 1.8595, "step": 7880 }, { "epoch": 0.593236605882685, "grad_norm": 4.5891432762146, "learning_rate": 3.761628640149567e-05, "loss": 1.3423, "step": 7881 }, { "epoch": 0.5933118801633452, "grad_norm": 5.952754497528076, "learning_rate": 3.7604475705749356e-05, "loss": 1.9639, "step": 7882 }, { "epoch": 0.5933871544440054, "grad_norm": 6.931262493133545, "learning_rate": 3.7592665746881406e-05, "loss": 1.8112, "step": 7883 }, { "epoch": 0.5934624287246655, "grad_norm": 4.159379005432129, "learning_rate": 3.758085652559386e-05, "loss": 1.7857, "step": 7884 }, { "epoch": 0.5935377030053256, "grad_norm": 4.909072399139404, "learning_rate": 3.756904804258878e-05, "loss": 1.8284, "step": 7885 }, { "epoch": 0.5936129772859858, "grad_norm": 4.211834907531738, "learning_rate": 3.75572402985681e-05, "loss": 1.9987, "step": 7886 }, { "epoch": 0.593688251566646, "grad_norm": 7.541345596313477, "learning_rate": 3.754543329423379e-05, "loss": 2.0844, "step": 7887 }, { "epoch": 0.5937635258473061, "grad_norm": 7.541345596313477, "learning_rate": 3.754543329423379e-05, "loss": 1.6423, "step": 7888 }, { "epoch": 0.5938388001279663, "grad_norm": 4.836280822753906, "learning_rate": 3.753362703028773e-05, "loss": 1.9787, "step": 7889 }, { "epoch": 0.5939140744086264, "grad_norm": 4.463475704193115, "learning_rate": 3.75218215074318e-05, "loss": 2.1009, "step": 7890 }, { "epoch": 0.5939893486892865, "grad_norm": 6.547650337219238, "learning_rate": 3.7510016726367746e-05, "loss": 1.7507, "step": 7891 }, { "epoch": 0.5940646229699468, "grad_norm": 4.9173431396484375, "learning_rate": 3.7498212687797386e-05, "loss": 1.693, "step": 7892 }, { "epoch": 0.5941398972506069, "grad_norm": 4.701329708099365, "learning_rate": 3.748640939242238e-05, "loss": 2.0079, "step": 7893 }, { "epoch": 0.594215171531267, "grad_norm": 5.148616790771484, "learning_rate": 3.7474606840944446e-05, "loss": 1.9929, "step": 7894 }, { "epoch": 0.5942904458119272, "grad_norm": 4.35709285736084, "learning_rate": 3.7462805034065195e-05, "loss": 1.7157, "step": 7895 }, { "epoch": 0.5943657200925874, "grad_norm": 3.8924877643585205, "learning_rate": 3.74510039724862e-05, "loss": 1.7595, "step": 7896 }, { "epoch": 0.5944409943732475, "grad_norm": 7.674504280090332, "learning_rate": 3.743920365690904e-05, "loss": 2.0583, "step": 7897 }, { "epoch": 0.5945162686539077, "grad_norm": 4.4485015869140625, "learning_rate": 3.7427404088035154e-05, "loss": 2.1203, "step": 7898 }, { "epoch": 0.5945915429345678, "grad_norm": 4.843740463256836, "learning_rate": 3.7415605266566034e-05, "loss": 1.8284, "step": 7899 }, { "epoch": 0.594666817215228, "grad_norm": 4.741055011749268, "learning_rate": 3.740380719320307e-05, "loss": 1.988, "step": 7900 }, { "epoch": 0.5947420914958882, "grad_norm": 5.653155326843262, "learning_rate": 3.739200986864763e-05, "loss": 1.902, "step": 7901 }, { "epoch": 0.5948173657765483, "grad_norm": 4.838129043579102, "learning_rate": 3.738021329360102e-05, "loss": 2.1271, "step": 7902 }, { "epoch": 0.5948926400572084, "grad_norm": 6.923396587371826, "learning_rate": 3.7368417468764526e-05, "loss": 2.2673, "step": 7903 }, { "epoch": 0.5949679143378687, "grad_norm": 3.68367075920105, "learning_rate": 3.735662239483936e-05, "loss": 1.8763, "step": 7904 }, { "epoch": 0.5950431886185288, "grad_norm": 4.275118827819824, "learning_rate": 3.734482807252673e-05, "loss": 2.0478, "step": 7905 }, { "epoch": 0.5951184628991889, "grad_norm": 7.5992231369018555, "learning_rate": 3.733303450252773e-05, "loss": 1.819, "step": 7906 }, { "epoch": 0.595193737179849, "grad_norm": 4.1472320556640625, "learning_rate": 3.732124168554352e-05, "loss": 1.7684, "step": 7907 }, { "epoch": 0.5952690114605093, "grad_norm": 5.666646480560303, "learning_rate": 3.7309449622275085e-05, "loss": 1.9225, "step": 7908 }, { "epoch": 0.5953442857411694, "grad_norm": 5.599004745483398, "learning_rate": 3.729765831342347e-05, "loss": 2.6208, "step": 7909 }, { "epoch": 0.5954195600218295, "grad_norm": 6.332678318023682, "learning_rate": 3.7285867759689605e-05, "loss": 2.0193, "step": 7910 }, { "epoch": 0.5954948343024897, "grad_norm": 5.501683235168457, "learning_rate": 3.727407796177445e-05, "loss": 1.8501, "step": 7911 }, { "epoch": 0.5955701085831498, "grad_norm": 4.173152923583984, "learning_rate": 3.7262288920378815e-05, "loss": 1.8828, "step": 7912 }, { "epoch": 0.59564538286381, "grad_norm": 5.3801751136779785, "learning_rate": 3.7250500636203565e-05, "loss": 1.6013, "step": 7913 }, { "epoch": 0.5957206571444702, "grad_norm": 4.022792816162109, "learning_rate": 3.7238713109949486e-05, "loss": 1.6574, "step": 7914 }, { "epoch": 0.5957959314251303, "grad_norm": 3.3938214778900146, "learning_rate": 3.722692634231728e-05, "loss": 1.6375, "step": 7915 }, { "epoch": 0.5958712057057904, "grad_norm": 5.620737552642822, "learning_rate": 3.721514033400766e-05, "loss": 2.1646, "step": 7916 }, { "epoch": 0.5959464799864507, "grad_norm": 5.351798057556152, "learning_rate": 3.7203355085721246e-05, "loss": 1.8322, "step": 7917 }, { "epoch": 0.5960217542671108, "grad_norm": 3.5777359008789062, "learning_rate": 3.7191570598158685e-05, "loss": 1.8492, "step": 7918 }, { "epoch": 0.5960970285477709, "grad_norm": 4.743188858032227, "learning_rate": 3.7179786872020474e-05, "loss": 1.5006, "step": 7919 }, { "epoch": 0.5961723028284311, "grad_norm": 4.990192890167236, "learning_rate": 3.716800390800718e-05, "loss": 1.8846, "step": 7920 }, { "epoch": 0.5962475771090913, "grad_norm": 3.918168306350708, "learning_rate": 3.71562217068192e-05, "loss": 1.6861, "step": 7921 }, { "epoch": 0.5963228513897514, "grad_norm": 3.9214861392974854, "learning_rate": 3.7144440269157e-05, "loss": 1.7838, "step": 7922 }, { "epoch": 0.5963981256704116, "grad_norm": 5.140080451965332, "learning_rate": 3.7132659595720934e-05, "loss": 2.297, "step": 7923 }, { "epoch": 0.5964733999510717, "grad_norm": 5.152413368225098, "learning_rate": 3.712087968721135e-05, "loss": 2.1238, "step": 7924 }, { "epoch": 0.5965486742317319, "grad_norm": 5.307943820953369, "learning_rate": 3.71091005443285e-05, "loss": 1.9978, "step": 7925 }, { "epoch": 0.596623948512392, "grad_norm": 6.113337993621826, "learning_rate": 3.709732216777264e-05, "loss": 1.4336, "step": 7926 }, { "epoch": 0.5966992227930522, "grad_norm": 3.746710777282715, "learning_rate": 3.708554455824394e-05, "loss": 1.8175, "step": 7927 }, { "epoch": 0.5967744970737123, "grad_norm": 5.175866603851318, "learning_rate": 3.7073767716442565e-05, "loss": 2.1938, "step": 7928 }, { "epoch": 0.5968497713543724, "grad_norm": 5.046782493591309, "learning_rate": 3.706199164306863e-05, "loss": 1.6358, "step": 7929 }, { "epoch": 0.5969250456350327, "grad_norm": 4.727492332458496, "learning_rate": 3.7050216338822154e-05, "loss": 1.9445, "step": 7930 }, { "epoch": 0.5970003199156928, "grad_norm": 4.22304105758667, "learning_rate": 3.703844180440317e-05, "loss": 2.1292, "step": 7931 }, { "epoch": 0.5970755941963529, "grad_norm": 5.28605842590332, "learning_rate": 3.7026668040511616e-05, "loss": 1.686, "step": 7932 }, { "epoch": 0.5971508684770132, "grad_norm": 7.246971130371094, "learning_rate": 3.701489504784743e-05, "loss": 1.8671, "step": 7933 }, { "epoch": 0.5972261427576733, "grad_norm": 4.841611385345459, "learning_rate": 3.7003122827110466e-05, "loss": 1.8963, "step": 7934 }, { "epoch": 0.5973014170383334, "grad_norm": 5.4831461906433105, "learning_rate": 3.6991351379000575e-05, "loss": 1.7834, "step": 7935 }, { "epoch": 0.5973766913189936, "grad_norm": 4.378210544586182, "learning_rate": 3.69795807042175e-05, "loss": 1.7506, "step": 7936 }, { "epoch": 0.5974519655996537, "grad_norm": 3.409184217453003, "learning_rate": 3.6967810803461006e-05, "loss": 1.5844, "step": 7937 }, { "epoch": 0.5975272398803139, "grad_norm": 4.411425590515137, "learning_rate": 3.695604167743076e-05, "loss": 2.0305, "step": 7938 }, { "epoch": 0.5976025141609741, "grad_norm": 4.402707099914551, "learning_rate": 3.694427332682642e-05, "loss": 2.0007, "step": 7939 }, { "epoch": 0.5976777884416342, "grad_norm": 4.217192649841309, "learning_rate": 3.6932505752347545e-05, "loss": 1.871, "step": 7940 }, { "epoch": 0.5977530627222943, "grad_norm": 4.3588409423828125, "learning_rate": 3.692073895469373e-05, "loss": 2.0733, "step": 7941 }, { "epoch": 0.5978283370029546, "grad_norm": 4.314188480377197, "learning_rate": 3.690897293456444e-05, "loss": 1.8234, "step": 7942 }, { "epoch": 0.5979036112836147, "grad_norm": 3.796677350997925, "learning_rate": 3.689720769265916e-05, "loss": 1.9309, "step": 7943 }, { "epoch": 0.5979788855642748, "grad_norm": 4.907580852508545, "learning_rate": 3.688544322967728e-05, "loss": 1.8871, "step": 7944 }, { "epoch": 0.5980541598449349, "grad_norm": 4.831220626831055, "learning_rate": 3.687367954631816e-05, "loss": 1.7927, "step": 7945 }, { "epoch": 0.5981294341255952, "grad_norm": 6.768815517425537, "learning_rate": 3.686191664328116e-05, "loss": 2.2726, "step": 7946 }, { "epoch": 0.5982047084062553, "grad_norm": 5.013249397277832, "learning_rate": 3.685015452126548e-05, "loss": 1.8583, "step": 7947 }, { "epoch": 0.5982799826869154, "grad_norm": 3.6031606197357178, "learning_rate": 3.6838393180970416e-05, "loss": 1.5103, "step": 7948 }, { "epoch": 0.5983552569675756, "grad_norm": 5.221858024597168, "learning_rate": 3.682663262309509e-05, "loss": 1.6994, "step": 7949 }, { "epoch": 0.5984305312482358, "grad_norm": 4.897763729095459, "learning_rate": 3.681487284833867e-05, "loss": 1.9034, "step": 7950 }, { "epoch": 0.5985058055288959, "grad_norm": 6.664005756378174, "learning_rate": 3.6803113857400206e-05, "loss": 1.4828, "step": 7951 }, { "epoch": 0.5985810798095561, "grad_norm": 5.071001052856445, "learning_rate": 3.679135565097878e-05, "loss": 1.7262, "step": 7952 }, { "epoch": 0.5986563540902162, "grad_norm": 4.3958659172058105, "learning_rate": 3.6779598229773344e-05, "loss": 1.7595, "step": 7953 }, { "epoch": 0.5987316283708763, "grad_norm": 4.544886112213135, "learning_rate": 3.676784159448287e-05, "loss": 2.2463, "step": 7954 }, { "epoch": 0.5988069026515366, "grad_norm": 3.928467273712158, "learning_rate": 3.675608574580623e-05, "loss": 1.7742, "step": 7955 }, { "epoch": 0.5988821769321967, "grad_norm": 4.448353290557861, "learning_rate": 3.674433068444232e-05, "loss": 2.0488, "step": 7956 }, { "epoch": 0.5989574512128568, "grad_norm": 4.744180202484131, "learning_rate": 3.673257641108989e-05, "loss": 1.8836, "step": 7957 }, { "epoch": 0.599032725493517, "grad_norm": 3.891849994659424, "learning_rate": 3.672082292644775e-05, "loss": 1.6183, "step": 7958 }, { "epoch": 0.5991079997741772, "grad_norm": 4.785205364227295, "learning_rate": 3.670907023121456e-05, "loss": 1.6399, "step": 7959 }, { "epoch": 0.5991832740548373, "grad_norm": 5.018258094787598, "learning_rate": 3.669731832608902e-05, "loss": 1.9479, "step": 7960 }, { "epoch": 0.5992585483354975, "grad_norm": 3.85152006149292, "learning_rate": 3.6685567211769726e-05, "loss": 1.7777, "step": 7961 }, { "epoch": 0.5993338226161576, "grad_norm": 4.894052505493164, "learning_rate": 3.6673816888955256e-05, "loss": 2.3965, "step": 7962 }, { "epoch": 0.5994090968968178, "grad_norm": 5.2292585372924805, "learning_rate": 3.666206735834415e-05, "loss": 1.9201, "step": 7963 }, { "epoch": 0.5994843711774779, "grad_norm": 5.393561840057373, "learning_rate": 3.6650318620634864e-05, "loss": 2.1616, "step": 7964 }, { "epoch": 0.5995596454581381, "grad_norm": 3.465280055999756, "learning_rate": 3.663857067652584e-05, "loss": 1.8788, "step": 7965 }, { "epoch": 0.5996349197387982, "grad_norm": 4.5058465003967285, "learning_rate": 3.662682352671543e-05, "loss": 1.9492, "step": 7966 }, { "epoch": 0.5997101940194584, "grad_norm": 4.201843738555908, "learning_rate": 3.6615077171902e-05, "loss": 1.5962, "step": 7967 }, { "epoch": 0.5997854683001186, "grad_norm": 3.7791907787323, "learning_rate": 3.660333161278382e-05, "loss": 1.9455, "step": 7968 }, { "epoch": 0.5998607425807787, "grad_norm": 4.722830772399902, "learning_rate": 3.659158685005916e-05, "loss": 1.8205, "step": 7969 }, { "epoch": 0.5999360168614388, "grad_norm": 5.043122291564941, "learning_rate": 3.6579842884426165e-05, "loss": 1.6884, "step": 7970 }, { "epoch": 0.6000112911420991, "grad_norm": 5.420186996459961, "learning_rate": 3.656809971658301e-05, "loss": 2.2662, "step": 7971 }, { "epoch": 0.6000865654227592, "grad_norm": 4.284111499786377, "learning_rate": 3.655635734722778e-05, "loss": 1.9558, "step": 7972 }, { "epoch": 0.6001618397034193, "grad_norm": 3.5491456985473633, "learning_rate": 3.654461577705855e-05, "loss": 1.5448, "step": 7973 }, { "epoch": 0.6002371139840795, "grad_norm": 3.5895063877105713, "learning_rate": 3.6532875006773284e-05, "loss": 1.8839, "step": 7974 }, { "epoch": 0.6003123882647396, "grad_norm": 3.2783491611480713, "learning_rate": 3.652113503706998e-05, "loss": 1.7279, "step": 7975 }, { "epoch": 0.6003876625453998, "grad_norm": 5.6505961418151855, "learning_rate": 3.6509395868646503e-05, "loss": 2.1068, "step": 7976 }, { "epoch": 0.60046293682606, "grad_norm": 5.302483081817627, "learning_rate": 3.649765750220073e-05, "loss": 1.8227, "step": 7977 }, { "epoch": 0.6005382111067201, "grad_norm": 4.162251949310303, "learning_rate": 3.648591993843049e-05, "loss": 2.0226, "step": 7978 }, { "epoch": 0.6006134853873802, "grad_norm": 5.978146076202393, "learning_rate": 3.6474183178033516e-05, "loss": 1.9416, "step": 7979 }, { "epoch": 0.6006887596680405, "grad_norm": 4.252506256103516, "learning_rate": 3.646244722170756e-05, "loss": 1.8379, "step": 7980 }, { "epoch": 0.6007640339487006, "grad_norm": 4.377779006958008, "learning_rate": 3.645071207015026e-05, "loss": 1.6979, "step": 7981 }, { "epoch": 0.6008393082293607, "grad_norm": 5.107065677642822, "learning_rate": 3.6438977724059266e-05, "loss": 2.3744, "step": 7982 }, { "epoch": 0.6009145825100208, "grad_norm": 7.6258649826049805, "learning_rate": 3.642724418413211e-05, "loss": 1.8971, "step": 7983 }, { "epoch": 0.6009898567906811, "grad_norm": 4.194841384887695, "learning_rate": 3.641551145106638e-05, "loss": 1.4903, "step": 7984 }, { "epoch": 0.6010651310713412, "grad_norm": 4.250508785247803, "learning_rate": 3.640377952555949e-05, "loss": 1.6269, "step": 7985 }, { "epoch": 0.6011404053520013, "grad_norm": 4.383350849151611, "learning_rate": 3.639204840830891e-05, "loss": 2.1613, "step": 7986 }, { "epoch": 0.6012156796326615, "grad_norm": 5.240368366241455, "learning_rate": 3.638031810001199e-05, "loss": 1.8303, "step": 7987 }, { "epoch": 0.6012909539133217, "grad_norm": 3.93595814704895, "learning_rate": 3.636858860136609e-05, "loss": 1.9607, "step": 7988 }, { "epoch": 0.6013662281939818, "grad_norm": 3.9893717765808105, "learning_rate": 3.6356859913068475e-05, "loss": 1.8748, "step": 7989 }, { "epoch": 0.601441502474642, "grad_norm": 5.265326976776123, "learning_rate": 3.634513203581641e-05, "loss": 1.8292, "step": 7990 }, { "epoch": 0.6015167767553021, "grad_norm": 3.9016449451446533, "learning_rate": 3.6333404970307044e-05, "loss": 1.5806, "step": 7991 }, { "epoch": 0.6015920510359622, "grad_norm": 5.3055315017700195, "learning_rate": 3.6321678717237564e-05, "loss": 1.8323, "step": 7992 }, { "epoch": 0.6016673253166225, "grad_norm": 4.487079620361328, "learning_rate": 3.630995327730501e-05, "loss": 1.9372, "step": 7993 }, { "epoch": 0.6017425995972826, "grad_norm": 8.976163864135742, "learning_rate": 3.629822865120646e-05, "loss": 1.9777, "step": 7994 }, { "epoch": 0.6018178738779427, "grad_norm": 4.627541542053223, "learning_rate": 3.62865048396389e-05, "loss": 1.8289, "step": 7995 }, { "epoch": 0.601893148158603, "grad_norm": 5.566967010498047, "learning_rate": 3.6274781843299275e-05, "loss": 1.8739, "step": 7996 }, { "epoch": 0.6019684224392631, "grad_norm": 5.938688278198242, "learning_rate": 3.626305966288451e-05, "loss": 2.0693, "step": 7997 }, { "epoch": 0.6020436967199232, "grad_norm": 4.522730827331543, "learning_rate": 3.625133829909141e-05, "loss": 2.0087, "step": 7998 }, { "epoch": 0.6021189710005834, "grad_norm": 5.317759037017822, "learning_rate": 3.623961775261682e-05, "loss": 2.1023, "step": 7999 }, { "epoch": 0.6021942452812435, "grad_norm": 5.180668354034424, "learning_rate": 3.622789802415745e-05, "loss": 1.5706, "step": 8000 }, { "epoch": 0.6022695195619037, "grad_norm": 4.770196437835693, "learning_rate": 3.6216179114410044e-05, "loss": 1.8813, "step": 8001 }, { "epoch": 0.6023447938425639, "grad_norm": 4.593331336975098, "learning_rate": 3.620446102407122e-05, "loss": 1.5814, "step": 8002 }, { "epoch": 0.602420068123224, "grad_norm": 6.350017547607422, "learning_rate": 3.619274375383763e-05, "loss": 1.7553, "step": 8003 }, { "epoch": 0.6024953424038841, "grad_norm": 4.056393146514893, "learning_rate": 3.618102730440578e-05, "loss": 1.7419, "step": 8004 }, { "epoch": 0.6025706166845443, "grad_norm": 4.596986293792725, "learning_rate": 3.616931167647222e-05, "loss": 1.7508, "step": 8005 }, { "epoch": 0.6026458909652045, "grad_norm": 5.526656150817871, "learning_rate": 3.615759687073339e-05, "loss": 1.7621, "step": 8006 }, { "epoch": 0.6027211652458646, "grad_norm": 4.0575761795043945, "learning_rate": 3.614588288788572e-05, "loss": 1.9033, "step": 8007 }, { "epoch": 0.6027964395265247, "grad_norm": 3.8082070350646973, "learning_rate": 3.6134169728625546e-05, "loss": 1.5919, "step": 8008 }, { "epoch": 0.602871713807185, "grad_norm": 4.748533725738525, "learning_rate": 3.612245739364919e-05, "loss": 1.3692, "step": 8009 }, { "epoch": 0.6029469880878451, "grad_norm": 6.354117393493652, "learning_rate": 3.611074588365294e-05, "loss": 2.0135, "step": 8010 }, { "epoch": 0.6030222623685052, "grad_norm": 6.241334438323975, "learning_rate": 3.609903519933298e-05, "loss": 2.3166, "step": 8011 }, { "epoch": 0.6030975366491654, "grad_norm": 6.611080646514893, "learning_rate": 3.6087325341385514e-05, "loss": 1.6199, "step": 8012 }, { "epoch": 0.6031728109298256, "grad_norm": 6.188549995422363, "learning_rate": 3.607561631050661e-05, "loss": 1.7267, "step": 8013 }, { "epoch": 0.6032480852104857, "grad_norm": 4.7459211349487305, "learning_rate": 3.606390810739239e-05, "loss": 1.546, "step": 8014 }, { "epoch": 0.6033233594911459, "grad_norm": 4.252176761627197, "learning_rate": 3.605220073273882e-05, "loss": 2.0117, "step": 8015 }, { "epoch": 0.603398633771806, "grad_norm": 4.85923957824707, "learning_rate": 3.604049418724191e-05, "loss": 1.7327, "step": 8016 }, { "epoch": 0.6034739080524661, "grad_norm": 6.795665740966797, "learning_rate": 3.602878847159755e-05, "loss": 1.8034, "step": 8017 }, { "epoch": 0.6035491823331264, "grad_norm": 5.557191848754883, "learning_rate": 3.601708358650166e-05, "loss": 1.8023, "step": 8018 }, { "epoch": 0.6036244566137865, "grad_norm": 4.3204731941223145, "learning_rate": 3.6005379532649996e-05, "loss": 1.6702, "step": 8019 }, { "epoch": 0.6036997308944466, "grad_norm": 4.874846458435059, "learning_rate": 3.5993676310738396e-05, "loss": 2.0416, "step": 8020 }, { "epoch": 0.6037750051751068, "grad_norm": 4.5694169998168945, "learning_rate": 3.598197392146252e-05, "loss": 1.7856, "step": 8021 }, { "epoch": 0.603850279455767, "grad_norm": 5.44784688949585, "learning_rate": 3.597027236551809e-05, "loss": 1.9995, "step": 8022 }, { "epoch": 0.6039255537364271, "grad_norm": 4.517114639282227, "learning_rate": 3.59585716436007e-05, "loss": 1.9514, "step": 8023 }, { "epoch": 0.6040008280170872, "grad_norm": 5.466026782989502, "learning_rate": 3.594687175640595e-05, "loss": 2.0542, "step": 8024 }, { "epoch": 0.6040761022977474, "grad_norm": 4.703134536743164, "learning_rate": 3.593517270462934e-05, "loss": 1.6897, "step": 8025 }, { "epoch": 0.6041513765784076, "grad_norm": 4.974737644195557, "learning_rate": 3.592347448896635e-05, "loss": 1.9729, "step": 8026 }, { "epoch": 0.6042266508590677, "grad_norm": 5.321857929229736, "learning_rate": 3.591177711011242e-05, "loss": 1.9015, "step": 8027 }, { "epoch": 0.6043019251397279, "grad_norm": 4.586242198944092, "learning_rate": 3.5900080568762916e-05, "loss": 2.0655, "step": 8028 }, { "epoch": 0.604377199420388, "grad_norm": 4.832167625427246, "learning_rate": 3.588838486561318e-05, "loss": 2.0172, "step": 8029 }, { "epoch": 0.6044524737010482, "grad_norm": 6.086312770843506, "learning_rate": 3.5876690001358445e-05, "loss": 1.8601, "step": 8030 }, { "epoch": 0.6045277479817084, "grad_norm": 4.7074408531188965, "learning_rate": 3.5864995976694e-05, "loss": 1.553, "step": 8031 }, { "epoch": 0.6046030222623685, "grad_norm": 5.799994468688965, "learning_rate": 3.585330279231496e-05, "loss": 1.6822, "step": 8032 }, { "epoch": 0.6046782965430286, "grad_norm": 4.663863658905029, "learning_rate": 3.5841610448916484e-05, "loss": 1.7558, "step": 8033 }, { "epoch": 0.6047535708236889, "grad_norm": 5.484861373901367, "learning_rate": 3.5829918947193634e-05, "loss": 2.0045, "step": 8034 }, { "epoch": 0.604828845104349, "grad_norm": 5.244642734527588, "learning_rate": 3.5818228287841466e-05, "loss": 1.656, "step": 8035 }, { "epoch": 0.6049041193850091, "grad_norm": 8.319445610046387, "learning_rate": 3.580653847155491e-05, "loss": 2.0709, "step": 8036 }, { "epoch": 0.6049793936656693, "grad_norm": 4.302628517150879, "learning_rate": 3.579484949902893e-05, "loss": 1.8349, "step": 8037 }, { "epoch": 0.6050546679463294, "grad_norm": 5.783748149871826, "learning_rate": 3.578316137095838e-05, "loss": 2.0228, "step": 8038 }, { "epoch": 0.6051299422269896, "grad_norm": 7.605402946472168, "learning_rate": 3.5771474088038114e-05, "loss": 1.821, "step": 8039 }, { "epoch": 0.6052052165076498, "grad_norm": 4.048700332641602, "learning_rate": 3.575978765096286e-05, "loss": 1.8033, "step": 8040 }, { "epoch": 0.6052804907883099, "grad_norm": 5.959883213043213, "learning_rate": 3.5748102060427374e-05, "loss": 1.7772, "step": 8041 }, { "epoch": 0.60535576506897, "grad_norm": 5.1179375648498535, "learning_rate": 3.573641731712635e-05, "loss": 1.8498, "step": 8042 }, { "epoch": 0.6054310393496302, "grad_norm": 4.0493574142456055, "learning_rate": 3.5724733421754365e-05, "loss": 1.7278, "step": 8043 }, { "epoch": 0.6055063136302904, "grad_norm": 4.4777350425720215, "learning_rate": 3.571305037500603e-05, "loss": 1.8631, "step": 8044 }, { "epoch": 0.6055815879109505, "grad_norm": 5.109423637390137, "learning_rate": 3.570136817757585e-05, "loss": 1.8148, "step": 8045 }, { "epoch": 0.6056568621916106, "grad_norm": 13.446748733520508, "learning_rate": 3.568968683015832e-05, "loss": 2.0569, "step": 8046 }, { "epoch": 0.6057321364722709, "grad_norm": 4.003000736236572, "learning_rate": 3.567800633344783e-05, "loss": 1.7951, "step": 8047 }, { "epoch": 0.605807410752931, "grad_norm": 3.602207660675049, "learning_rate": 3.566632668813879e-05, "loss": 1.8865, "step": 8048 }, { "epoch": 0.6058826850335911, "grad_norm": 4.107050895690918, "learning_rate": 3.5654647894925475e-05, "loss": 1.6907, "step": 8049 }, { "epoch": 0.6059579593142513, "grad_norm": 4.525403022766113, "learning_rate": 3.564296995450219e-05, "loss": 1.6926, "step": 8050 }, { "epoch": 0.6060332335949115, "grad_norm": 5.758842468261719, "learning_rate": 3.5631292867563146e-05, "loss": 2.5907, "step": 8051 }, { "epoch": 0.6061085078755716, "grad_norm": 5.020792484283447, "learning_rate": 3.5619616634802524e-05, "loss": 1.9564, "step": 8052 }, { "epoch": 0.6061837821562318, "grad_norm": 4.926012992858887, "learning_rate": 3.560794125691441e-05, "loss": 2.07, "step": 8053 }, { "epoch": 0.6062590564368919, "grad_norm": 6.003063678741455, "learning_rate": 3.559626673459291e-05, "loss": 2.3415, "step": 8054 }, { "epoch": 0.606334330717552, "grad_norm": 4.733049392700195, "learning_rate": 3.558459306853201e-05, "loss": 1.7742, "step": 8055 }, { "epoch": 0.6064096049982123, "grad_norm": 4.311572551727295, "learning_rate": 3.5572920259425715e-05, "loss": 1.8239, "step": 8056 }, { "epoch": 0.6064848792788724, "grad_norm": 4.7451300621032715, "learning_rate": 3.556124830796789e-05, "loss": 1.6207, "step": 8057 }, { "epoch": 0.6065601535595325, "grad_norm": 4.346027374267578, "learning_rate": 3.554957721485242e-05, "loss": 1.8382, "step": 8058 }, { "epoch": 0.6066354278401928, "grad_norm": 3.985694646835327, "learning_rate": 3.553790698077315e-05, "loss": 1.5765, "step": 8059 }, { "epoch": 0.6067107021208529, "grad_norm": 5.615527153015137, "learning_rate": 3.5526237606423786e-05, "loss": 1.6242, "step": 8060 }, { "epoch": 0.606785976401513, "grad_norm": 4.927435398101807, "learning_rate": 3.551456909249808e-05, "loss": 1.8059, "step": 8061 }, { "epoch": 0.6068612506821731, "grad_norm": 9.183197021484375, "learning_rate": 3.5502901439689664e-05, "loss": 2.0579, "step": 8062 }, { "epoch": 0.6069365249628333, "grad_norm": 5.377397537231445, "learning_rate": 3.549123464869218e-05, "loss": 1.6157, "step": 8063 }, { "epoch": 0.6070117992434935, "grad_norm": 5.270592212677002, "learning_rate": 3.547956872019915e-05, "loss": 1.7501, "step": 8064 }, { "epoch": 0.6070870735241536, "grad_norm": 5.024423122406006, "learning_rate": 3.546790365490411e-05, "loss": 1.6681, "step": 8065 }, { "epoch": 0.6071623478048138, "grad_norm": 5.829306125640869, "learning_rate": 3.545623945350049e-05, "loss": 1.9247, "step": 8066 }, { "epoch": 0.6072376220854739, "grad_norm": 4.136631488800049, "learning_rate": 3.544457611668171e-05, "loss": 1.9709, "step": 8067 }, { "epoch": 0.6073128963661341, "grad_norm": 6.158609867095947, "learning_rate": 3.5432913645141106e-05, "loss": 2.1045, "step": 8068 }, { "epoch": 0.6073881706467943, "grad_norm": 4.047645092010498, "learning_rate": 3.542125203957202e-05, "loss": 1.6893, "step": 8069 }, { "epoch": 0.6074634449274544, "grad_norm": 4.429532051086426, "learning_rate": 3.5409591300667636e-05, "loss": 1.5283, "step": 8070 }, { "epoch": 0.6075387192081145, "grad_norm": 4.327272415161133, "learning_rate": 3.539793142912119e-05, "loss": 1.8778, "step": 8071 }, { "epoch": 0.6076139934887748, "grad_norm": 4.6361494064331055, "learning_rate": 3.5386272425625816e-05, "loss": 1.78, "step": 8072 }, { "epoch": 0.6076892677694349, "grad_norm": 4.448143482208252, "learning_rate": 3.537461429087464e-05, "loss": 1.7166, "step": 8073 }, { "epoch": 0.607764542050095, "grad_norm": 5.978185653686523, "learning_rate": 3.536295702556065e-05, "loss": 1.6491, "step": 8074 }, { "epoch": 0.6078398163307552, "grad_norm": 4.677586555480957, "learning_rate": 3.5351300630376876e-05, "loss": 1.8593, "step": 8075 }, { "epoch": 0.6079150906114154, "grad_norm": 5.9051690101623535, "learning_rate": 3.533964510601626e-05, "loss": 1.773, "step": 8076 }, { "epoch": 0.6079903648920755, "grad_norm": 4.932521820068359, "learning_rate": 3.5327990453171663e-05, "loss": 1.8313, "step": 8077 }, { "epoch": 0.6080656391727357, "grad_norm": 5.28539514541626, "learning_rate": 3.5316336672535945e-05, "loss": 2.1272, "step": 8078 }, { "epoch": 0.6081409134533958, "grad_norm": 4.727498531341553, "learning_rate": 3.530468376480187e-05, "loss": 1.4553, "step": 8079 }, { "epoch": 0.608216187734056, "grad_norm": 3.7702107429504395, "learning_rate": 3.5293031730662195e-05, "loss": 1.7154, "step": 8080 }, { "epoch": 0.6082914620147161, "grad_norm": 4.206916809082031, "learning_rate": 3.528138057080957e-05, "loss": 2.0161, "step": 8081 }, { "epoch": 0.6083667362953763, "grad_norm": 3.46120548248291, "learning_rate": 3.526973028593664e-05, "loss": 1.824, "step": 8082 }, { "epoch": 0.6084420105760364, "grad_norm": 5.708986759185791, "learning_rate": 3.525808087673598e-05, "loss": 1.9866, "step": 8083 }, { "epoch": 0.6085172848566965, "grad_norm": 4.997643947601318, "learning_rate": 3.524643234390012e-05, "loss": 1.8727, "step": 8084 }, { "epoch": 0.6085925591373568, "grad_norm": 5.269734859466553, "learning_rate": 3.5234784688121516e-05, "loss": 1.9493, "step": 8085 }, { "epoch": 0.6086678334180169, "grad_norm": 4.212966442108154, "learning_rate": 3.5223137910092605e-05, "loss": 1.7936, "step": 8086 }, { "epoch": 0.608743107698677, "grad_norm": 7.552111625671387, "learning_rate": 3.5211492010505734e-05, "loss": 2.2614, "step": 8087 }, { "epoch": 0.6088183819793372, "grad_norm": 4.56868314743042, "learning_rate": 3.5199846990053234e-05, "loss": 2.0106, "step": 8088 }, { "epoch": 0.6088936562599974, "grad_norm": 4.035215377807617, "learning_rate": 3.5188202849427355e-05, "loss": 1.9778, "step": 8089 }, { "epoch": 0.6089689305406575, "grad_norm": 7.589376449584961, "learning_rate": 3.5176559589320326e-05, "loss": 2.3722, "step": 8090 }, { "epoch": 0.6090442048213177, "grad_norm": 5.87318754196167, "learning_rate": 3.516491721042431e-05, "loss": 1.9171, "step": 8091 }, { "epoch": 0.6091194791019778, "grad_norm": 4.617512226104736, "learning_rate": 3.5153275713431375e-05, "loss": 2.631, "step": 8092 }, { "epoch": 0.609194753382638, "grad_norm": 5.0993757247924805, "learning_rate": 3.514163509903362e-05, "loss": 2.4103, "step": 8093 }, { "epoch": 0.6092700276632982, "grad_norm": 7.570215225219727, "learning_rate": 3.512999536792301e-05, "loss": 1.8675, "step": 8094 }, { "epoch": 0.6093453019439583, "grad_norm": 4.643409729003906, "learning_rate": 3.511835652079151e-05, "loss": 2.036, "step": 8095 }, { "epoch": 0.6094205762246184, "grad_norm": 4.436516761779785, "learning_rate": 3.5106718558331006e-05, "loss": 1.7069, "step": 8096 }, { "epoch": 0.6094958505052787, "grad_norm": 4.737794876098633, "learning_rate": 3.5095081481233384e-05, "loss": 1.8507, "step": 8097 }, { "epoch": 0.6095711247859388, "grad_norm": 4.942888259887695, "learning_rate": 3.5083445290190364e-05, "loss": 2.2302, "step": 8098 }, { "epoch": 0.6096463990665989, "grad_norm": 5.764395713806152, "learning_rate": 3.507180998589374e-05, "loss": 1.9121, "step": 8099 }, { "epoch": 0.6097216733472591, "grad_norm": 4.899541854858398, "learning_rate": 3.506017556903517e-05, "loss": 1.7828, "step": 8100 }, { "epoch": 0.6097969476279193, "grad_norm": 5.933323860168457, "learning_rate": 3.504854204030631e-05, "loss": 1.9129, "step": 8101 }, { "epoch": 0.6098722219085794, "grad_norm": 5.2042107582092285, "learning_rate": 3.503690940039871e-05, "loss": 1.7762, "step": 8102 }, { "epoch": 0.6099474961892395, "grad_norm": 3.7044179439544678, "learning_rate": 3.502527765000393e-05, "loss": 1.7365, "step": 8103 }, { "epoch": 0.6100227704698997, "grad_norm": 6.34365701675415, "learning_rate": 3.5013646789813404e-05, "loss": 1.6649, "step": 8104 }, { "epoch": 0.6100980447505598, "grad_norm": 4.74472188949585, "learning_rate": 3.500201682051859e-05, "loss": 2.0668, "step": 8105 }, { "epoch": 0.61017331903122, "grad_norm": 5.129726409912109, "learning_rate": 3.499038774281083e-05, "loss": 2.1201, "step": 8106 }, { "epoch": 0.6102485933118802, "grad_norm": 4.57461404800415, "learning_rate": 3.4978759557381444e-05, "loss": 1.5843, "step": 8107 }, { "epoch": 0.6103238675925403, "grad_norm": 5.204603672027588, "learning_rate": 3.496713226492173e-05, "loss": 1.7654, "step": 8108 }, { "epoch": 0.6103991418732004, "grad_norm": 7.3619704246521, "learning_rate": 3.495550586612285e-05, "loss": 2.0269, "step": 8109 }, { "epoch": 0.6104744161538607, "grad_norm": 6.065268516540527, "learning_rate": 3.494388036167598e-05, "loss": 1.9622, "step": 8110 }, { "epoch": 0.6105496904345208, "grad_norm": 4.100382328033447, "learning_rate": 3.493225575227222e-05, "loss": 2.0366, "step": 8111 }, { "epoch": 0.6106249647151809, "grad_norm": 4.180423259735107, "learning_rate": 3.492063203860264e-05, "loss": 1.6262, "step": 8112 }, { "epoch": 0.6107002389958411, "grad_norm": 5.717609405517578, "learning_rate": 3.49090092213582e-05, "loss": 1.9743, "step": 8113 }, { "epoch": 0.6107755132765013, "grad_norm": 9.112792015075684, "learning_rate": 3.489738730122988e-05, "loss": 1.8602, "step": 8114 }, { "epoch": 0.6108507875571614, "grad_norm": 5.108053684234619, "learning_rate": 3.4885766278908526e-05, "loss": 1.7395, "step": 8115 }, { "epoch": 0.6109260618378216, "grad_norm": 5.141839981079102, "learning_rate": 3.487414615508501e-05, "loss": 1.9182, "step": 8116 }, { "epoch": 0.6110013361184817, "grad_norm": 5.8351263999938965, "learning_rate": 3.48625269304501e-05, "loss": 1.2865, "step": 8117 }, { "epoch": 0.6110766103991419, "grad_norm": 5.516819477081299, "learning_rate": 3.485090860569454e-05, "loss": 2.2089, "step": 8118 }, { "epoch": 0.6111518846798021, "grad_norm": 4.534638404846191, "learning_rate": 3.483929118150898e-05, "loss": 1.7027, "step": 8119 }, { "epoch": 0.6112271589604622, "grad_norm": 9.957633018493652, "learning_rate": 3.482767465858408e-05, "loss": 1.9719, "step": 8120 }, { "epoch": 0.6113024332411223, "grad_norm": 5.063474178314209, "learning_rate": 3.4816059037610365e-05, "loss": 1.8365, "step": 8121 }, { "epoch": 0.6113777075217824, "grad_norm": 4.992016315460205, "learning_rate": 3.480444431927837e-05, "loss": 1.6186, "step": 8122 }, { "epoch": 0.6114529818024427, "grad_norm": 5.252773761749268, "learning_rate": 3.479283050427858e-05, "loss": 1.9097, "step": 8123 }, { "epoch": 0.6115282560831028, "grad_norm": 4.58005428314209, "learning_rate": 3.478121759330136e-05, "loss": 1.7574, "step": 8124 }, { "epoch": 0.6116035303637629, "grad_norm": 5.458553314208984, "learning_rate": 3.476960558703711e-05, "loss": 1.7179, "step": 8125 }, { "epoch": 0.6116788046444231, "grad_norm": 4.62752103805542, "learning_rate": 3.4757994486176094e-05, "loss": 2.1633, "step": 8126 }, { "epoch": 0.6117540789250833, "grad_norm": 4.543347358703613, "learning_rate": 3.474638429140858e-05, "loss": 1.5635, "step": 8127 }, { "epoch": 0.6118293532057434, "grad_norm": 4.643796920776367, "learning_rate": 3.4734775003424734e-05, "loss": 1.624, "step": 8128 }, { "epoch": 0.6119046274864036, "grad_norm": 3.7508130073547363, "learning_rate": 3.472316662291474e-05, "loss": 1.927, "step": 8129 }, { "epoch": 0.6119799017670637, "grad_norm": 5.244353771209717, "learning_rate": 3.4711559150568634e-05, "loss": 1.9094, "step": 8130 }, { "epoch": 0.6120551760477239, "grad_norm": 4.895063877105713, "learning_rate": 3.469995258707649e-05, "loss": 1.7094, "step": 8131 }, { "epoch": 0.6121304503283841, "grad_norm": 4.976377010345459, "learning_rate": 3.4688346933128246e-05, "loss": 1.7931, "step": 8132 }, { "epoch": 0.6122057246090442, "grad_norm": 4.292976379394531, "learning_rate": 3.467674218941385e-05, "loss": 1.8127, "step": 8133 }, { "epoch": 0.6122809988897043, "grad_norm": 4.518517017364502, "learning_rate": 3.466513835662315e-05, "loss": 1.7762, "step": 8134 }, { "epoch": 0.6123562731703646, "grad_norm": 4.300276279449463, "learning_rate": 3.4653535435446e-05, "loss": 1.7317, "step": 8135 }, { "epoch": 0.6124315474510247, "grad_norm": 4.825788974761963, "learning_rate": 3.464193342657211e-05, "loss": 2.2165, "step": 8136 }, { "epoch": 0.6125068217316848, "grad_norm": 4.445923328399658, "learning_rate": 3.4630332330691224e-05, "loss": 1.944, "step": 8137 }, { "epoch": 0.612582096012345, "grad_norm": 5.350835800170898, "learning_rate": 3.461873214849297e-05, "loss": 2.3772, "step": 8138 }, { "epoch": 0.6126573702930052, "grad_norm": 5.620702743530273, "learning_rate": 3.4607132880666956e-05, "loss": 1.9225, "step": 8139 }, { "epoch": 0.6127326445736653, "grad_norm": 4.805440902709961, "learning_rate": 3.459553452790274e-05, "loss": 1.6635, "step": 8140 }, { "epoch": 0.6128079188543254, "grad_norm": 5.023445129394531, "learning_rate": 3.458393709088979e-05, "loss": 1.6587, "step": 8141 }, { "epoch": 0.6128831931349856, "grad_norm": 4.494292259216309, "learning_rate": 3.457234057031755e-05, "loss": 1.7722, "step": 8142 }, { "epoch": 0.6129584674156457, "grad_norm": 4.2279815673828125, "learning_rate": 3.456074496687538e-05, "loss": 2.3912, "step": 8143 }, { "epoch": 0.6130337416963059, "grad_norm": 5.196355819702148, "learning_rate": 3.4549150281252636e-05, "loss": 1.5514, "step": 8144 }, { "epoch": 0.6131090159769661, "grad_norm": 4.484930992126465, "learning_rate": 3.4537556514138556e-05, "loss": 1.7105, "step": 8145 }, { "epoch": 0.6131842902576262, "grad_norm": 6.017373085021973, "learning_rate": 3.452596366622241e-05, "loss": 1.7412, "step": 8146 }, { "epoch": 0.6132595645382863, "grad_norm": 4.953855514526367, "learning_rate": 3.451437173819329e-05, "loss": 1.9372, "step": 8147 }, { "epoch": 0.6133348388189466, "grad_norm": 4.7046709060668945, "learning_rate": 3.450278073074036e-05, "loss": 1.6947, "step": 8148 }, { "epoch": 0.6134101130996067, "grad_norm": 4.654233455657959, "learning_rate": 3.449119064455263e-05, "loss": 1.9575, "step": 8149 }, { "epoch": 0.6134853873802668, "grad_norm": 4.065881729125977, "learning_rate": 3.447960148031913e-05, "loss": 1.3527, "step": 8150 }, { "epoch": 0.613560661660927, "grad_norm": 3.889578104019165, "learning_rate": 3.4468013238728774e-05, "loss": 1.8721, "step": 8151 }, { "epoch": 0.6136359359415872, "grad_norm": 5.912107944488525, "learning_rate": 3.445642592047049e-05, "loss": 2.0551, "step": 8152 }, { "epoch": 0.6137112102222473, "grad_norm": 4.926999568939209, "learning_rate": 3.444483952623306e-05, "loss": 2.3235, "step": 8153 }, { "epoch": 0.6137864845029075, "grad_norm": 5.576359748840332, "learning_rate": 3.443325405670529e-05, "loss": 1.7611, "step": 8154 }, { "epoch": 0.6138617587835676, "grad_norm": 7.416301250457764, "learning_rate": 3.442166951257591e-05, "loss": 1.8567, "step": 8155 }, { "epoch": 0.6139370330642278, "grad_norm": 5.036648750305176, "learning_rate": 3.4410085894533576e-05, "loss": 2.157, "step": 8156 }, { "epoch": 0.614012307344888, "grad_norm": 3.795140266418457, "learning_rate": 3.439850320326691e-05, "loss": 1.6346, "step": 8157 }, { "epoch": 0.6140875816255481, "grad_norm": 3.8690414428710938, "learning_rate": 3.438692143946445e-05, "loss": 1.8093, "step": 8158 }, { "epoch": 0.6141628559062082, "grad_norm": 5.439431190490723, "learning_rate": 3.437534060381473e-05, "loss": 1.8431, "step": 8159 }, { "epoch": 0.6142381301868683, "grad_norm": 3.9665729999542236, "learning_rate": 3.4363760697006155e-05, "loss": 1.963, "step": 8160 }, { "epoch": 0.6143134044675286, "grad_norm": 4.183095455169678, "learning_rate": 3.4352181719727156e-05, "loss": 1.9875, "step": 8161 }, { "epoch": 0.6143886787481887, "grad_norm": 4.535946846008301, "learning_rate": 3.434060367266604e-05, "loss": 1.634, "step": 8162 }, { "epoch": 0.6144639530288488, "grad_norm": 5.891911506652832, "learning_rate": 3.4329026556511137e-05, "loss": 1.8804, "step": 8163 }, { "epoch": 0.614539227309509, "grad_norm": 6.393861770629883, "learning_rate": 3.431745037195061e-05, "loss": 1.7716, "step": 8164 }, { "epoch": 0.6146145015901692, "grad_norm": 5.3104248046875, "learning_rate": 3.430587511967267e-05, "loss": 1.8541, "step": 8165 }, { "epoch": 0.6146897758708293, "grad_norm": 4.353861331939697, "learning_rate": 3.429430080036541e-05, "loss": 2.017, "step": 8166 }, { "epoch": 0.6147650501514895, "grad_norm": 4.105453968048096, "learning_rate": 3.428272741471693e-05, "loss": 1.8968, "step": 8167 }, { "epoch": 0.6148403244321496, "grad_norm": 3.8583414554595947, "learning_rate": 3.427115496341518e-05, "loss": 1.9697, "step": 8168 }, { "epoch": 0.6149155987128098, "grad_norm": 4.821552753448486, "learning_rate": 3.425958344714816e-05, "loss": 1.6207, "step": 8169 }, { "epoch": 0.61499087299347, "grad_norm": 4.335643768310547, "learning_rate": 3.424801286660372e-05, "loss": 1.6681, "step": 8170 }, { "epoch": 0.6150661472741301, "grad_norm": 5.145238399505615, "learning_rate": 3.4236443222469715e-05, "loss": 1.8731, "step": 8171 }, { "epoch": 0.6151414215547902, "grad_norm": 4.591930389404297, "learning_rate": 3.422487451543395e-05, "loss": 2.0362, "step": 8172 }, { "epoch": 0.6152166958354505, "grad_norm": 4.320077896118164, "learning_rate": 3.421330674618411e-05, "loss": 1.7148, "step": 8173 }, { "epoch": 0.6152919701161106, "grad_norm": 8.470335960388184, "learning_rate": 3.420173991540791e-05, "loss": 1.5307, "step": 8174 }, { "epoch": 0.6153672443967707, "grad_norm": 4.425691604614258, "learning_rate": 3.419017402379292e-05, "loss": 1.7212, "step": 8175 }, { "epoch": 0.6154425186774309, "grad_norm": 4.004261016845703, "learning_rate": 3.4178609072026745e-05, "loss": 1.9242, "step": 8176 }, { "epoch": 0.6155177929580911, "grad_norm": 5.919533729553223, "learning_rate": 3.416704506079684e-05, "loss": 2.2793, "step": 8177 }, { "epoch": 0.6155930672387512, "grad_norm": 5.656198024749756, "learning_rate": 3.415548199079069e-05, "loss": 2.0532, "step": 8178 }, { "epoch": 0.6156683415194114, "grad_norm": 4.965191841125488, "learning_rate": 3.414391986269566e-05, "loss": 1.9619, "step": 8179 }, { "epoch": 0.6157436158000715, "grad_norm": 3.8956573009490967, "learning_rate": 3.413235867719912e-05, "loss": 1.894, "step": 8180 }, { "epoch": 0.6158188900807317, "grad_norm": 4.109010219573975, "learning_rate": 3.4120798434988296e-05, "loss": 1.7619, "step": 8181 }, { "epoch": 0.6158941643613918, "grad_norm": 5.274187088012695, "learning_rate": 3.410923913675047e-05, "loss": 1.5277, "step": 8182 }, { "epoch": 0.615969438642052, "grad_norm": 4.784882068634033, "learning_rate": 3.4097680783172764e-05, "loss": 1.7618, "step": 8183 }, { "epoch": 0.6160447129227121, "grad_norm": 3.6030797958374023, "learning_rate": 3.408612337494233e-05, "loss": 1.9912, "step": 8184 }, { "epoch": 0.6161199872033722, "grad_norm": 4.4571919441223145, "learning_rate": 3.407456691274618e-05, "loss": 1.9, "step": 8185 }, { "epoch": 0.6161952614840325, "grad_norm": 3.6722207069396973, "learning_rate": 3.406301139727132e-05, "loss": 1.7955, "step": 8186 }, { "epoch": 0.6162705357646926, "grad_norm": 5.215631008148193, "learning_rate": 3.405145682920473e-05, "loss": 1.5753, "step": 8187 }, { "epoch": 0.6163458100453527, "grad_norm": 5.818994045257568, "learning_rate": 3.403990320923325e-05, "loss": 2.4141, "step": 8188 }, { "epoch": 0.616421084326013, "grad_norm": 4.393082141876221, "learning_rate": 3.402835053804374e-05, "loss": 1.9337, "step": 8189 }, { "epoch": 0.6164963586066731, "grad_norm": 4.876507759094238, "learning_rate": 3.401679881632296e-05, "loss": 1.9883, "step": 8190 }, { "epoch": 0.6165716328873332, "grad_norm": 6.674361228942871, "learning_rate": 3.400524804475764e-05, "loss": 1.7115, "step": 8191 }, { "epoch": 0.6166469071679934, "grad_norm": 3.4941883087158203, "learning_rate": 3.3993698224034415e-05, "loss": 1.7839, "step": 8192 }, { "epoch": 0.6167221814486535, "grad_norm": 4.829101085662842, "learning_rate": 3.398214935483991e-05, "loss": 1.9081, "step": 8193 }, { "epoch": 0.6167974557293137, "grad_norm": 6.579063415527344, "learning_rate": 3.397060143786067e-05, "loss": 1.8436, "step": 8194 }, { "epoch": 0.6168727300099739, "grad_norm": 5.214309215545654, "learning_rate": 3.3959054473783175e-05, "loss": 1.8023, "step": 8195 }, { "epoch": 0.616948004290634, "grad_norm": 6.181789875030518, "learning_rate": 3.394750846329386e-05, "loss": 1.9845, "step": 8196 }, { "epoch": 0.6170232785712941, "grad_norm": 6.282076358795166, "learning_rate": 3.393596340707912e-05, "loss": 2.4618, "step": 8197 }, { "epoch": 0.6170985528519544, "grad_norm": 4.5025787353515625, "learning_rate": 3.392441930582525e-05, "loss": 1.9083, "step": 8198 }, { "epoch": 0.6171738271326145, "grad_norm": 3.552966356277466, "learning_rate": 3.391287616021853e-05, "loss": 1.9448, "step": 8199 }, { "epoch": 0.6172491014132746, "grad_norm": 4.1839447021484375, "learning_rate": 3.390133397094516e-05, "loss": 1.7578, "step": 8200 }, { "epoch": 0.6173243756939347, "grad_norm": 4.193100929260254, "learning_rate": 3.3889792738691314e-05, "loss": 1.6644, "step": 8201 }, { "epoch": 0.617399649974595, "grad_norm": 3.751049518585205, "learning_rate": 3.387825246414303e-05, "loss": 1.6328, "step": 8202 }, { "epoch": 0.6174749242552551, "grad_norm": 5.038300037384033, "learning_rate": 3.3866713147986394e-05, "loss": 2.0151, "step": 8203 }, { "epoch": 0.6175501985359152, "grad_norm": 8.461604118347168, "learning_rate": 3.385517479090738e-05, "loss": 2.0889, "step": 8204 }, { "epoch": 0.6176254728165754, "grad_norm": 5.9980950355529785, "learning_rate": 3.3843637393591894e-05, "loss": 1.9154, "step": 8205 }, { "epoch": 0.6177007470972355, "grad_norm": 4.784667015075684, "learning_rate": 3.383210095672581e-05, "loss": 2.1451, "step": 8206 }, { "epoch": 0.6177760213778957, "grad_norm": 4.672134876251221, "learning_rate": 3.3820565480994913e-05, "loss": 1.3894, "step": 8207 }, { "epoch": 0.6178512956585559, "grad_norm": 5.144842624664307, "learning_rate": 3.380903096708501e-05, "loss": 2.0122, "step": 8208 }, { "epoch": 0.617926569939216, "grad_norm": 4.93029260635376, "learning_rate": 3.3797497415681725e-05, "loss": 2.104, "step": 8209 }, { "epoch": 0.6180018442198761, "grad_norm": 3.9838309288024902, "learning_rate": 3.378596482747075e-05, "loss": 1.8608, "step": 8210 }, { "epoch": 0.6180771185005364, "grad_norm": 4.014377117156982, "learning_rate": 3.377443320313762e-05, "loss": 1.6941, "step": 8211 }, { "epoch": 0.6181523927811965, "grad_norm": 6.950359344482422, "learning_rate": 3.3762902543367916e-05, "loss": 1.7919, "step": 8212 }, { "epoch": 0.6182276670618566, "grad_norm": 5.2452545166015625, "learning_rate": 3.375137284884703e-05, "loss": 2.0207, "step": 8213 }, { "epoch": 0.6183029413425168, "grad_norm": 5.115932941436768, "learning_rate": 3.3739844120260436e-05, "loss": 1.6745, "step": 8214 }, { "epoch": 0.618378215623177, "grad_norm": 7.520609378814697, "learning_rate": 3.372831635829343e-05, "loss": 2.2043, "step": 8215 }, { "epoch": 0.6184534899038371, "grad_norm": 3.8072352409362793, "learning_rate": 3.3716789563631334e-05, "loss": 1.7571, "step": 8216 }, { "epoch": 0.6185287641844973, "grad_norm": 4.510412693023682, "learning_rate": 3.370526373695937e-05, "loss": 1.634, "step": 8217 }, { "epoch": 0.6186040384651574, "grad_norm": 4.100464344024658, "learning_rate": 3.3693738878962745e-05, "loss": 1.6751, "step": 8218 }, { "epoch": 0.6186793127458176, "grad_norm": 5.649510860443115, "learning_rate": 3.368221499032653e-05, "loss": 1.631, "step": 8219 }, { "epoch": 0.6187545870264777, "grad_norm": 4.242654800415039, "learning_rate": 3.3670692071735824e-05, "loss": 1.9873, "step": 8220 }, { "epoch": 0.6188298613071379, "grad_norm": 5.126852989196777, "learning_rate": 3.365917012387563e-05, "loss": 1.9031, "step": 8221 }, { "epoch": 0.618905135587798, "grad_norm": 6.010108470916748, "learning_rate": 3.364764914743087e-05, "loss": 2.0317, "step": 8222 }, { "epoch": 0.6189804098684581, "grad_norm": 4.9840569496154785, "learning_rate": 3.363612914308645e-05, "loss": 1.8559, "step": 8223 }, { "epoch": 0.6190556841491184, "grad_norm": 5.7103142738342285, "learning_rate": 3.36246101115272e-05, "loss": 2.093, "step": 8224 }, { "epoch": 0.6191309584297785, "grad_norm": 4.976246356964111, "learning_rate": 3.36130920534379e-05, "loss": 1.5783, "step": 8225 }, { "epoch": 0.6192062327104386, "grad_norm": 4.657334327697754, "learning_rate": 3.3601574969503255e-05, "loss": 1.9017, "step": 8226 }, { "epoch": 0.6192815069910989, "grad_norm": 4.421818256378174, "learning_rate": 3.359005886040792e-05, "loss": 1.7282, "step": 8227 }, { "epoch": 0.619356781271759, "grad_norm": 3.9061472415924072, "learning_rate": 3.35785437268365e-05, "loss": 1.7746, "step": 8228 }, { "epoch": 0.6194320555524191, "grad_norm": 5.525216579437256, "learning_rate": 3.356702956947355e-05, "loss": 1.7409, "step": 8229 }, { "epoch": 0.6195073298330793, "grad_norm": 3.159904956817627, "learning_rate": 3.3555516389003525e-05, "loss": 1.7272, "step": 8230 }, { "epoch": 0.6195826041137394, "grad_norm": 5.123450756072998, "learning_rate": 3.3544004186110886e-05, "loss": 2.1347, "step": 8231 }, { "epoch": 0.6196578783943996, "grad_norm": 5.510494232177734, "learning_rate": 3.353249296147997e-05, "loss": 1.9331, "step": 8232 }, { "epoch": 0.6197331526750598, "grad_norm": 4.655089378356934, "learning_rate": 3.352098271579509e-05, "loss": 2.3762, "step": 8233 }, { "epoch": 0.6198084269557199, "grad_norm": 4.618927001953125, "learning_rate": 3.35094734497405e-05, "loss": 2.1618, "step": 8234 }, { "epoch": 0.61988370123638, "grad_norm": 6.0057477951049805, "learning_rate": 3.34979651640004e-05, "loss": 1.8834, "step": 8235 }, { "epoch": 0.6199589755170403, "grad_norm": 8.42966079711914, "learning_rate": 3.3486457859258934e-05, "loss": 1.8661, "step": 8236 }, { "epoch": 0.6200342497977004, "grad_norm": 4.626075267791748, "learning_rate": 3.347495153620015e-05, "loss": 2.2126, "step": 8237 }, { "epoch": 0.6201095240783605, "grad_norm": 5.205615520477295, "learning_rate": 3.3463446195508094e-05, "loss": 1.9239, "step": 8238 }, { "epoch": 0.6201847983590206, "grad_norm": 5.049977779388428, "learning_rate": 3.34519418378667e-05, "loss": 2.08, "step": 8239 }, { "epoch": 0.6202600726396809, "grad_norm": 5.072951316833496, "learning_rate": 3.344043846395991e-05, "loss": 1.7105, "step": 8240 }, { "epoch": 0.620335346920341, "grad_norm": 4.272696495056152, "learning_rate": 3.342893607447151e-05, "loss": 2.5105, "step": 8241 }, { "epoch": 0.6204106212010011, "grad_norm": 4.273044109344482, "learning_rate": 3.3417434670085335e-05, "loss": 2.1126, "step": 8242 }, { "epoch": 0.6204858954816613, "grad_norm": 3.3404288291931152, "learning_rate": 3.340593425148507e-05, "loss": 1.7529, "step": 8243 }, { "epoch": 0.6205611697623215, "grad_norm": 4.337212085723877, "learning_rate": 3.3394434819354404e-05, "loss": 1.7534, "step": 8244 }, { "epoch": 0.6206364440429816, "grad_norm": 4.941805362701416, "learning_rate": 3.3382936374376935e-05, "loss": 1.5829, "step": 8245 }, { "epoch": 0.6207117183236418, "grad_norm": 5.1706976890563965, "learning_rate": 3.337143891723624e-05, "loss": 2.3988, "step": 8246 }, { "epoch": 0.6207869926043019, "grad_norm": 4.825283050537109, "learning_rate": 3.335994244861577e-05, "loss": 1.911, "step": 8247 }, { "epoch": 0.620862266884962, "grad_norm": 3.969494342803955, "learning_rate": 3.3348446969198984e-05, "loss": 1.7953, "step": 8248 }, { "epoch": 0.6209375411656223, "grad_norm": 4.922825336456299, "learning_rate": 3.3336952479669234e-05, "loss": 1.9632, "step": 8249 }, { "epoch": 0.6210128154462824, "grad_norm": 4.728207111358643, "learning_rate": 3.332545898070987e-05, "loss": 1.7429, "step": 8250 }, { "epoch": 0.6210880897269425, "grad_norm": 4.985260009765625, "learning_rate": 3.3313966473004096e-05, "loss": 1.6192, "step": 8251 }, { "epoch": 0.6211633640076027, "grad_norm": 4.827419757843018, "learning_rate": 3.330247495723515e-05, "loss": 1.8011, "step": 8252 }, { "epoch": 0.6212386382882629, "grad_norm": 4.1476826667785645, "learning_rate": 3.329098443408617e-05, "loss": 1.8888, "step": 8253 }, { "epoch": 0.621313912568923, "grad_norm": 4.269753932952881, "learning_rate": 3.3279494904240206e-05, "loss": 2.1614, "step": 8254 }, { "epoch": 0.6213891868495832, "grad_norm": 4.465358734130859, "learning_rate": 3.3268006368380304e-05, "loss": 1.6303, "step": 8255 }, { "epoch": 0.6214644611302433, "grad_norm": 3.966667652130127, "learning_rate": 3.32565188271894e-05, "loss": 1.7639, "step": 8256 }, { "epoch": 0.6215397354109035, "grad_norm": 3.632319211959839, "learning_rate": 3.3245032281350435e-05, "loss": 1.8971, "step": 8257 }, { "epoch": 0.6216150096915636, "grad_norm": 4.717795372009277, "learning_rate": 3.3233546731546204e-05, "loss": 2.2254, "step": 8258 }, { "epoch": 0.6216902839722238, "grad_norm": 3.8923418521881104, "learning_rate": 3.3222062178459526e-05, "loss": 2.115, "step": 8259 }, { "epoch": 0.6217655582528839, "grad_norm": 4.7175092697143555, "learning_rate": 3.32105786227731e-05, "loss": 1.946, "step": 8260 }, { "epoch": 0.621840832533544, "grad_norm": 4.205885887145996, "learning_rate": 3.31990960651696e-05, "loss": 1.6029, "step": 8261 }, { "epoch": 0.6219161068142043, "grad_norm": 6.618117809295654, "learning_rate": 3.318761450633163e-05, "loss": 2.1752, "step": 8262 }, { "epoch": 0.6219913810948644, "grad_norm": 5.104174613952637, "learning_rate": 3.317613394694175e-05, "loss": 2.1454, "step": 8263 }, { "epoch": 0.6220666553755245, "grad_norm": 7.77531623840332, "learning_rate": 3.316465438768242e-05, "loss": 1.325, "step": 8264 }, { "epoch": 0.6221419296561848, "grad_norm": 5.644231796264648, "learning_rate": 3.3153175829236085e-05, "loss": 2.0972, "step": 8265 }, { "epoch": 0.6222172039368449, "grad_norm": 4.986363410949707, "learning_rate": 3.3141698272285107e-05, "loss": 2.1437, "step": 8266 }, { "epoch": 0.622292478217505, "grad_norm": 3.750397205352783, "learning_rate": 3.313022171751179e-05, "loss": 1.8346, "step": 8267 }, { "epoch": 0.6223677524981652, "grad_norm": 5.335587024688721, "learning_rate": 3.31187461655984e-05, "loss": 1.5425, "step": 8268 }, { "epoch": 0.6224430267788253, "grad_norm": 3.871005058288574, "learning_rate": 3.3107271617227096e-05, "loss": 1.8154, "step": 8269 }, { "epoch": 0.6225183010594855, "grad_norm": 4.961533546447754, "learning_rate": 3.3095798073080055e-05, "loss": 1.8041, "step": 8270 }, { "epoch": 0.6225935753401457, "grad_norm": 3.8872344493865967, "learning_rate": 3.3084325533839275e-05, "loss": 1.5672, "step": 8271 }, { "epoch": 0.6226688496208058, "grad_norm": 3.621957778930664, "learning_rate": 3.3072854000186836e-05, "loss": 1.8466, "step": 8272 }, { "epoch": 0.6227441239014659, "grad_norm": 3.66719913482666, "learning_rate": 3.306138347280464e-05, "loss": 1.8955, "step": 8273 }, { "epoch": 0.6228193981821262, "grad_norm": 4.060410499572754, "learning_rate": 3.304991395237461e-05, "loss": 1.9046, "step": 8274 }, { "epoch": 0.6228946724627863, "grad_norm": 4.263533115386963, "learning_rate": 3.3038445439578544e-05, "loss": 2.0354, "step": 8275 }, { "epoch": 0.6229699467434464, "grad_norm": 4.144182205200195, "learning_rate": 3.302697793509825e-05, "loss": 1.8561, "step": 8276 }, { "epoch": 0.6230452210241066, "grad_norm": 4.593705177307129, "learning_rate": 3.3015511439615384e-05, "loss": 1.7105, "step": 8277 }, { "epoch": 0.6231204953047668, "grad_norm": 4.700996398925781, "learning_rate": 3.300404595381165e-05, "loss": 1.9159, "step": 8278 }, { "epoch": 0.6231957695854269, "grad_norm": 7.516633033752441, "learning_rate": 3.2992581478368614e-05, "loss": 2.63, "step": 8279 }, { "epoch": 0.623271043866087, "grad_norm": 4.484390735626221, "learning_rate": 3.2981118013967815e-05, "loss": 2.0504, "step": 8280 }, { "epoch": 0.6233463181467472, "grad_norm": 4.061007022857666, "learning_rate": 3.29696555612907e-05, "loss": 1.6844, "step": 8281 }, { "epoch": 0.6234215924274074, "grad_norm": 4.173439025878906, "learning_rate": 3.295819412101872e-05, "loss": 1.586, "step": 8282 }, { "epoch": 0.6234968667080675, "grad_norm": 4.952429294586182, "learning_rate": 3.294673369383318e-05, "loss": 2.1062, "step": 8283 }, { "epoch": 0.6235721409887277, "grad_norm": 5.733234405517578, "learning_rate": 3.2935274280415385e-05, "loss": 2.3618, "step": 8284 }, { "epoch": 0.6236474152693878, "grad_norm": 3.6483817100524902, "learning_rate": 3.2923815881446594e-05, "loss": 1.8382, "step": 8285 }, { "epoch": 0.623722689550048, "grad_norm": 4.784677028656006, "learning_rate": 3.291235849760794e-05, "loss": 1.767, "step": 8286 }, { "epoch": 0.6237979638307082, "grad_norm": 5.480867385864258, "learning_rate": 3.290090212958056e-05, "loss": 2.1439, "step": 8287 }, { "epoch": 0.6238732381113683, "grad_norm": 5.025787353515625, "learning_rate": 3.2889446778045464e-05, "loss": 1.8821, "step": 8288 }, { "epoch": 0.6239485123920284, "grad_norm": 4.936732769012451, "learning_rate": 3.287799244368367e-05, "loss": 1.8836, "step": 8289 }, { "epoch": 0.6240237866726887, "grad_norm": 3.6807878017425537, "learning_rate": 3.286653912717609e-05, "loss": 1.7756, "step": 8290 }, { "epoch": 0.6240990609533488, "grad_norm": 4.392080307006836, "learning_rate": 3.285508682920361e-05, "loss": 1.8883, "step": 8291 }, { "epoch": 0.6241743352340089, "grad_norm": 4.467647075653076, "learning_rate": 3.2843635550447004e-05, "loss": 1.6686, "step": 8292 }, { "epoch": 0.6242496095146691, "grad_norm": 3.7380247116088867, "learning_rate": 3.283218529158706e-05, "loss": 2.0058, "step": 8293 }, { "epoch": 0.6243248837953292, "grad_norm": 7.30566930770874, "learning_rate": 3.282073605330443e-05, "loss": 1.808, "step": 8294 }, { "epoch": 0.6244001580759894, "grad_norm": 4.505008220672607, "learning_rate": 3.280928783627975e-05, "loss": 1.8222, "step": 8295 }, { "epoch": 0.6244754323566496, "grad_norm": 4.26721715927124, "learning_rate": 3.279784064119357e-05, "loss": 2.1332, "step": 8296 }, { "epoch": 0.6245507066373097, "grad_norm": 5.7599382400512695, "learning_rate": 3.2786394468726436e-05, "loss": 1.5231, "step": 8297 }, { "epoch": 0.6246259809179698, "grad_norm": 4.933229923248291, "learning_rate": 3.277494931955873e-05, "loss": 1.7742, "step": 8298 }, { "epoch": 0.62470125519863, "grad_norm": 4.214962005615234, "learning_rate": 3.2763505194370866e-05, "loss": 2.0699, "step": 8299 }, { "epoch": 0.6247765294792902, "grad_norm": 5.859561920166016, "learning_rate": 3.275206209384317e-05, "loss": 2.0165, "step": 8300 }, { "epoch": 0.6248518037599503, "grad_norm": 5.256788730621338, "learning_rate": 3.2740620018655885e-05, "loss": 1.9123, "step": 8301 }, { "epoch": 0.6249270780406104, "grad_norm": 7.470607280731201, "learning_rate": 3.272917896948923e-05, "loss": 1.8502, "step": 8302 }, { "epoch": 0.6250023523212707, "grad_norm": 4.81989860534668, "learning_rate": 3.2717738947023314e-05, "loss": 2.2194, "step": 8303 }, { "epoch": 0.6250776266019308, "grad_norm": 6.651296138763428, "learning_rate": 3.2706299951938255e-05, "loss": 1.789, "step": 8304 }, { "epoch": 0.6251529008825909, "grad_norm": 4.168320655822754, "learning_rate": 3.269486198491402e-05, "loss": 1.7555, "step": 8305 }, { "epoch": 0.6252281751632511, "grad_norm": 4.957070827484131, "learning_rate": 3.268342504663059e-05, "loss": 2.1281, "step": 8306 }, { "epoch": 0.6253034494439113, "grad_norm": 5.425527572631836, "learning_rate": 3.267198913776785e-05, "loss": 1.7913, "step": 8307 }, { "epoch": 0.6253787237245714, "grad_norm": 4.317502975463867, "learning_rate": 3.266055425900565e-05, "loss": 1.8442, "step": 8308 }, { "epoch": 0.6254539980052316, "grad_norm": 5.058265209197998, "learning_rate": 3.2649120411023734e-05, "loss": 1.7928, "step": 8309 }, { "epoch": 0.6255292722858917, "grad_norm": 7.60575532913208, "learning_rate": 3.263768759450183e-05, "loss": 2.0965, "step": 8310 }, { "epoch": 0.6256045465665518, "grad_norm": 5.555531978607178, "learning_rate": 3.262625581011957e-05, "loss": 2.057, "step": 8311 }, { "epoch": 0.6256798208472121, "grad_norm": 4.312882900238037, "learning_rate": 3.2614825058556573e-05, "loss": 1.5621, "step": 8312 }, { "epoch": 0.6257550951278722, "grad_norm": 4.7362871170043945, "learning_rate": 3.260339534049232e-05, "loss": 1.9293, "step": 8313 }, { "epoch": 0.6258303694085323, "grad_norm": 4.182237148284912, "learning_rate": 3.259196665660632e-05, "loss": 1.8929, "step": 8314 }, { "epoch": 0.6259056436891925, "grad_norm": 4.8938374519348145, "learning_rate": 3.2580539007577934e-05, "loss": 2.2774, "step": 8315 }, { "epoch": 0.6259809179698527, "grad_norm": 4.275259017944336, "learning_rate": 3.256911239408652e-05, "loss": 1.8442, "step": 8316 }, { "epoch": 0.6260561922505128, "grad_norm": 5.5678486824035645, "learning_rate": 3.255768681681137e-05, "loss": 1.799, "step": 8317 }, { "epoch": 0.6261314665311729, "grad_norm": 5.898873329162598, "learning_rate": 3.254626227643168e-05, "loss": 1.6108, "step": 8318 }, { "epoch": 0.6262067408118331, "grad_norm": 5.793529510498047, "learning_rate": 3.253483877362664e-05, "loss": 1.7145, "step": 8319 }, { "epoch": 0.6262820150924933, "grad_norm": 5.791046142578125, "learning_rate": 3.252341630907531e-05, "loss": 1.6596, "step": 8320 }, { "epoch": 0.6263572893731534, "grad_norm": 3.4461171627044678, "learning_rate": 3.251199488345674e-05, "loss": 1.5891, "step": 8321 }, { "epoch": 0.6264325636538136, "grad_norm": 4.364080905914307, "learning_rate": 3.2500574497449874e-05, "loss": 2.0532, "step": 8322 }, { "epoch": 0.6265078379344737, "grad_norm": 4.662264823913574, "learning_rate": 3.2489155151733665e-05, "loss": 2.3847, "step": 8323 }, { "epoch": 0.6265831122151339, "grad_norm": 5.796299934387207, "learning_rate": 3.2477736846986924e-05, "loss": 2.0372, "step": 8324 }, { "epoch": 0.6266583864957941, "grad_norm": 5.66961669921875, "learning_rate": 3.2466319583888464e-05, "loss": 1.543, "step": 8325 }, { "epoch": 0.6267336607764542, "grad_norm": 4.356776714324951, "learning_rate": 3.245490336311698e-05, "loss": 1.6971, "step": 8326 }, { "epoch": 0.6268089350571143, "grad_norm": 4.802499294281006, "learning_rate": 3.2443488185351154e-05, "loss": 2.1443, "step": 8327 }, { "epoch": 0.6268842093377746, "grad_norm": 3.842556953430176, "learning_rate": 3.2432074051269576e-05, "loss": 1.7824, "step": 8328 }, { "epoch": 0.6269594836184347, "grad_norm": 4.327637195587158, "learning_rate": 3.242066096155081e-05, "loss": 1.8276, "step": 8329 }, { "epoch": 0.6270347578990948, "grad_norm": 4.579751014709473, "learning_rate": 3.240924891687328e-05, "loss": 1.7164, "step": 8330 }, { "epoch": 0.627110032179755, "grad_norm": 5.32450008392334, "learning_rate": 3.239783791791546e-05, "loss": 1.871, "step": 8331 }, { "epoch": 0.6271853064604151, "grad_norm": 4.403231143951416, "learning_rate": 3.2386427965355636e-05, "loss": 1.924, "step": 8332 }, { "epoch": 0.6272605807410753, "grad_norm": 7.266706466674805, "learning_rate": 3.2375019059872144e-05, "loss": 2.0374, "step": 8333 }, { "epoch": 0.6273358550217355, "grad_norm": 5.705352783203125, "learning_rate": 3.236361120214319e-05, "loss": 1.929, "step": 8334 }, { "epoch": 0.6274111293023956, "grad_norm": 4.518496513366699, "learning_rate": 3.235220439284695e-05, "loss": 1.7801, "step": 8335 }, { "epoch": 0.6274864035830557, "grad_norm": 5.234706401824951, "learning_rate": 3.234079863266154e-05, "loss": 1.5562, "step": 8336 }, { "epoch": 0.6275616778637159, "grad_norm": 4.721508979797363, "learning_rate": 3.232939392226496e-05, "loss": 1.6654, "step": 8337 }, { "epoch": 0.6276369521443761, "grad_norm": 5.178009510040283, "learning_rate": 3.2317990262335215e-05, "loss": 1.9676, "step": 8338 }, { "epoch": 0.6277122264250362, "grad_norm": 4.755590438842773, "learning_rate": 3.230658765355021e-05, "loss": 1.9271, "step": 8339 }, { "epoch": 0.6277875007056963, "grad_norm": 4.829550266265869, "learning_rate": 3.2295186096587815e-05, "loss": 2.1921, "step": 8340 }, { "epoch": 0.6278627749863566, "grad_norm": 4.162178039550781, "learning_rate": 3.2283785592125785e-05, "loss": 1.689, "step": 8341 }, { "epoch": 0.6279380492670167, "grad_norm": 6.179942607879639, "learning_rate": 3.2272386140841895e-05, "loss": 1.8758, "step": 8342 }, { "epoch": 0.6280133235476768, "grad_norm": 4.728642463684082, "learning_rate": 3.226098774341376e-05, "loss": 1.5701, "step": 8343 }, { "epoch": 0.628088597828337, "grad_norm": 5.58257532119751, "learning_rate": 3.224959040051902e-05, "loss": 1.6969, "step": 8344 }, { "epoch": 0.6281638721089972, "grad_norm": 4.4962897300720215, "learning_rate": 3.223819411283517e-05, "loss": 1.7774, "step": 8345 }, { "epoch": 0.6282391463896573, "grad_norm": 4.467648029327393, "learning_rate": 3.222679888103975e-05, "loss": 1.9221, "step": 8346 }, { "epoch": 0.6283144206703175, "grad_norm": 4.631173133850098, "learning_rate": 3.221540470581011e-05, "loss": 1.8135, "step": 8347 }, { "epoch": 0.6283896949509776, "grad_norm": 7.60060453414917, "learning_rate": 3.220401158782365e-05, "loss": 2.0137, "step": 8348 }, { "epoch": 0.6284649692316377, "grad_norm": 6.273983955383301, "learning_rate": 3.219261952775764e-05, "loss": 1.7037, "step": 8349 }, { "epoch": 0.628540243512298, "grad_norm": 6.07139253616333, "learning_rate": 3.218122852628929e-05, "loss": 2.0992, "step": 8350 }, { "epoch": 0.6286155177929581, "grad_norm": 4.815303802490234, "learning_rate": 3.216983858409579e-05, "loss": 1.4361, "step": 8351 }, { "epoch": 0.6286907920736182, "grad_norm": 6.025692939758301, "learning_rate": 3.215844970185421e-05, "loss": 1.7152, "step": 8352 }, { "epoch": 0.6287660663542785, "grad_norm": 7.418488502502441, "learning_rate": 3.214706188024162e-05, "loss": 1.9619, "step": 8353 }, { "epoch": 0.6288413406349386, "grad_norm": 3.55694842338562, "learning_rate": 3.213567511993497e-05, "loss": 1.9477, "step": 8354 }, { "epoch": 0.6289166149155987, "grad_norm": 3.8427464962005615, "learning_rate": 3.2124289421611175e-05, "loss": 1.7306, "step": 8355 }, { "epoch": 0.6289918891962588, "grad_norm": 4.430111885070801, "learning_rate": 3.211290478594707e-05, "loss": 1.8016, "step": 8356 }, { "epoch": 0.629067163476919, "grad_norm": 4.9731764793396, "learning_rate": 3.210152121361948e-05, "loss": 1.6231, "step": 8357 }, { "epoch": 0.6291424377575792, "grad_norm": 4.950997829437256, "learning_rate": 3.209013870530507e-05, "loss": 1.6704, "step": 8358 }, { "epoch": 0.6292177120382393, "grad_norm": 4.167163372039795, "learning_rate": 3.207875726168055e-05, "loss": 1.9497, "step": 8359 }, { "epoch": 0.6292929863188995, "grad_norm": 3.9677810668945312, "learning_rate": 3.2067376883422464e-05, "loss": 1.9058, "step": 8360 }, { "epoch": 0.6293682605995596, "grad_norm": 4.717792510986328, "learning_rate": 3.205599757120737e-05, "loss": 1.7056, "step": 8361 }, { "epoch": 0.6294435348802198, "grad_norm": 5.162610054016113, "learning_rate": 3.204461932571174e-05, "loss": 1.8545, "step": 8362 }, { "epoch": 0.62951880916088, "grad_norm": 5.355527877807617, "learning_rate": 3.203324214761198e-05, "loss": 1.9754, "step": 8363 }, { "epoch": 0.6295940834415401, "grad_norm": 4.509415149688721, "learning_rate": 3.202186603758442e-05, "loss": 1.9866, "step": 8364 }, { "epoch": 0.6296693577222002, "grad_norm": 4.520984649658203, "learning_rate": 3.201049099630532e-05, "loss": 1.8496, "step": 8365 }, { "epoch": 0.6297446320028605, "grad_norm": 4.6440205574035645, "learning_rate": 3.1999117024450924e-05, "loss": 1.9134, "step": 8366 }, { "epoch": 0.6298199062835206, "grad_norm": 3.9407575130462646, "learning_rate": 3.198774412269736e-05, "loss": 1.7924, "step": 8367 }, { "epoch": 0.6298951805641807, "grad_norm": 3.958294630050659, "learning_rate": 3.197637229172076e-05, "loss": 2.0424, "step": 8368 }, { "epoch": 0.6299704548448409, "grad_norm": 6.674445629119873, "learning_rate": 3.1965001532197075e-05, "loss": 2.0018, "step": 8369 }, { "epoch": 0.630045729125501, "grad_norm": 3.707188367843628, "learning_rate": 3.1953631844802327e-05, "loss": 1.8018, "step": 8370 }, { "epoch": 0.6301210034061612, "grad_norm": 5.819868087768555, "learning_rate": 3.194226323021237e-05, "loss": 1.6668, "step": 8371 }, { "epoch": 0.6301962776868214, "grad_norm": 4.373745918273926, "learning_rate": 3.1930895689103054e-05, "loss": 1.6442, "step": 8372 }, { "epoch": 0.6302715519674815, "grad_norm": 3.326568603515625, "learning_rate": 3.1919529222150136e-05, "loss": 1.7995, "step": 8373 }, { "epoch": 0.6303468262481416, "grad_norm": 6.456649303436279, "learning_rate": 3.190816383002934e-05, "loss": 1.7915, "step": 8374 }, { "epoch": 0.6304221005288019, "grad_norm": 4.06173849105835, "learning_rate": 3.189679951341628e-05, "loss": 1.7129, "step": 8375 }, { "epoch": 0.630497374809462, "grad_norm": 6.115271091461182, "learning_rate": 3.1885436272986566e-05, "loss": 1.6972, "step": 8376 }, { "epoch": 0.6305726490901221, "grad_norm": 4.842566967010498, "learning_rate": 3.1874074109415666e-05, "loss": 1.8554, "step": 8377 }, { "epoch": 0.6306479233707822, "grad_norm": 3.90193772315979, "learning_rate": 3.186271302337906e-05, "loss": 1.9724, "step": 8378 }, { "epoch": 0.6307231976514425, "grad_norm": 4.660115718841553, "learning_rate": 3.185135301555212e-05, "loss": 1.7914, "step": 8379 }, { "epoch": 0.6307984719321026, "grad_norm": 5.187869548797607, "learning_rate": 3.183999408661016e-05, "loss": 1.9176, "step": 8380 }, { "epoch": 0.6308737462127627, "grad_norm": 3.9580442905426025, "learning_rate": 3.182863623722847e-05, "loss": 2.0853, "step": 8381 }, { "epoch": 0.6309490204934229, "grad_norm": 3.8213765621185303, "learning_rate": 3.18172794680822e-05, "loss": 2.0184, "step": 8382 }, { "epoch": 0.6310242947740831, "grad_norm": 4.1299662590026855, "learning_rate": 3.180592377984649e-05, "loss": 1.8766, "step": 8383 }, { "epoch": 0.6310995690547432, "grad_norm": 4.048853874206543, "learning_rate": 3.179456917319641e-05, "loss": 1.7417, "step": 8384 }, { "epoch": 0.6311748433354034, "grad_norm": 4.777251720428467, "learning_rate": 3.178321564880696e-05, "loss": 1.7942, "step": 8385 }, { "epoch": 0.6312501176160635, "grad_norm": 3.9705843925476074, "learning_rate": 3.177186320735306e-05, "loss": 1.6337, "step": 8386 }, { "epoch": 0.6313253918967237, "grad_norm": 3.4834516048431396, "learning_rate": 3.1760511849509616e-05, "loss": 1.9068, "step": 8387 }, { "epoch": 0.6314006661773839, "grad_norm": 4.639775276184082, "learning_rate": 3.174916157595138e-05, "loss": 1.9823, "step": 8388 }, { "epoch": 0.631475940458044, "grad_norm": 4.269222259521484, "learning_rate": 3.173781238735313e-05, "loss": 1.6385, "step": 8389 }, { "epoch": 0.6315512147387041, "grad_norm": 4.1870622634887695, "learning_rate": 3.1726464284389527e-05, "loss": 1.7807, "step": 8390 }, { "epoch": 0.6316264890193644, "grad_norm": 3.809448480606079, "learning_rate": 3.171511726773521e-05, "loss": 1.9132, "step": 8391 }, { "epoch": 0.6317017633000245, "grad_norm": 4.513881683349609, "learning_rate": 3.170377133806469e-05, "loss": 2.1446, "step": 8392 }, { "epoch": 0.6317770375806846, "grad_norm": 4.269097805023193, "learning_rate": 3.1692426496052465e-05, "loss": 1.945, "step": 8393 }, { "epoch": 0.6318523118613448, "grad_norm": 4.71463680267334, "learning_rate": 3.1681082742372955e-05, "loss": 1.6865, "step": 8394 }, { "epoch": 0.631927586142005, "grad_norm": 5.84751033782959, "learning_rate": 3.166974007770053e-05, "loss": 1.8674, "step": 8395 }, { "epoch": 0.6320028604226651, "grad_norm": 5.969925403594971, "learning_rate": 3.1658398502709436e-05, "loss": 1.8413, "step": 8396 }, { "epoch": 0.6320781347033252, "grad_norm": 3.561568021774292, "learning_rate": 3.164705801807393e-05, "loss": 1.8048, "step": 8397 }, { "epoch": 0.6321534089839854, "grad_norm": 5.3204345703125, "learning_rate": 3.163571862446818e-05, "loss": 1.6532, "step": 8398 }, { "epoch": 0.6322286832646455, "grad_norm": 6.0735931396484375, "learning_rate": 3.162438032256625e-05, "loss": 1.8224, "step": 8399 }, { "epoch": 0.6323039575453057, "grad_norm": 4.502913475036621, "learning_rate": 3.1613043113042186e-05, "loss": 1.6187, "step": 8400 }, { "epoch": 0.6323792318259659, "grad_norm": 4.421908855438232, "learning_rate": 3.160170699656995e-05, "loss": 1.8675, "step": 8401 }, { "epoch": 0.632454506106626, "grad_norm": 6.09163236618042, "learning_rate": 3.1590371973823455e-05, "loss": 1.9831, "step": 8402 }, { "epoch": 0.6325297803872861, "grad_norm": 4.58880090713501, "learning_rate": 3.157903804547651e-05, "loss": 2.0097, "step": 8403 }, { "epoch": 0.6326050546679464, "grad_norm": 4.020610809326172, "learning_rate": 3.1567705212202915e-05, "loss": 1.88, "step": 8404 }, { "epoch": 0.6326803289486065, "grad_norm": 5.396985054016113, "learning_rate": 3.155637347467633e-05, "loss": 2.2428, "step": 8405 }, { "epoch": 0.6327556032292666, "grad_norm": 4.30937385559082, "learning_rate": 3.1545042833570435e-05, "loss": 1.9898, "step": 8406 }, { "epoch": 0.6328308775099268, "grad_norm": 4.43129825592041, "learning_rate": 3.1533713289558784e-05, "loss": 1.9185, "step": 8407 }, { "epoch": 0.632906151790587, "grad_norm": 4.572925090789795, "learning_rate": 3.152238484331491e-05, "loss": 1.9966, "step": 8408 }, { "epoch": 0.6329814260712471, "grad_norm": 4.606265068054199, "learning_rate": 3.151105749551222e-05, "loss": 1.4964, "step": 8409 }, { "epoch": 0.6330567003519073, "grad_norm": 4.03704309463501, "learning_rate": 3.1499731246824124e-05, "loss": 1.9266, "step": 8410 }, { "epoch": 0.6331319746325674, "grad_norm": 3.9234256744384766, "learning_rate": 3.1488406097923905e-05, "loss": 2.1381, "step": 8411 }, { "epoch": 0.6332072489132275, "grad_norm": 3.9247612953186035, "learning_rate": 3.147708204948483e-05, "loss": 2.0783, "step": 8412 }, { "epoch": 0.6332825231938878, "grad_norm": 5.843477725982666, "learning_rate": 3.1465759102180106e-05, "loss": 1.7687, "step": 8413 }, { "epoch": 0.6333577974745479, "grad_norm": 5.497551441192627, "learning_rate": 3.145443725668279e-05, "loss": 1.8308, "step": 8414 }, { "epoch": 0.633433071755208, "grad_norm": 3.995081663131714, "learning_rate": 3.1443116513665995e-05, "loss": 2.0809, "step": 8415 }, { "epoch": 0.6335083460358681, "grad_norm": 5.792292594909668, "learning_rate": 3.143179687380265e-05, "loss": 1.5218, "step": 8416 }, { "epoch": 0.6335836203165284, "grad_norm": 5.019339561462402, "learning_rate": 3.1420478337765726e-05, "loss": 2.0032, "step": 8417 }, { "epoch": 0.6336588945971885, "grad_norm": 5.25856876373291, "learning_rate": 3.1409160906228036e-05, "loss": 1.8416, "step": 8418 }, { "epoch": 0.6337341688778486, "grad_norm": 3.918635368347168, "learning_rate": 3.1397844579862404e-05, "loss": 1.658, "step": 8419 }, { "epoch": 0.6338094431585088, "grad_norm": 5.040243148803711, "learning_rate": 3.138652935934152e-05, "loss": 1.622, "step": 8420 }, { "epoch": 0.633884717439169, "grad_norm": 4.454802513122559, "learning_rate": 3.137521524533807e-05, "loss": 1.7687, "step": 8421 }, { "epoch": 0.6339599917198291, "grad_norm": 3.7485463619232178, "learning_rate": 3.1363902238524625e-05, "loss": 1.6373, "step": 8422 }, { "epoch": 0.6340352660004893, "grad_norm": 6.195174217224121, "learning_rate": 3.135259033957373e-05, "loss": 2.1407, "step": 8423 }, { "epoch": 0.6341105402811494, "grad_norm": 3.882117509841919, "learning_rate": 3.134127954915782e-05, "loss": 1.7567, "step": 8424 }, { "epoch": 0.6341858145618096, "grad_norm": 5.763416290283203, "learning_rate": 3.1329969867949316e-05, "loss": 1.5771, "step": 8425 }, { "epoch": 0.6342610888424698, "grad_norm": 4.590452671051025, "learning_rate": 3.131866129662052e-05, "loss": 2.0444, "step": 8426 }, { "epoch": 0.6343363631231299, "grad_norm": 4.413145542144775, "learning_rate": 3.130735383584372e-05, "loss": 2.1113, "step": 8427 }, { "epoch": 0.63441163740379, "grad_norm": 4.523130416870117, "learning_rate": 3.129604748629108e-05, "loss": 1.8543, "step": 8428 }, { "epoch": 0.6344869116844503, "grad_norm": 10.310506820678711, "learning_rate": 3.1284742248634755e-05, "loss": 2.0874, "step": 8429 }, { "epoch": 0.6345621859651104, "grad_norm": 4.193033218383789, "learning_rate": 3.127343812354682e-05, "loss": 1.7977, "step": 8430 }, { "epoch": 0.6346374602457705, "grad_norm": 6.753688812255859, "learning_rate": 3.126213511169924e-05, "loss": 1.9621, "step": 8431 }, { "epoch": 0.6347127345264307, "grad_norm": 5.896317481994629, "learning_rate": 3.125083321376398e-05, "loss": 2.3063, "step": 8432 }, { "epoch": 0.6347880088070909, "grad_norm": 4.846764087677002, "learning_rate": 3.1239532430412866e-05, "loss": 1.7856, "step": 8433 }, { "epoch": 0.634863283087751, "grad_norm": 4.583929061889648, "learning_rate": 3.1228232762317734e-05, "loss": 2.1295, "step": 8434 }, { "epoch": 0.6349385573684111, "grad_norm": 4.837149620056152, "learning_rate": 3.1216934210150286e-05, "loss": 1.7093, "step": 8435 }, { "epoch": 0.6350138316490713, "grad_norm": 4.8804144859313965, "learning_rate": 3.120563677458224e-05, "loss": 2.0385, "step": 8436 }, { "epoch": 0.6350891059297314, "grad_norm": 4.434991836547852, "learning_rate": 3.119434045628512e-05, "loss": 1.8697, "step": 8437 }, { "epoch": 0.6351643802103916, "grad_norm": 6.151554584503174, "learning_rate": 3.118304525593052e-05, "loss": 1.9632, "step": 8438 }, { "epoch": 0.6352396544910518, "grad_norm": 5.7576904296875, "learning_rate": 3.117175117418988e-05, "loss": 1.942, "step": 8439 }, { "epoch": 0.6353149287717119, "grad_norm": 5.128333568572998, "learning_rate": 3.116045821173462e-05, "loss": 1.904, "step": 8440 }, { "epoch": 0.635390203052372, "grad_norm": 5.1462931632995605, "learning_rate": 3.114916636923605e-05, "loss": 2.0165, "step": 8441 }, { "epoch": 0.6354654773330323, "grad_norm": 5.546964645385742, "learning_rate": 3.1137875647365466e-05, "loss": 1.8632, "step": 8442 }, { "epoch": 0.6355407516136924, "grad_norm": 4.864556312561035, "learning_rate": 3.112658604679404e-05, "loss": 1.8112, "step": 8443 }, { "epoch": 0.6356160258943525, "grad_norm": 4.2855634689331055, "learning_rate": 3.1115297568192915e-05, "loss": 2.104, "step": 8444 }, { "epoch": 0.6356913001750127, "grad_norm": 3.92118239402771, "learning_rate": 3.110401021223317e-05, "loss": 1.8094, "step": 8445 }, { "epoch": 0.6357665744556729, "grad_norm": 5.776573181152344, "learning_rate": 3.1092723979585795e-05, "loss": 1.5659, "step": 8446 }, { "epoch": 0.635841848736333, "grad_norm": 5.143870830535889, "learning_rate": 3.108143887092175e-05, "loss": 1.8178, "step": 8447 }, { "epoch": 0.6359171230169932, "grad_norm": 4.2534308433532715, "learning_rate": 3.107015488691185e-05, "loss": 1.919, "step": 8448 }, { "epoch": 0.6359923972976533, "grad_norm": 5.454699993133545, "learning_rate": 3.105887202822696e-05, "loss": 1.9768, "step": 8449 }, { "epoch": 0.6360676715783135, "grad_norm": 4.441861629486084, "learning_rate": 3.1047590295537754e-05, "loss": 1.7199, "step": 8450 }, { "epoch": 0.6361429458589737, "grad_norm": 3.8113224506378174, "learning_rate": 3.103630968951493e-05, "loss": 1.6083, "step": 8451 }, { "epoch": 0.6362182201396338, "grad_norm": 6.349819660186768, "learning_rate": 3.102503021082907e-05, "loss": 1.761, "step": 8452 }, { "epoch": 0.6362934944202939, "grad_norm": 3.772465705871582, "learning_rate": 3.101375186015075e-05, "loss": 1.9929, "step": 8453 }, { "epoch": 0.636368768700954, "grad_norm": 4.067782878875732, "learning_rate": 3.1002474638150376e-05, "loss": 1.946, "step": 8454 }, { "epoch": 0.6364440429816143, "grad_norm": 5.269015312194824, "learning_rate": 3.099119854549839e-05, "loss": 1.9848, "step": 8455 }, { "epoch": 0.6365193172622744, "grad_norm": 5.123625755310059, "learning_rate": 3.097992358286509e-05, "loss": 2.191, "step": 8456 }, { "epoch": 0.6365945915429345, "grad_norm": 4.4515838623046875, "learning_rate": 3.0968649750920784e-05, "loss": 1.8606, "step": 8457 }, { "epoch": 0.6366698658235947, "grad_norm": 5.074526786804199, "learning_rate": 3.0957377050335624e-05, "loss": 2.0305, "step": 8458 }, { "epoch": 0.6367451401042549, "grad_norm": 4.495479583740234, "learning_rate": 3.0946105481779776e-05, "loss": 1.9195, "step": 8459 }, { "epoch": 0.636820414384915, "grad_norm": 3.6234099864959717, "learning_rate": 3.093483504592326e-05, "loss": 2.0614, "step": 8460 }, { "epoch": 0.6368956886655752, "grad_norm": 5.036413192749023, "learning_rate": 3.092356574343611e-05, "loss": 2.0836, "step": 8461 }, { "epoch": 0.6369709629462353, "grad_norm": 5.620336532592773, "learning_rate": 3.091229757498825e-05, "loss": 2.3991, "step": 8462 }, { "epoch": 0.6370462372268955, "grad_norm": 4.834925174713135, "learning_rate": 3.090103054124951e-05, "loss": 1.6901, "step": 8463 }, { "epoch": 0.6371215115075557, "grad_norm": 7.271121501922607, "learning_rate": 3.0889764642889725e-05, "loss": 1.3124, "step": 8464 }, { "epoch": 0.6371967857882158, "grad_norm": 4.830792427062988, "learning_rate": 3.087849988057858e-05, "loss": 1.9993, "step": 8465 }, { "epoch": 0.6372720600688759, "grad_norm": 4.118501663208008, "learning_rate": 3.0867236254985764e-05, "loss": 1.7187, "step": 8466 }, { "epoch": 0.6373473343495362, "grad_norm": 5.906317710876465, "learning_rate": 3.085597376678084e-05, "loss": 1.9745, "step": 8467 }, { "epoch": 0.6374226086301963, "grad_norm": 4.795383930206299, "learning_rate": 3.084471241663337e-05, "loss": 2.0147, "step": 8468 }, { "epoch": 0.6374978829108564, "grad_norm": 5.179449081420898, "learning_rate": 3.083345220521276e-05, "loss": 1.986, "step": 8469 }, { "epoch": 0.6375731571915166, "grad_norm": 6.1793341636657715, "learning_rate": 3.082219313318844e-05, "loss": 1.9492, "step": 8470 }, { "epoch": 0.6376484314721768, "grad_norm": 4.8410420417785645, "learning_rate": 3.0810935201229694e-05, "loss": 1.9789, "step": 8471 }, { "epoch": 0.6377237057528369, "grad_norm": 4.241498947143555, "learning_rate": 3.0799678410005806e-05, "loss": 1.6805, "step": 8472 }, { "epoch": 0.6377989800334971, "grad_norm": 4.631001949310303, "learning_rate": 3.0788422760185926e-05, "loss": 2.0209, "step": 8473 }, { "epoch": 0.6378742543141572, "grad_norm": 4.758254051208496, "learning_rate": 3.0777168252439217e-05, "loss": 2.103, "step": 8474 }, { "epoch": 0.6379495285948173, "grad_norm": 5.047450065612793, "learning_rate": 3.0765914887434665e-05, "loss": 2.0047, "step": 8475 }, { "epoch": 0.6380248028754775, "grad_norm": 5.941407680511475, "learning_rate": 3.075466266584132e-05, "loss": 1.8381, "step": 8476 }, { "epoch": 0.6381000771561377, "grad_norm": 3.9722232818603516, "learning_rate": 3.074341158832803e-05, "loss": 1.9066, "step": 8477 }, { "epoch": 0.6381753514367978, "grad_norm": 7.776758193969727, "learning_rate": 3.073216165556367e-05, "loss": 1.8474, "step": 8478 }, { "epoch": 0.6382506257174579, "grad_norm": 3.987119436264038, "learning_rate": 3.072091286821702e-05, "loss": 2.0514, "step": 8479 }, { "epoch": 0.6383258999981182, "grad_norm": 4.9225664138793945, "learning_rate": 3.070966522695677e-05, "loss": 1.7064, "step": 8480 }, { "epoch": 0.6384011742787783, "grad_norm": 3.492079019546509, "learning_rate": 3.069841873245161e-05, "loss": 1.7445, "step": 8481 }, { "epoch": 0.6384764485594384, "grad_norm": 7.026560306549072, "learning_rate": 3.068717338537004e-05, "loss": 2.3793, "step": 8482 }, { "epoch": 0.6385517228400986, "grad_norm": 6.624011993408203, "learning_rate": 3.06759291863806e-05, "loss": 2.1891, "step": 8483 }, { "epoch": 0.6386269971207588, "grad_norm": 5.279020309448242, "learning_rate": 3.066468613615173e-05, "loss": 1.8185, "step": 8484 }, { "epoch": 0.6387022714014189, "grad_norm": 5.62544059753418, "learning_rate": 3.0653444235351805e-05, "loss": 1.7669, "step": 8485 }, { "epoch": 0.6387775456820791, "grad_norm": 5.458271026611328, "learning_rate": 3.064220348464908e-05, "loss": 1.5607, "step": 8486 }, { "epoch": 0.6388528199627392, "grad_norm": 4.97369384765625, "learning_rate": 3.0630963884711836e-05, "loss": 1.7731, "step": 8487 }, { "epoch": 0.6389280942433994, "grad_norm": 4.403059959411621, "learning_rate": 3.06197254362082e-05, "loss": 1.8589, "step": 8488 }, { "epoch": 0.6390033685240596, "grad_norm": 4.2187628746032715, "learning_rate": 3.060848813980628e-05, "loss": 1.6185, "step": 8489 }, { "epoch": 0.6390786428047197, "grad_norm": 4.045538902282715, "learning_rate": 3.05972519961741e-05, "loss": 2.0817, "step": 8490 }, { "epoch": 0.6391539170853798, "grad_norm": 6.184861183166504, "learning_rate": 3.058601700597963e-05, "loss": 1.9642, "step": 8491 }, { "epoch": 0.6392291913660401, "grad_norm": 8.042470932006836, "learning_rate": 3.0574783169890724e-05, "loss": 2.1537, "step": 8492 }, { "epoch": 0.6393044656467002, "grad_norm": 5.143957614898682, "learning_rate": 3.056355048857522e-05, "loss": 1.6169, "step": 8493 }, { "epoch": 0.6393797399273603, "grad_norm": 9.582860946655273, "learning_rate": 3.0552318962700885e-05, "loss": 1.9245, "step": 8494 }, { "epoch": 0.6394550142080204, "grad_norm": 3.644885540008545, "learning_rate": 3.0541088592935374e-05, "loss": 1.7739, "step": 8495 }, { "epoch": 0.6395302884886807, "grad_norm": 3.9607558250427246, "learning_rate": 3.0529859379946334e-05, "loss": 1.7368, "step": 8496 }, { "epoch": 0.6396055627693408, "grad_norm": 5.704013347625732, "learning_rate": 3.051863132440126e-05, "loss": 1.6579, "step": 8497 }, { "epoch": 0.6396808370500009, "grad_norm": 4.952240943908691, "learning_rate": 3.0507404426967685e-05, "loss": 1.6261, "step": 8498 }, { "epoch": 0.6397561113306611, "grad_norm": 4.565150260925293, "learning_rate": 3.049617868831296e-05, "loss": 1.739, "step": 8499 }, { "epoch": 0.6398313856113212, "grad_norm": 3.678384780883789, "learning_rate": 3.0484954109104462e-05, "loss": 1.8142, "step": 8500 }, { "epoch": 0.6399066598919814, "grad_norm": 4.516523838043213, "learning_rate": 3.047373069000944e-05, "loss": 1.8722, "step": 8501 }, { "epoch": 0.6399819341726416, "grad_norm": 4.493857383728027, "learning_rate": 3.0462508431695112e-05, "loss": 1.9746, "step": 8502 }, { "epoch": 0.6400572084533017, "grad_norm": 3.938154697418213, "learning_rate": 3.045128733482859e-05, "loss": 1.7696, "step": 8503 }, { "epoch": 0.6401324827339618, "grad_norm": 4.596213340759277, "learning_rate": 3.0440067400076954e-05, "loss": 1.6798, "step": 8504 }, { "epoch": 0.6402077570146221, "grad_norm": 7.9469146728515625, "learning_rate": 3.0428848628107176e-05, "loss": 1.7819, "step": 8505 }, { "epoch": 0.6402830312952822, "grad_norm": 5.652976036071777, "learning_rate": 3.0417631019586197e-05, "loss": 1.9123, "step": 8506 }, { "epoch": 0.6403583055759423, "grad_norm": 6.151829242706299, "learning_rate": 3.0406414575180853e-05, "loss": 1.551, "step": 8507 }, { "epoch": 0.6404335798566025, "grad_norm": 5.076486587524414, "learning_rate": 3.0395199295557965e-05, "loss": 1.9937, "step": 8508 }, { "epoch": 0.6405088541372627, "grad_norm": 5.037563323974609, "learning_rate": 3.0383985181384213e-05, "loss": 1.4109, "step": 8509 }, { "epoch": 0.6405841284179228, "grad_norm": 5.346786975860596, "learning_rate": 3.0372772233326242e-05, "loss": 1.751, "step": 8510 }, { "epoch": 0.640659402698583, "grad_norm": 5.752713203430176, "learning_rate": 3.0361560452050664e-05, "loss": 2.059, "step": 8511 }, { "epoch": 0.6407346769792431, "grad_norm": 5.699567794799805, "learning_rate": 3.0350349838223958e-05, "loss": 1.913, "step": 8512 }, { "epoch": 0.6408099512599033, "grad_norm": 3.9459757804870605, "learning_rate": 3.03391403925126e-05, "loss": 2.1368, "step": 8513 }, { "epoch": 0.6408852255405634, "grad_norm": 4.238221168518066, "learning_rate": 3.0327932115582903e-05, "loss": 2.125, "step": 8514 }, { "epoch": 0.6409604998212236, "grad_norm": 5.608497619628906, "learning_rate": 3.0316725008101215e-05, "loss": 1.8591, "step": 8515 }, { "epoch": 0.6410357741018837, "grad_norm": 3.9737725257873535, "learning_rate": 3.0305519070733734e-05, "loss": 1.7274, "step": 8516 }, { "epoch": 0.6411110483825438, "grad_norm": 3.95164155960083, "learning_rate": 3.0294314304146642e-05, "loss": 1.6302, "step": 8517 }, { "epoch": 0.6411863226632041, "grad_norm": 4.32230281829834, "learning_rate": 3.0283110709006014e-05, "loss": 2.0586, "step": 8518 }, { "epoch": 0.6412615969438642, "grad_norm": 4.404703140258789, "learning_rate": 3.027190828597789e-05, "loss": 1.7466, "step": 8519 }, { "epoch": 0.6413368712245243, "grad_norm": 4.992621898651123, "learning_rate": 3.0260707035728208e-05, "loss": 1.5498, "step": 8520 }, { "epoch": 0.6414121455051845, "grad_norm": 4.494771480560303, "learning_rate": 3.0249506958922857e-05, "loss": 2.0752, "step": 8521 }, { "epoch": 0.6414874197858447, "grad_norm": 6.320178031921387, "learning_rate": 3.0238308056227633e-05, "loss": 1.8353, "step": 8522 }, { "epoch": 0.6415626940665048, "grad_norm": 5.9774250984191895, "learning_rate": 3.022711032830832e-05, "loss": 1.7564, "step": 8523 }, { "epoch": 0.641637968347165, "grad_norm": 4.570126056671143, "learning_rate": 3.0215913775830533e-05, "loss": 1.7231, "step": 8524 }, { "epoch": 0.6417132426278251, "grad_norm": 5.786969184875488, "learning_rate": 3.0204718399459913e-05, "loss": 1.9839, "step": 8525 }, { "epoch": 0.6417885169084853, "grad_norm": 3.862994909286499, "learning_rate": 3.0193524199862e-05, "loss": 1.6574, "step": 8526 }, { "epoch": 0.6418637911891455, "grad_norm": 5.468550205230713, "learning_rate": 3.018233117770223e-05, "loss": 1.9569, "step": 8527 }, { "epoch": 0.6419390654698056, "grad_norm": 5.428247928619385, "learning_rate": 3.0171139333646014e-05, "loss": 1.7367, "step": 8528 }, { "epoch": 0.6420143397504657, "grad_norm": 4.250402450561523, "learning_rate": 3.0159948668358662e-05, "loss": 1.6917, "step": 8529 }, { "epoch": 0.642089614031126, "grad_norm": 4.328627586364746, "learning_rate": 3.014875918250545e-05, "loss": 1.5335, "step": 8530 }, { "epoch": 0.6421648883117861, "grad_norm": 6.313094615936279, "learning_rate": 3.0137570876751524e-05, "loss": 1.9671, "step": 8531 }, { "epoch": 0.6422401625924462, "grad_norm": 3.6403791904449463, "learning_rate": 3.0126383751762043e-05, "loss": 1.6429, "step": 8532 }, { "epoch": 0.6423154368731063, "grad_norm": 4.782705307006836, "learning_rate": 3.0115197808202e-05, "loss": 1.7788, "step": 8533 }, { "epoch": 0.6423907111537666, "grad_norm": 7.334542274475098, "learning_rate": 3.0104013046736402e-05, "loss": 1.7528, "step": 8534 }, { "epoch": 0.6424659854344267, "grad_norm": 4.299736499786377, "learning_rate": 3.0092829468030125e-05, "loss": 1.7937, "step": 8535 }, { "epoch": 0.6425412597150868, "grad_norm": 5.448459625244141, "learning_rate": 3.0081647072748043e-05, "loss": 1.7141, "step": 8536 }, { "epoch": 0.642616533995747, "grad_norm": 4.656899452209473, "learning_rate": 3.0070465861554874e-05, "loss": 1.9306, "step": 8537 }, { "epoch": 0.6426918082764072, "grad_norm": 4.9766998291015625, "learning_rate": 3.005928583511533e-05, "loss": 1.8518, "step": 8538 }, { "epoch": 0.6427670825570673, "grad_norm": 7.527698516845703, "learning_rate": 3.0048106994094016e-05, "loss": 1.9763, "step": 8539 }, { "epoch": 0.6428423568377275, "grad_norm": 4.758029937744141, "learning_rate": 3.003692933915552e-05, "loss": 1.859, "step": 8540 }, { "epoch": 0.6429176311183876, "grad_norm": 4.55019998550415, "learning_rate": 3.0025752870964262e-05, "loss": 1.7336, "step": 8541 }, { "epoch": 0.6429929053990477, "grad_norm": 5.973742485046387, "learning_rate": 3.001457759018469e-05, "loss": 2.1553, "step": 8542 }, { "epoch": 0.643068179679708, "grad_norm": 5.923421859741211, "learning_rate": 3.0003403497481152e-05, "loss": 1.7233, "step": 8543 }, { "epoch": 0.6431434539603681, "grad_norm": 4.701004981994629, "learning_rate": 2.999223059351789e-05, "loss": 1.8867, "step": 8544 }, { "epoch": 0.6432187282410282, "grad_norm": 5.493651390075684, "learning_rate": 2.998105887895911e-05, "loss": 2.036, "step": 8545 }, { "epoch": 0.6432940025216884, "grad_norm": 5.680747985839844, "learning_rate": 2.9969888354468933e-05, "loss": 1.808, "step": 8546 }, { "epoch": 0.6433692768023486, "grad_norm": 4.604275226593018, "learning_rate": 2.995871902071144e-05, "loss": 1.9923, "step": 8547 }, { "epoch": 0.6434445510830087, "grad_norm": 7.673112869262695, "learning_rate": 2.9947550878350572e-05, "loss": 1.5103, "step": 8548 }, { "epoch": 0.6435198253636689, "grad_norm": 4.728858947753906, "learning_rate": 2.9936383928050284e-05, "loss": 1.5903, "step": 8549 }, { "epoch": 0.643595099644329, "grad_norm": 4.685051918029785, "learning_rate": 2.9925218170474402e-05, "loss": 1.532, "step": 8550 }, { "epoch": 0.6436703739249892, "grad_norm": 4.883116245269775, "learning_rate": 2.9914053606286695e-05, "loss": 1.9962, "step": 8551 }, { "epoch": 0.6437456482056494, "grad_norm": 5.989914894104004, "learning_rate": 2.990289023615086e-05, "loss": 2.3358, "step": 8552 }, { "epoch": 0.6438209224863095, "grad_norm": 5.500239372253418, "learning_rate": 2.9891728060730555e-05, "loss": 2.0928, "step": 8553 }, { "epoch": 0.6438961967669696, "grad_norm": 4.805877685546875, "learning_rate": 2.9880567080689303e-05, "loss": 2.2692, "step": 8554 }, { "epoch": 0.6439714710476298, "grad_norm": 6.41030216217041, "learning_rate": 2.986940729669061e-05, "loss": 1.9259, "step": 8555 }, { "epoch": 0.64404674532829, "grad_norm": 4.49720573425293, "learning_rate": 2.985824870939789e-05, "loss": 1.8036, "step": 8556 }, { "epoch": 0.6441220196089501, "grad_norm": 4.38686466217041, "learning_rate": 2.9847091319474485e-05, "loss": 1.7433, "step": 8557 }, { "epoch": 0.6441972938896102, "grad_norm": 3.8548953533172607, "learning_rate": 2.9835935127583703e-05, "loss": 2.0497, "step": 8558 }, { "epoch": 0.6442725681702705, "grad_norm": 6.085220813751221, "learning_rate": 2.9824780134388697e-05, "loss": 2.3237, "step": 8559 }, { "epoch": 0.6443478424509306, "grad_norm": 3.575270175933838, "learning_rate": 2.981362634055265e-05, "loss": 1.6333, "step": 8560 }, { "epoch": 0.6444231167315907, "grad_norm": 6.429887294769287, "learning_rate": 2.9802473746738562e-05, "loss": 2.0933, "step": 8561 }, { "epoch": 0.6444983910122509, "grad_norm": 7.841139793395996, "learning_rate": 2.9791322353609473e-05, "loss": 1.752, "step": 8562 }, { "epoch": 0.644573665292911, "grad_norm": 3.8940541744232178, "learning_rate": 2.978017216182828e-05, "loss": 1.7116, "step": 8563 }, { "epoch": 0.6446489395735712, "grad_norm": 5.283813953399658, "learning_rate": 2.976902317205785e-05, "loss": 2.3602, "step": 8564 }, { "epoch": 0.6447242138542314, "grad_norm": 4.955506324768066, "learning_rate": 2.975787538496092e-05, "loss": 1.6618, "step": 8565 }, { "epoch": 0.6447994881348915, "grad_norm": 5.2576375007629395, "learning_rate": 2.974672880120023e-05, "loss": 1.8863, "step": 8566 }, { "epoch": 0.6448747624155516, "grad_norm": 4.595310688018799, "learning_rate": 2.973558342143839e-05, "loss": 1.8035, "step": 8567 }, { "epoch": 0.6449500366962119, "grad_norm": 5.2111077308654785, "learning_rate": 2.9724439246337987e-05, "loss": 2.2897, "step": 8568 }, { "epoch": 0.645025310976872, "grad_norm": 5.0141096115112305, "learning_rate": 2.971329627656148e-05, "loss": 1.5543, "step": 8569 }, { "epoch": 0.6451005852575321, "grad_norm": 3.944983720779419, "learning_rate": 2.970215451277132e-05, "loss": 1.8509, "step": 8570 }, { "epoch": 0.6451758595381923, "grad_norm": 7.747410297393799, "learning_rate": 2.9691013955629808e-05, "loss": 2.1258, "step": 8571 }, { "epoch": 0.6452511338188525, "grad_norm": 4.766421318054199, "learning_rate": 2.9679874605799257e-05, "loss": 1.8184, "step": 8572 }, { "epoch": 0.6453264080995126, "grad_norm": 5.19478178024292, "learning_rate": 2.9668736463941844e-05, "loss": 1.8652, "step": 8573 }, { "epoch": 0.6454016823801727, "grad_norm": 4.1410722732543945, "learning_rate": 2.9657599530719714e-05, "loss": 2.2062, "step": 8574 }, { "epoch": 0.6454769566608329, "grad_norm": 6.578781604766846, "learning_rate": 2.964646380679494e-05, "loss": 2.0871, "step": 8575 }, { "epoch": 0.645552230941493, "grad_norm": 5.385827541351318, "learning_rate": 2.963532929282947e-05, "loss": 1.4649, "step": 8576 }, { "epoch": 0.6456275052221532, "grad_norm": 4.8669562339782715, "learning_rate": 2.9624195989485264e-05, "loss": 1.8195, "step": 8577 }, { "epoch": 0.6457027795028134, "grad_norm": 6.243131160736084, "learning_rate": 2.961306389742412e-05, "loss": 2.0895, "step": 8578 }, { "epoch": 0.6457780537834735, "grad_norm": 7.147341251373291, "learning_rate": 2.9601933017307847e-05, "loss": 1.78, "step": 8579 }, { "epoch": 0.6458533280641336, "grad_norm": 5.092959403991699, "learning_rate": 2.959080334979811e-05, "loss": 1.897, "step": 8580 }, { "epoch": 0.6459286023447939, "grad_norm": 4.118281364440918, "learning_rate": 2.9579674895556574e-05, "loss": 1.9852, "step": 8581 }, { "epoch": 0.646003876625454, "grad_norm": 4.885115146636963, "learning_rate": 2.956854765524476e-05, "loss": 2.0077, "step": 8582 }, { "epoch": 0.6460791509061141, "grad_norm": 5.322532653808594, "learning_rate": 2.9557421629524163e-05, "loss": 2.0309, "step": 8583 }, { "epoch": 0.6461544251867744, "grad_norm": 4.8626556396484375, "learning_rate": 2.9546296819056192e-05, "loss": 1.5482, "step": 8584 }, { "epoch": 0.6462296994674345, "grad_norm": 5.2390875816345215, "learning_rate": 2.95351732245022e-05, "loss": 1.9367, "step": 8585 }, { "epoch": 0.6463049737480946, "grad_norm": 5.341590881347656, "learning_rate": 2.9524050846523427e-05, "loss": 1.8399, "step": 8586 }, { "epoch": 0.6463802480287548, "grad_norm": 6.855342388153076, "learning_rate": 2.9512929685781092e-05, "loss": 2.0165, "step": 8587 }, { "epoch": 0.6464555223094149, "grad_norm": 5.312610149383545, "learning_rate": 2.9501809742936292e-05, "loss": 2.1165, "step": 8588 }, { "epoch": 0.6465307965900751, "grad_norm": 4.534675598144531, "learning_rate": 2.9490691018650084e-05, "loss": 1.5757, "step": 8589 }, { "epoch": 0.6466060708707353, "grad_norm": 5.893584251403809, "learning_rate": 2.9479573513583446e-05, "loss": 1.7665, "step": 8590 }, { "epoch": 0.6466813451513954, "grad_norm": 3.467353105545044, "learning_rate": 2.9468457228397284e-05, "loss": 1.9285, "step": 8591 }, { "epoch": 0.6467566194320555, "grad_norm": 4.947606086730957, "learning_rate": 2.9457342163752443e-05, "loss": 1.7428, "step": 8592 }, { "epoch": 0.6468318937127157, "grad_norm": 5.851820945739746, "learning_rate": 2.9446228320309655e-05, "loss": 1.9027, "step": 8593 }, { "epoch": 0.6469071679933759, "grad_norm": 5.959867477416992, "learning_rate": 2.943511569872962e-05, "loss": 1.684, "step": 8594 }, { "epoch": 0.646982442274036, "grad_norm": 4.563910484313965, "learning_rate": 2.9424004299672948e-05, "loss": 2.0268, "step": 8595 }, { "epoch": 0.6470577165546961, "grad_norm": 7.226961135864258, "learning_rate": 2.9412894123800195e-05, "loss": 1.8213, "step": 8596 }, { "epoch": 0.6471329908353564, "grad_norm": 5.474841594696045, "learning_rate": 2.940178517177179e-05, "loss": 1.6239, "step": 8597 }, { "epoch": 0.6472082651160165, "grad_norm": 4.743842124938965, "learning_rate": 2.939067744424818e-05, "loss": 2.0848, "step": 8598 }, { "epoch": 0.6472835393966766, "grad_norm": 4.671804904937744, "learning_rate": 2.9379570941889643e-05, "loss": 1.757, "step": 8599 }, { "epoch": 0.6473588136773368, "grad_norm": 4.820899963378906, "learning_rate": 2.9368465665356448e-05, "loss": 1.7968, "step": 8600 }, { "epoch": 0.647434087957997, "grad_norm": 3.182859420776367, "learning_rate": 2.935736161530877e-05, "loss": 1.7045, "step": 8601 }, { "epoch": 0.6475093622386571, "grad_norm": 5.053806304931641, "learning_rate": 2.9346258792406734e-05, "loss": 1.7039, "step": 8602 }, { "epoch": 0.6475846365193173, "grad_norm": 4.7595295906066895, "learning_rate": 2.9335157197310327e-05, "loss": 1.7799, "step": 8603 }, { "epoch": 0.6476599107999774, "grad_norm": 4.2374043464660645, "learning_rate": 2.932405683067955e-05, "loss": 1.76, "step": 8604 }, { "epoch": 0.6477351850806375, "grad_norm": 3.7517526149749756, "learning_rate": 2.931295769317425e-05, "loss": 1.9355, "step": 8605 }, { "epoch": 0.6478104593612978, "grad_norm": 6.759345054626465, "learning_rate": 2.9301859785454254e-05, "loss": 2.0159, "step": 8606 }, { "epoch": 0.6478857336419579, "grad_norm": 6.61454439163208, "learning_rate": 2.929076310817932e-05, "loss": 2.0018, "step": 8607 }, { "epoch": 0.647961007922618, "grad_norm": 6.61454439163208, "learning_rate": 2.929076310817932e-05, "loss": 2.1288, "step": 8608 }, { "epoch": 0.6480362822032782, "grad_norm": 5.1581315994262695, "learning_rate": 2.927966766200908e-05, "loss": 1.883, "step": 8609 }, { "epoch": 0.6481115564839384, "grad_norm": 5.625715732574463, "learning_rate": 2.926857344760317e-05, "loss": 1.6717, "step": 8610 }, { "epoch": 0.6481868307645985, "grad_norm": 6.846342086791992, "learning_rate": 2.9257480465621063e-05, "loss": 2.254, "step": 8611 }, { "epoch": 0.6482621050452586, "grad_norm": 6.118210792541504, "learning_rate": 2.9246388716722233e-05, "loss": 1.9584, "step": 8612 }, { "epoch": 0.6483373793259188, "grad_norm": 4.919810771942139, "learning_rate": 2.9235298201566037e-05, "loss": 2.1906, "step": 8613 }, { "epoch": 0.648412653606579, "grad_norm": 3.967045307159424, "learning_rate": 2.9224208920811803e-05, "loss": 1.722, "step": 8614 }, { "epoch": 0.6484879278872391, "grad_norm": 4.292625904083252, "learning_rate": 2.9213120875118715e-05, "loss": 1.8844, "step": 8615 }, { "epoch": 0.6485632021678993, "grad_norm": 4.200898170471191, "learning_rate": 2.9202034065145967e-05, "loss": 2.1818, "step": 8616 }, { "epoch": 0.6486384764485594, "grad_norm": 5.403683185577393, "learning_rate": 2.9190948491552596e-05, "loss": 1.7544, "step": 8617 }, { "epoch": 0.6487137507292196, "grad_norm": 10.114962577819824, "learning_rate": 2.917986415499765e-05, "loss": 1.8368, "step": 8618 }, { "epoch": 0.6487890250098798, "grad_norm": 3.80903959274292, "learning_rate": 2.916878105614002e-05, "loss": 1.8435, "step": 8619 }, { "epoch": 0.6488642992905399, "grad_norm": 4.539679050445557, "learning_rate": 2.9157699195638588e-05, "loss": 1.8655, "step": 8620 }, { "epoch": 0.6489395735712, "grad_norm": 5.5913238525390625, "learning_rate": 2.9146618574152128e-05, "loss": 1.8801, "step": 8621 }, { "epoch": 0.6490148478518603, "grad_norm": 3.4713363647460938, "learning_rate": 2.913553919233939e-05, "loss": 1.9798, "step": 8622 }, { "epoch": 0.6490901221325204, "grad_norm": 5.722151756286621, "learning_rate": 2.9124461050858954e-05, "loss": 2.0308, "step": 8623 }, { "epoch": 0.6491653964131805, "grad_norm": 5.274685382843018, "learning_rate": 2.9113384150369406e-05, "loss": 2.0087, "step": 8624 }, { "epoch": 0.6492406706938407, "grad_norm": 4.676621913909912, "learning_rate": 2.910230849152926e-05, "loss": 1.7031, "step": 8625 }, { "epoch": 0.6493159449745008, "grad_norm": 5.577304840087891, "learning_rate": 2.9091234074996897e-05, "loss": 1.8762, "step": 8626 }, { "epoch": 0.649391219255161, "grad_norm": 4.7884931564331055, "learning_rate": 2.9080160901430698e-05, "loss": 2.0286, "step": 8627 }, { "epoch": 0.6494664935358212, "grad_norm": 5.585773468017578, "learning_rate": 2.9069088971488877e-05, "loss": 1.4661, "step": 8628 }, { "epoch": 0.6495417678164813, "grad_norm": 4.676112174987793, "learning_rate": 2.905801828582969e-05, "loss": 1.8685, "step": 8629 }, { "epoch": 0.6496170420971414, "grad_norm": 5.2993550300598145, "learning_rate": 2.9046948845111198e-05, "loss": 2.2756, "step": 8630 }, { "epoch": 0.6496923163778016, "grad_norm": 5.965620517730713, "learning_rate": 2.9035880649991487e-05, "loss": 1.9021, "step": 8631 }, { "epoch": 0.6497675906584618, "grad_norm": 4.728835105895996, "learning_rate": 2.9024813701128506e-05, "loss": 1.8307, "step": 8632 }, { "epoch": 0.6498428649391219, "grad_norm": 3.898773670196533, "learning_rate": 2.9013747999180195e-05, "loss": 1.9384, "step": 8633 }, { "epoch": 0.649918139219782, "grad_norm": 4.24934196472168, "learning_rate": 2.900268354480432e-05, "loss": 1.8787, "step": 8634 }, { "epoch": 0.6499934135004423, "grad_norm": 3.7674505710601807, "learning_rate": 2.8991620338658687e-05, "loss": 1.7757, "step": 8635 }, { "epoch": 0.6500686877811024, "grad_norm": 5.886000156402588, "learning_rate": 2.8980558381400925e-05, "loss": 2.2199, "step": 8636 }, { "epoch": 0.6501439620617625, "grad_norm": 4.2713165283203125, "learning_rate": 2.8969497673688674e-05, "loss": 2.0352, "step": 8637 }, { "epoch": 0.6502192363424227, "grad_norm": 4.730532169342041, "learning_rate": 2.8958438216179425e-05, "loss": 1.9243, "step": 8638 }, { "epoch": 0.6502945106230829, "grad_norm": 4.13792610168457, "learning_rate": 2.8947380009530644e-05, "loss": 1.5656, "step": 8639 }, { "epoch": 0.650369784903743, "grad_norm": 4.677649974822998, "learning_rate": 2.8936323054399738e-05, "loss": 1.8078, "step": 8640 }, { "epoch": 0.6504450591844032, "grad_norm": 4.231987953186035, "learning_rate": 2.892526735144397e-05, "loss": 2.1614, "step": 8641 }, { "epoch": 0.6505203334650633, "grad_norm": 4.96175479888916, "learning_rate": 2.8914212901320605e-05, "loss": 1.6936, "step": 8642 }, { "epoch": 0.6505956077457234, "grad_norm": 4.54158353805542, "learning_rate": 2.8903159704686762e-05, "loss": 1.7023, "step": 8643 }, { "epoch": 0.6506708820263837, "grad_norm": 4.517205238342285, "learning_rate": 2.8892107762199538e-05, "loss": 1.8842, "step": 8644 }, { "epoch": 0.6507461563070438, "grad_norm": 3.989027261734009, "learning_rate": 2.888105707451595e-05, "loss": 1.4609, "step": 8645 }, { "epoch": 0.6508214305877039, "grad_norm": 4.304959774017334, "learning_rate": 2.8870007642292933e-05, "loss": 2.1232, "step": 8646 }, { "epoch": 0.6508967048683642, "grad_norm": 3.9756412506103516, "learning_rate": 2.8858959466187317e-05, "loss": 1.9693, "step": 8647 }, { "epoch": 0.6509719791490243, "grad_norm": 3.8650543689727783, "learning_rate": 2.884791254685592e-05, "loss": 2.1174, "step": 8648 }, { "epoch": 0.6510472534296844, "grad_norm": 3.9971768856048584, "learning_rate": 2.883686688495541e-05, "loss": 1.9875, "step": 8649 }, { "epoch": 0.6511225277103446, "grad_norm": 4.313498497009277, "learning_rate": 2.882582248114246e-05, "loss": 1.81, "step": 8650 }, { "epoch": 0.6511978019910047, "grad_norm": 3.4087369441986084, "learning_rate": 2.881477933607359e-05, "loss": 1.907, "step": 8651 }, { "epoch": 0.6512730762716649, "grad_norm": 5.429731845855713, "learning_rate": 2.8803737450405322e-05, "loss": 2.0832, "step": 8652 }, { "epoch": 0.651348350552325, "grad_norm": 3.744896173477173, "learning_rate": 2.8792696824794018e-05, "loss": 1.8025, "step": 8653 }, { "epoch": 0.6514236248329852, "grad_norm": 5.274442672729492, "learning_rate": 2.878165745989604e-05, "loss": 1.8795, "step": 8654 }, { "epoch": 0.6514988991136453, "grad_norm": 4.412415981292725, "learning_rate": 2.8770619356367646e-05, "loss": 1.9839, "step": 8655 }, { "epoch": 0.6515741733943055, "grad_norm": 5.6461687088012695, "learning_rate": 2.8759582514865012e-05, "loss": 2.0527, "step": 8656 }, { "epoch": 0.6516494476749657, "grad_norm": 4.013443946838379, "learning_rate": 2.8748546936044275e-05, "loss": 1.9805, "step": 8657 }, { "epoch": 0.6517247219556258, "grad_norm": 5.20570707321167, "learning_rate": 2.8737512620561423e-05, "loss": 1.6596, "step": 8658 }, { "epoch": 0.6517999962362859, "grad_norm": 3.515523910522461, "learning_rate": 2.872647956907246e-05, "loss": 1.5992, "step": 8659 }, { "epoch": 0.6518752705169462, "grad_norm": 5.263373851776123, "learning_rate": 2.8715447782233227e-05, "loss": 1.8315, "step": 8660 }, { "epoch": 0.6519505447976063, "grad_norm": 4.3307881355285645, "learning_rate": 2.870441726069957e-05, "loss": 2.0157, "step": 8661 }, { "epoch": 0.6520258190782664, "grad_norm": 4.524033069610596, "learning_rate": 2.869338800512718e-05, "loss": 1.6649, "step": 8662 }, { "epoch": 0.6521010933589266, "grad_norm": 4.244225978851318, "learning_rate": 2.8682360016171762e-05, "loss": 1.8696, "step": 8663 }, { "epoch": 0.6521763676395868, "grad_norm": 5.265883922576904, "learning_rate": 2.867133329448885e-05, "loss": 2.4691, "step": 8664 }, { "epoch": 0.6522516419202469, "grad_norm": 4.064724445343018, "learning_rate": 2.8660307840733973e-05, "loss": 1.9237, "step": 8665 }, { "epoch": 0.6523269162009071, "grad_norm": 6.914492607116699, "learning_rate": 2.864928365556257e-05, "loss": 2.7163, "step": 8666 }, { "epoch": 0.6524021904815672, "grad_norm": 3.861659526824951, "learning_rate": 2.8638260739630007e-05, "loss": 1.6831, "step": 8667 }, { "epoch": 0.6524774647622273, "grad_norm": 6.618344306945801, "learning_rate": 2.8627239093591536e-05, "loss": 2.0614, "step": 8668 }, { "epoch": 0.6525527390428876, "grad_norm": 4.913801670074463, "learning_rate": 2.8616218718102388e-05, "loss": 1.8187, "step": 8669 }, { "epoch": 0.6526280133235477, "grad_norm": 4.293828964233398, "learning_rate": 2.8605199613817668e-05, "loss": 1.7561, "step": 8670 }, { "epoch": 0.6527032876042078, "grad_norm": 4.456260681152344, "learning_rate": 2.8594181781392437e-05, "loss": 2.0805, "step": 8671 }, { "epoch": 0.6527785618848679, "grad_norm": 7.194111347198486, "learning_rate": 2.85831652214817e-05, "loss": 1.4987, "step": 8672 }, { "epoch": 0.6528538361655282, "grad_norm": 4.761721134185791, "learning_rate": 2.8572149934740317e-05, "loss": 1.526, "step": 8673 }, { "epoch": 0.6529291104461883, "grad_norm": 4.444544792175293, "learning_rate": 2.8561135921823157e-05, "loss": 1.7775, "step": 8674 }, { "epoch": 0.6530043847268484, "grad_norm": 4.1387248039245605, "learning_rate": 2.8550123183384936e-05, "loss": 1.6179, "step": 8675 }, { "epoch": 0.6530796590075086, "grad_norm": 5.734201431274414, "learning_rate": 2.8539111720080343e-05, "loss": 2.1411, "step": 8676 }, { "epoch": 0.6531549332881688, "grad_norm": 5.701095104217529, "learning_rate": 2.8528101532563978e-05, "loss": 2.362, "step": 8677 }, { "epoch": 0.6532302075688289, "grad_norm": 4.324429512023926, "learning_rate": 2.8517092621490388e-05, "loss": 2.0575, "step": 8678 }, { "epoch": 0.6533054818494891, "grad_norm": 4.849112510681152, "learning_rate": 2.8506084987513983e-05, "loss": 1.5267, "step": 8679 }, { "epoch": 0.6533807561301492, "grad_norm": 5.130321502685547, "learning_rate": 2.8495078631289167e-05, "loss": 1.7454, "step": 8680 }, { "epoch": 0.6534560304108094, "grad_norm": 4.246479034423828, "learning_rate": 2.8484073553470203e-05, "loss": 1.9407, "step": 8681 }, { "epoch": 0.6535313046914696, "grad_norm": 5.759989261627197, "learning_rate": 2.8473069754711356e-05, "loss": 1.9653, "step": 8682 }, { "epoch": 0.6536065789721297, "grad_norm": 5.1931257247924805, "learning_rate": 2.8462067235666724e-05, "loss": 1.9794, "step": 8683 }, { "epoch": 0.6536818532527898, "grad_norm": 3.2792458534240723, "learning_rate": 2.845106599699041e-05, "loss": 1.9016, "step": 8684 }, { "epoch": 0.65375712753345, "grad_norm": 3.82450795173645, "learning_rate": 2.8440066039336372e-05, "loss": 1.6829, "step": 8685 }, { "epoch": 0.6538324018141102, "grad_norm": 4.776988983154297, "learning_rate": 2.8429067363358545e-05, "loss": 1.4054, "step": 8686 }, { "epoch": 0.6539076760947703, "grad_norm": 4.953002452850342, "learning_rate": 2.8418069969710776e-05, "loss": 2.0174, "step": 8687 }, { "epoch": 0.6539829503754305, "grad_norm": 5.949739933013916, "learning_rate": 2.840707385904682e-05, "loss": 2.0863, "step": 8688 }, { "epoch": 0.6540582246560906, "grad_norm": 5.9535298347473145, "learning_rate": 2.8396079032020383e-05, "loss": 2.0541, "step": 8689 }, { "epoch": 0.6541334989367508, "grad_norm": 3.6686513423919678, "learning_rate": 2.8385085489285034e-05, "loss": 1.6368, "step": 8690 }, { "epoch": 0.6542087732174109, "grad_norm": 6.072544097900391, "learning_rate": 2.837409323149436e-05, "loss": 1.8019, "step": 8691 }, { "epoch": 0.6542840474980711, "grad_norm": 5.601493835449219, "learning_rate": 2.836310225930177e-05, "loss": 2.1077, "step": 8692 }, { "epoch": 0.6543593217787312, "grad_norm": 4.370206356048584, "learning_rate": 2.835211257336069e-05, "loss": 1.9673, "step": 8693 }, { "epoch": 0.6544345960593914, "grad_norm": 4.355002403259277, "learning_rate": 2.8341124174324372e-05, "loss": 1.7738, "step": 8694 }, { "epoch": 0.6545098703400516, "grad_norm": 5.089869022369385, "learning_rate": 2.8330137062846107e-05, "loss": 1.8935, "step": 8695 }, { "epoch": 0.6545851446207117, "grad_norm": 6.398248195648193, "learning_rate": 2.8319151239578994e-05, "loss": 2.0295, "step": 8696 }, { "epoch": 0.6546604189013718, "grad_norm": 4.059794902801514, "learning_rate": 2.8308166705176147e-05, "loss": 1.8573, "step": 8697 }, { "epoch": 0.6547356931820321, "grad_norm": 5.221839427947998, "learning_rate": 2.8297183460290532e-05, "loss": 1.8594, "step": 8698 }, { "epoch": 0.6548109674626922, "grad_norm": 4.219726085662842, "learning_rate": 2.8286201505575084e-05, "loss": 1.6764, "step": 8699 }, { "epoch": 0.6548862417433523, "grad_norm": 3.922380208969116, "learning_rate": 2.8275220841682658e-05, "loss": 1.7608, "step": 8700 }, { "epoch": 0.6549615160240125, "grad_norm": 4.685485363006592, "learning_rate": 2.8264241469266033e-05, "loss": 1.5478, "step": 8701 }, { "epoch": 0.6550367903046727, "grad_norm": 8.281332969665527, "learning_rate": 2.825326338897787e-05, "loss": 1.666, "step": 8702 }, { "epoch": 0.6551120645853328, "grad_norm": 5.294943809509277, "learning_rate": 2.8242286601470795e-05, "loss": 1.9232, "step": 8703 }, { "epoch": 0.655187338865993, "grad_norm": 5.194580078125, "learning_rate": 2.8231311107397373e-05, "loss": 2.0269, "step": 8704 }, { "epoch": 0.6552626131466531, "grad_norm": 11.073525428771973, "learning_rate": 2.8220336907410028e-05, "loss": 2.0984, "step": 8705 }, { "epoch": 0.6553378874273132, "grad_norm": 6.163061618804932, "learning_rate": 2.8209364002161177e-05, "loss": 1.8944, "step": 8706 }, { "epoch": 0.6554131617079735, "grad_norm": 4.358154773712158, "learning_rate": 2.81983923923031e-05, "loss": 1.7566, "step": 8707 }, { "epoch": 0.6554884359886336, "grad_norm": 4.6281938552856445, "learning_rate": 2.8187422078488045e-05, "loss": 1.7854, "step": 8708 }, { "epoch": 0.6555637102692937, "grad_norm": 5.033481121063232, "learning_rate": 2.8176453061368145e-05, "loss": 1.9167, "step": 8709 }, { "epoch": 0.6556389845499538, "grad_norm": 7.444966793060303, "learning_rate": 2.8165485341595498e-05, "loss": 2.2107, "step": 8710 }, { "epoch": 0.6557142588306141, "grad_norm": 5.250480651855469, "learning_rate": 2.8154518919822092e-05, "loss": 2.078, "step": 8711 }, { "epoch": 0.6557895331112742, "grad_norm": 4.205929279327393, "learning_rate": 2.8143553796699872e-05, "loss": 1.9267, "step": 8712 }, { "epoch": 0.6558648073919343, "grad_norm": 6.08516788482666, "learning_rate": 2.8132589972880653e-05, "loss": 1.8515, "step": 8713 }, { "epoch": 0.6559400816725945, "grad_norm": 5.553742408752441, "learning_rate": 2.812162744901623e-05, "loss": 1.7718, "step": 8714 }, { "epoch": 0.6560153559532547, "grad_norm": 6.910336971282959, "learning_rate": 2.8110666225758264e-05, "loss": 2.3409, "step": 8715 }, { "epoch": 0.6560906302339148, "grad_norm": 3.601201057434082, "learning_rate": 2.80997063037584e-05, "loss": 1.6426, "step": 8716 }, { "epoch": 0.656165904514575, "grad_norm": 3.970806360244751, "learning_rate": 2.8088747683668138e-05, "loss": 2.0209, "step": 8717 }, { "epoch": 0.6562411787952351, "grad_norm": 4.646604537963867, "learning_rate": 2.8077790366138977e-05, "loss": 2.0318, "step": 8718 }, { "epoch": 0.6563164530758953, "grad_norm": 4.490777492523193, "learning_rate": 2.8066834351822257e-05, "loss": 1.6372, "step": 8719 }, { "epoch": 0.6563917273565555, "grad_norm": 3.6624560356140137, "learning_rate": 2.8055879641369298e-05, "loss": 1.6147, "step": 8720 }, { "epoch": 0.6564670016372156, "grad_norm": 5.967345714569092, "learning_rate": 2.8044926235431335e-05, "loss": 1.6397, "step": 8721 }, { "epoch": 0.6565422759178757, "grad_norm": 4.960357666015625, "learning_rate": 2.80339741346595e-05, "loss": 1.8527, "step": 8722 }, { "epoch": 0.656617550198536, "grad_norm": 5.28615140914917, "learning_rate": 2.8023023339704907e-05, "loss": 1.7492, "step": 8723 }, { "epoch": 0.6566928244791961, "grad_norm": 5.680187225341797, "learning_rate": 2.801207385121849e-05, "loss": 1.8242, "step": 8724 }, { "epoch": 0.6567680987598562, "grad_norm": 4.5485968589782715, "learning_rate": 2.800112566985122e-05, "loss": 1.8231, "step": 8725 }, { "epoch": 0.6568433730405164, "grad_norm": 6.216640472412109, "learning_rate": 2.799017879625388e-05, "loss": 1.776, "step": 8726 }, { "epoch": 0.6569186473211766, "grad_norm": 5.127228260040283, "learning_rate": 2.7979233231077278e-05, "loss": 1.6569, "step": 8727 }, { "epoch": 0.6569939216018367, "grad_norm": 3.3271596431732178, "learning_rate": 2.7968288974972058e-05, "loss": 1.8206, "step": 8728 }, { "epoch": 0.6570691958824968, "grad_norm": 4.320346355438232, "learning_rate": 2.795734602858886e-05, "loss": 1.8484, "step": 8729 }, { "epoch": 0.657144470163157, "grad_norm": 5.000116348266602, "learning_rate": 2.794640439257818e-05, "loss": 1.7838, "step": 8730 }, { "epoch": 0.6572197444438171, "grad_norm": 5.192089557647705, "learning_rate": 2.7935464067590473e-05, "loss": 2.2165, "step": 8731 }, { "epoch": 0.6572950187244773, "grad_norm": 4.940530300140381, "learning_rate": 2.7924525054276118e-05, "loss": 1.7177, "step": 8732 }, { "epoch": 0.6573702930051375, "grad_norm": 4.420196533203125, "learning_rate": 2.7913587353285432e-05, "loss": 1.7902, "step": 8733 }, { "epoch": 0.6574455672857976, "grad_norm": 5.060540676116943, "learning_rate": 2.7902650965268572e-05, "loss": 2.1291, "step": 8734 }, { "epoch": 0.6575208415664577, "grad_norm": 4.391613006591797, "learning_rate": 2.7891715890875742e-05, "loss": 1.3926, "step": 8735 }, { "epoch": 0.657596115847118, "grad_norm": 7.196225166320801, "learning_rate": 2.7880782130756937e-05, "loss": 1.8463, "step": 8736 }, { "epoch": 0.6576713901277781, "grad_norm": 4.369712829589844, "learning_rate": 2.786984968556218e-05, "loss": 1.8656, "step": 8737 }, { "epoch": 0.6577466644084382, "grad_norm": 4.403004169464111, "learning_rate": 2.7858918555941365e-05, "loss": 1.738, "step": 8738 }, { "epoch": 0.6578219386890984, "grad_norm": 4.340316295623779, "learning_rate": 2.7847988742544297e-05, "loss": 1.7306, "step": 8739 }, { "epoch": 0.6578972129697586, "grad_norm": 4.489402770996094, "learning_rate": 2.7837060246020762e-05, "loss": 1.6377, "step": 8740 }, { "epoch": 0.6579724872504187, "grad_norm": 4.764037609100342, "learning_rate": 2.7826133067020375e-05, "loss": 2.0081, "step": 8741 }, { "epoch": 0.6580477615310789, "grad_norm": 4.301174640655518, "learning_rate": 2.7815207206192783e-05, "loss": 1.8452, "step": 8742 }, { "epoch": 0.658123035811739, "grad_norm": 5.14947509765625, "learning_rate": 2.7804282664187443e-05, "loss": 1.9155, "step": 8743 }, { "epoch": 0.6581983100923992, "grad_norm": 7.12629508972168, "learning_rate": 2.779335944165381e-05, "loss": 2.0534, "step": 8744 }, { "epoch": 0.6582735843730594, "grad_norm": 4.720340251922607, "learning_rate": 2.7782437539241233e-05, "loss": 2.0739, "step": 8745 }, { "epoch": 0.6583488586537195, "grad_norm": 4.3159661293029785, "learning_rate": 2.777151695759903e-05, "loss": 1.7583, "step": 8746 }, { "epoch": 0.6584241329343796, "grad_norm": 5.066626071929932, "learning_rate": 2.7760597697376335e-05, "loss": 2.2023, "step": 8747 }, { "epoch": 0.6584994072150399, "grad_norm": 4.680880546569824, "learning_rate": 2.7749679759222314e-05, "loss": 1.9215, "step": 8748 }, { "epoch": 0.6585746814957, "grad_norm": 3.7674427032470703, "learning_rate": 2.7738763143785972e-05, "loss": 1.8614, "step": 8749 }, { "epoch": 0.6586499557763601, "grad_norm": 3.798656940460205, "learning_rate": 2.7727847851716305e-05, "loss": 1.7624, "step": 8750 }, { "epoch": 0.6587252300570202, "grad_norm": 5.341363906860352, "learning_rate": 2.7716933883662154e-05, "loss": 2.0625, "step": 8751 }, { "epoch": 0.6588005043376804, "grad_norm": 4.5162272453308105, "learning_rate": 2.770602124027235e-05, "loss": 2.0344, "step": 8752 }, { "epoch": 0.6588757786183406, "grad_norm": 3.985649824142456, "learning_rate": 2.7695109922195632e-05, "loss": 1.7292, "step": 8753 }, { "epoch": 0.6589510528990007, "grad_norm": 5.624774932861328, "learning_rate": 2.7684199930080613e-05, "loss": 1.6561, "step": 8754 }, { "epoch": 0.6590263271796609, "grad_norm": 4.4445109367370605, "learning_rate": 2.7673291264575874e-05, "loss": 1.8889, "step": 8755 }, { "epoch": 0.659101601460321, "grad_norm": 4.026817798614502, "learning_rate": 2.7662383926329903e-05, "loss": 1.7208, "step": 8756 }, { "epoch": 0.6591768757409812, "grad_norm": 5.325575351715088, "learning_rate": 2.765147791599114e-05, "loss": 1.6658, "step": 8757 }, { "epoch": 0.6592521500216414, "grad_norm": 6.273247241973877, "learning_rate": 2.7640573234207866e-05, "loss": 2.0033, "step": 8758 }, { "epoch": 0.6593274243023015, "grad_norm": 3.3490426540374756, "learning_rate": 2.7629669881628384e-05, "loss": 1.9294, "step": 8759 }, { "epoch": 0.6594026985829616, "grad_norm": 5.980242729187012, "learning_rate": 2.7618767858900818e-05, "loss": 2.1203, "step": 8760 }, { "epoch": 0.6594779728636219, "grad_norm": 5.140210151672363, "learning_rate": 2.7607867166673296e-05, "loss": 1.9771, "step": 8761 }, { "epoch": 0.659553247144282, "grad_norm": 4.407763481140137, "learning_rate": 2.759696780559381e-05, "loss": 1.9322, "step": 8762 }, { "epoch": 0.6596285214249421, "grad_norm": 4.496096134185791, "learning_rate": 2.758606977631032e-05, "loss": 1.9758, "step": 8763 }, { "epoch": 0.6597037957056023, "grad_norm": 5.305761337280273, "learning_rate": 2.757517307947065e-05, "loss": 2.0047, "step": 8764 }, { "epoch": 0.6597790699862625, "grad_norm": 4.556825637817383, "learning_rate": 2.7564277715722596e-05, "loss": 2.0209, "step": 8765 }, { "epoch": 0.6598543442669226, "grad_norm": 5.236590385437012, "learning_rate": 2.7553383685713862e-05, "loss": 1.674, "step": 8766 }, { "epoch": 0.6599296185475828, "grad_norm": 4.662038803100586, "learning_rate": 2.7542490990092074e-05, "loss": 2.0531, "step": 8767 }, { "epoch": 0.6600048928282429, "grad_norm": 4.337377071380615, "learning_rate": 2.7531599629504745e-05, "loss": 2.1333, "step": 8768 }, { "epoch": 0.660080167108903, "grad_norm": 4.375157833099365, "learning_rate": 2.752070960459934e-05, "loss": 1.7405, "step": 8769 }, { "epoch": 0.6601554413895632, "grad_norm": 4.503413200378418, "learning_rate": 2.7509820916023276e-05, "loss": 1.9461, "step": 8770 }, { "epoch": 0.6602307156702234, "grad_norm": 4.720217227935791, "learning_rate": 2.749893356442381e-05, "loss": 2.0645, "step": 8771 }, { "epoch": 0.6603059899508835, "grad_norm": 5.786413669586182, "learning_rate": 2.7488047550448193e-05, "loss": 1.7386, "step": 8772 }, { "epoch": 0.6603812642315436, "grad_norm": 3.9931697845458984, "learning_rate": 2.747716287474354e-05, "loss": 2.0254, "step": 8773 }, { "epoch": 0.6604565385122039, "grad_norm": 5.328513145446777, "learning_rate": 2.7466279537956944e-05, "loss": 1.8, "step": 8774 }, { "epoch": 0.660531812792864, "grad_norm": 3.8266711235046387, "learning_rate": 2.745539754073536e-05, "loss": 2.1057, "step": 8775 }, { "epoch": 0.6606070870735241, "grad_norm": 4.392454147338867, "learning_rate": 2.74445168837257e-05, "loss": 2.1317, "step": 8776 }, { "epoch": 0.6606823613541843, "grad_norm": 4.543263912200928, "learning_rate": 2.7433637567574788e-05, "loss": 1.8057, "step": 8777 }, { "epoch": 0.6607576356348445, "grad_norm": 3.929015636444092, "learning_rate": 2.7422759592929405e-05, "loss": 1.7747, "step": 8778 }, { "epoch": 0.6608329099155046, "grad_norm": 4.441057205200195, "learning_rate": 2.7411882960436153e-05, "loss": 1.5652, "step": 8779 }, { "epoch": 0.6609081841961648, "grad_norm": 7.1514129638671875, "learning_rate": 2.7401007670741667e-05, "loss": 1.6769, "step": 8780 }, { "epoch": 0.6609834584768249, "grad_norm": 4.549609661102295, "learning_rate": 2.7390133724492417e-05, "loss": 1.5836, "step": 8781 }, { "epoch": 0.6610587327574851, "grad_norm": 4.804782390594482, "learning_rate": 2.7379261122334855e-05, "loss": 2.2311, "step": 8782 }, { "epoch": 0.6611340070381453, "grad_norm": 4.868736267089844, "learning_rate": 2.7368389864915294e-05, "loss": 1.8926, "step": 8783 }, { "epoch": 0.6612092813188054, "grad_norm": 6.154513359069824, "learning_rate": 2.7357519952880023e-05, "loss": 1.7282, "step": 8784 }, { "epoch": 0.6612845555994655, "grad_norm": 5.211771011352539, "learning_rate": 2.7346651386875237e-05, "loss": 1.7508, "step": 8785 }, { "epoch": 0.6613598298801258, "grad_norm": 4.101115703582764, "learning_rate": 2.7335784167547006e-05, "loss": 1.8346, "step": 8786 }, { "epoch": 0.6614351041607859, "grad_norm": 4.862835884094238, "learning_rate": 2.732491829554138e-05, "loss": 1.835, "step": 8787 }, { "epoch": 0.661510378441446, "grad_norm": 5.2062907218933105, "learning_rate": 2.73140537715043e-05, "loss": 2.0171, "step": 8788 }, { "epoch": 0.6615856527221061, "grad_norm": 5.2599029541015625, "learning_rate": 2.7303190596081645e-05, "loss": 2.2126, "step": 8789 }, { "epoch": 0.6616609270027664, "grad_norm": 4.830210208892822, "learning_rate": 2.7292328769919172e-05, "loss": 1.6777, "step": 8790 }, { "epoch": 0.6617362012834265, "grad_norm": 4.170035362243652, "learning_rate": 2.7281468293662608e-05, "loss": 1.6983, "step": 8791 }, { "epoch": 0.6618114755640866, "grad_norm": 4.532258033752441, "learning_rate": 2.727060916795755e-05, "loss": 1.8308, "step": 8792 }, { "epoch": 0.6618867498447468, "grad_norm": 5.162574291229248, "learning_rate": 2.7259751393449584e-05, "loss": 1.9941, "step": 8793 }, { "epoch": 0.661962024125407, "grad_norm": 5.646231174468994, "learning_rate": 2.724889497078413e-05, "loss": 2.2627, "step": 8794 }, { "epoch": 0.6620372984060671, "grad_norm": 4.764942169189453, "learning_rate": 2.7238039900606605e-05, "loss": 2.3024, "step": 8795 }, { "epoch": 0.6621125726867273, "grad_norm": 4.774278163909912, "learning_rate": 2.7227186183562276e-05, "loss": 1.5764, "step": 8796 }, { "epoch": 0.6621878469673874, "grad_norm": 4.349574089050293, "learning_rate": 2.7216333820296402e-05, "loss": 1.8824, "step": 8797 }, { "epoch": 0.6622631212480475, "grad_norm": 5.4706292152404785, "learning_rate": 2.7205482811454097e-05, "loss": 1.884, "step": 8798 }, { "epoch": 0.6623383955287078, "grad_norm": 3.846074104309082, "learning_rate": 2.7194633157680434e-05, "loss": 2.0543, "step": 8799 }, { "epoch": 0.6624136698093679, "grad_norm": 5.873631000518799, "learning_rate": 2.7183784859620387e-05, "loss": 1.7716, "step": 8800 }, { "epoch": 0.662488944090028, "grad_norm": 5.629774570465088, "learning_rate": 2.7172937917918868e-05, "loss": 1.9605, "step": 8801 }, { "epoch": 0.6625642183706882, "grad_norm": 4.951746940612793, "learning_rate": 2.7162092333220712e-05, "loss": 1.7837, "step": 8802 }, { "epoch": 0.6626394926513484, "grad_norm": 5.148329734802246, "learning_rate": 2.7151248106170613e-05, "loss": 2.1406, "step": 8803 }, { "epoch": 0.6627147669320085, "grad_norm": 5.117009162902832, "learning_rate": 2.714040523741328e-05, "loss": 1.8966, "step": 8804 }, { "epoch": 0.6627900412126687, "grad_norm": 4.9454216957092285, "learning_rate": 2.712956372759324e-05, "loss": 1.851, "step": 8805 }, { "epoch": 0.6628653154933288, "grad_norm": 3.680377244949341, "learning_rate": 2.7118723577355033e-05, "loss": 1.7048, "step": 8806 }, { "epoch": 0.662940589773989, "grad_norm": 6.020511627197266, "learning_rate": 2.7107884787343035e-05, "loss": 1.777, "step": 8807 }, { "epoch": 0.6630158640546491, "grad_norm": 4.53709077835083, "learning_rate": 2.709704735820162e-05, "loss": 1.9225, "step": 8808 }, { "epoch": 0.6630911383353093, "grad_norm": 4.87180233001709, "learning_rate": 2.708621129057501e-05, "loss": 1.4963, "step": 8809 }, { "epoch": 0.6631664126159694, "grad_norm": 4.665671348571777, "learning_rate": 2.7075376585107388e-05, "loss": 1.7045, "step": 8810 }, { "epoch": 0.6632416868966295, "grad_norm": 4.7149763107299805, "learning_rate": 2.706454324244285e-05, "loss": 1.7171, "step": 8811 }, { "epoch": 0.6633169611772898, "grad_norm": 4.419112682342529, "learning_rate": 2.7053711263225427e-05, "loss": 2.0527, "step": 8812 }, { "epoch": 0.6633922354579499, "grad_norm": 5.024907112121582, "learning_rate": 2.7042880648099013e-05, "loss": 1.5522, "step": 8813 }, { "epoch": 0.66346750973861, "grad_norm": 5.341494560241699, "learning_rate": 2.7032051397707492e-05, "loss": 1.8569, "step": 8814 }, { "epoch": 0.6635427840192702, "grad_norm": 4.947847366333008, "learning_rate": 2.7021223512694587e-05, "loss": 2.0411, "step": 8815 }, { "epoch": 0.6636180582999304, "grad_norm": 5.178067207336426, "learning_rate": 2.701039699370401e-05, "loss": 1.8223, "step": 8816 }, { "epoch": 0.6636933325805905, "grad_norm": 4.4944047927856445, "learning_rate": 2.6999571841379394e-05, "loss": 1.8239, "step": 8817 }, { "epoch": 0.6637686068612507, "grad_norm": 4.428443431854248, "learning_rate": 2.6988748056364214e-05, "loss": 1.6589, "step": 8818 }, { "epoch": 0.6638438811419108, "grad_norm": 4.5360517501831055, "learning_rate": 2.697792563930196e-05, "loss": 1.8336, "step": 8819 }, { "epoch": 0.663919155422571, "grad_norm": 6.5190629959106445, "learning_rate": 2.696710459083594e-05, "loss": 1.5053, "step": 8820 }, { "epoch": 0.6639944297032312, "grad_norm": 3.7468550205230713, "learning_rate": 2.695628491160947e-05, "loss": 1.4678, "step": 8821 }, { "epoch": 0.6640697039838913, "grad_norm": 4.403375148773193, "learning_rate": 2.694546660226574e-05, "loss": 1.847, "step": 8822 }, { "epoch": 0.6641449782645514, "grad_norm": 4.647982597351074, "learning_rate": 2.6934649663447885e-05, "loss": 1.4818, "step": 8823 }, { "epoch": 0.6642202525452117, "grad_norm": 3.9248507022857666, "learning_rate": 2.6923834095798916e-05, "loss": 1.9991, "step": 8824 }, { "epoch": 0.6642955268258718, "grad_norm": 4.492358207702637, "learning_rate": 2.691301989996181e-05, "loss": 1.7987, "step": 8825 }, { "epoch": 0.6643708011065319, "grad_norm": 3.765650749206543, "learning_rate": 2.6902207076579406e-05, "loss": 1.3297, "step": 8826 }, { "epoch": 0.6644460753871921, "grad_norm": 4.731827259063721, "learning_rate": 2.6891395626294536e-05, "loss": 1.6934, "step": 8827 }, { "epoch": 0.6645213496678523, "grad_norm": 5.9216628074646, "learning_rate": 2.6880585549749875e-05, "loss": 1.9936, "step": 8828 }, { "epoch": 0.6645966239485124, "grad_norm": 5.2762885093688965, "learning_rate": 2.6869776847588078e-05, "loss": 1.8152, "step": 8829 }, { "epoch": 0.6646718982291725, "grad_norm": 5.487278938293457, "learning_rate": 2.685896952045167e-05, "loss": 1.637, "step": 8830 }, { "epoch": 0.6647471725098327, "grad_norm": 6.06055212020874, "learning_rate": 2.684816356898312e-05, "loss": 2.1396, "step": 8831 }, { "epoch": 0.6648224467904928, "grad_norm": 4.602733612060547, "learning_rate": 2.6837358993824814e-05, "loss": 1.9866, "step": 8832 }, { "epoch": 0.664897721071153, "grad_norm": 4.820003509521484, "learning_rate": 2.6826555795619056e-05, "loss": 2.5, "step": 8833 }, { "epoch": 0.6649729953518132, "grad_norm": 5.898910999298096, "learning_rate": 2.6815753975008085e-05, "loss": 1.7225, "step": 8834 }, { "epoch": 0.6650482696324733, "grad_norm": 5.85598087310791, "learning_rate": 2.6804953532634e-05, "loss": 1.7758, "step": 8835 }, { "epoch": 0.6651235439131334, "grad_norm": 5.7575154304504395, "learning_rate": 2.6794154469138887e-05, "loss": 2.1179, "step": 8836 }, { "epoch": 0.6651988181937937, "grad_norm": 5.69008207321167, "learning_rate": 2.6783356785164686e-05, "loss": 1.7158, "step": 8837 }, { "epoch": 0.6652740924744538, "grad_norm": 3.8738021850585938, "learning_rate": 2.6772560481353332e-05, "loss": 1.5365, "step": 8838 }, { "epoch": 0.6653493667551139, "grad_norm": 5.95831298828125, "learning_rate": 2.6761765558346585e-05, "loss": 1.8869, "step": 8839 }, { "epoch": 0.6654246410357741, "grad_norm": 4.69302225112915, "learning_rate": 2.6750972016786223e-05, "loss": 2.0503, "step": 8840 }, { "epoch": 0.6654999153164343, "grad_norm": 4.687337875366211, "learning_rate": 2.6740179857313845e-05, "loss": 1.9947, "step": 8841 }, { "epoch": 0.6655751895970944, "grad_norm": 4.024134159088135, "learning_rate": 2.6729389080571033e-05, "loss": 1.7627, "step": 8842 }, { "epoch": 0.6656504638777546, "grad_norm": 6.774590969085693, "learning_rate": 2.6718599687199293e-05, "loss": 2.0674, "step": 8843 }, { "epoch": 0.6657257381584147, "grad_norm": 5.074631690979004, "learning_rate": 2.6707811677839978e-05, "loss": 2.0338, "step": 8844 }, { "epoch": 0.6658010124390749, "grad_norm": 6.617143154144287, "learning_rate": 2.669702505313442e-05, "loss": 2.0158, "step": 8845 }, { "epoch": 0.6658762867197351, "grad_norm": 6.428169250488281, "learning_rate": 2.668623981372389e-05, "loss": 1.8422, "step": 8846 }, { "epoch": 0.6659515610003952, "grad_norm": 4.327280044555664, "learning_rate": 2.6675455960249478e-05, "loss": 1.6413, "step": 8847 }, { "epoch": 0.6660268352810553, "grad_norm": 4.7735090255737305, "learning_rate": 2.666467349335231e-05, "loss": 1.7619, "step": 8848 }, { "epoch": 0.6661021095617154, "grad_norm": 4.143545150756836, "learning_rate": 2.665389241367332e-05, "loss": 1.7187, "step": 8849 }, { "epoch": 0.6661773838423757, "grad_norm": 6.336202621459961, "learning_rate": 2.6643112721853446e-05, "loss": 1.9036, "step": 8850 }, { "epoch": 0.6662526581230358, "grad_norm": 4.243865966796875, "learning_rate": 2.6632334418533516e-05, "loss": 1.6973, "step": 8851 }, { "epoch": 0.6663279324036959, "grad_norm": 5.223363399505615, "learning_rate": 2.6621557504354237e-05, "loss": 1.561, "step": 8852 }, { "epoch": 0.6664032066843562, "grad_norm": 5.2146172523498535, "learning_rate": 2.6610781979956307e-05, "loss": 1.7123, "step": 8853 }, { "epoch": 0.6664784809650163, "grad_norm": 4.6776018142700195, "learning_rate": 2.6600007845980257e-05, "loss": 1.8581, "step": 8854 }, { "epoch": 0.6665537552456764, "grad_norm": 5.126739025115967, "learning_rate": 2.6589235103066595e-05, "loss": 1.7989, "step": 8855 }, { "epoch": 0.6666290295263366, "grad_norm": 4.947964668273926, "learning_rate": 2.657846375185573e-05, "loss": 1.859, "step": 8856 }, { "epoch": 0.6667043038069967, "grad_norm": 4.283720016479492, "learning_rate": 2.6567693792988017e-05, "loss": 1.8351, "step": 8857 }, { "epoch": 0.6667795780876569, "grad_norm": 5.613223075866699, "learning_rate": 2.6556925227103656e-05, "loss": 1.8202, "step": 8858 }, { "epoch": 0.6668548523683171, "grad_norm": 4.260611057281494, "learning_rate": 2.654615805484284e-05, "loss": 1.6868, "step": 8859 }, { "epoch": 0.6669301266489772, "grad_norm": 4.571383476257324, "learning_rate": 2.6535392276845612e-05, "loss": 1.8439, "step": 8860 }, { "epoch": 0.6670054009296373, "grad_norm": 4.924087047576904, "learning_rate": 2.652462789375201e-05, "loss": 1.8539, "step": 8861 }, { "epoch": 0.6670806752102976, "grad_norm": 4.206077575683594, "learning_rate": 2.651386490620189e-05, "loss": 1.9324, "step": 8862 }, { "epoch": 0.6671559494909577, "grad_norm": 4.6520676612854, "learning_rate": 2.6503103314835142e-05, "loss": 1.5271, "step": 8863 }, { "epoch": 0.6672312237716178, "grad_norm": 4.078134059906006, "learning_rate": 2.6492343120291453e-05, "loss": 1.6754, "step": 8864 }, { "epoch": 0.667306498052278, "grad_norm": 3.992279529571533, "learning_rate": 2.6481584323210524e-05, "loss": 1.8784, "step": 8865 }, { "epoch": 0.6673817723329382, "grad_norm": 5.328444004058838, "learning_rate": 2.6470826924231918e-05, "loss": 1.8818, "step": 8866 }, { "epoch": 0.6674570466135983, "grad_norm": 5.049443244934082, "learning_rate": 2.646007092399514e-05, "loss": 1.5722, "step": 8867 }, { "epoch": 0.6675323208942584, "grad_norm": 4.65287971496582, "learning_rate": 2.644931632313963e-05, "loss": 2.2848, "step": 8868 }, { "epoch": 0.6676075951749186, "grad_norm": 4.520260810852051, "learning_rate": 2.643856312230466e-05, "loss": 2.0799, "step": 8869 }, { "epoch": 0.6676828694555788, "grad_norm": 6.330007553100586, "learning_rate": 2.6427811322129525e-05, "loss": 2.1194, "step": 8870 }, { "epoch": 0.6677581437362389, "grad_norm": 4.228724002838135, "learning_rate": 2.641706092325336e-05, "loss": 1.9272, "step": 8871 }, { "epoch": 0.6678334180168991, "grad_norm": 4.327436923980713, "learning_rate": 2.6406311926315273e-05, "loss": 1.7915, "step": 8872 }, { "epoch": 0.6679086922975592, "grad_norm": 5.304169178009033, "learning_rate": 2.6395564331954225e-05, "loss": 1.7595, "step": 8873 }, { "epoch": 0.6679839665782193, "grad_norm": 5.184169769287109, "learning_rate": 2.6384818140809166e-05, "loss": 2.2099, "step": 8874 }, { "epoch": 0.6680592408588796, "grad_norm": 6.44237756729126, "learning_rate": 2.6374073353518892e-05, "loss": 1.6844, "step": 8875 }, { "epoch": 0.6681345151395397, "grad_norm": 4.902488708496094, "learning_rate": 2.636332997072216e-05, "loss": 2.0372, "step": 8876 }, { "epoch": 0.6682097894201998, "grad_norm": 8.373483657836914, "learning_rate": 2.6352587993057646e-05, "loss": 1.7565, "step": 8877 }, { "epoch": 0.66828506370086, "grad_norm": 5.195601463317871, "learning_rate": 2.6341847421163946e-05, "loss": 1.8044, "step": 8878 }, { "epoch": 0.6683603379815202, "grad_norm": 5.000908851623535, "learning_rate": 2.633110825567951e-05, "loss": 1.6557, "step": 8879 }, { "epoch": 0.6684356122621803, "grad_norm": 5.616551399230957, "learning_rate": 2.6320370497242798e-05, "loss": 1.8628, "step": 8880 }, { "epoch": 0.6685108865428405, "grad_norm": 4.4209513664245605, "learning_rate": 2.630963414649209e-05, "loss": 1.7255, "step": 8881 }, { "epoch": 0.6685861608235006, "grad_norm": 6.026299953460693, "learning_rate": 2.6298899204065657e-05, "loss": 2.2945, "step": 8882 }, { "epoch": 0.6686614351041608, "grad_norm": 3.0953924655914307, "learning_rate": 2.6288165670601684e-05, "loss": 1.9448, "step": 8883 }, { "epoch": 0.668736709384821, "grad_norm": 6.213339328765869, "learning_rate": 2.6277433546738206e-05, "loss": 1.6914, "step": 8884 }, { "epoch": 0.6688119836654811, "grad_norm": 3.756370782852173, "learning_rate": 2.626670283311325e-05, "loss": 1.9009, "step": 8885 }, { "epoch": 0.6688872579461412, "grad_norm": 6.405398368835449, "learning_rate": 2.6255973530364696e-05, "loss": 1.7966, "step": 8886 }, { "epoch": 0.6689625322268014, "grad_norm": 4.880269527435303, "learning_rate": 2.6245245639130388e-05, "loss": 1.5388, "step": 8887 }, { "epoch": 0.6690378065074616, "grad_norm": 6.3944220542907715, "learning_rate": 2.6234519160048064e-05, "loss": 1.845, "step": 8888 }, { "epoch": 0.6691130807881217, "grad_norm": 5.3372063636779785, "learning_rate": 2.6223794093755405e-05, "loss": 1.9204, "step": 8889 }, { "epoch": 0.6691883550687818, "grad_norm": 4.326444625854492, "learning_rate": 2.6213070440889942e-05, "loss": 1.8462, "step": 8890 }, { "epoch": 0.6692636293494421, "grad_norm": 5.463204383850098, "learning_rate": 2.6202348202089216e-05, "loss": 2.1027, "step": 8891 }, { "epoch": 0.6693389036301022, "grad_norm": 5.259471416473389, "learning_rate": 2.6191627377990586e-05, "loss": 1.9241, "step": 8892 }, { "epoch": 0.6694141779107623, "grad_norm": 6.563348770141602, "learning_rate": 2.618090796923141e-05, "loss": 1.9239, "step": 8893 }, { "epoch": 0.6694894521914225, "grad_norm": 3.9169232845306396, "learning_rate": 2.6170189976448894e-05, "loss": 2.1645, "step": 8894 }, { "epoch": 0.6695647264720826, "grad_norm": 4.303414821624756, "learning_rate": 2.6159473400280227e-05, "loss": 2.0235, "step": 8895 }, { "epoch": 0.6696400007527428, "grad_norm": 4.6599273681640625, "learning_rate": 2.6148758241362436e-05, "loss": 1.9235, "step": 8896 }, { "epoch": 0.669715275033403, "grad_norm": 4.29166841506958, "learning_rate": 2.6138044500332536e-05, "loss": 1.9216, "step": 8897 }, { "epoch": 0.6697905493140631, "grad_norm": 5.0810017585754395, "learning_rate": 2.6127332177827446e-05, "loss": 1.479, "step": 8898 }, { "epoch": 0.6698658235947232, "grad_norm": 5.189507484436035, "learning_rate": 2.611662127448393e-05, "loss": 1.7415, "step": 8899 }, { "epoch": 0.6699410978753835, "grad_norm": 5.042854309082031, "learning_rate": 2.6105911790938763e-05, "loss": 2.018, "step": 8900 }, { "epoch": 0.6700163721560436, "grad_norm": 5.700060844421387, "learning_rate": 2.609520372782857e-05, "loss": 1.7725, "step": 8901 }, { "epoch": 0.6700916464367037, "grad_norm": 4.652759552001953, "learning_rate": 2.608449708578995e-05, "loss": 1.6746, "step": 8902 }, { "epoch": 0.670166920717364, "grad_norm": 5.318171501159668, "learning_rate": 2.607379186545933e-05, "loss": 1.8465, "step": 8903 }, { "epoch": 0.6702421949980241, "grad_norm": 3.932227373123169, "learning_rate": 2.6063088067473156e-05, "loss": 1.5701, "step": 8904 }, { "epoch": 0.6703174692786842, "grad_norm": 4.138515949249268, "learning_rate": 2.6052385692467697e-05, "loss": 1.5684, "step": 8905 }, { "epoch": 0.6703927435593443, "grad_norm": 4.297887802124023, "learning_rate": 2.604168474107921e-05, "loss": 1.8615, "step": 8906 }, { "epoch": 0.6704680178400045, "grad_norm": 4.569864273071289, "learning_rate": 2.6030985213943803e-05, "loss": 1.7772, "step": 8907 }, { "epoch": 0.6705432921206647, "grad_norm": 4.180438995361328, "learning_rate": 2.602028711169757e-05, "loss": 1.4586, "step": 8908 }, { "epoch": 0.6706185664013248, "grad_norm": 4.958812236785889, "learning_rate": 2.6009590434976445e-05, "loss": 1.7733, "step": 8909 }, { "epoch": 0.670693840681985, "grad_norm": 8.969746589660645, "learning_rate": 2.5998895184416327e-05, "loss": 1.9241, "step": 8910 }, { "epoch": 0.6707691149626451, "grad_norm": 4.2592878341674805, "learning_rate": 2.598820136065303e-05, "loss": 1.4884, "step": 8911 }, { "epoch": 0.6708443892433053, "grad_norm": 4.074267864227295, "learning_rate": 2.5977508964322282e-05, "loss": 1.9795, "step": 8912 }, { "epoch": 0.6709196635239655, "grad_norm": 5.606818675994873, "learning_rate": 2.5966817996059677e-05, "loss": 2.001, "step": 8913 }, { "epoch": 0.6709949378046256, "grad_norm": 5.230132579803467, "learning_rate": 2.5956128456500784e-05, "loss": 2.1983, "step": 8914 }, { "epoch": 0.6710702120852857, "grad_norm": 5.076301574707031, "learning_rate": 2.594544034628108e-05, "loss": 2.0636, "step": 8915 }, { "epoch": 0.671145486365946, "grad_norm": 5.236661434173584, "learning_rate": 2.5934753666035917e-05, "loss": 1.9933, "step": 8916 }, { "epoch": 0.6712207606466061, "grad_norm": 7.820615291595459, "learning_rate": 2.592406841640061e-05, "loss": 1.692, "step": 8917 }, { "epoch": 0.6712960349272662, "grad_norm": 5.645308494567871, "learning_rate": 2.5913384598010337e-05, "loss": 2.2752, "step": 8918 }, { "epoch": 0.6713713092079264, "grad_norm": 13.594305992126465, "learning_rate": 2.590270221150025e-05, "loss": 1.9373, "step": 8919 }, { "epoch": 0.6714465834885865, "grad_norm": 4.773320198059082, "learning_rate": 2.589202125750536e-05, "loss": 1.7681, "step": 8920 }, { "epoch": 0.6715218577692467, "grad_norm": 5.516262531280518, "learning_rate": 2.5881341736660635e-05, "loss": 1.7638, "step": 8921 }, { "epoch": 0.6715971320499069, "grad_norm": 6.392311096191406, "learning_rate": 2.587066364960094e-05, "loss": 1.7556, "step": 8922 }, { "epoch": 0.671672406330567, "grad_norm": 5.861118793487549, "learning_rate": 2.5859986996961074e-05, "loss": 1.4801, "step": 8923 }, { "epoch": 0.6717476806112271, "grad_norm": 3.715571641921997, "learning_rate": 2.5849311779375696e-05, "loss": 1.796, "step": 8924 }, { "epoch": 0.6718229548918874, "grad_norm": 5.83088493347168, "learning_rate": 2.5838637997479454e-05, "loss": 2.1076, "step": 8925 }, { "epoch": 0.6718982291725475, "grad_norm": 5.278836727142334, "learning_rate": 2.5827965651906838e-05, "loss": 1.7612, "step": 8926 }, { "epoch": 0.6719735034532076, "grad_norm": 4.42335844039917, "learning_rate": 2.5817294743292324e-05, "loss": 1.7263, "step": 8927 }, { "epoch": 0.6720487777338677, "grad_norm": 6.038539409637451, "learning_rate": 2.5806625272270236e-05, "loss": 2.0524, "step": 8928 }, { "epoch": 0.672124052014528, "grad_norm": 5.974808692932129, "learning_rate": 2.5795957239474856e-05, "loss": 1.7796, "step": 8929 }, { "epoch": 0.6721993262951881, "grad_norm": 5.594521999359131, "learning_rate": 2.5785290645540382e-05, "loss": 1.7868, "step": 8930 }, { "epoch": 0.6722746005758482, "grad_norm": 5.21370792388916, "learning_rate": 2.5774625491100877e-05, "loss": 1.9736, "step": 8931 }, { "epoch": 0.6723498748565084, "grad_norm": 5.262139797210693, "learning_rate": 2.576396177679039e-05, "loss": 2.0341, "step": 8932 }, { "epoch": 0.6724251491371686, "grad_norm": 5.050817489624023, "learning_rate": 2.5753299503242823e-05, "loss": 1.9789, "step": 8933 }, { "epoch": 0.6725004234178287, "grad_norm": 4.763389587402344, "learning_rate": 2.5742638671092058e-05, "loss": 2.1324, "step": 8934 }, { "epoch": 0.6725756976984889, "grad_norm": 4.985348224639893, "learning_rate": 2.5731979280971795e-05, "loss": 1.8983, "step": 8935 }, { "epoch": 0.672650971979149, "grad_norm": 3.7745063304901123, "learning_rate": 2.572132133351575e-05, "loss": 1.7179, "step": 8936 }, { "epoch": 0.6727262462598091, "grad_norm": 4.594761371612549, "learning_rate": 2.5710664829357473e-05, "loss": 2.0431, "step": 8937 }, { "epoch": 0.6728015205404694, "grad_norm": 4.333402156829834, "learning_rate": 2.5700009769130496e-05, "loss": 1.8918, "step": 8938 }, { "epoch": 0.6728767948211295, "grad_norm": 5.764066219329834, "learning_rate": 2.5689356153468192e-05, "loss": 1.9849, "step": 8939 }, { "epoch": 0.6729520691017896, "grad_norm": 5.158320426940918, "learning_rate": 2.567870398300393e-05, "loss": 1.8153, "step": 8940 }, { "epoch": 0.6730273433824498, "grad_norm": 6.412191390991211, "learning_rate": 2.5668053258370918e-05, "loss": 1.8805, "step": 8941 }, { "epoch": 0.67310261766311, "grad_norm": 4.714682102203369, "learning_rate": 2.565740398020231e-05, "loss": 1.4891, "step": 8942 }, { "epoch": 0.6731778919437701, "grad_norm": 5.484572410583496, "learning_rate": 2.5646756149131202e-05, "loss": 1.6467, "step": 8943 }, { "epoch": 0.6732531662244303, "grad_norm": 6.749970436096191, "learning_rate": 2.5636109765790573e-05, "loss": 1.9803, "step": 8944 }, { "epoch": 0.6733284405050904, "grad_norm": 4.412948131561279, "learning_rate": 2.5625464830813295e-05, "loss": 1.6374, "step": 8945 }, { "epoch": 0.6734037147857506, "grad_norm": 4.890275001525879, "learning_rate": 2.5614821344832197e-05, "loss": 2.0223, "step": 8946 }, { "epoch": 0.6734789890664107, "grad_norm": 5.711452484130859, "learning_rate": 2.5604179308480018e-05, "loss": 1.8884, "step": 8947 }, { "epoch": 0.6735542633470709, "grad_norm": 4.520747184753418, "learning_rate": 2.5593538722389364e-05, "loss": 1.9254, "step": 8948 }, { "epoch": 0.673629537627731, "grad_norm": 3.953178644180298, "learning_rate": 2.558289958719282e-05, "loss": 1.7587, "step": 8949 }, { "epoch": 0.6737048119083912, "grad_norm": 5.083440780639648, "learning_rate": 2.557226190352281e-05, "loss": 1.7061, "step": 8950 }, { "epoch": 0.6737800861890514, "grad_norm": 3.774132251739502, "learning_rate": 2.5561625672011767e-05, "loss": 1.8521, "step": 8951 }, { "epoch": 0.6738553604697115, "grad_norm": 5.798524379730225, "learning_rate": 2.5550990893291933e-05, "loss": 2.1594, "step": 8952 }, { "epoch": 0.6739306347503716, "grad_norm": 4.3504719734191895, "learning_rate": 2.5540357567995554e-05, "loss": 1.8972, "step": 8953 }, { "epoch": 0.6740059090310319, "grad_norm": 4.402191162109375, "learning_rate": 2.5529725696754726e-05, "loss": 1.6157, "step": 8954 }, { "epoch": 0.674081183311692, "grad_norm": 4.855814456939697, "learning_rate": 2.551909528020148e-05, "loss": 1.6673, "step": 8955 }, { "epoch": 0.6741564575923521, "grad_norm": 3.893885612487793, "learning_rate": 2.550846631896778e-05, "loss": 1.8216, "step": 8956 }, { "epoch": 0.6742317318730123, "grad_norm": 4.49495267868042, "learning_rate": 2.5497838813685503e-05, "loss": 1.8598, "step": 8957 }, { "epoch": 0.6743070061536725, "grad_norm": 4.231896877288818, "learning_rate": 2.548721276498639e-05, "loss": 1.7879, "step": 8958 }, { "epoch": 0.6743822804343326, "grad_norm": 7.277490139007568, "learning_rate": 2.547658817350216e-05, "loss": 1.909, "step": 8959 }, { "epoch": 0.6744575547149928, "grad_norm": 10.944930076599121, "learning_rate": 2.546596503986437e-05, "loss": 2.3362, "step": 8960 }, { "epoch": 0.6745328289956529, "grad_norm": 4.4685378074646, "learning_rate": 2.5455343364704575e-05, "loss": 1.723, "step": 8961 }, { "epoch": 0.674608103276313, "grad_norm": 4.842438697814941, "learning_rate": 2.5444723148654204e-05, "loss": 1.8035, "step": 8962 }, { "epoch": 0.6746833775569733, "grad_norm": 4.29227876663208, "learning_rate": 2.543410439234456e-05, "loss": 1.6039, "step": 8963 }, { "epoch": 0.6747586518376334, "grad_norm": 4.410990238189697, "learning_rate": 2.542348709640695e-05, "loss": 1.8499, "step": 8964 }, { "epoch": 0.6748339261182935, "grad_norm": 4.420595169067383, "learning_rate": 2.5412871261472494e-05, "loss": 1.8169, "step": 8965 }, { "epoch": 0.6749092003989536, "grad_norm": 3.357102394104004, "learning_rate": 2.5402256888172282e-05, "loss": 1.6254, "step": 8966 }, { "epoch": 0.6749844746796139, "grad_norm": 4.532436370849609, "learning_rate": 2.539164397713733e-05, "loss": 1.5907, "step": 8967 }, { "epoch": 0.675059748960274, "grad_norm": 3.719102621078491, "learning_rate": 2.538103252899855e-05, "loss": 1.8033, "step": 8968 }, { "epoch": 0.6751350232409341, "grad_norm": 4.57113790512085, "learning_rate": 2.537042254438673e-05, "loss": 1.7598, "step": 8969 }, { "epoch": 0.6752102975215943, "grad_norm": 3.956610918045044, "learning_rate": 2.5359814023932637e-05, "loss": 1.7427, "step": 8970 }, { "epoch": 0.6752855718022545, "grad_norm": 4.250957489013672, "learning_rate": 2.534920696826688e-05, "loss": 1.8295, "step": 8971 }, { "epoch": 0.6753608460829146, "grad_norm": 7.939421653747559, "learning_rate": 2.533860137802006e-05, "loss": 2.3515, "step": 8972 }, { "epoch": 0.6754361203635748, "grad_norm": 6.079216957092285, "learning_rate": 2.5327997253822605e-05, "loss": 1.7206, "step": 8973 }, { "epoch": 0.6755113946442349, "grad_norm": 4.681916236877441, "learning_rate": 2.5317394596304943e-05, "loss": 2.1824, "step": 8974 }, { "epoch": 0.675586668924895, "grad_norm": 5.979300498962402, "learning_rate": 2.530679340609733e-05, "loss": 1.9406, "step": 8975 }, { "epoch": 0.6756619432055553, "grad_norm": 4.574625015258789, "learning_rate": 2.5296193683829987e-05, "loss": 1.9338, "step": 8976 }, { "epoch": 0.6757372174862154, "grad_norm": 5.333022594451904, "learning_rate": 2.5285595430133058e-05, "loss": 2.0642, "step": 8977 }, { "epoch": 0.6758124917668755, "grad_norm": 4.997891902923584, "learning_rate": 2.527499864563656e-05, "loss": 1.9122, "step": 8978 }, { "epoch": 0.6758877660475358, "grad_norm": 5.70123291015625, "learning_rate": 2.526440333097047e-05, "loss": 1.9367, "step": 8979 }, { "epoch": 0.6759630403281959, "grad_norm": 4.904244899749756, "learning_rate": 2.5253809486764614e-05, "loss": 1.991, "step": 8980 }, { "epoch": 0.676038314608856, "grad_norm": 4.200491428375244, "learning_rate": 2.5243217113648788e-05, "loss": 1.8222, "step": 8981 }, { "epoch": 0.6761135888895162, "grad_norm": 5.340821266174316, "learning_rate": 2.5232626212252657e-05, "loss": 1.925, "step": 8982 }, { "epoch": 0.6761888631701763, "grad_norm": 4.627140045166016, "learning_rate": 2.522203678320585e-05, "loss": 1.8809, "step": 8983 }, { "epoch": 0.6762641374508365, "grad_norm": 4.27971076965332, "learning_rate": 2.521144882713784e-05, "loss": 2.2423, "step": 8984 }, { "epoch": 0.6763394117314966, "grad_norm": 4.922454833984375, "learning_rate": 2.520086234467809e-05, "loss": 2.032, "step": 8985 }, { "epoch": 0.6764146860121568, "grad_norm": 5.3509039878845215, "learning_rate": 2.5190277336455902e-05, "loss": 2.0768, "step": 8986 }, { "epoch": 0.6764899602928169, "grad_norm": 10.495585441589355, "learning_rate": 2.5179693803100536e-05, "loss": 1.988, "step": 8987 }, { "epoch": 0.6765652345734771, "grad_norm": 4.338545322418213, "learning_rate": 2.516911174524116e-05, "loss": 1.6176, "step": 8988 }, { "epoch": 0.6766405088541373, "grad_norm": 5.272329330444336, "learning_rate": 2.5158531163506854e-05, "loss": 1.5887, "step": 8989 }, { "epoch": 0.6767157831347974, "grad_norm": 4.774064540863037, "learning_rate": 2.514795205852658e-05, "loss": 1.5859, "step": 8990 }, { "epoch": 0.6767910574154575, "grad_norm": 4.001836776733398, "learning_rate": 2.5137374430929262e-05, "loss": 1.9374, "step": 8991 }, { "epoch": 0.6768663316961178, "grad_norm": 5.248881816864014, "learning_rate": 2.5126798281343677e-05, "loss": 2.1088, "step": 8992 }, { "epoch": 0.6769416059767779, "grad_norm": 4.291639804840088, "learning_rate": 2.511622361039858e-05, "loss": 1.8804, "step": 8993 }, { "epoch": 0.677016880257438, "grad_norm": 5.402246475219727, "learning_rate": 2.510565041872258e-05, "loss": 1.766, "step": 8994 }, { "epoch": 0.6770921545380982, "grad_norm": 5.217708110809326, "learning_rate": 2.509507870694423e-05, "loss": 2.2748, "step": 8995 }, { "epoch": 0.6771674288187584, "grad_norm": 4.564388751983643, "learning_rate": 2.5084508475692008e-05, "loss": 1.9711, "step": 8996 }, { "epoch": 0.6772427030994185, "grad_norm": 4.961460590362549, "learning_rate": 2.5073939725594248e-05, "loss": 1.9904, "step": 8997 }, { "epoch": 0.6773179773800787, "grad_norm": 3.4901316165924072, "learning_rate": 2.5063372457279266e-05, "loss": 1.8561, "step": 8998 }, { "epoch": 0.6773932516607388, "grad_norm": 6.30404806137085, "learning_rate": 2.505280667137523e-05, "loss": 1.6779, "step": 8999 }, { "epoch": 0.677468525941399, "grad_norm": 5.243917942047119, "learning_rate": 2.504224236851025e-05, "loss": 1.767, "step": 9000 }, { "epoch": 0.6775438002220592, "grad_norm": 5.209652423858643, "learning_rate": 2.5031679549312347e-05, "loss": 2.1176, "step": 9001 }, { "epoch": 0.6776190745027193, "grad_norm": 6.367055416107178, "learning_rate": 2.5021118214409473e-05, "loss": 1.8135, "step": 9002 }, { "epoch": 0.6776943487833794, "grad_norm": 4.160006046295166, "learning_rate": 2.5010558364429433e-05, "loss": 1.9674, "step": 9003 }, { "epoch": 0.6777696230640395, "grad_norm": 5.699466705322266, "learning_rate": 2.500000000000001e-05, "loss": 1.9511, "step": 9004 }, { "epoch": 0.6778448973446998, "grad_norm": 4.277342319488525, "learning_rate": 2.498944312174884e-05, "loss": 1.6726, "step": 9005 }, { "epoch": 0.6779201716253599, "grad_norm": 4.5288543701171875, "learning_rate": 2.497888773030353e-05, "loss": 1.5794, "step": 9006 }, { "epoch": 0.67799544590602, "grad_norm": 4.378624439239502, "learning_rate": 2.496833382629153e-05, "loss": 1.7962, "step": 9007 }, { "epoch": 0.6780707201866802, "grad_norm": 6.767026901245117, "learning_rate": 2.495778141034028e-05, "loss": 2.0421, "step": 9008 }, { "epoch": 0.6781459944673404, "grad_norm": 3.869217872619629, "learning_rate": 2.4947230483077048e-05, "loss": 1.7959, "step": 9009 }, { "epoch": 0.6782212687480005, "grad_norm": 4.102424621582031, "learning_rate": 2.493668104512909e-05, "loss": 1.8528, "step": 9010 }, { "epoch": 0.6782965430286607, "grad_norm": 5.372541904449463, "learning_rate": 2.492613309712352e-05, "loss": 1.9733, "step": 9011 }, { "epoch": 0.6783718173093208, "grad_norm": 4.331643581390381, "learning_rate": 2.4915586639687394e-05, "loss": 1.497, "step": 9012 }, { "epoch": 0.678447091589981, "grad_norm": 5.577416896820068, "learning_rate": 2.490504167344769e-05, "loss": 1.3954, "step": 9013 }, { "epoch": 0.6785223658706412, "grad_norm": 6.015734672546387, "learning_rate": 2.489449819903123e-05, "loss": 2.0559, "step": 9014 }, { "epoch": 0.6785976401513013, "grad_norm": 4.008953094482422, "learning_rate": 2.4883956217064834e-05, "loss": 1.6231, "step": 9015 }, { "epoch": 0.6786729144319614, "grad_norm": 6.033753395080566, "learning_rate": 2.487341572817516e-05, "loss": 2.1297, "step": 9016 }, { "epoch": 0.6787481887126217, "grad_norm": 4.549692630767822, "learning_rate": 2.4862876732988844e-05, "loss": 1.8499, "step": 9017 }, { "epoch": 0.6788234629932818, "grad_norm": 4.264279365539551, "learning_rate": 2.485233923213236e-05, "loss": 1.7449, "step": 9018 }, { "epoch": 0.6788987372739419, "grad_norm": 4.737195014953613, "learning_rate": 2.4841803226232168e-05, "loss": 1.9426, "step": 9019 }, { "epoch": 0.6789740115546021, "grad_norm": 6.565869331359863, "learning_rate": 2.4831268715914568e-05, "loss": 1.3662, "step": 9020 }, { "epoch": 0.6790492858352623, "grad_norm": 5.90390682220459, "learning_rate": 2.482073570180583e-05, "loss": 2.0404, "step": 9021 }, { "epoch": 0.6791245601159224, "grad_norm": 3.933093547821045, "learning_rate": 2.4810204184532104e-05, "loss": 2.0474, "step": 9022 }, { "epoch": 0.6791998343965826, "grad_norm": 4.74329948425293, "learning_rate": 2.4799674164719483e-05, "loss": 1.4304, "step": 9023 }, { "epoch": 0.6792751086772427, "grad_norm": 6.14803409576416, "learning_rate": 2.4789145642993904e-05, "loss": 2.17, "step": 9024 }, { "epoch": 0.6793503829579028, "grad_norm": 5.256182670593262, "learning_rate": 2.4778618619981296e-05, "loss": 1.7758, "step": 9025 }, { "epoch": 0.679425657238563, "grad_norm": 8.053637504577637, "learning_rate": 2.4768093096307428e-05, "loss": 2.4638, "step": 9026 }, { "epoch": 0.6795009315192232, "grad_norm": 5.0397820472717285, "learning_rate": 2.4757569072598025e-05, "loss": 2.1023, "step": 9027 }, { "epoch": 0.6795762057998833, "grad_norm": 4.719660758972168, "learning_rate": 2.474704654947873e-05, "loss": 1.6651, "step": 9028 }, { "epoch": 0.6796514800805434, "grad_norm": 5.490492343902588, "learning_rate": 2.4736525527575038e-05, "loss": 2.0088, "step": 9029 }, { "epoch": 0.6797267543612037, "grad_norm": 3.843472719192505, "learning_rate": 2.4726006007512437e-05, "loss": 2.576, "step": 9030 }, { "epoch": 0.6798020286418638, "grad_norm": 5.109609127044678, "learning_rate": 2.4715487989916237e-05, "loss": 1.6954, "step": 9031 }, { "epoch": 0.6798773029225239, "grad_norm": 4.13579797744751, "learning_rate": 2.470497147541173e-05, "loss": 1.6793, "step": 9032 }, { "epoch": 0.6799525772031841, "grad_norm": 5.528165817260742, "learning_rate": 2.469445646462409e-05, "loss": 1.9381, "step": 9033 }, { "epoch": 0.6800278514838443, "grad_norm": 4.903003692626953, "learning_rate": 2.468394295817842e-05, "loss": 1.7115, "step": 9034 }, { "epoch": 0.6801031257645044, "grad_norm": 4.704830169677734, "learning_rate": 2.4673430956699684e-05, "loss": 1.7333, "step": 9035 }, { "epoch": 0.6801784000451646, "grad_norm": 4.738926410675049, "learning_rate": 2.466292046081282e-05, "loss": 1.8686, "step": 9036 }, { "epoch": 0.6802536743258247, "grad_norm": 5.204471111297607, "learning_rate": 2.4652411471142624e-05, "loss": 1.7435, "step": 9037 }, { "epoch": 0.6803289486064849, "grad_norm": 4.219439506530762, "learning_rate": 2.4641903988313852e-05, "loss": 2.0567, "step": 9038 }, { "epoch": 0.6804042228871451, "grad_norm": 3.5801472663879395, "learning_rate": 2.4631398012951107e-05, "loss": 1.8702, "step": 9039 }, { "epoch": 0.6804794971678052, "grad_norm": 5.407886981964111, "learning_rate": 2.462089354567898e-05, "loss": 2.1262, "step": 9040 }, { "epoch": 0.6805547714484653, "grad_norm": 4.085020542144775, "learning_rate": 2.461039058712189e-05, "loss": 1.5482, "step": 9041 }, { "epoch": 0.6806300457291256, "grad_norm": 3.4622628688812256, "learning_rate": 2.459988913790423e-05, "loss": 2.2664, "step": 9042 }, { "epoch": 0.6807053200097857, "grad_norm": 4.15674352645874, "learning_rate": 2.4589389198650282e-05, "loss": 1.7201, "step": 9043 }, { "epoch": 0.6807805942904458, "grad_norm": 4.034127712249756, "learning_rate": 2.457889076998423e-05, "loss": 1.8085, "step": 9044 }, { "epoch": 0.6808558685711059, "grad_norm": 5.499331474304199, "learning_rate": 2.4568393852530202e-05, "loss": 1.5786, "step": 9045 }, { "epoch": 0.6809311428517661, "grad_norm": 4.728934288024902, "learning_rate": 2.455789844691217e-05, "loss": 1.9485, "step": 9046 }, { "epoch": 0.6810064171324263, "grad_norm": 3.9879322052001953, "learning_rate": 2.454740455375409e-05, "loss": 2.0854, "step": 9047 }, { "epoch": 0.6810816914130864, "grad_norm": 4.168044090270996, "learning_rate": 2.453691217367976e-05, "loss": 1.6686, "step": 9048 }, { "epoch": 0.6811569656937466, "grad_norm": 3.6258819103240967, "learning_rate": 2.452642130731296e-05, "loss": 1.8926, "step": 9049 }, { "epoch": 0.6812322399744067, "grad_norm": 5.833902835845947, "learning_rate": 2.45159319552773e-05, "loss": 1.8476, "step": 9050 }, { "epoch": 0.6813075142550669, "grad_norm": 4.166037082672119, "learning_rate": 2.450544411819638e-05, "loss": 1.6482, "step": 9051 }, { "epoch": 0.6813827885357271, "grad_norm": 4.751802444458008, "learning_rate": 2.449495779669364e-05, "loss": 1.791, "step": 9052 }, { "epoch": 0.6814580628163872, "grad_norm": 4.893528461456299, "learning_rate": 2.4484472991392497e-05, "loss": 1.6024, "step": 9053 }, { "epoch": 0.6815333370970473, "grad_norm": 4.66733455657959, "learning_rate": 2.44739897029162e-05, "loss": 1.5003, "step": 9054 }, { "epoch": 0.6816086113777076, "grad_norm": 6.2677106857299805, "learning_rate": 2.4463507931887976e-05, "loss": 1.7657, "step": 9055 }, { "epoch": 0.6816838856583677, "grad_norm": 8.477002143859863, "learning_rate": 2.445302767893093e-05, "loss": 1.86, "step": 9056 }, { "epoch": 0.6817591599390278, "grad_norm": 6.425327777862549, "learning_rate": 2.4442548944668107e-05, "loss": 2.0036, "step": 9057 }, { "epoch": 0.681834434219688, "grad_norm": 5.1553473472595215, "learning_rate": 2.44320717297224e-05, "loss": 1.9077, "step": 9058 }, { "epoch": 0.6819097085003482, "grad_norm": 5.682281970977783, "learning_rate": 2.442159603471666e-05, "loss": 1.9357, "step": 9059 }, { "epoch": 0.6819849827810083, "grad_norm": 4.62346887588501, "learning_rate": 2.4411121860273666e-05, "loss": 2.0843, "step": 9060 }, { "epoch": 0.6820602570616685, "grad_norm": 5.829575538635254, "learning_rate": 2.4400649207016037e-05, "loss": 1.5117, "step": 9061 }, { "epoch": 0.6821355313423286, "grad_norm": 4.86073637008667, "learning_rate": 2.4390178075566373e-05, "loss": 1.8396, "step": 9062 }, { "epoch": 0.6822108056229887, "grad_norm": 4.742579936981201, "learning_rate": 2.4379708466547125e-05, "loss": 1.8558, "step": 9063 }, { "epoch": 0.6822860799036489, "grad_norm": 4.661652565002441, "learning_rate": 2.4369240380580716e-05, "loss": 1.6086, "step": 9064 }, { "epoch": 0.6823613541843091, "grad_norm": 4.343211650848389, "learning_rate": 2.43587738182894e-05, "loss": 1.6114, "step": 9065 }, { "epoch": 0.6824366284649692, "grad_norm": 5.326225757598877, "learning_rate": 2.4348308780295408e-05, "loss": 1.8733, "step": 9066 }, { "epoch": 0.6825119027456293, "grad_norm": 5.697606086730957, "learning_rate": 2.433784526722086e-05, "loss": 1.827, "step": 9067 }, { "epoch": 0.6825871770262896, "grad_norm": 5.193353652954102, "learning_rate": 2.43273832796878e-05, "loss": 1.7111, "step": 9068 }, { "epoch": 0.6826624513069497, "grad_norm": 3.550020456314087, "learning_rate": 2.4316922818318115e-05, "loss": 1.9625, "step": 9069 }, { "epoch": 0.6827377255876098, "grad_norm": 4.3767852783203125, "learning_rate": 2.4306463883733693e-05, "loss": 1.7519, "step": 9070 }, { "epoch": 0.68281299986827, "grad_norm": 4.287188529968262, "learning_rate": 2.4296006476556256e-05, "loss": 2.1424, "step": 9071 }, { "epoch": 0.6828882741489302, "grad_norm": 4.752734661102295, "learning_rate": 2.4285550597407503e-05, "loss": 2.0816, "step": 9072 }, { "epoch": 0.6829635484295903, "grad_norm": 4.153080940246582, "learning_rate": 2.427509624690896e-05, "loss": 1.7652, "step": 9073 }, { "epoch": 0.6830388227102505, "grad_norm": 4.861576080322266, "learning_rate": 2.4264643425682133e-05, "loss": 1.8001, "step": 9074 }, { "epoch": 0.6831140969909106, "grad_norm": 5.290843486785889, "learning_rate": 2.4254192134348425e-05, "loss": 1.5115, "step": 9075 }, { "epoch": 0.6831893712715708, "grad_norm": 7.709638595581055, "learning_rate": 2.424374237352911e-05, "loss": 2.0455, "step": 9076 }, { "epoch": 0.683264645552231, "grad_norm": 4.527714729309082, "learning_rate": 2.4233294143845403e-05, "loss": 1.9473, "step": 9077 }, { "epoch": 0.6833399198328911, "grad_norm": 4.879895210266113, "learning_rate": 2.4222847445918428e-05, "loss": 1.5161, "step": 9078 }, { "epoch": 0.6834151941135512, "grad_norm": 4.958390712738037, "learning_rate": 2.4212402280369235e-05, "loss": 1.7933, "step": 9079 }, { "epoch": 0.6834904683942115, "grad_norm": 4.639610290527344, "learning_rate": 2.4201958647818707e-05, "loss": 1.9909, "step": 9080 }, { "epoch": 0.6835657426748716, "grad_norm": 5.387855052947998, "learning_rate": 2.4191516548887732e-05, "loss": 1.7857, "step": 9081 }, { "epoch": 0.6836410169555317, "grad_norm": 5.309325695037842, "learning_rate": 2.4181075984197034e-05, "loss": 1.8503, "step": 9082 }, { "epoch": 0.6837162912361918, "grad_norm": 4.217898368835449, "learning_rate": 2.41706369543673e-05, "loss": 1.7031, "step": 9083 }, { "epoch": 0.683791565516852, "grad_norm": 4.389471530914307, "learning_rate": 2.4160199460019067e-05, "loss": 1.8445, "step": 9084 }, { "epoch": 0.6838668397975122, "grad_norm": 4.237769603729248, "learning_rate": 2.4149763501772847e-05, "loss": 1.5626, "step": 9085 }, { "epoch": 0.6839421140781723, "grad_norm": 4.457744598388672, "learning_rate": 2.4139329080249006e-05, "loss": 1.4935, "step": 9086 }, { "epoch": 0.6840173883588325, "grad_norm": 5.8319902420043945, "learning_rate": 2.412889619606784e-05, "loss": 1.6587, "step": 9087 }, { "epoch": 0.6840926626394926, "grad_norm": 5.9280476570129395, "learning_rate": 2.411846484984957e-05, "loss": 1.734, "step": 9088 }, { "epoch": 0.6841679369201528, "grad_norm": 4.0684332847595215, "learning_rate": 2.4108035042214316e-05, "loss": 1.9654, "step": 9089 }, { "epoch": 0.684243211200813, "grad_norm": 4.031683921813965, "learning_rate": 2.409760677378207e-05, "loss": 1.8936, "step": 9090 }, { "epoch": 0.6843184854814731, "grad_norm": 6.697076320648193, "learning_rate": 2.408718004517278e-05, "loss": 2.219, "step": 9091 }, { "epoch": 0.6843937597621332, "grad_norm": 4.2221174240112305, "learning_rate": 2.40767548570063e-05, "loss": 1.5519, "step": 9092 }, { "epoch": 0.6844690340427935, "grad_norm": 5.143556118011475, "learning_rate": 2.4066331209902342e-05, "loss": 1.7326, "step": 9093 }, { "epoch": 0.6845443083234536, "grad_norm": 4.825675964355469, "learning_rate": 2.4055909104480602e-05, "loss": 1.8874, "step": 9094 }, { "epoch": 0.6846195826041137, "grad_norm": 5.928195953369141, "learning_rate": 2.4045488541360606e-05, "loss": 1.8395, "step": 9095 }, { "epoch": 0.6846948568847739, "grad_norm": 4.8983659744262695, "learning_rate": 2.4035069521161864e-05, "loss": 1.8492, "step": 9096 }, { "epoch": 0.6847701311654341, "grad_norm": 3.9793169498443604, "learning_rate": 2.4024652044503713e-05, "loss": 1.4956, "step": 9097 }, { "epoch": 0.6848454054460942, "grad_norm": 6.323598384857178, "learning_rate": 2.4014236112005474e-05, "loss": 1.7211, "step": 9098 }, { "epoch": 0.6849206797267544, "grad_norm": 4.206787586212158, "learning_rate": 2.4003821724286353e-05, "loss": 1.7448, "step": 9099 }, { "epoch": 0.6849959540074145, "grad_norm": 5.844894886016846, "learning_rate": 2.3993408881965418e-05, "loss": 1.6057, "step": 9100 }, { "epoch": 0.6850712282880747, "grad_norm": 4.612281322479248, "learning_rate": 2.3982997585661703e-05, "loss": 1.7822, "step": 9101 }, { "epoch": 0.6851465025687348, "grad_norm": 4.586778163909912, "learning_rate": 2.397258783599415e-05, "loss": 1.6179, "step": 9102 }, { "epoch": 0.685221776849395, "grad_norm": 4.259588718414307, "learning_rate": 2.396217963358155e-05, "loss": 1.6766, "step": 9103 }, { "epoch": 0.6852970511300551, "grad_norm": 5.42435359954834, "learning_rate": 2.395177297904268e-05, "loss": 1.8473, "step": 9104 }, { "epoch": 0.6853723254107152, "grad_norm": 5.430781841278076, "learning_rate": 2.3941367872996144e-05, "loss": 1.7342, "step": 9105 }, { "epoch": 0.6854475996913755, "grad_norm": 8.5025053024292, "learning_rate": 2.393096431606054e-05, "loss": 2.1931, "step": 9106 }, { "epoch": 0.6855228739720356, "grad_norm": 4.162214279174805, "learning_rate": 2.392056230885429e-05, "loss": 1.9266, "step": 9107 }, { "epoch": 0.6855981482526957, "grad_norm": 5.234429836273193, "learning_rate": 2.3910161851995778e-05, "loss": 1.9606, "step": 9108 }, { "epoch": 0.685673422533356, "grad_norm": 4.853522300720215, "learning_rate": 2.38997629461033e-05, "loss": 1.7711, "step": 9109 }, { "epoch": 0.6857486968140161, "grad_norm": 4.913171768188477, "learning_rate": 2.388936559179501e-05, "loss": 1.9255, "step": 9110 }, { "epoch": 0.6858239710946762, "grad_norm": 4.298588752746582, "learning_rate": 2.387896978968901e-05, "loss": 1.9373, "step": 9111 }, { "epoch": 0.6858992453753364, "grad_norm": 8.830284118652344, "learning_rate": 2.386857554040331e-05, "loss": 1.7534, "step": 9112 }, { "epoch": 0.6859745196559965, "grad_norm": 5.750880718231201, "learning_rate": 2.3858182844555832e-05, "loss": 1.7645, "step": 9113 }, { "epoch": 0.6860497939366567, "grad_norm": 4.981136798858643, "learning_rate": 2.3847791702764362e-05, "loss": 1.6493, "step": 9114 }, { "epoch": 0.6861250682173169, "grad_norm": 4.677420616149902, "learning_rate": 2.3837402115646647e-05, "loss": 2.0298, "step": 9115 }, { "epoch": 0.686200342497977, "grad_norm": 4.720395565032959, "learning_rate": 2.3827014083820292e-05, "loss": 1.4519, "step": 9116 }, { "epoch": 0.6862756167786371, "grad_norm": 4.0699968338012695, "learning_rate": 2.381662760790287e-05, "loss": 1.4495, "step": 9117 }, { "epoch": 0.6863508910592974, "grad_norm": 3.9409024715423584, "learning_rate": 2.3806242688511786e-05, "loss": 1.8021, "step": 9118 }, { "epoch": 0.6864261653399575, "grad_norm": 8.845688819885254, "learning_rate": 2.379585932626444e-05, "loss": 2.0402, "step": 9119 }, { "epoch": 0.6865014396206176, "grad_norm": 5.33734655380249, "learning_rate": 2.378547752177805e-05, "loss": 1.6132, "step": 9120 }, { "epoch": 0.6865767139012778, "grad_norm": 5.080447673797607, "learning_rate": 2.37750972756698e-05, "loss": 1.7566, "step": 9121 }, { "epoch": 0.686651988181938, "grad_norm": 6.031424045562744, "learning_rate": 2.3764718588556773e-05, "loss": 1.9316, "step": 9122 }, { "epoch": 0.6867272624625981, "grad_norm": 8.825675010681152, "learning_rate": 2.3754341461055947e-05, "loss": 1.4095, "step": 9123 }, { "epoch": 0.6868025367432582, "grad_norm": 5.342495441436768, "learning_rate": 2.3743965893784225e-05, "loss": 1.7483, "step": 9124 }, { "epoch": 0.6868778110239184, "grad_norm": 4.1590142250061035, "learning_rate": 2.3733591887358385e-05, "loss": 1.4803, "step": 9125 }, { "epoch": 0.6869530853045785, "grad_norm": 4.0848846435546875, "learning_rate": 2.3723219442395156e-05, "loss": 1.9417, "step": 9126 }, { "epoch": 0.6870283595852387, "grad_norm": 6.331859111785889, "learning_rate": 2.3712848559511112e-05, "loss": 2.1291, "step": 9127 }, { "epoch": 0.6871036338658989, "grad_norm": 3.932668924331665, "learning_rate": 2.3702479239322818e-05, "loss": 1.4551, "step": 9128 }, { "epoch": 0.687178908146559, "grad_norm": 7.706721305847168, "learning_rate": 2.369211148244666e-05, "loss": 1.7745, "step": 9129 }, { "epoch": 0.6872541824272191, "grad_norm": 5.1186676025390625, "learning_rate": 2.3681745289499002e-05, "loss": 2.1512, "step": 9130 }, { "epoch": 0.6873294567078794, "grad_norm": 3.926673173904419, "learning_rate": 2.3671380661096053e-05, "loss": 1.8025, "step": 9131 }, { "epoch": 0.6874047309885395, "grad_norm": 5.5438971519470215, "learning_rate": 2.3661017597853987e-05, "loss": 1.5459, "step": 9132 }, { "epoch": 0.6874800052691996, "grad_norm": 6.186820983886719, "learning_rate": 2.365065610038884e-05, "loss": 1.9546, "step": 9133 }, { "epoch": 0.6875552795498598, "grad_norm": 4.476471900939941, "learning_rate": 2.3640296169316604e-05, "loss": 1.78, "step": 9134 }, { "epoch": 0.68763055383052, "grad_norm": 4.045302867889404, "learning_rate": 2.362993780525311e-05, "loss": 1.7067, "step": 9135 }, { "epoch": 0.6877058281111801, "grad_norm": 6.452706813812256, "learning_rate": 2.361958100881417e-05, "loss": 1.5105, "step": 9136 }, { "epoch": 0.6877811023918403, "grad_norm": 6.263516902923584, "learning_rate": 2.360922578061542e-05, "loss": 2.1166, "step": 9137 }, { "epoch": 0.6878563766725004, "grad_norm": 4.467607021331787, "learning_rate": 2.3598872121272498e-05, "loss": 1.7217, "step": 9138 }, { "epoch": 0.6879316509531606, "grad_norm": 4.40142297744751, "learning_rate": 2.358852003140085e-05, "loss": 1.8081, "step": 9139 }, { "epoch": 0.6880069252338208, "grad_norm": 4.108103275299072, "learning_rate": 2.3578169511615912e-05, "loss": 1.8588, "step": 9140 }, { "epoch": 0.6880821995144809, "grad_norm": 4.90578556060791, "learning_rate": 2.3567820562532994e-05, "loss": 1.6132, "step": 9141 }, { "epoch": 0.688157473795141, "grad_norm": 4.553472518920898, "learning_rate": 2.3557473184767287e-05, "loss": 1.5817, "step": 9142 }, { "epoch": 0.6882327480758011, "grad_norm": 4.37747049331665, "learning_rate": 2.3547127378933925e-05, "loss": 1.803, "step": 9143 }, { "epoch": 0.6883080223564614, "grad_norm": 4.457742691040039, "learning_rate": 2.353678314564794e-05, "loss": 1.6272, "step": 9144 }, { "epoch": 0.6883832966371215, "grad_norm": 4.232684135437012, "learning_rate": 2.352644048552428e-05, "loss": 1.9434, "step": 9145 }, { "epoch": 0.6884585709177816, "grad_norm": 4.716200828552246, "learning_rate": 2.351609939917776e-05, "loss": 2.2549, "step": 9146 }, { "epoch": 0.6885338451984419, "grad_norm": 4.657998561859131, "learning_rate": 2.3505759887223155e-05, "loss": 1.828, "step": 9147 }, { "epoch": 0.688609119479102, "grad_norm": 5.67838716506958, "learning_rate": 2.3495421950275086e-05, "loss": 1.9363, "step": 9148 }, { "epoch": 0.6886843937597621, "grad_norm": 8.302669525146484, "learning_rate": 2.3485085588948153e-05, "loss": 2.1315, "step": 9149 }, { "epoch": 0.6887596680404223, "grad_norm": 5.238595962524414, "learning_rate": 2.3474750803856782e-05, "loss": 2.1672, "step": 9150 }, { "epoch": 0.6888349423210824, "grad_norm": 4.266178131103516, "learning_rate": 2.346441759561538e-05, "loss": 1.8642, "step": 9151 }, { "epoch": 0.6889102166017426, "grad_norm": 6.604078769683838, "learning_rate": 2.34540859648382e-05, "loss": 1.9481, "step": 9152 }, { "epoch": 0.6889854908824028, "grad_norm": 5.05017614364624, "learning_rate": 2.3443755912139458e-05, "loss": 1.8284, "step": 9153 }, { "epoch": 0.6890607651630629, "grad_norm": 4.7816162109375, "learning_rate": 2.343342743813321e-05, "loss": 2.0192, "step": 9154 }, { "epoch": 0.689136039443723, "grad_norm": 4.639046669006348, "learning_rate": 2.342310054343347e-05, "loss": 1.9001, "step": 9155 }, { "epoch": 0.6892113137243833, "grad_norm": 5.766395568847656, "learning_rate": 2.3412775228654147e-05, "loss": 2.4821, "step": 9156 }, { "epoch": 0.6892865880050434, "grad_norm": 4.912259578704834, "learning_rate": 2.340245149440905e-05, "loss": 1.6332, "step": 9157 }, { "epoch": 0.6893618622857035, "grad_norm": 4.39493465423584, "learning_rate": 2.3392129341311914e-05, "loss": 1.7889, "step": 9158 }, { "epoch": 0.6894371365663637, "grad_norm": 3.6425859928131104, "learning_rate": 2.3381808769976323e-05, "loss": 1.9316, "step": 9159 }, { "epoch": 0.6895124108470239, "grad_norm": 5.146086692810059, "learning_rate": 2.3371489781015838e-05, "loss": 2.1955, "step": 9160 }, { "epoch": 0.689587685127684, "grad_norm": 3.5690157413482666, "learning_rate": 2.3361172375043867e-05, "loss": 1.7187, "step": 9161 }, { "epoch": 0.6896629594083441, "grad_norm": 6.691940784454346, "learning_rate": 2.335085655267378e-05, "loss": 1.8649, "step": 9162 }, { "epoch": 0.6897382336890043, "grad_norm": 4.657131671905518, "learning_rate": 2.3340542314518786e-05, "loss": 1.7588, "step": 9163 }, { "epoch": 0.6898135079696645, "grad_norm": 5.437439441680908, "learning_rate": 2.3330229661192077e-05, "loss": 1.406, "step": 9164 }, { "epoch": 0.6898887822503246, "grad_norm": 4.345561981201172, "learning_rate": 2.3319918593306673e-05, "loss": 1.7277, "step": 9165 }, { "epoch": 0.6899640565309848, "grad_norm": 5.805276393890381, "learning_rate": 2.330960911147555e-05, "loss": 1.8672, "step": 9166 }, { "epoch": 0.6900393308116449, "grad_norm": 4.972979545593262, "learning_rate": 2.3299301216311586e-05, "loss": 1.5813, "step": 9167 }, { "epoch": 0.690114605092305, "grad_norm": 6.759117126464844, "learning_rate": 2.328899490842757e-05, "loss": 1.8435, "step": 9168 }, { "epoch": 0.6901898793729653, "grad_norm": 6.600821495056152, "learning_rate": 2.3278690188436147e-05, "loss": 2.086, "step": 9169 }, { "epoch": 0.6902651536536254, "grad_norm": 4.582935810089111, "learning_rate": 2.3268387056949935e-05, "loss": 1.7489, "step": 9170 }, { "epoch": 0.6903404279342855, "grad_norm": 4.0333991050720215, "learning_rate": 2.3258085514581395e-05, "loss": 1.7854, "step": 9171 }, { "epoch": 0.6904157022149457, "grad_norm": 5.5755085945129395, "learning_rate": 2.324778556194294e-05, "loss": 1.3656, "step": 9172 }, { "epoch": 0.6904909764956059, "grad_norm": 3.8935749530792236, "learning_rate": 2.3237487199646896e-05, "loss": 1.7583, "step": 9173 }, { "epoch": 0.690566250776266, "grad_norm": 4.003864288330078, "learning_rate": 2.3227190428305425e-05, "loss": 1.7924, "step": 9174 }, { "epoch": 0.6906415250569262, "grad_norm": 6.712199687957764, "learning_rate": 2.321689524853068e-05, "loss": 1.9053, "step": 9175 }, { "epoch": 0.6907167993375863, "grad_norm": 4.886847019195557, "learning_rate": 2.320660166093464e-05, "loss": 1.9904, "step": 9176 }, { "epoch": 0.6907920736182465, "grad_norm": 6.222190856933594, "learning_rate": 2.3196309666129263e-05, "loss": 1.5595, "step": 9177 }, { "epoch": 0.6908673478989067, "grad_norm": 4.808961868286133, "learning_rate": 2.318601926472636e-05, "loss": 2.0759, "step": 9178 }, { "epoch": 0.6909426221795668, "grad_norm": 9.17426872253418, "learning_rate": 2.3175730457337697e-05, "loss": 1.9261, "step": 9179 }, { "epoch": 0.6910178964602269, "grad_norm": 5.523231029510498, "learning_rate": 2.316544324457487e-05, "loss": 1.4542, "step": 9180 }, { "epoch": 0.691093170740887, "grad_norm": 4.6755781173706055, "learning_rate": 2.3155157627049462e-05, "loss": 1.5624, "step": 9181 }, { "epoch": 0.6911684450215473, "grad_norm": 4.937200546264648, "learning_rate": 2.314487360537289e-05, "loss": 1.9299, "step": 9182 }, { "epoch": 0.6912437193022074, "grad_norm": 5.608829975128174, "learning_rate": 2.3134591180156545e-05, "loss": 1.5029, "step": 9183 }, { "epoch": 0.6913189935828675, "grad_norm": 3.9532153606414795, "learning_rate": 2.3124310352011652e-05, "loss": 1.4616, "step": 9184 }, { "epoch": 0.6913942678635278, "grad_norm": 6.401953220367432, "learning_rate": 2.3114031121549405e-05, "loss": 1.7897, "step": 9185 }, { "epoch": 0.6914695421441879, "grad_norm": 5.974150657653809, "learning_rate": 2.3103753489380853e-05, "loss": 2.3324, "step": 9186 }, { "epoch": 0.691544816424848, "grad_norm": 5.477693557739258, "learning_rate": 2.3093477456116978e-05, "loss": 1.5144, "step": 9187 }, { "epoch": 0.6916200907055082, "grad_norm": 5.690422058105469, "learning_rate": 2.3083203022368667e-05, "loss": 1.7013, "step": 9188 }, { "epoch": 0.6916953649861683, "grad_norm": 6.343783855438232, "learning_rate": 2.30729301887467e-05, "loss": 1.5204, "step": 9189 }, { "epoch": 0.6917706392668285, "grad_norm": 4.771541595458984, "learning_rate": 2.3062658955861794e-05, "loss": 1.9007, "step": 9190 }, { "epoch": 0.6918459135474887, "grad_norm": 5.155393600463867, "learning_rate": 2.3052389324324504e-05, "loss": 1.7802, "step": 9191 }, { "epoch": 0.6919211878281488, "grad_norm": 5.258251190185547, "learning_rate": 2.3042121294745356e-05, "loss": 2.188, "step": 9192 }, { "epoch": 0.6919964621088089, "grad_norm": 5.419024467468262, "learning_rate": 2.3031854867734737e-05, "loss": 1.7274, "step": 9193 }, { "epoch": 0.6920717363894692, "grad_norm": 4.655803203582764, "learning_rate": 2.302159004390298e-05, "loss": 1.8137, "step": 9194 }, { "epoch": 0.6921470106701293, "grad_norm": 4.632664680480957, "learning_rate": 2.301132682386027e-05, "loss": 1.7877, "step": 9195 }, { "epoch": 0.6922222849507894, "grad_norm": 4.912025451660156, "learning_rate": 2.3001065208216755e-05, "loss": 1.7493, "step": 9196 }, { "epoch": 0.6922975592314496, "grad_norm": 8.843585014343262, "learning_rate": 2.2990805197582426e-05, "loss": 1.8767, "step": 9197 }, { "epoch": 0.6923728335121098, "grad_norm": 5.6237897872924805, "learning_rate": 2.298054679256724e-05, "loss": 1.9113, "step": 9198 }, { "epoch": 0.6924481077927699, "grad_norm": 10.498770713806152, "learning_rate": 2.2970289993781025e-05, "loss": 2.0073, "step": 9199 }, { "epoch": 0.6925233820734301, "grad_norm": 4.830541610717773, "learning_rate": 2.29600348018335e-05, "loss": 1.9106, "step": 9200 }, { "epoch": 0.6925986563540902, "grad_norm": 3.9322240352630615, "learning_rate": 2.2949781217334322e-05, "loss": 1.6409, "step": 9201 }, { "epoch": 0.6926739306347504, "grad_norm": 5.151636123657227, "learning_rate": 2.293952924089305e-05, "loss": 1.8522, "step": 9202 }, { "epoch": 0.6927492049154105, "grad_norm": 4.6234130859375, "learning_rate": 2.2929278873119103e-05, "loss": 1.7615, "step": 9203 }, { "epoch": 0.6928244791960707, "grad_norm": 4.71243143081665, "learning_rate": 2.2919030114621848e-05, "loss": 1.9931, "step": 9204 }, { "epoch": 0.6928997534767308, "grad_norm": 5.4978346824646, "learning_rate": 2.2908782966010574e-05, "loss": 2.0309, "step": 9205 }, { "epoch": 0.692975027757391, "grad_norm": 4.959962844848633, "learning_rate": 2.2898537427894396e-05, "loss": 1.8914, "step": 9206 }, { "epoch": 0.6930503020380512, "grad_norm": 6.575296878814697, "learning_rate": 2.2888293500882423e-05, "loss": 2.2001, "step": 9207 }, { "epoch": 0.6931255763187113, "grad_norm": 4.736110687255859, "learning_rate": 2.2878051185583594e-05, "loss": 1.6847, "step": 9208 }, { "epoch": 0.6932008505993714, "grad_norm": 5.7614030838012695, "learning_rate": 2.2867810482606816e-05, "loss": 1.6137, "step": 9209 }, { "epoch": 0.6932761248800317, "grad_norm": 4.224489688873291, "learning_rate": 2.2857571392560838e-05, "loss": 1.5964, "step": 9210 }, { "epoch": 0.6933513991606918, "grad_norm": 5.394630432128906, "learning_rate": 2.284733391605436e-05, "loss": 1.9322, "step": 9211 }, { "epoch": 0.6934266734413519, "grad_norm": 4.493192672729492, "learning_rate": 2.283709805369597e-05, "loss": 1.96, "step": 9212 }, { "epoch": 0.6935019477220121, "grad_norm": 5.20558500289917, "learning_rate": 2.282686380609418e-05, "loss": 1.625, "step": 9213 }, { "epoch": 0.6935772220026722, "grad_norm": 10.132219314575195, "learning_rate": 2.281663117385735e-05, "loss": 1.9751, "step": 9214 }, { "epoch": 0.6936524962833324, "grad_norm": 5.027317523956299, "learning_rate": 2.2806400157593816e-05, "loss": 1.9312, "step": 9215 }, { "epoch": 0.6937277705639926, "grad_norm": 7.338446140289307, "learning_rate": 2.279617075791175e-05, "loss": 2.1066, "step": 9216 }, { "epoch": 0.6938030448446527, "grad_norm": 6.198676109313965, "learning_rate": 2.2785942975419293e-05, "loss": 1.7719, "step": 9217 }, { "epoch": 0.6938783191253128, "grad_norm": 5.513648509979248, "learning_rate": 2.2775716810724422e-05, "loss": 2.3094, "step": 9218 }, { "epoch": 0.6939535934059731, "grad_norm": 4.731198310852051, "learning_rate": 2.276549226443509e-05, "loss": 1.6878, "step": 9219 }, { "epoch": 0.6940288676866332, "grad_norm": 4.2922186851501465, "learning_rate": 2.275526933715908e-05, "loss": 1.9455, "step": 9220 }, { "epoch": 0.6941041419672933, "grad_norm": 9.820531845092773, "learning_rate": 2.274504802950413e-05, "loss": 1.9895, "step": 9221 }, { "epoch": 0.6941794162479534, "grad_norm": 5.64205265045166, "learning_rate": 2.2734828342077875e-05, "loss": 1.9654, "step": 9222 }, { "epoch": 0.6942546905286137, "grad_norm": 4.4885687828063965, "learning_rate": 2.2724610275487844e-05, "loss": 1.9458, "step": 9223 }, { "epoch": 0.6943299648092738, "grad_norm": 5.2593770027160645, "learning_rate": 2.2714393830341483e-05, "loss": 1.9413, "step": 9224 }, { "epoch": 0.6944052390899339, "grad_norm": 5.17440938949585, "learning_rate": 2.2704179007246107e-05, "loss": 1.6935, "step": 9225 }, { "epoch": 0.6944805133705941, "grad_norm": 4.944150447845459, "learning_rate": 2.269396580680898e-05, "loss": 1.5861, "step": 9226 }, { "epoch": 0.6945557876512543, "grad_norm": 4.647194862365723, "learning_rate": 2.268375422963722e-05, "loss": 1.7095, "step": 9227 }, { "epoch": 0.6946310619319144, "grad_norm": 6.682551383972168, "learning_rate": 2.2673544276337915e-05, "loss": 1.8163, "step": 9228 }, { "epoch": 0.6947063362125746, "grad_norm": 5.43378210067749, "learning_rate": 2.2663335947517973e-05, "loss": 1.8934, "step": 9229 }, { "epoch": 0.6947816104932347, "grad_norm": 4.208450794219971, "learning_rate": 2.265312924378429e-05, "loss": 1.7279, "step": 9230 }, { "epoch": 0.6948568847738948, "grad_norm": 11.26501750946045, "learning_rate": 2.2642924165743584e-05, "loss": 1.81, "step": 9231 }, { "epoch": 0.6949321590545551, "grad_norm": 4.731900215148926, "learning_rate": 2.2632720714002542e-05, "loss": 1.949, "step": 9232 }, { "epoch": 0.6950074333352152, "grad_norm": 7.432902812957764, "learning_rate": 2.2622518889167725e-05, "loss": 1.8894, "step": 9233 }, { "epoch": 0.6950827076158753, "grad_norm": 7.676168441772461, "learning_rate": 2.2612318691845628e-05, "loss": 1.9798, "step": 9234 }, { "epoch": 0.6951579818965355, "grad_norm": 4.9224090576171875, "learning_rate": 2.2602120122642585e-05, "loss": 1.4561, "step": 9235 }, { "epoch": 0.6952332561771957, "grad_norm": 3.893045663833618, "learning_rate": 2.259192318216488e-05, "loss": 1.9676, "step": 9236 }, { "epoch": 0.6953085304578558, "grad_norm": 6.134203910827637, "learning_rate": 2.2581727871018726e-05, "loss": 2.0661, "step": 9237 }, { "epoch": 0.695383804738516, "grad_norm": 3.6348259449005127, "learning_rate": 2.257153418981015e-05, "loss": 1.9444, "step": 9238 }, { "epoch": 0.6954590790191761, "grad_norm": 4.642723083496094, "learning_rate": 2.256134213914519e-05, "loss": 2.0677, "step": 9239 }, { "epoch": 0.6955343532998363, "grad_norm": 5.865072727203369, "learning_rate": 2.2551151719629693e-05, "loss": 1.9967, "step": 9240 }, { "epoch": 0.6956096275804964, "grad_norm": 5.17803430557251, "learning_rate": 2.254096293186948e-05, "loss": 2.0331, "step": 9241 }, { "epoch": 0.6956849018611566, "grad_norm": 3.7839152812957764, "learning_rate": 2.253077577647022e-05, "loss": 1.9727, "step": 9242 }, { "epoch": 0.6957601761418167, "grad_norm": 5.9798054695129395, "learning_rate": 2.2520590254037516e-05, "loss": 2.0086, "step": 9243 }, { "epoch": 0.6958354504224769, "grad_norm": 5.171562671661377, "learning_rate": 2.251040636517688e-05, "loss": 1.9071, "step": 9244 }, { "epoch": 0.6959107247031371, "grad_norm": 5.818791389465332, "learning_rate": 2.2500224110493717e-05, "loss": 1.8327, "step": 9245 }, { "epoch": 0.6959859989837972, "grad_norm": 4.878377914428711, "learning_rate": 2.2490043490593315e-05, "loss": 1.7789, "step": 9246 }, { "epoch": 0.6960612732644573, "grad_norm": 6.003511905670166, "learning_rate": 2.247986450608091e-05, "loss": 2.2119, "step": 9247 }, { "epoch": 0.6961365475451176, "grad_norm": 4.520191669464111, "learning_rate": 2.2469687157561576e-05, "loss": 1.7008, "step": 9248 }, { "epoch": 0.6962118218257777, "grad_norm": 7.781517505645752, "learning_rate": 2.2459511445640362e-05, "loss": 2.1897, "step": 9249 }, { "epoch": 0.6962870961064378, "grad_norm": 4.689202785491943, "learning_rate": 2.2449337370922158e-05, "loss": 2.0805, "step": 9250 }, { "epoch": 0.696362370387098, "grad_norm": 6.558923721313477, "learning_rate": 2.2439164934011815e-05, "loss": 2.0677, "step": 9251 }, { "epoch": 0.6964376446677581, "grad_norm": 8.834991455078125, "learning_rate": 2.2428994135514015e-05, "loss": 1.8102, "step": 9252 }, { "epoch": 0.6965129189484183, "grad_norm": 5.984163761138916, "learning_rate": 2.2418824976033397e-05, "loss": 1.8725, "step": 9253 }, { "epoch": 0.6965881932290785, "grad_norm": 4.9911346435546875, "learning_rate": 2.240865745617452e-05, "loss": 1.7862, "step": 9254 }, { "epoch": 0.6966634675097386, "grad_norm": 5.888492584228516, "learning_rate": 2.239849157654177e-05, "loss": 1.7328, "step": 9255 }, { "epoch": 0.6967387417903987, "grad_norm": 4.192131042480469, "learning_rate": 2.23883273377395e-05, "loss": 1.4821, "step": 9256 }, { "epoch": 0.696814016071059, "grad_norm": 3.871842384338379, "learning_rate": 2.2378164740371936e-05, "loss": 1.7818, "step": 9257 }, { "epoch": 0.6968892903517191, "grad_norm": 4.933742046356201, "learning_rate": 2.2368003785043253e-05, "loss": 1.9669, "step": 9258 }, { "epoch": 0.6969645646323792, "grad_norm": 4.600714683532715, "learning_rate": 2.2357844472357432e-05, "loss": 1.4692, "step": 9259 }, { "epoch": 0.6970398389130393, "grad_norm": 5.010395050048828, "learning_rate": 2.234768680291846e-05, "loss": 2.1681, "step": 9260 }, { "epoch": 0.6971151131936996, "grad_norm": 4.872340202331543, "learning_rate": 2.233753077733015e-05, "loss": 1.9431, "step": 9261 }, { "epoch": 0.6971903874743597, "grad_norm": 4.049929618835449, "learning_rate": 2.232737639619628e-05, "loss": 1.7836, "step": 9262 }, { "epoch": 0.6972656617550198, "grad_norm": 6.61251974105835, "learning_rate": 2.2317223660120464e-05, "loss": 1.7196, "step": 9263 }, { "epoch": 0.69734093603568, "grad_norm": 4.973830699920654, "learning_rate": 2.230707256970629e-05, "loss": 1.9496, "step": 9264 }, { "epoch": 0.6974162103163402, "grad_norm": 5.179134845733643, "learning_rate": 2.2296923125557167e-05, "loss": 1.4152, "step": 9265 }, { "epoch": 0.6974914845970003, "grad_norm": 5.262170791625977, "learning_rate": 2.2286775328276482e-05, "loss": 1.526, "step": 9266 }, { "epoch": 0.6975667588776605, "grad_norm": 5.391235828399658, "learning_rate": 2.2276629178467483e-05, "loss": 1.7981, "step": 9267 }, { "epoch": 0.6976420331583206, "grad_norm": 4.867656707763672, "learning_rate": 2.2266484676733328e-05, "loss": 1.6827, "step": 9268 }, { "epoch": 0.6977173074389807, "grad_norm": 4.321715354919434, "learning_rate": 2.2256341823677106e-05, "loss": 2.0111, "step": 9269 }, { "epoch": 0.697792581719641, "grad_norm": 5.91670560836792, "learning_rate": 2.224620061990174e-05, "loss": 2.2746, "step": 9270 }, { "epoch": 0.6978678560003011, "grad_norm": 3.856194019317627, "learning_rate": 2.2236061066010127e-05, "loss": 1.3904, "step": 9271 }, { "epoch": 0.6979431302809612, "grad_norm": 4.685153007507324, "learning_rate": 2.2225923162605006e-05, "loss": 1.8802, "step": 9272 }, { "epoch": 0.6980184045616215, "grad_norm": 7.412583827972412, "learning_rate": 2.2215786910289076e-05, "loss": 1.8379, "step": 9273 }, { "epoch": 0.6980936788422816, "grad_norm": 4.549882411956787, "learning_rate": 2.2205652309664876e-05, "loss": 1.7944, "step": 9274 }, { "epoch": 0.6981689531229417, "grad_norm": 6.999424934387207, "learning_rate": 2.219551936133491e-05, "loss": 1.6265, "step": 9275 }, { "epoch": 0.6982442274036019, "grad_norm": 5.160253524780273, "learning_rate": 2.2185388065901518e-05, "loss": 1.922, "step": 9276 }, { "epoch": 0.698319501684262, "grad_norm": 4.559370517730713, "learning_rate": 2.2175258423967003e-05, "loss": 1.5671, "step": 9277 }, { "epoch": 0.6983947759649222, "grad_norm": 3.975020408630371, "learning_rate": 2.2165130436133535e-05, "loss": 1.9614, "step": 9278 }, { "epoch": 0.6984700502455823, "grad_norm": 4.767719745635986, "learning_rate": 2.2155004103003206e-05, "loss": 1.7707, "step": 9279 }, { "epoch": 0.6985453245262425, "grad_norm": 4.912820339202881, "learning_rate": 2.2144879425177977e-05, "loss": 2.1508, "step": 9280 }, { "epoch": 0.6986205988069026, "grad_norm": 6.164126873016357, "learning_rate": 2.213475640325976e-05, "loss": 2.0283, "step": 9281 }, { "epoch": 0.6986958730875628, "grad_norm": 5.1750712394714355, "learning_rate": 2.21246350378503e-05, "loss": 1.8956, "step": 9282 }, { "epoch": 0.698771147368223, "grad_norm": 4.71627950668335, "learning_rate": 2.2114515329551315e-05, "loss": 1.6123, "step": 9283 }, { "epoch": 0.6988464216488831, "grad_norm": 6.4594244956970215, "learning_rate": 2.2104397278964372e-05, "loss": 2.0762, "step": 9284 }, { "epoch": 0.6989216959295432, "grad_norm": 4.881511211395264, "learning_rate": 2.2094280886690967e-05, "loss": 1.858, "step": 9285 }, { "epoch": 0.6989969702102035, "grad_norm": 6.458160877227783, "learning_rate": 2.2084166153332508e-05, "loss": 1.8266, "step": 9286 }, { "epoch": 0.6990722444908636, "grad_norm": 5.805734157562256, "learning_rate": 2.2074053079490255e-05, "loss": 1.6864, "step": 9287 }, { "epoch": 0.6991475187715237, "grad_norm": 5.414831161499023, "learning_rate": 2.206394166576542e-05, "loss": 1.6567, "step": 9288 }, { "epoch": 0.6992227930521839, "grad_norm": 4.865509033203125, "learning_rate": 2.2053831912759093e-05, "loss": 1.95, "step": 9289 }, { "epoch": 0.699298067332844, "grad_norm": 3.63382887840271, "learning_rate": 2.2043723821072292e-05, "loss": 1.7686, "step": 9290 }, { "epoch": 0.6993733416135042, "grad_norm": 6.146778583526611, "learning_rate": 2.2033617391305876e-05, "loss": 2.2629, "step": 9291 }, { "epoch": 0.6994486158941644, "grad_norm": 4.015007495880127, "learning_rate": 2.2023512624060678e-05, "loss": 1.4133, "step": 9292 }, { "epoch": 0.6995238901748245, "grad_norm": 7.483290672302246, "learning_rate": 2.201340951993736e-05, "loss": 1.7149, "step": 9293 }, { "epoch": 0.6995991644554846, "grad_norm": 6.540182590484619, "learning_rate": 2.2003308079536572e-05, "loss": 1.8213, "step": 9294 }, { "epoch": 0.6996744387361449, "grad_norm": 4.4751057624816895, "learning_rate": 2.1993208303458762e-05, "loss": 2.0089, "step": 9295 }, { "epoch": 0.699749713016805, "grad_norm": 4.6467084884643555, "learning_rate": 2.1983110192304374e-05, "loss": 2.1168, "step": 9296 }, { "epoch": 0.6998249872974651, "grad_norm": 4.232470512390137, "learning_rate": 2.1973013746673682e-05, "loss": 1.7915, "step": 9297 }, { "epoch": 0.6999002615781253, "grad_norm": 5.053588390350342, "learning_rate": 2.19629189671669e-05, "loss": 2.2114, "step": 9298 }, { "epoch": 0.6999755358587855, "grad_norm": 5.083146095275879, "learning_rate": 2.195282585438414e-05, "loss": 1.6179, "step": 9299 }, { "epoch": 0.7000508101394456, "grad_norm": 5.449784278869629, "learning_rate": 2.1942734408925425e-05, "loss": 1.6886, "step": 9300 }, { "epoch": 0.7001260844201057, "grad_norm": 5.43645715713501, "learning_rate": 2.1932644631390625e-05, "loss": 1.641, "step": 9301 }, { "epoch": 0.7002013587007659, "grad_norm": 10.92508602142334, "learning_rate": 2.1922556522379573e-05, "loss": 2.171, "step": 9302 }, { "epoch": 0.7002766329814261, "grad_norm": 6.381450176239014, "learning_rate": 2.191247008249198e-05, "loss": 1.9996, "step": 9303 }, { "epoch": 0.7003519072620862, "grad_norm": 4.0064473152160645, "learning_rate": 2.190238531232744e-05, "loss": 1.758, "step": 9304 }, { "epoch": 0.7004271815427464, "grad_norm": 5.375153064727783, "learning_rate": 2.189230221248549e-05, "loss": 1.6912, "step": 9305 }, { "epoch": 0.7005024558234065, "grad_norm": 5.514552116394043, "learning_rate": 2.18822207835655e-05, "loss": 1.669, "step": 9306 }, { "epoch": 0.7005777301040667, "grad_norm": 4.428057670593262, "learning_rate": 2.1872141026166825e-05, "loss": 2.0135, "step": 9307 }, { "epoch": 0.7006530043847269, "grad_norm": 4.594017505645752, "learning_rate": 2.1862062940888638e-05, "loss": 1.6488, "step": 9308 }, { "epoch": 0.700728278665387, "grad_norm": 5.2304277420043945, "learning_rate": 2.1851986528330094e-05, "loss": 1.5904, "step": 9309 }, { "epoch": 0.7008035529460471, "grad_norm": 4.830564022064209, "learning_rate": 2.1841911789090163e-05, "loss": 2.1304, "step": 9310 }, { "epoch": 0.7008788272267074, "grad_norm": 4.99942684173584, "learning_rate": 2.183183872376779e-05, "loss": 1.5426, "step": 9311 }, { "epoch": 0.7009541015073675, "grad_norm": 4.5725908279418945, "learning_rate": 2.1821767332961772e-05, "loss": 2.0446, "step": 9312 }, { "epoch": 0.7010293757880276, "grad_norm": 5.205332279205322, "learning_rate": 2.1811697617270854e-05, "loss": 2.5718, "step": 9313 }, { "epoch": 0.7011046500686878, "grad_norm": 4.680220603942871, "learning_rate": 2.180162957729362e-05, "loss": 1.7823, "step": 9314 }, { "epoch": 0.701179924349348, "grad_norm": 4.48225212097168, "learning_rate": 2.1791563213628606e-05, "loss": 1.9265, "step": 9315 }, { "epoch": 0.7012551986300081, "grad_norm": 5.262147903442383, "learning_rate": 2.1781498526874205e-05, "loss": 1.9798, "step": 9316 }, { "epoch": 0.7013304729106683, "grad_norm": 5.336678981781006, "learning_rate": 2.177143551762875e-05, "loss": 1.5556, "step": 9317 }, { "epoch": 0.7014057471913284, "grad_norm": 5.078948020935059, "learning_rate": 2.176137418649048e-05, "loss": 2.076, "step": 9318 }, { "epoch": 0.7014810214719885, "grad_norm": 5.765548229217529, "learning_rate": 2.175131453405746e-05, "loss": 1.9074, "step": 9319 }, { "epoch": 0.7015562957526487, "grad_norm": 3.9967434406280518, "learning_rate": 2.1741256560927763e-05, "loss": 1.6182, "step": 9320 }, { "epoch": 0.7016315700333089, "grad_norm": 4.858323574066162, "learning_rate": 2.173120026769926e-05, "loss": 1.9258, "step": 9321 }, { "epoch": 0.701706844313969, "grad_norm": 4.378138065338135, "learning_rate": 2.172114565496979e-05, "loss": 1.7107, "step": 9322 }, { "epoch": 0.7017821185946291, "grad_norm": 4.164498805999756, "learning_rate": 2.171109272333706e-05, "loss": 1.818, "step": 9323 }, { "epoch": 0.7018573928752894, "grad_norm": 4.1086320877075195, "learning_rate": 2.1701041473398725e-05, "loss": 2.1962, "step": 9324 }, { "epoch": 0.7019326671559495, "grad_norm": 4.772989273071289, "learning_rate": 2.1690991905752254e-05, "loss": 1.9398, "step": 9325 }, { "epoch": 0.7020079414366096, "grad_norm": 4.492087364196777, "learning_rate": 2.168094402099511e-05, "loss": 1.8252, "step": 9326 }, { "epoch": 0.7020832157172698, "grad_norm": 4.383864879608154, "learning_rate": 2.1670897819724562e-05, "loss": 2.2872, "step": 9327 }, { "epoch": 0.70215848999793, "grad_norm": 4.97402811050415, "learning_rate": 2.1660853302537874e-05, "loss": 1.8907, "step": 9328 }, { "epoch": 0.7022337642785901, "grad_norm": 4.69892692565918, "learning_rate": 2.165081047003213e-05, "loss": 1.8133, "step": 9329 }, { "epoch": 0.7023090385592503, "grad_norm": 3.776805877685547, "learning_rate": 2.164076932280437e-05, "loss": 1.8718, "step": 9330 }, { "epoch": 0.7023843128399104, "grad_norm": 7.233206272125244, "learning_rate": 2.1630729861451493e-05, "loss": 1.7294, "step": 9331 }, { "epoch": 0.7024595871205705, "grad_norm": 4.274624824523926, "learning_rate": 2.162069208657032e-05, "loss": 1.4394, "step": 9332 }, { "epoch": 0.7025348614012308, "grad_norm": 4.72705602645874, "learning_rate": 2.161065599875757e-05, "loss": 1.7681, "step": 9333 }, { "epoch": 0.7026101356818909, "grad_norm": 4.526385307312012, "learning_rate": 2.1600621598609865e-05, "loss": 1.8165, "step": 9334 }, { "epoch": 0.702685409962551, "grad_norm": 5.886971950531006, "learning_rate": 2.159058888672374e-05, "loss": 1.7325, "step": 9335 }, { "epoch": 0.7027606842432113, "grad_norm": 4.353018283843994, "learning_rate": 2.1580557863695573e-05, "loss": 1.5334, "step": 9336 }, { "epoch": 0.7028359585238714, "grad_norm": 4.670185089111328, "learning_rate": 2.157052853012171e-05, "loss": 1.8523, "step": 9337 }, { "epoch": 0.7029112328045315, "grad_norm": 8.775164604187012, "learning_rate": 2.156050088659834e-05, "loss": 2.1419, "step": 9338 }, { "epoch": 0.7029865070851916, "grad_norm": 4.2399396896362305, "learning_rate": 2.1550474933721605e-05, "loss": 1.9868, "step": 9339 }, { "epoch": 0.7030617813658518, "grad_norm": 4.301163673400879, "learning_rate": 2.1540450672087482e-05, "loss": 2.0163, "step": 9340 }, { "epoch": 0.703137055646512, "grad_norm": 5.607899188995361, "learning_rate": 2.1530428102291927e-05, "loss": 2.165, "step": 9341 }, { "epoch": 0.7032123299271721, "grad_norm": 5.423983097076416, "learning_rate": 2.152040722493072e-05, "loss": 1.805, "step": 9342 }, { "epoch": 0.7032876042078323, "grad_norm": 6.00930118560791, "learning_rate": 2.1510388040599584e-05, "loss": 1.9848, "step": 9343 }, { "epoch": 0.7033628784884924, "grad_norm": 6.215510845184326, "learning_rate": 2.1500370549894135e-05, "loss": 1.8873, "step": 9344 }, { "epoch": 0.7034381527691526, "grad_norm": 5.830587387084961, "learning_rate": 2.1490354753409892e-05, "loss": 1.6592, "step": 9345 }, { "epoch": 0.7035134270498128, "grad_norm": 6.64855432510376, "learning_rate": 2.148034065174224e-05, "loss": 1.7833, "step": 9346 }, { "epoch": 0.7035887013304729, "grad_norm": 4.266633987426758, "learning_rate": 2.1470328245486533e-05, "loss": 1.7095, "step": 9347 }, { "epoch": 0.703663975611133, "grad_norm": 4.774324893951416, "learning_rate": 2.1460317535237922e-05, "loss": 1.9698, "step": 9348 }, { "epoch": 0.7037392498917933, "grad_norm": 6.444777011871338, "learning_rate": 2.145030852159155e-05, "loss": 1.5569, "step": 9349 }, { "epoch": 0.7038145241724534, "grad_norm": 4.9345784187316895, "learning_rate": 2.1440301205142433e-05, "loss": 1.8729, "step": 9350 }, { "epoch": 0.7038897984531135, "grad_norm": 5.914407730102539, "learning_rate": 2.1430295586485443e-05, "loss": 1.873, "step": 9351 }, { "epoch": 0.7039650727337737, "grad_norm": 4.735976696014404, "learning_rate": 2.1420291666215426e-05, "loss": 1.6529, "step": 9352 }, { "epoch": 0.7040403470144339, "grad_norm": 3.983957052230835, "learning_rate": 2.1410289444927045e-05, "loss": 1.4232, "step": 9353 }, { "epoch": 0.704115621295094, "grad_norm": 4.668962478637695, "learning_rate": 2.140028892321494e-05, "loss": 1.5796, "step": 9354 }, { "epoch": 0.7041908955757542, "grad_norm": 6.854010105133057, "learning_rate": 2.1390290101673583e-05, "loss": 1.8398, "step": 9355 }, { "epoch": 0.7042661698564143, "grad_norm": 3.9459760189056396, "learning_rate": 2.1380292980897388e-05, "loss": 1.787, "step": 9356 }, { "epoch": 0.7043414441370744, "grad_norm": 5.830298900604248, "learning_rate": 2.137029756148065e-05, "loss": 1.6778, "step": 9357 }, { "epoch": 0.7044167184177346, "grad_norm": 5.088885307312012, "learning_rate": 2.13603038440176e-05, "loss": 1.6525, "step": 9358 }, { "epoch": 0.7044919926983948, "grad_norm": 4.401206970214844, "learning_rate": 2.1350311829102288e-05, "loss": 1.9909, "step": 9359 }, { "epoch": 0.7045672669790549, "grad_norm": 5.848589897155762, "learning_rate": 2.1340321517328754e-05, "loss": 1.584, "step": 9360 }, { "epoch": 0.704642541259715, "grad_norm": 5.143894672393799, "learning_rate": 2.133033290929085e-05, "loss": 1.6961, "step": 9361 }, { "epoch": 0.7047178155403753, "grad_norm": 7.149900913238525, "learning_rate": 2.132034600558241e-05, "loss": 1.8017, "step": 9362 }, { "epoch": 0.7047930898210354, "grad_norm": 6.186385631561279, "learning_rate": 2.1310360806797092e-05, "loss": 2.0581, "step": 9363 }, { "epoch": 0.7048683641016955, "grad_norm": 3.7667362689971924, "learning_rate": 2.1300377313528524e-05, "loss": 2.2862, "step": 9364 }, { "epoch": 0.7049436383823557, "grad_norm": 5.4394683837890625, "learning_rate": 2.1290395526370155e-05, "loss": 1.9493, "step": 9365 }, { "epoch": 0.7050189126630159, "grad_norm": 3.6797165870666504, "learning_rate": 2.1280415445915397e-05, "loss": 1.7459, "step": 9366 }, { "epoch": 0.705094186943676, "grad_norm": 4.405654430389404, "learning_rate": 2.1270437072757538e-05, "loss": 2.0053, "step": 9367 }, { "epoch": 0.7051694612243362, "grad_norm": 3.4335784912109375, "learning_rate": 2.1260460407489758e-05, "loss": 1.8559, "step": 9368 }, { "epoch": 0.7052447355049963, "grad_norm": 4.21601676940918, "learning_rate": 2.125048545070516e-05, "loss": 2.3365, "step": 9369 }, { "epoch": 0.7053200097856565, "grad_norm": 4.850498676300049, "learning_rate": 2.1240512202996695e-05, "loss": 1.9836, "step": 9370 }, { "epoch": 0.7053952840663167, "grad_norm": 4.358738422393799, "learning_rate": 2.123054066495727e-05, "loss": 1.7281, "step": 9371 }, { "epoch": 0.7054705583469768, "grad_norm": 5.4170732498168945, "learning_rate": 2.1220570837179643e-05, "loss": 1.8855, "step": 9372 }, { "epoch": 0.7055458326276369, "grad_norm": 4.894082546234131, "learning_rate": 2.1210602720256522e-05, "loss": 1.6757, "step": 9373 }, { "epoch": 0.7056211069082972, "grad_norm": 4.747068881988525, "learning_rate": 2.1200636314780442e-05, "loss": 1.604, "step": 9374 }, { "epoch": 0.7056963811889573, "grad_norm": 5.248499393463135, "learning_rate": 2.1190671621343916e-05, "loss": 1.642, "step": 9375 }, { "epoch": 0.7057716554696174, "grad_norm": 4.000081539154053, "learning_rate": 2.118070864053928e-05, "loss": 1.8255, "step": 9376 }, { "epoch": 0.7058469297502775, "grad_norm": 4.539329528808594, "learning_rate": 2.1170747372958828e-05, "loss": 2.0555, "step": 9377 }, { "epoch": 0.7059222040309377, "grad_norm": 4.026722431182861, "learning_rate": 2.1160787819194715e-05, "loss": 1.7773, "step": 9378 }, { "epoch": 0.7059974783115979, "grad_norm": 6.095086574554443, "learning_rate": 2.1150829979839043e-05, "loss": 2.4515, "step": 9379 }, { "epoch": 0.706072752592258, "grad_norm": 7.773655414581299, "learning_rate": 2.1140873855483733e-05, "loss": 1.6775, "step": 9380 }, { "epoch": 0.7061480268729182, "grad_norm": 4.406905174255371, "learning_rate": 2.1130919446720667e-05, "loss": 1.6362, "step": 9381 }, { "epoch": 0.7062233011535783, "grad_norm": 10.06230640411377, "learning_rate": 2.112096675414162e-05, "loss": 1.8967, "step": 9382 }, { "epoch": 0.7062985754342385, "grad_norm": 4.2922797203063965, "learning_rate": 2.1111015778338215e-05, "loss": 1.7575, "step": 9383 }, { "epoch": 0.7063738497148987, "grad_norm": 4.28108549118042, "learning_rate": 2.110106651990205e-05, "loss": 1.5583, "step": 9384 }, { "epoch": 0.7064491239955588, "grad_norm": 4.165553092956543, "learning_rate": 2.109111897942455e-05, "loss": 1.8619, "step": 9385 }, { "epoch": 0.7065243982762189, "grad_norm": 4.458963394165039, "learning_rate": 2.1081173157497086e-05, "loss": 1.7861, "step": 9386 }, { "epoch": 0.7065996725568792, "grad_norm": 5.161516189575195, "learning_rate": 2.107122905471089e-05, "loss": 2.3224, "step": 9387 }, { "epoch": 0.7066749468375393, "grad_norm": 6.569104194641113, "learning_rate": 2.1061286671657116e-05, "loss": 1.8312, "step": 9388 }, { "epoch": 0.7067502211181994, "grad_norm": 5.383288383483887, "learning_rate": 2.1051346008926815e-05, "loss": 1.7546, "step": 9389 }, { "epoch": 0.7068254953988596, "grad_norm": 4.633711338043213, "learning_rate": 2.1041407067110953e-05, "loss": 1.7025, "step": 9390 }, { "epoch": 0.7069007696795198, "grad_norm": 5.346320152282715, "learning_rate": 2.1031469846800327e-05, "loss": 2.0629, "step": 9391 }, { "epoch": 0.7069760439601799, "grad_norm": 6.171438217163086, "learning_rate": 2.1021534348585718e-05, "loss": 1.841, "step": 9392 }, { "epoch": 0.7070513182408401, "grad_norm": 7.632879257202148, "learning_rate": 2.1011600573057722e-05, "loss": 1.7749, "step": 9393 }, { "epoch": 0.7071265925215002, "grad_norm": 4.841483116149902, "learning_rate": 2.1001668520806917e-05, "loss": 1.7153, "step": 9394 }, { "epoch": 0.7072018668021604, "grad_norm": 4.584401607513428, "learning_rate": 2.0991738192423695e-05, "loss": 1.6034, "step": 9395 }, { "epoch": 0.7072771410828206, "grad_norm": 4.426340579986572, "learning_rate": 2.0981809588498418e-05, "loss": 1.6639, "step": 9396 }, { "epoch": 0.7073524153634807, "grad_norm": 4.835805416107178, "learning_rate": 2.0971882709621288e-05, "loss": 2.2606, "step": 9397 }, { "epoch": 0.7074276896441408, "grad_norm": 6.390164852142334, "learning_rate": 2.0961957556382434e-05, "loss": 1.9499, "step": 9398 }, { "epoch": 0.7075029639248009, "grad_norm": 3.7949137687683105, "learning_rate": 2.0952034129371884e-05, "loss": 1.6472, "step": 9399 }, { "epoch": 0.7075782382054612, "grad_norm": 4.826058864593506, "learning_rate": 2.0942112429179556e-05, "loss": 2.2777, "step": 9400 }, { "epoch": 0.7076535124861213, "grad_norm": 5.310537815093994, "learning_rate": 2.0932192456395288e-05, "loss": 1.84, "step": 9401 }, { "epoch": 0.7077287867667814, "grad_norm": 5.666449069976807, "learning_rate": 2.0922274211608756e-05, "loss": 1.8859, "step": 9402 }, { "epoch": 0.7078040610474416, "grad_norm": 4.422976016998291, "learning_rate": 2.09123576954096e-05, "loss": 1.4833, "step": 9403 }, { "epoch": 0.7078793353281018, "grad_norm": 5.033746719360352, "learning_rate": 2.090244290838731e-05, "loss": 1.602, "step": 9404 }, { "epoch": 0.7079546096087619, "grad_norm": 4.90593147277832, "learning_rate": 2.089252985113131e-05, "loss": 1.6487, "step": 9405 }, { "epoch": 0.7080298838894221, "grad_norm": 4.035327434539795, "learning_rate": 2.0882618524230875e-05, "loss": 1.6887, "step": 9406 }, { "epoch": 0.7081051581700822, "grad_norm": 4.4100661277771, "learning_rate": 2.087270892827524e-05, "loss": 1.9973, "step": 9407 }, { "epoch": 0.7081804324507424, "grad_norm": 9.177873611450195, "learning_rate": 2.0862801063853466e-05, "loss": 2.2082, "step": 9408 }, { "epoch": 0.7082557067314026, "grad_norm": 5.6072211265563965, "learning_rate": 2.0852894931554586e-05, "loss": 1.7898, "step": 9409 }, { "epoch": 0.7083309810120627, "grad_norm": 4.769901275634766, "learning_rate": 2.0842990531967444e-05, "loss": 1.7207, "step": 9410 }, { "epoch": 0.7084062552927228, "grad_norm": 5.831104278564453, "learning_rate": 2.0833087865680855e-05, "loss": 2.3435, "step": 9411 }, { "epoch": 0.7084815295733831, "grad_norm": 5.289443492889404, "learning_rate": 2.0823186933283513e-05, "loss": 1.6374, "step": 9412 }, { "epoch": 0.7085568038540432, "grad_norm": 5.817784309387207, "learning_rate": 2.081328773536398e-05, "loss": 2.1398, "step": 9413 }, { "epoch": 0.7086320781347033, "grad_norm": 4.402369976043701, "learning_rate": 2.0803390272510764e-05, "loss": 1.6205, "step": 9414 }, { "epoch": 0.7087073524153635, "grad_norm": 5.286246299743652, "learning_rate": 2.079349454531221e-05, "loss": 1.8302, "step": 9415 }, { "epoch": 0.7087826266960237, "grad_norm": 4.683670997619629, "learning_rate": 2.0783600554356615e-05, "loss": 1.5501, "step": 9416 }, { "epoch": 0.7088579009766838, "grad_norm": 4.5598063468933105, "learning_rate": 2.0773708300232118e-05, "loss": 1.9481, "step": 9417 }, { "epoch": 0.7089331752573439, "grad_norm": 5.628935813903809, "learning_rate": 2.0763817783526824e-05, "loss": 1.6645, "step": 9418 }, { "epoch": 0.7090084495380041, "grad_norm": 5.669116020202637, "learning_rate": 2.0753929004828654e-05, "loss": 1.6413, "step": 9419 }, { "epoch": 0.7090837238186642, "grad_norm": 5.136711120605469, "learning_rate": 2.0744041964725508e-05, "loss": 1.4798, "step": 9420 }, { "epoch": 0.7091589980993244, "grad_norm": 6.001125812530518, "learning_rate": 2.0734156663805103e-05, "loss": 1.8518, "step": 9421 }, { "epoch": 0.7092342723799846, "grad_norm": 6.930325508117676, "learning_rate": 2.072427310265511e-05, "loss": 1.9453, "step": 9422 }, { "epoch": 0.7093095466606447, "grad_norm": 4.790477275848389, "learning_rate": 2.071439128186309e-05, "loss": 1.9081, "step": 9423 }, { "epoch": 0.7093848209413048, "grad_norm": 6.618725299835205, "learning_rate": 2.0704511202016485e-05, "loss": 2.0128, "step": 9424 }, { "epoch": 0.7094600952219651, "grad_norm": 5.4649271965026855, "learning_rate": 2.0694632863702618e-05, "loss": 1.6879, "step": 9425 }, { "epoch": 0.7095353695026252, "grad_norm": 5.447333335876465, "learning_rate": 2.0684756267508755e-05, "loss": 1.9284, "step": 9426 }, { "epoch": 0.7096106437832853, "grad_norm": 4.140979766845703, "learning_rate": 2.0674881414022002e-05, "loss": 1.5889, "step": 9427 }, { "epoch": 0.7096859180639455, "grad_norm": 4.824054718017578, "learning_rate": 2.066500830382942e-05, "loss": 1.7807, "step": 9428 }, { "epoch": 0.7097611923446057, "grad_norm": 4.64283561706543, "learning_rate": 2.0655136937517906e-05, "loss": 1.951, "step": 9429 }, { "epoch": 0.7098364666252658, "grad_norm": 4.86910343170166, "learning_rate": 2.0645267315674306e-05, "loss": 1.773, "step": 9430 }, { "epoch": 0.709911740905926, "grad_norm": 5.271092891693115, "learning_rate": 2.063539943888535e-05, "loss": 2.2061, "step": 9431 }, { "epoch": 0.7099870151865861, "grad_norm": 4.815905570983887, "learning_rate": 2.062553330773762e-05, "loss": 2.1344, "step": 9432 }, { "epoch": 0.7100622894672463, "grad_norm": 4.7830491065979, "learning_rate": 2.0615668922817655e-05, "loss": 1.9825, "step": 9433 }, { "epoch": 0.7101375637479065, "grad_norm": 6.408608436584473, "learning_rate": 2.0605806284711858e-05, "loss": 1.6837, "step": 9434 }, { "epoch": 0.7102128380285666, "grad_norm": 4.737805366516113, "learning_rate": 2.0595945394006554e-05, "loss": 1.6237, "step": 9435 }, { "epoch": 0.7102881123092267, "grad_norm": 4.678584575653076, "learning_rate": 2.0586086251287907e-05, "loss": 2.0914, "step": 9436 }, { "epoch": 0.7103633865898868, "grad_norm": 5.823984146118164, "learning_rate": 2.0576228857142056e-05, "loss": 1.4322, "step": 9437 }, { "epoch": 0.7104386608705471, "grad_norm": 5.274572372436523, "learning_rate": 2.0566373212154956e-05, "loss": 1.9612, "step": 9438 }, { "epoch": 0.7105139351512072, "grad_norm": 4.736893653869629, "learning_rate": 2.0556519316912526e-05, "loss": 1.9395, "step": 9439 }, { "epoch": 0.7105892094318673, "grad_norm": 4.674492835998535, "learning_rate": 2.0546667172000524e-05, "loss": 1.6518, "step": 9440 }, { "epoch": 0.7106644837125276, "grad_norm": 4.059900760650635, "learning_rate": 2.0536816778004665e-05, "loss": 1.452, "step": 9441 }, { "epoch": 0.7107397579931877, "grad_norm": 5.007218360900879, "learning_rate": 2.0526968135510493e-05, "loss": 1.7674, "step": 9442 }, { "epoch": 0.7108150322738478, "grad_norm": 4.747188568115234, "learning_rate": 2.05171212451035e-05, "loss": 1.6817, "step": 9443 }, { "epoch": 0.710890306554508, "grad_norm": 5.4470744132995605, "learning_rate": 2.050727610736905e-05, "loss": 2.077, "step": 9444 }, { "epoch": 0.7109655808351681, "grad_norm": 6.768156051635742, "learning_rate": 2.0497432722892414e-05, "loss": 1.9952, "step": 9445 }, { "epoch": 0.7110408551158283, "grad_norm": 3.845045804977417, "learning_rate": 2.0487591092258763e-05, "loss": 1.8572, "step": 9446 }, { "epoch": 0.7111161293964885, "grad_norm": 5.5883917808532715, "learning_rate": 2.0477751216053126e-05, "loss": 1.7998, "step": 9447 }, { "epoch": 0.7111914036771486, "grad_norm": 4.677051544189453, "learning_rate": 2.0467913094860487e-05, "loss": 1.9686, "step": 9448 }, { "epoch": 0.7112666779578087, "grad_norm": 6.796509265899658, "learning_rate": 2.045807672926566e-05, "loss": 1.8988, "step": 9449 }, { "epoch": 0.711341952238469, "grad_norm": 3.9641034603118896, "learning_rate": 2.0448242119853427e-05, "loss": 1.7176, "step": 9450 }, { "epoch": 0.7114172265191291, "grad_norm": 5.551223278045654, "learning_rate": 2.0438409267208387e-05, "loss": 2.0897, "step": 9451 }, { "epoch": 0.7114925007997892, "grad_norm": 5.59065580368042, "learning_rate": 2.0428578171915107e-05, "loss": 1.8998, "step": 9452 }, { "epoch": 0.7115677750804494, "grad_norm": 5.542457103729248, "learning_rate": 2.0418748834557995e-05, "loss": 1.7374, "step": 9453 }, { "epoch": 0.7116430493611096, "grad_norm": 4.429009914398193, "learning_rate": 2.040892125572138e-05, "loss": 1.8505, "step": 9454 }, { "epoch": 0.7117183236417697, "grad_norm": 4.183013439178467, "learning_rate": 2.0399095435989514e-05, "loss": 1.7087, "step": 9455 }, { "epoch": 0.7117935979224298, "grad_norm": 6.364041805267334, "learning_rate": 2.038927137594647e-05, "loss": 1.8004, "step": 9456 }, { "epoch": 0.71186887220309, "grad_norm": 4.549670219421387, "learning_rate": 2.0379449076176276e-05, "loss": 1.877, "step": 9457 }, { "epoch": 0.7119441464837502, "grad_norm": 6.37774658203125, "learning_rate": 2.0369628537262863e-05, "loss": 1.7572, "step": 9458 }, { "epoch": 0.7120194207644103, "grad_norm": 5.045804023742676, "learning_rate": 2.035980975979e-05, "loss": 1.9443, "step": 9459 }, { "epoch": 0.7120946950450705, "grad_norm": 4.541904449462891, "learning_rate": 2.034999274434142e-05, "loss": 1.7683, "step": 9460 }, { "epoch": 0.7121699693257306, "grad_norm": 5.265013694763184, "learning_rate": 2.034017749150067e-05, "loss": 2.1479, "step": 9461 }, { "epoch": 0.7122452436063907, "grad_norm": 4.115753173828125, "learning_rate": 2.0330364001851264e-05, "loss": 1.6876, "step": 9462 }, { "epoch": 0.712320517887051, "grad_norm": 4.402643203735352, "learning_rate": 2.032055227597661e-05, "loss": 2.11, "step": 9463 }, { "epoch": 0.7123957921677111, "grad_norm": 4.408322811126709, "learning_rate": 2.0310742314459945e-05, "loss": 1.9864, "step": 9464 }, { "epoch": 0.7124710664483712, "grad_norm": 3.5569489002227783, "learning_rate": 2.030093411788448e-05, "loss": 1.6481, "step": 9465 }, { "epoch": 0.7125463407290314, "grad_norm": 5.080889701843262, "learning_rate": 2.0291127686833244e-05, "loss": 1.6673, "step": 9466 }, { "epoch": 0.7126216150096916, "grad_norm": 3.850985527038574, "learning_rate": 2.028132302188922e-05, "loss": 1.6713, "step": 9467 }, { "epoch": 0.7126968892903517, "grad_norm": 4.193704605102539, "learning_rate": 2.027152012363528e-05, "loss": 1.9128, "step": 9468 }, { "epoch": 0.7127721635710119, "grad_norm": 5.77872896194458, "learning_rate": 2.0261718992654176e-05, "loss": 1.7688, "step": 9469 }, { "epoch": 0.712847437851672, "grad_norm": 4.845407009124756, "learning_rate": 2.0251919629528538e-05, "loss": 1.7781, "step": 9470 }, { "epoch": 0.7129227121323322, "grad_norm": 3.637472629547119, "learning_rate": 2.0242122034840932e-05, "loss": 1.4049, "step": 9471 }, { "epoch": 0.7129979864129924, "grad_norm": 4.914551258087158, "learning_rate": 2.0232326209173774e-05, "loss": 1.8648, "step": 9472 }, { "epoch": 0.7130732606936525, "grad_norm": 4.670671463012695, "learning_rate": 2.022253215310943e-05, "loss": 1.8214, "step": 9473 }, { "epoch": 0.7131485349743126, "grad_norm": 5.561976432800293, "learning_rate": 2.0212739867230086e-05, "loss": 1.8266, "step": 9474 }, { "epoch": 0.7132238092549729, "grad_norm": 4.460993766784668, "learning_rate": 2.0202949352117905e-05, "loss": 1.6355, "step": 9475 }, { "epoch": 0.713299083535633, "grad_norm": 8.429607391357422, "learning_rate": 2.019316060835487e-05, "loss": 1.7074, "step": 9476 }, { "epoch": 0.7133743578162931, "grad_norm": 4.11983585357666, "learning_rate": 2.0183373636522912e-05, "loss": 1.5695, "step": 9477 }, { "epoch": 0.7134496320969532, "grad_norm": 5.1690263748168945, "learning_rate": 2.0173588437203838e-05, "loss": 1.9743, "step": 9478 }, { "epoch": 0.7135249063776135, "grad_norm": 5.531281471252441, "learning_rate": 2.016380501097935e-05, "loss": 1.9025, "step": 9479 }, { "epoch": 0.7136001806582736, "grad_norm": 4.883945941925049, "learning_rate": 2.015402335843107e-05, "loss": 1.7413, "step": 9480 }, { "epoch": 0.7136754549389337, "grad_norm": 4.979677200317383, "learning_rate": 2.0144243480140433e-05, "loss": 1.7923, "step": 9481 }, { "epoch": 0.7137507292195939, "grad_norm": 4.138659477233887, "learning_rate": 2.0134465376688877e-05, "loss": 1.5244, "step": 9482 }, { "epoch": 0.713826003500254, "grad_norm": 11.317208290100098, "learning_rate": 2.0124689048657646e-05, "loss": 2.1525, "step": 9483 }, { "epoch": 0.7139012777809142, "grad_norm": 5.241421222686768, "learning_rate": 2.0114914496627944e-05, "loss": 1.9196, "step": 9484 }, { "epoch": 0.7139765520615744, "grad_norm": 5.762665271759033, "learning_rate": 2.010514172118081e-05, "loss": 1.9304, "step": 9485 }, { "epoch": 0.7140518263422345, "grad_norm": 4.502680778503418, "learning_rate": 2.009537072289724e-05, "loss": 2.0734, "step": 9486 }, { "epoch": 0.7141271006228946, "grad_norm": 5.363126277923584, "learning_rate": 2.0085601502358052e-05, "loss": 1.6732, "step": 9487 }, { "epoch": 0.7142023749035549, "grad_norm": 4.757355690002441, "learning_rate": 2.0075834060144027e-05, "loss": 1.7796, "step": 9488 }, { "epoch": 0.714277649184215, "grad_norm": 4.702545642852783, "learning_rate": 2.0066068396835803e-05, "loss": 1.9702, "step": 9489 }, { "epoch": 0.7143529234648751, "grad_norm": 6.604905605316162, "learning_rate": 2.005630451301394e-05, "loss": 1.8952, "step": 9490 }, { "epoch": 0.7144281977455353, "grad_norm": 4.455034255981445, "learning_rate": 2.004654240925884e-05, "loss": 1.6805, "step": 9491 }, { "epoch": 0.7145034720261955, "grad_norm": 4.227853775024414, "learning_rate": 2.0036782086150862e-05, "loss": 1.5846, "step": 9492 }, { "epoch": 0.7145787463068556, "grad_norm": 6.459262847900391, "learning_rate": 2.0027023544270195e-05, "loss": 1.7413, "step": 9493 }, { "epoch": 0.7146540205875158, "grad_norm": 4.390810966491699, "learning_rate": 2.001726678419698e-05, "loss": 1.9777, "step": 9494 }, { "epoch": 0.7147292948681759, "grad_norm": 5.537712097167969, "learning_rate": 2.000751180651124e-05, "loss": 1.9674, "step": 9495 }, { "epoch": 0.714804569148836, "grad_norm": 5.226698398590088, "learning_rate": 1.9997758611792855e-05, "loss": 1.6521, "step": 9496 }, { "epoch": 0.7148798434294962, "grad_norm": 5.052948474884033, "learning_rate": 1.9988007200621646e-05, "loss": 1.7016, "step": 9497 }, { "epoch": 0.7149551177101564, "grad_norm": 4.696854591369629, "learning_rate": 1.9978257573577275e-05, "loss": 1.376, "step": 9498 }, { "epoch": 0.7150303919908165, "grad_norm": 5.07398796081543, "learning_rate": 1.9968509731239354e-05, "loss": 2.3853, "step": 9499 }, { "epoch": 0.7151056662714766, "grad_norm": 3.914128065109253, "learning_rate": 1.9958763674187358e-05, "loss": 1.883, "step": 9500 }, { "epoch": 0.7151809405521369, "grad_norm": 6.058406829833984, "learning_rate": 1.994901940300068e-05, "loss": 1.6567, "step": 9501 }, { "epoch": 0.715256214832797, "grad_norm": 5.501729488372803, "learning_rate": 1.9939276918258554e-05, "loss": 1.6225, "step": 9502 }, { "epoch": 0.7153314891134571, "grad_norm": 4.761053562164307, "learning_rate": 1.992953622054018e-05, "loss": 2.359, "step": 9503 }, { "epoch": 0.7154067633941174, "grad_norm": 4.187493324279785, "learning_rate": 1.9919797310424582e-05, "loss": 1.7836, "step": 9504 }, { "epoch": 0.7154820376747775, "grad_norm": 6.203763008117676, "learning_rate": 1.9910060188490738e-05, "loss": 2.0934, "step": 9505 }, { "epoch": 0.7155573119554376, "grad_norm": 4.951480388641357, "learning_rate": 1.990032485531746e-05, "loss": 1.8963, "step": 9506 }, { "epoch": 0.7156325862360978, "grad_norm": 4.882652282714844, "learning_rate": 1.989059131148352e-05, "loss": 1.7381, "step": 9507 }, { "epoch": 0.7157078605167579, "grad_norm": 5.824948787689209, "learning_rate": 1.9880859557567517e-05, "loss": 2.1631, "step": 9508 }, { "epoch": 0.7157831347974181, "grad_norm": 5.714610576629639, "learning_rate": 1.9871129594148015e-05, "loss": 2.5054, "step": 9509 }, { "epoch": 0.7158584090780783, "grad_norm": 4.927806854248047, "learning_rate": 1.9861401421803382e-05, "loss": 1.8776, "step": 9510 }, { "epoch": 0.7159336833587384, "grad_norm": 4.6382527351379395, "learning_rate": 1.985167504111196e-05, "loss": 1.735, "step": 9511 }, { "epoch": 0.7160089576393985, "grad_norm": 6.377290725708008, "learning_rate": 1.984195045265195e-05, "loss": 1.6773, "step": 9512 }, { "epoch": 0.7160842319200588, "grad_norm": 4.6818108558654785, "learning_rate": 1.983222765700146e-05, "loss": 1.809, "step": 9513 }, { "epoch": 0.7161595062007189, "grad_norm": 4.784926414489746, "learning_rate": 1.9822506654738486e-05, "loss": 1.5798, "step": 9514 }, { "epoch": 0.716234780481379, "grad_norm": 5.976828575134277, "learning_rate": 1.981278744644089e-05, "loss": 1.9079, "step": 9515 }, { "epoch": 0.7163100547620391, "grad_norm": 6.271202564239502, "learning_rate": 1.9803070032686478e-05, "loss": 1.4672, "step": 9516 }, { "epoch": 0.7163853290426994, "grad_norm": 5.964905261993408, "learning_rate": 1.979335441405289e-05, "loss": 1.8177, "step": 9517 }, { "epoch": 0.7164606033233595, "grad_norm": 4.530181407928467, "learning_rate": 1.9783640591117725e-05, "loss": 2.0666, "step": 9518 }, { "epoch": 0.7165358776040196, "grad_norm": 5.328892230987549, "learning_rate": 1.977392856445841e-05, "loss": 1.9104, "step": 9519 }, { "epoch": 0.7166111518846798, "grad_norm": 5.724018573760986, "learning_rate": 1.9764218334652335e-05, "loss": 1.8381, "step": 9520 }, { "epoch": 0.71668642616534, "grad_norm": 4.3129448890686035, "learning_rate": 1.9754509902276704e-05, "loss": 1.7737, "step": 9521 }, { "epoch": 0.7167617004460001, "grad_norm": 4.701634883880615, "learning_rate": 1.9744803267908674e-05, "loss": 1.704, "step": 9522 }, { "epoch": 0.7168369747266603, "grad_norm": 5.569914817810059, "learning_rate": 1.9735098432125277e-05, "loss": 1.79, "step": 9523 }, { "epoch": 0.7169122490073204, "grad_norm": 4.8248677253723145, "learning_rate": 1.972539539550346e-05, "loss": 1.7106, "step": 9524 }, { "epoch": 0.7169875232879805, "grad_norm": 3.9295737743377686, "learning_rate": 1.9715694158619996e-05, "loss": 1.844, "step": 9525 }, { "epoch": 0.7170627975686408, "grad_norm": 4.6467132568359375, "learning_rate": 1.9705994722051624e-05, "loss": 1.793, "step": 9526 }, { "epoch": 0.7171380718493009, "grad_norm": 4.346904754638672, "learning_rate": 1.969629708637496e-05, "loss": 1.5909, "step": 9527 }, { "epoch": 0.717213346129961, "grad_norm": 4.667377471923828, "learning_rate": 1.968660125216646e-05, "loss": 1.7459, "step": 9528 }, { "epoch": 0.7172886204106212, "grad_norm": 5.513657093048096, "learning_rate": 1.967690722000256e-05, "loss": 1.9629, "step": 9529 }, { "epoch": 0.7173638946912814, "grad_norm": 4.792205810546875, "learning_rate": 1.9667214990459497e-05, "loss": 1.8842, "step": 9530 }, { "epoch": 0.7174391689719415, "grad_norm": 3.7832374572753906, "learning_rate": 1.965752456411349e-05, "loss": 1.6079, "step": 9531 }, { "epoch": 0.7175144432526017, "grad_norm": 5.25220251083374, "learning_rate": 1.9647835941540565e-05, "loss": 2.1688, "step": 9532 }, { "epoch": 0.7175897175332618, "grad_norm": 4.870365619659424, "learning_rate": 1.9638149123316706e-05, "loss": 1.7067, "step": 9533 }, { "epoch": 0.717664991813922, "grad_norm": 3.4470694065093994, "learning_rate": 1.9628464110017757e-05, "loss": 1.9056, "step": 9534 }, { "epoch": 0.7177402660945821, "grad_norm": 5.867374897003174, "learning_rate": 1.9618780902219496e-05, "loss": 2.0299, "step": 9535 }, { "epoch": 0.7178155403752423, "grad_norm": 6.339066505432129, "learning_rate": 1.9609099500497517e-05, "loss": 2.0115, "step": 9536 }, { "epoch": 0.7178908146559024, "grad_norm": 4.336844444274902, "learning_rate": 1.959941990542739e-05, "loss": 1.8723, "step": 9537 }, { "epoch": 0.7179660889365626, "grad_norm": 4.932903289794922, "learning_rate": 1.9589742117584502e-05, "loss": 2.3154, "step": 9538 }, { "epoch": 0.7180413632172228, "grad_norm": 4.143261432647705, "learning_rate": 1.9580066137544208e-05, "loss": 2.0187, "step": 9539 }, { "epoch": 0.7181166374978829, "grad_norm": 4.2491302490234375, "learning_rate": 1.9570391965881675e-05, "loss": 1.5832, "step": 9540 }, { "epoch": 0.718191911778543, "grad_norm": 5.4467949867248535, "learning_rate": 1.9560719603172046e-05, "loss": 1.771, "step": 9541 }, { "epoch": 0.7182671860592033, "grad_norm": 4.698729515075684, "learning_rate": 1.955104904999028e-05, "loss": 1.6989, "step": 9542 }, { "epoch": 0.7183424603398634, "grad_norm": 4.595734119415283, "learning_rate": 1.9541380306911276e-05, "loss": 2.1554, "step": 9543 }, { "epoch": 0.7184177346205235, "grad_norm": 6.739954471588135, "learning_rate": 1.9531713374509824e-05, "loss": 1.6398, "step": 9544 }, { "epoch": 0.7184930089011837, "grad_norm": 4.812310218811035, "learning_rate": 1.9522048253360587e-05, "loss": 1.9355, "step": 9545 }, { "epoch": 0.7185682831818438, "grad_norm": 6.03534460067749, "learning_rate": 1.9512384944038148e-05, "loss": 2.0226, "step": 9546 }, { "epoch": 0.718643557462504, "grad_norm": 4.772028923034668, "learning_rate": 1.9502723447116927e-05, "loss": 2.3704, "step": 9547 }, { "epoch": 0.7187188317431642, "grad_norm": 6.009061813354492, "learning_rate": 1.949306376317131e-05, "loss": 1.5869, "step": 9548 }, { "epoch": 0.7187941060238243, "grad_norm": 5.287210464477539, "learning_rate": 1.9483405892775495e-05, "loss": 1.9085, "step": 9549 }, { "epoch": 0.7188693803044844, "grad_norm": 4.43877649307251, "learning_rate": 1.9473749836503657e-05, "loss": 1.271, "step": 9550 }, { "epoch": 0.7189446545851447, "grad_norm": 4.632543087005615, "learning_rate": 1.9464095594929788e-05, "loss": 2.108, "step": 9551 }, { "epoch": 0.7190199288658048, "grad_norm": 4.28782844543457, "learning_rate": 1.9454443168627833e-05, "loss": 1.6824, "step": 9552 }, { "epoch": 0.7190952031464649, "grad_norm": 4.903390407562256, "learning_rate": 1.944479255817157e-05, "loss": 1.7923, "step": 9553 }, { "epoch": 0.719170477427125, "grad_norm": 7.645961761474609, "learning_rate": 1.9435143764134718e-05, "loss": 2.0172, "step": 9554 }, { "epoch": 0.7192457517077853, "grad_norm": 5.691678524017334, "learning_rate": 1.942549678709087e-05, "loss": 1.8204, "step": 9555 }, { "epoch": 0.7193210259884454, "grad_norm": 4.54675817489624, "learning_rate": 1.9415851627613525e-05, "loss": 1.9056, "step": 9556 }, { "epoch": 0.7193963002691055, "grad_norm": 4.945567607879639, "learning_rate": 1.9406208286276024e-05, "loss": 1.7552, "step": 9557 }, { "epoch": 0.7194715745497657, "grad_norm": 4.792803764343262, "learning_rate": 1.9396566763651658e-05, "loss": 1.6443, "step": 9558 }, { "epoch": 0.7195468488304259, "grad_norm": 4.259121417999268, "learning_rate": 1.9386927060313607e-05, "loss": 1.5547, "step": 9559 }, { "epoch": 0.719622123111086, "grad_norm": 4.082737445831299, "learning_rate": 1.9377289176834884e-05, "loss": 2.0998, "step": 9560 }, { "epoch": 0.7196973973917462, "grad_norm": 6.311650276184082, "learning_rate": 1.9367653113788474e-05, "loss": 1.7414, "step": 9561 }, { "epoch": 0.7197726716724063, "grad_norm": 5.185740947723389, "learning_rate": 1.9358018871747176e-05, "loss": 1.7923, "step": 9562 }, { "epoch": 0.7198479459530664, "grad_norm": 4.590246677398682, "learning_rate": 1.9348386451283746e-05, "loss": 1.6235, "step": 9563 }, { "epoch": 0.7199232202337267, "grad_norm": 5.305851936340332, "learning_rate": 1.933875585297078e-05, "loss": 1.6704, "step": 9564 }, { "epoch": 0.7199984945143868, "grad_norm": 4.344723701477051, "learning_rate": 1.9329127077380815e-05, "loss": 1.9337, "step": 9565 }, { "epoch": 0.7200737687950469, "grad_norm": 6.741992950439453, "learning_rate": 1.9319500125086228e-05, "loss": 2.4713, "step": 9566 }, { "epoch": 0.7201490430757072, "grad_norm": 5.562639236450195, "learning_rate": 1.9309874996659317e-05, "loss": 1.7452, "step": 9567 }, { "epoch": 0.7202243173563673, "grad_norm": 8.103625297546387, "learning_rate": 1.9300251692672283e-05, "loss": 2.4275, "step": 9568 }, { "epoch": 0.7202995916370274, "grad_norm": 4.430222034454346, "learning_rate": 1.929063021369722e-05, "loss": 1.8206, "step": 9569 }, { "epoch": 0.7203748659176876, "grad_norm": 5.527270793914795, "learning_rate": 1.928101056030605e-05, "loss": 1.8373, "step": 9570 }, { "epoch": 0.7204501401983477, "grad_norm": 4.450514793395996, "learning_rate": 1.9271392733070686e-05, "loss": 1.7859, "step": 9571 }, { "epoch": 0.7205254144790079, "grad_norm": 4.106017112731934, "learning_rate": 1.926177673256283e-05, "loss": 1.785, "step": 9572 }, { "epoch": 0.7206006887596681, "grad_norm": 4.553153038024902, "learning_rate": 1.925216255935417e-05, "loss": 1.7082, "step": 9573 }, { "epoch": 0.7206759630403282, "grad_norm": 4.588836193084717, "learning_rate": 1.9242550214016204e-05, "loss": 2.0324, "step": 9574 }, { "epoch": 0.7207512373209883, "grad_norm": 6.4269328117370605, "learning_rate": 1.9232939697120374e-05, "loss": 1.5279, "step": 9575 }, { "epoch": 0.7208265116016485, "grad_norm": 4.636991500854492, "learning_rate": 1.922333100923801e-05, "loss": 1.8487, "step": 9576 }, { "epoch": 0.7209017858823087, "grad_norm": 4.073963165283203, "learning_rate": 1.92137241509403e-05, "loss": 2.1126, "step": 9577 }, { "epoch": 0.7209770601629688, "grad_norm": 5.726802825927734, "learning_rate": 1.9204119122798347e-05, "loss": 1.8359, "step": 9578 }, { "epoch": 0.7210523344436289, "grad_norm": 6.7845377922058105, "learning_rate": 1.919451592538315e-05, "loss": 1.7283, "step": 9579 }, { "epoch": 0.7211276087242892, "grad_norm": 5.296535015106201, "learning_rate": 1.918491455926561e-05, "loss": 1.7735, "step": 9580 }, { "epoch": 0.7212028830049493, "grad_norm": 5.237893581390381, "learning_rate": 1.9175315025016464e-05, "loss": 2.0306, "step": 9581 }, { "epoch": 0.7212781572856094, "grad_norm": 4.582086086273193, "learning_rate": 1.9165717323206412e-05, "loss": 1.8575, "step": 9582 }, { "epoch": 0.7213534315662696, "grad_norm": 4.404659271240234, "learning_rate": 1.9156121454405968e-05, "loss": 2.1049, "step": 9583 }, { "epoch": 0.7214287058469298, "grad_norm": 4.021386623382568, "learning_rate": 1.914652741918563e-05, "loss": 1.4943, "step": 9584 }, { "epoch": 0.7215039801275899, "grad_norm": 3.68868088722229, "learning_rate": 1.9136935218115682e-05, "loss": 1.7914, "step": 9585 }, { "epoch": 0.7215792544082501, "grad_norm": 6.577298164367676, "learning_rate": 1.91273448517664e-05, "loss": 1.7602, "step": 9586 }, { "epoch": 0.7216545286889102, "grad_norm": 6.713287830352783, "learning_rate": 1.9117756320707865e-05, "loss": 1.9236, "step": 9587 }, { "epoch": 0.7217298029695703, "grad_norm": 4.912992000579834, "learning_rate": 1.9108169625510107e-05, "loss": 1.7037, "step": 9588 }, { "epoch": 0.7218050772502306, "grad_norm": 4.865211009979248, "learning_rate": 1.9098584766743026e-05, "loss": 1.9806, "step": 9589 }, { "epoch": 0.7218803515308907, "grad_norm": 5.134067535400391, "learning_rate": 1.9089001744976413e-05, "loss": 1.8654, "step": 9590 }, { "epoch": 0.7219556258115508, "grad_norm": 5.546294689178467, "learning_rate": 1.907942056077997e-05, "loss": 1.8915, "step": 9591 }, { "epoch": 0.722030900092211, "grad_norm": 4.9889349937438965, "learning_rate": 1.906984121472324e-05, "loss": 1.8894, "step": 9592 }, { "epoch": 0.7221061743728712, "grad_norm": 5.335707187652588, "learning_rate": 1.9060263707375713e-05, "loss": 2.0343, "step": 9593 }, { "epoch": 0.7221814486535313, "grad_norm": 7.060311794281006, "learning_rate": 1.9050688039306717e-05, "loss": 1.5834, "step": 9594 }, { "epoch": 0.7222567229341914, "grad_norm": 4.028903484344482, "learning_rate": 1.9041114211085538e-05, "loss": 1.6511, "step": 9595 }, { "epoch": 0.7223319972148516, "grad_norm": 6.402505874633789, "learning_rate": 1.9031542223281263e-05, "loss": 1.6092, "step": 9596 }, { "epoch": 0.7224072714955118, "grad_norm": 6.058710098266602, "learning_rate": 1.9021972076462964e-05, "loss": 1.8923, "step": 9597 }, { "epoch": 0.7224825457761719, "grad_norm": 5.561830520629883, "learning_rate": 1.9012403771199517e-05, "loss": 1.7782, "step": 9598 }, { "epoch": 0.7225578200568321, "grad_norm": 4.070313930511475, "learning_rate": 1.9002837308059755e-05, "loss": 2.0121, "step": 9599 }, { "epoch": 0.7226330943374922, "grad_norm": 3.963165760040283, "learning_rate": 1.8993272687612378e-05, "loss": 1.7335, "step": 9600 }, { "epoch": 0.7227083686181524, "grad_norm": 3.5139055252075195, "learning_rate": 1.898370991042599e-05, "loss": 2.0153, "step": 9601 }, { "epoch": 0.7227836428988126, "grad_norm": 4.647072792053223, "learning_rate": 1.897414897706903e-05, "loss": 1.628, "step": 9602 }, { "epoch": 0.7228589171794727, "grad_norm": 3.811833381652832, "learning_rate": 1.8964589888109906e-05, "loss": 1.8338, "step": 9603 }, { "epoch": 0.7229341914601328, "grad_norm": 4.302700519561768, "learning_rate": 1.8955032644116848e-05, "loss": 1.5848, "step": 9604 }, { "epoch": 0.7230094657407931, "grad_norm": 4.486805438995361, "learning_rate": 1.8945477245658038e-05, "loss": 1.6467, "step": 9605 }, { "epoch": 0.7230847400214532, "grad_norm": 4.820157527923584, "learning_rate": 1.8935923693301488e-05, "loss": 2.0539, "step": 9606 }, { "epoch": 0.7231600143021133, "grad_norm": 4.296202182769775, "learning_rate": 1.8926371987615137e-05, "loss": 1.9226, "step": 9607 }, { "epoch": 0.7232352885827735, "grad_norm": 3.899815082550049, "learning_rate": 1.8916822129166827e-05, "loss": 2.0188, "step": 9608 }, { "epoch": 0.7233105628634336, "grad_norm": 4.494897842407227, "learning_rate": 1.8907274118524236e-05, "loss": 1.8445, "step": 9609 }, { "epoch": 0.7233858371440938, "grad_norm": 3.7157018184661865, "learning_rate": 1.8897727956255007e-05, "loss": 1.6226, "step": 9610 }, { "epoch": 0.723461111424754, "grad_norm": 5.906121253967285, "learning_rate": 1.888818364292659e-05, "loss": 1.8541, "step": 9611 }, { "epoch": 0.7235363857054141, "grad_norm": 5.056562900543213, "learning_rate": 1.8878641179106375e-05, "loss": 1.7235, "step": 9612 }, { "epoch": 0.7236116599860742, "grad_norm": 5.222574234008789, "learning_rate": 1.8869100565361657e-05, "loss": 1.3679, "step": 9613 }, { "epoch": 0.7236869342667344, "grad_norm": 4.466900825500488, "learning_rate": 1.8859561802259597e-05, "loss": 1.7114, "step": 9614 }, { "epoch": 0.7237622085473946, "grad_norm": 5.444098472595215, "learning_rate": 1.8850024890367224e-05, "loss": 1.954, "step": 9615 }, { "epoch": 0.7238374828280547, "grad_norm": 4.00093936920166, "learning_rate": 1.88404898302515e-05, "loss": 1.9861, "step": 9616 }, { "epoch": 0.7239127571087148, "grad_norm": 5.224597454071045, "learning_rate": 1.883095662247924e-05, "loss": 1.8943, "step": 9617 }, { "epoch": 0.7239880313893751, "grad_norm": 4.640960216522217, "learning_rate": 1.8821425267617187e-05, "loss": 1.9859, "step": 9618 }, { "epoch": 0.7240633056700352, "grad_norm": 3.755033016204834, "learning_rate": 1.8811895766231928e-05, "loss": 1.6073, "step": 9619 }, { "epoch": 0.7241385799506953, "grad_norm": 4.830538272857666, "learning_rate": 1.880236811888999e-05, "loss": 2.0587, "step": 9620 }, { "epoch": 0.7242138542313555, "grad_norm": 4.16685676574707, "learning_rate": 1.879284232615774e-05, "loss": 2.0139, "step": 9621 }, { "epoch": 0.7242891285120157, "grad_norm": 5.567697048187256, "learning_rate": 1.8783318388601463e-05, "loss": 1.554, "step": 9622 }, { "epoch": 0.7243644027926758, "grad_norm": 4.302929401397705, "learning_rate": 1.8773796306787343e-05, "loss": 1.6313, "step": 9623 }, { "epoch": 0.724439677073336, "grad_norm": 5.352593421936035, "learning_rate": 1.8764276081281428e-05, "loss": 1.971, "step": 9624 }, { "epoch": 0.7245149513539961, "grad_norm": 3.3538074493408203, "learning_rate": 1.87547577126497e-05, "loss": 1.9039, "step": 9625 }, { "epoch": 0.7245902256346562, "grad_norm": 6.26210355758667, "learning_rate": 1.8745241201457954e-05, "loss": 1.8066, "step": 9626 }, { "epoch": 0.7246654999153165, "grad_norm": 3.9136078357696533, "learning_rate": 1.8735726548271947e-05, "loss": 1.6886, "step": 9627 }, { "epoch": 0.7247407741959766, "grad_norm": 5.439061641693115, "learning_rate": 1.8726213753657278e-05, "loss": 1.982, "step": 9628 }, { "epoch": 0.7248160484766367, "grad_norm": 5.446580410003662, "learning_rate": 1.8716702818179487e-05, "loss": 1.8119, "step": 9629 }, { "epoch": 0.724891322757297, "grad_norm": 4.451972007751465, "learning_rate": 1.8707193742403935e-05, "loss": 1.602, "step": 9630 }, { "epoch": 0.7249665970379571, "grad_norm": 5.40526008605957, "learning_rate": 1.869768652689594e-05, "loss": 2.2619, "step": 9631 }, { "epoch": 0.7250418713186172, "grad_norm": 4.169073581695557, "learning_rate": 1.868818117222065e-05, "loss": 1.8202, "step": 9632 }, { "epoch": 0.7251171455992773, "grad_norm": 7.562443733215332, "learning_rate": 1.867867767894314e-05, "loss": 2.2829, "step": 9633 }, { "epoch": 0.7251924198799375, "grad_norm": 4.879371643066406, "learning_rate": 1.8669176047628373e-05, "loss": 1.7564, "step": 9634 }, { "epoch": 0.7252676941605977, "grad_norm": 5.123662948608398, "learning_rate": 1.8659676278841215e-05, "loss": 1.8249, "step": 9635 }, { "epoch": 0.7253429684412578, "grad_norm": 4.29550313949585, "learning_rate": 1.8650178373146355e-05, "loss": 2.0688, "step": 9636 }, { "epoch": 0.725418242721918, "grad_norm": 5.675379276275635, "learning_rate": 1.864068233110845e-05, "loss": 1.7642, "step": 9637 }, { "epoch": 0.7254935170025781, "grad_norm": 5.180209159851074, "learning_rate": 1.863118815329199e-05, "loss": 1.7766, "step": 9638 }, { "epoch": 0.7255687912832383, "grad_norm": 4.255703926086426, "learning_rate": 1.8621695840261388e-05, "loss": 1.683, "step": 9639 }, { "epoch": 0.7256440655638985, "grad_norm": 6.847949028015137, "learning_rate": 1.8612205392580945e-05, "loss": 1.9474, "step": 9640 }, { "epoch": 0.7257193398445586, "grad_norm": 4.82269811630249, "learning_rate": 1.860271681081482e-05, "loss": 1.7569, "step": 9641 }, { "epoch": 0.7257946141252187, "grad_norm": 4.468365669250488, "learning_rate": 1.8593230095527104e-05, "loss": 1.5133, "step": 9642 }, { "epoch": 0.725869888405879, "grad_norm": 5.581293106079102, "learning_rate": 1.8583745247281724e-05, "loss": 2.1109, "step": 9643 }, { "epoch": 0.7259451626865391, "grad_norm": 4.230678081512451, "learning_rate": 1.8574262266642552e-05, "loss": 1.8581, "step": 9644 }, { "epoch": 0.7260204369671992, "grad_norm": 3.937662363052368, "learning_rate": 1.8564781154173312e-05, "loss": 1.6738, "step": 9645 }, { "epoch": 0.7260957112478594, "grad_norm": 4.213057518005371, "learning_rate": 1.855530191043765e-05, "loss": 1.7502, "step": 9646 }, { "epoch": 0.7261709855285196, "grad_norm": 4.31326150894165, "learning_rate": 1.854582453599905e-05, "loss": 1.363, "step": 9647 }, { "epoch": 0.7262462598091797, "grad_norm": 5.665443420410156, "learning_rate": 1.8536349031420953e-05, "loss": 1.7954, "step": 9648 }, { "epoch": 0.7263215340898399, "grad_norm": 8.148431777954102, "learning_rate": 1.8526875397266603e-05, "loss": 2.0201, "step": 9649 }, { "epoch": 0.7263968083705, "grad_norm": 5.401271343231201, "learning_rate": 1.8517403634099217e-05, "loss": 1.5663, "step": 9650 }, { "epoch": 0.7264720826511601, "grad_norm": 5.100400924682617, "learning_rate": 1.850793374248184e-05, "loss": 1.6419, "step": 9651 }, { "epoch": 0.7265473569318203, "grad_norm": 5.196130752563477, "learning_rate": 1.849846572297746e-05, "loss": 2.0133, "step": 9652 }, { "epoch": 0.7266226312124805, "grad_norm": 4.777595043182373, "learning_rate": 1.8488999576148886e-05, "loss": 1.9229, "step": 9653 }, { "epoch": 0.7266979054931406, "grad_norm": 6.994591236114502, "learning_rate": 1.8479535302558872e-05, "loss": 2.1819, "step": 9654 }, { "epoch": 0.7267731797738007, "grad_norm": 6.8679938316345215, "learning_rate": 1.8470072902770037e-05, "loss": 2.2083, "step": 9655 }, { "epoch": 0.726848454054461, "grad_norm": 5.348995208740234, "learning_rate": 1.846061237734492e-05, "loss": 1.9756, "step": 9656 }, { "epoch": 0.7269237283351211, "grad_norm": 3.789236068725586, "learning_rate": 1.8451153726845882e-05, "loss": 1.7119, "step": 9657 }, { "epoch": 0.7269990026157812, "grad_norm": 6.561370849609375, "learning_rate": 1.8441696951835235e-05, "loss": 2.1884, "step": 9658 }, { "epoch": 0.7270742768964414, "grad_norm": 6.422976493835449, "learning_rate": 1.8432242052875165e-05, "loss": 2.0826, "step": 9659 }, { "epoch": 0.7271495511771016, "grad_norm": 5.728564739227295, "learning_rate": 1.8422789030527714e-05, "loss": 2.246, "step": 9660 }, { "epoch": 0.7272248254577617, "grad_norm": 5.061489105224609, "learning_rate": 1.8413337885354866e-05, "loss": 1.924, "step": 9661 }, { "epoch": 0.7273000997384219, "grad_norm": 6.333934783935547, "learning_rate": 1.840388861791843e-05, "loss": 1.4921, "step": 9662 }, { "epoch": 0.727375374019082, "grad_norm": 4.267570495605469, "learning_rate": 1.8394441228780174e-05, "loss": 1.4773, "step": 9663 }, { "epoch": 0.7274506482997422, "grad_norm": 5.911327838897705, "learning_rate": 1.8384995718501685e-05, "loss": 1.7595, "step": 9664 }, { "epoch": 0.7275259225804024, "grad_norm": 6.126576900482178, "learning_rate": 1.8375552087644497e-05, "loss": 2.3626, "step": 9665 }, { "epoch": 0.7276011968610625, "grad_norm": 4.967687606811523, "learning_rate": 1.8366110336769987e-05, "loss": 1.9229, "step": 9666 }, { "epoch": 0.7276764711417226, "grad_norm": 4.9898271560668945, "learning_rate": 1.8356670466439446e-05, "loss": 1.6429, "step": 9667 }, { "epoch": 0.7277517454223829, "grad_norm": 5.991692066192627, "learning_rate": 1.8347232477214053e-05, "loss": 1.9946, "step": 9668 }, { "epoch": 0.727827019703043, "grad_norm": 5.030762672424316, "learning_rate": 1.8337796369654887e-05, "loss": 1.8292, "step": 9669 }, { "epoch": 0.7279022939837031, "grad_norm": 4.66921329498291, "learning_rate": 1.832836214432286e-05, "loss": 1.7523, "step": 9670 }, { "epoch": 0.7279775682643633, "grad_norm": 4.7627410888671875, "learning_rate": 1.8318929801778824e-05, "loss": 1.7108, "step": 9671 }, { "epoch": 0.7280528425450234, "grad_norm": 4.6498308181762695, "learning_rate": 1.8309499342583525e-05, "loss": 1.4522, "step": 9672 }, { "epoch": 0.7281281168256836, "grad_norm": 4.665681838989258, "learning_rate": 1.830007076729754e-05, "loss": 1.7993, "step": 9673 }, { "epoch": 0.7282033911063437, "grad_norm": 4.798788547515869, "learning_rate": 1.829064407648141e-05, "loss": 2.2603, "step": 9674 }, { "epoch": 0.7282786653870039, "grad_norm": 5.3134026527404785, "learning_rate": 1.8281219270695493e-05, "loss": 1.9267, "step": 9675 }, { "epoch": 0.728353939667664, "grad_norm": 4.871895790100098, "learning_rate": 1.827179635050009e-05, "loss": 1.816, "step": 9676 }, { "epoch": 0.7284292139483242, "grad_norm": 4.945578098297119, "learning_rate": 1.8262375316455343e-05, "loss": 1.8274, "step": 9677 }, { "epoch": 0.7285044882289844, "grad_norm": 4.161609172821045, "learning_rate": 1.8252956169121315e-05, "loss": 1.6447, "step": 9678 }, { "epoch": 0.7285797625096445, "grad_norm": 4.553129196166992, "learning_rate": 1.8243538909057954e-05, "loss": 1.8793, "step": 9679 }, { "epoch": 0.7286550367903046, "grad_norm": 4.220051288604736, "learning_rate": 1.82341235368251e-05, "loss": 1.6566, "step": 9680 }, { "epoch": 0.7287303110709649, "grad_norm": 6.925411701202393, "learning_rate": 1.8224710052982435e-05, "loss": 1.7613, "step": 9681 }, { "epoch": 0.728805585351625, "grad_norm": 4.924813270568848, "learning_rate": 1.8215298458089607e-05, "loss": 1.8584, "step": 9682 }, { "epoch": 0.7288808596322851, "grad_norm": 5.307071685791016, "learning_rate": 1.8205888752706062e-05, "loss": 2.2315, "step": 9683 }, { "epoch": 0.7289561339129453, "grad_norm": 7.626509189605713, "learning_rate": 1.8196480937391223e-05, "loss": 2.2469, "step": 9684 }, { "epoch": 0.7290314081936055, "grad_norm": 4.821575164794922, "learning_rate": 1.8187075012704324e-05, "loss": 1.5392, "step": 9685 }, { "epoch": 0.7291066824742656, "grad_norm": 4.358816623687744, "learning_rate": 1.8177670979204546e-05, "loss": 2.1884, "step": 9686 }, { "epoch": 0.7291819567549258, "grad_norm": 6.0131635665893555, "learning_rate": 1.8168268837450907e-05, "loss": 2.1701, "step": 9687 }, { "epoch": 0.7292572310355859, "grad_norm": 4.657010555267334, "learning_rate": 1.815886858800235e-05, "loss": 2.1627, "step": 9688 }, { "epoch": 0.729332505316246, "grad_norm": 5.907837390899658, "learning_rate": 1.8149470231417686e-05, "loss": 1.6824, "step": 9689 }, { "epoch": 0.7294077795969063, "grad_norm": 4.379014015197754, "learning_rate": 1.8140073768255633e-05, "loss": 1.753, "step": 9690 }, { "epoch": 0.7294830538775664, "grad_norm": 4.511151313781738, "learning_rate": 1.8130679199074798e-05, "loss": 2.2083, "step": 9691 }, { "epoch": 0.7295583281582265, "grad_norm": 4.889375686645508, "learning_rate": 1.8121286524433616e-05, "loss": 1.4597, "step": 9692 }, { "epoch": 0.7296336024388866, "grad_norm": 4.540733814239502, "learning_rate": 1.81118957448905e-05, "loss": 1.8036, "step": 9693 }, { "epoch": 0.7297088767195469, "grad_norm": 4.11193323135376, "learning_rate": 1.810250686100367e-05, "loss": 1.7872, "step": 9694 }, { "epoch": 0.729784151000207, "grad_norm": 5.5439629554748535, "learning_rate": 1.8093119873331297e-05, "loss": 1.9098, "step": 9695 }, { "epoch": 0.7298594252808671, "grad_norm": 6.257260322570801, "learning_rate": 1.808373478243138e-05, "loss": 2.3189, "step": 9696 }, { "epoch": 0.7299346995615273, "grad_norm": 4.942529201507568, "learning_rate": 1.8074351588861876e-05, "loss": 2.1724, "step": 9697 }, { "epoch": 0.7300099738421875, "grad_norm": 4.4367804527282715, "learning_rate": 1.8064970293180545e-05, "loss": 2.2091, "step": 9698 }, { "epoch": 0.7300852481228476, "grad_norm": 4.344898223876953, "learning_rate": 1.805559089594509e-05, "loss": 1.8996, "step": 9699 }, { "epoch": 0.7301605224035078, "grad_norm": 4.409204006195068, "learning_rate": 1.8046213397713108e-05, "loss": 2.0279, "step": 9700 }, { "epoch": 0.7302357966841679, "grad_norm": 8.010478019714355, "learning_rate": 1.803683779904206e-05, "loss": 1.8559, "step": 9701 }, { "epoch": 0.7303110709648281, "grad_norm": 7.777335166931152, "learning_rate": 1.8027464100489283e-05, "loss": 1.7544, "step": 9702 }, { "epoch": 0.7303863452454883, "grad_norm": 7.196082592010498, "learning_rate": 1.801809230261203e-05, "loss": 1.9297, "step": 9703 }, { "epoch": 0.7304616195261484, "grad_norm": 4.2694878578186035, "learning_rate": 1.800872240596743e-05, "loss": 1.9816, "step": 9704 }, { "epoch": 0.7305368938068085, "grad_norm": 3.8258280754089355, "learning_rate": 1.799935441111248e-05, "loss": 1.7919, "step": 9705 }, { "epoch": 0.7306121680874688, "grad_norm": 6.191116809844971, "learning_rate": 1.79899883186041e-05, "loss": 1.7088, "step": 9706 }, { "epoch": 0.7306874423681289, "grad_norm": 3.6788432598114014, "learning_rate": 1.7980624128999056e-05, "loss": 1.8622, "step": 9707 }, { "epoch": 0.730762716648789, "grad_norm": 5.6116414070129395, "learning_rate": 1.7971261842854048e-05, "loss": 1.8853, "step": 9708 }, { "epoch": 0.7308379909294492, "grad_norm": 5.4416704177856445, "learning_rate": 1.7961901460725605e-05, "loss": 1.7398, "step": 9709 }, { "epoch": 0.7309132652101094, "grad_norm": 3.781179666519165, "learning_rate": 1.7952542983170213e-05, "loss": 1.7704, "step": 9710 }, { "epoch": 0.7309885394907695, "grad_norm": 3.867990016937256, "learning_rate": 1.794318641074417e-05, "loss": 2.0546, "step": 9711 }, { "epoch": 0.7310638137714296, "grad_norm": 5.500983715057373, "learning_rate": 1.793383174400371e-05, "loss": 1.7079, "step": 9712 }, { "epoch": 0.7311390880520898, "grad_norm": 4.387624740600586, "learning_rate": 1.7924478983504945e-05, "loss": 1.8676, "step": 9713 }, { "epoch": 0.73121436233275, "grad_norm": 5.4258904457092285, "learning_rate": 1.7915128129803886e-05, "loss": 1.71, "step": 9714 }, { "epoch": 0.7312896366134101, "grad_norm": 4.6748199462890625, "learning_rate": 1.7905779183456382e-05, "loss": 1.9719, "step": 9715 }, { "epoch": 0.7313649108940703, "grad_norm": 3.597334623336792, "learning_rate": 1.789643214501824e-05, "loss": 1.5988, "step": 9716 }, { "epoch": 0.7314401851747304, "grad_norm": 5.096696376800537, "learning_rate": 1.788708701504506e-05, "loss": 1.83, "step": 9717 }, { "epoch": 0.7315154594553905, "grad_norm": 4.292730331420898, "learning_rate": 1.7877743794092444e-05, "loss": 1.6436, "step": 9718 }, { "epoch": 0.7315907337360508, "grad_norm": 5.707505226135254, "learning_rate": 1.7868402482715768e-05, "loss": 1.7773, "step": 9719 }, { "epoch": 0.7316660080167109, "grad_norm": 4.0598978996276855, "learning_rate": 1.7859063081470372e-05, "loss": 1.9463, "step": 9720 }, { "epoch": 0.731741282297371, "grad_norm": 4.274533748626709, "learning_rate": 1.784972559091147e-05, "loss": 1.5552, "step": 9721 }, { "epoch": 0.7318165565780312, "grad_norm": 4.773648262023926, "learning_rate": 1.784039001159411e-05, "loss": 1.8461, "step": 9722 }, { "epoch": 0.7318918308586914, "grad_norm": 4.201850891113281, "learning_rate": 1.7831056344073293e-05, "loss": 1.5214, "step": 9723 }, { "epoch": 0.7319671051393515, "grad_norm": 4.602932929992676, "learning_rate": 1.782172458890387e-05, "loss": 1.5414, "step": 9724 }, { "epoch": 0.7320423794200117, "grad_norm": 4.259728908538818, "learning_rate": 1.781239474664061e-05, "loss": 1.784, "step": 9725 }, { "epoch": 0.7321176537006718, "grad_norm": 5.31164026260376, "learning_rate": 1.7803066817838103e-05, "loss": 1.758, "step": 9726 }, { "epoch": 0.732192927981332, "grad_norm": 6.805521488189697, "learning_rate": 1.779374080305092e-05, "loss": 1.6763, "step": 9727 }, { "epoch": 0.7322682022619922, "grad_norm": 6.2445597648620605, "learning_rate": 1.7784416702833407e-05, "loss": 1.6983, "step": 9728 }, { "epoch": 0.7323434765426523, "grad_norm": 6.488143444061279, "learning_rate": 1.7775094517739903e-05, "loss": 1.7871, "step": 9729 }, { "epoch": 0.7324187508233124, "grad_norm": 7.087213039398193, "learning_rate": 1.776577424832455e-05, "loss": 1.7209, "step": 9730 }, { "epoch": 0.7324940251039725, "grad_norm": 5.352892875671387, "learning_rate": 1.775645589514145e-05, "loss": 1.7292, "step": 9731 }, { "epoch": 0.7325692993846328, "grad_norm": 4.766453266143799, "learning_rate": 1.7747139458744504e-05, "loss": 1.8335, "step": 9732 }, { "epoch": 0.7326445736652929, "grad_norm": 5.376926422119141, "learning_rate": 1.773782493968758e-05, "loss": 1.9848, "step": 9733 }, { "epoch": 0.732719847945953, "grad_norm": 5.157107353210449, "learning_rate": 1.772851233852439e-05, "loss": 1.7593, "step": 9734 }, { "epoch": 0.7327951222266132, "grad_norm": 4.221771717071533, "learning_rate": 1.7719201655808565e-05, "loss": 1.8166, "step": 9735 }, { "epoch": 0.7328703965072734, "grad_norm": 5.821368217468262, "learning_rate": 1.7709892892093554e-05, "loss": 2.0513, "step": 9736 }, { "epoch": 0.7329456707879335, "grad_norm": 6.510801315307617, "learning_rate": 1.7700586047932764e-05, "loss": 2.1425, "step": 9737 }, { "epoch": 0.7330209450685937, "grad_norm": 5.537794589996338, "learning_rate": 1.7691281123879472e-05, "loss": 1.7634, "step": 9738 }, { "epoch": 0.7330962193492538, "grad_norm": 4.1630425453186035, "learning_rate": 1.76819781204868e-05, "loss": 2.039, "step": 9739 }, { "epoch": 0.733171493629914, "grad_norm": 4.676449298858643, "learning_rate": 1.7672677038307812e-05, "loss": 1.7387, "step": 9740 }, { "epoch": 0.7332467679105742, "grad_norm": 4.5910234451293945, "learning_rate": 1.7663377877895397e-05, "loss": 1.4709, "step": 9741 }, { "epoch": 0.7333220421912343, "grad_norm": 4.184149265289307, "learning_rate": 1.76540806398024e-05, "loss": 1.5164, "step": 9742 }, { "epoch": 0.7333973164718944, "grad_norm": 4.522687911987305, "learning_rate": 1.7644785324581487e-05, "loss": 1.8831, "step": 9743 }, { "epoch": 0.7334725907525547, "grad_norm": 4.9783525466918945, "learning_rate": 1.763549193278524e-05, "loss": 2.0299, "step": 9744 }, { "epoch": 0.7335478650332148, "grad_norm": 5.061306953430176, "learning_rate": 1.762620046496614e-05, "loss": 2.1781, "step": 9745 }, { "epoch": 0.7336231393138749, "grad_norm": 4.065035820007324, "learning_rate": 1.7616910921676548e-05, "loss": 2.1733, "step": 9746 }, { "epoch": 0.7336984135945351, "grad_norm": 4.660484313964844, "learning_rate": 1.760762330346867e-05, "loss": 1.7157, "step": 9747 }, { "epoch": 0.7337736878751953, "grad_norm": 5.202456951141357, "learning_rate": 1.759833761089465e-05, "loss": 1.5034, "step": 9748 }, { "epoch": 0.7338489621558554, "grad_norm": 6.001447677612305, "learning_rate": 1.7589053844506477e-05, "loss": 1.4525, "step": 9749 }, { "epoch": 0.7339242364365156, "grad_norm": 3.924381971359253, "learning_rate": 1.757977200485607e-05, "loss": 1.9319, "step": 9750 }, { "epoch": 0.7339995107171757, "grad_norm": 3.994356870651245, "learning_rate": 1.7570492092495183e-05, "loss": 2.0428, "step": 9751 }, { "epoch": 0.7340747849978358, "grad_norm": 4.514043807983398, "learning_rate": 1.7561214107975483e-05, "loss": 1.7472, "step": 9752 }, { "epoch": 0.734150059278496, "grad_norm": 4.36171817779541, "learning_rate": 1.7551938051848548e-05, "loss": 1.7168, "step": 9753 }, { "epoch": 0.7342253335591562, "grad_norm": 5.348937511444092, "learning_rate": 1.7542663924665776e-05, "loss": 1.7727, "step": 9754 }, { "epoch": 0.7343006078398163, "grad_norm": 4.585260391235352, "learning_rate": 1.7533391726978498e-05, "loss": 1.4679, "step": 9755 }, { "epoch": 0.7343758821204764, "grad_norm": 4.222022533416748, "learning_rate": 1.752412145933793e-05, "loss": 1.7319, "step": 9756 }, { "epoch": 0.7344511564011367, "grad_norm": 4.450934410095215, "learning_rate": 1.751485312229517e-05, "loss": 1.6003, "step": 9757 }, { "epoch": 0.7345264306817968, "grad_norm": 5.085152626037598, "learning_rate": 1.7505586716401167e-05, "loss": 1.7326, "step": 9758 }, { "epoch": 0.7346017049624569, "grad_norm": 4.06351900100708, "learning_rate": 1.7496322242206815e-05, "loss": 1.9483, "step": 9759 }, { "epoch": 0.7346769792431171, "grad_norm": 4.651455879211426, "learning_rate": 1.7487059700262824e-05, "loss": 1.6702, "step": 9760 }, { "epoch": 0.7347522535237773, "grad_norm": 5.352548122406006, "learning_rate": 1.7477799091119862e-05, "loss": 2.3562, "step": 9761 }, { "epoch": 0.7348275278044374, "grad_norm": 4.365647315979004, "learning_rate": 1.7468540415328406e-05, "loss": 2.031, "step": 9762 }, { "epoch": 0.7349028020850976, "grad_norm": 5.19638204574585, "learning_rate": 1.74592836734389e-05, "loss": 1.5893, "step": 9763 }, { "epoch": 0.7349780763657577, "grad_norm": 4.678807735443115, "learning_rate": 1.745002886600159e-05, "loss": 1.376, "step": 9764 }, { "epoch": 0.7350533506464179, "grad_norm": 5.133519172668457, "learning_rate": 1.744077599356669e-05, "loss": 1.8811, "step": 9765 }, { "epoch": 0.7351286249270781, "grad_norm": 5.054455280303955, "learning_rate": 1.7431525056684213e-05, "loss": 1.7761, "step": 9766 }, { "epoch": 0.7352038992077382, "grad_norm": 4.485023021697998, "learning_rate": 1.742227605590412e-05, "loss": 2.095, "step": 9767 }, { "epoch": 0.7352791734883983, "grad_norm": 4.016222953796387, "learning_rate": 1.7413028991776242e-05, "loss": 1.985, "step": 9768 }, { "epoch": 0.7353544477690586, "grad_norm": 6.328843116760254, "learning_rate": 1.7403783864850286e-05, "loss": 1.4483, "step": 9769 }, { "epoch": 0.7354297220497187, "grad_norm": 5.10483980178833, "learning_rate": 1.7394540675675868e-05, "loss": 1.6889, "step": 9770 }, { "epoch": 0.7355049963303788, "grad_norm": 4.845269203186035, "learning_rate": 1.7385299424802437e-05, "loss": 1.6641, "step": 9771 }, { "epoch": 0.7355802706110389, "grad_norm": 5.702883720397949, "learning_rate": 1.737606011277939e-05, "loss": 1.9045, "step": 9772 }, { "epoch": 0.7356555448916992, "grad_norm": 4.234776020050049, "learning_rate": 1.736682274015594e-05, "loss": 1.524, "step": 9773 }, { "epoch": 0.7357308191723593, "grad_norm": 6.878330230712891, "learning_rate": 1.7357587307481267e-05, "loss": 1.9828, "step": 9774 }, { "epoch": 0.7358060934530194, "grad_norm": 4.601244926452637, "learning_rate": 1.7348353815304343e-05, "loss": 2.043, "step": 9775 }, { "epoch": 0.7358813677336796, "grad_norm": 5.564022541046143, "learning_rate": 1.7339122264174118e-05, "loss": 1.8573, "step": 9776 }, { "epoch": 0.7359566420143397, "grad_norm": 3.8240888118743896, "learning_rate": 1.732989265463934e-05, "loss": 1.6579, "step": 9777 }, { "epoch": 0.7360319162949999, "grad_norm": 3.6997196674346924, "learning_rate": 1.73206649872487e-05, "loss": 1.6569, "step": 9778 }, { "epoch": 0.7361071905756601, "grad_norm": 4.304880619049072, "learning_rate": 1.7311439262550766e-05, "loss": 1.9989, "step": 9779 }, { "epoch": 0.7361824648563202, "grad_norm": 4.956548690795898, "learning_rate": 1.7302215481093985e-05, "loss": 2.0161, "step": 9780 }, { "epoch": 0.7362577391369803, "grad_norm": 4.1241350173950195, "learning_rate": 1.7292993643426657e-05, "loss": 1.8653, "step": 9781 }, { "epoch": 0.7363330134176406, "grad_norm": 4.550619602203369, "learning_rate": 1.728377375009703e-05, "loss": 2.3279, "step": 9782 }, { "epoch": 0.7364082876983007, "grad_norm": 4.133872985839844, "learning_rate": 1.7274555801653163e-05, "loss": 1.6952, "step": 9783 }, { "epoch": 0.7364835619789608, "grad_norm": 3.9653403759002686, "learning_rate": 1.7265339798643048e-05, "loss": 1.8654, "step": 9784 }, { "epoch": 0.736558836259621, "grad_norm": 5.002777576446533, "learning_rate": 1.725612574161458e-05, "loss": 1.8781, "step": 9785 }, { "epoch": 0.7366341105402812, "grad_norm": 3.9798200130462646, "learning_rate": 1.7246913631115465e-05, "loss": 1.5089, "step": 9786 }, { "epoch": 0.7367093848209413, "grad_norm": 7.075782775878906, "learning_rate": 1.7237703467693366e-05, "loss": 1.9177, "step": 9787 }, { "epoch": 0.7367846591016015, "grad_norm": 5.200432300567627, "learning_rate": 1.7228495251895783e-05, "loss": 2.0511, "step": 9788 }, { "epoch": 0.7368599333822616, "grad_norm": 6.1699676513671875, "learning_rate": 1.721928898427012e-05, "loss": 1.432, "step": 9789 }, { "epoch": 0.7369352076629218, "grad_norm": 4.753431797027588, "learning_rate": 1.7210084665363668e-05, "loss": 1.91, "step": 9790 }, { "epoch": 0.7370104819435819, "grad_norm": 4.626429080963135, "learning_rate": 1.720088229572362e-05, "loss": 1.4399, "step": 9791 }, { "epoch": 0.7370857562242421, "grad_norm": 6.596329212188721, "learning_rate": 1.7191681875896986e-05, "loss": 2.0601, "step": 9792 }, { "epoch": 0.7371610305049022, "grad_norm": 3.7581191062927246, "learning_rate": 1.7182483406430745e-05, "loss": 2.0018, "step": 9793 }, { "epoch": 0.7372363047855623, "grad_norm": 3.9835758209228516, "learning_rate": 1.7173286887871687e-05, "loss": 1.7088, "step": 9794 }, { "epoch": 0.7373115790662226, "grad_norm": 4.646708965301514, "learning_rate": 1.7164092320766544e-05, "loss": 2.053, "step": 9795 }, { "epoch": 0.7373868533468827, "grad_norm": 4.294607162475586, "learning_rate": 1.7154899705661886e-05, "loss": 1.8916, "step": 9796 }, { "epoch": 0.7374621276275428, "grad_norm": 4.639212131500244, "learning_rate": 1.7145709043104208e-05, "loss": 2.0017, "step": 9797 }, { "epoch": 0.737537401908203, "grad_norm": 4.923370838165283, "learning_rate": 1.7136520333639843e-05, "loss": 1.8616, "step": 9798 }, { "epoch": 0.7376126761888632, "grad_norm": 4.515198230743408, "learning_rate": 1.7127333577815053e-05, "loss": 1.6887, "step": 9799 }, { "epoch": 0.7376879504695233, "grad_norm": 6.550492286682129, "learning_rate": 1.711814877617595e-05, "loss": 1.637, "step": 9800 }, { "epoch": 0.7377632247501835, "grad_norm": 4.596217632293701, "learning_rate": 1.710896592926856e-05, "loss": 1.8319, "step": 9801 }, { "epoch": 0.7378384990308436, "grad_norm": 5.986010551452637, "learning_rate": 1.7099785037638787e-05, "loss": 1.8664, "step": 9802 }, { "epoch": 0.7379137733115038, "grad_norm": 3.6857333183288574, "learning_rate": 1.709060610183238e-05, "loss": 1.8447, "step": 9803 }, { "epoch": 0.737989047592164, "grad_norm": 6.030239582061768, "learning_rate": 1.7081429122395022e-05, "loss": 1.8531, "step": 9804 }, { "epoch": 0.7380643218728241, "grad_norm": 5.257287979125977, "learning_rate": 1.707225409987224e-05, "loss": 1.6938, "step": 9805 }, { "epoch": 0.7381395961534842, "grad_norm": 4.69661283493042, "learning_rate": 1.706308103480948e-05, "loss": 1.7411, "step": 9806 }, { "epoch": 0.7382148704341445, "grad_norm": 5.9631028175354, "learning_rate": 1.7053909927752034e-05, "loss": 1.5424, "step": 9807 }, { "epoch": 0.7382901447148046, "grad_norm": 4.716734886169434, "learning_rate": 1.7044740779245123e-05, "loss": 2.1388, "step": 9808 }, { "epoch": 0.7383654189954647, "grad_norm": 6.152210235595703, "learning_rate": 1.7035573589833798e-05, "loss": 1.8094, "step": 9809 }, { "epoch": 0.7384406932761248, "grad_norm": 4.928935527801514, "learning_rate": 1.7026408360063035e-05, "loss": 1.7463, "step": 9810 }, { "epoch": 0.7385159675567851, "grad_norm": 5.057109355926514, "learning_rate": 1.70172450904777e-05, "loss": 1.9769, "step": 9811 }, { "epoch": 0.7385912418374452, "grad_norm": 4.73891019821167, "learning_rate": 1.7008083781622496e-05, "loss": 1.936, "step": 9812 }, { "epoch": 0.7386665161181053, "grad_norm": 5.9416584968566895, "learning_rate": 1.699892443404204e-05, "loss": 1.8201, "step": 9813 }, { "epoch": 0.7387417903987655, "grad_norm": 5.524312496185303, "learning_rate": 1.698976704828086e-05, "loss": 2.0471, "step": 9814 }, { "epoch": 0.7388170646794257, "grad_norm": 6.035019397735596, "learning_rate": 1.6980611624883285e-05, "loss": 1.5005, "step": 9815 }, { "epoch": 0.7388923389600858, "grad_norm": 5.996881484985352, "learning_rate": 1.6971458164393618e-05, "loss": 1.8045, "step": 9816 }, { "epoch": 0.738967613240746, "grad_norm": 4.0374274253845215, "learning_rate": 1.6962306667355997e-05, "loss": 1.6018, "step": 9817 }, { "epoch": 0.7390428875214061, "grad_norm": 5.851326942443848, "learning_rate": 1.695315713431444e-05, "loss": 2.1033, "step": 9818 }, { "epoch": 0.7391181618020662, "grad_norm": 4.750574111938477, "learning_rate": 1.6944009565812886e-05, "loss": 2.0676, "step": 9819 }, { "epoch": 0.7391934360827265, "grad_norm": 4.734914302825928, "learning_rate": 1.69348639623951e-05, "loss": 1.9219, "step": 9820 }, { "epoch": 0.7392687103633866, "grad_norm": 6.682633876800537, "learning_rate": 1.692572032460479e-05, "loss": 1.6547, "step": 9821 }, { "epoch": 0.7393439846440467, "grad_norm": 4.324955940246582, "learning_rate": 1.6916578652985488e-05, "loss": 2.0777, "step": 9822 }, { "epoch": 0.739419258924707, "grad_norm": 5.25320291519165, "learning_rate": 1.690743894808066e-05, "loss": 1.7514, "step": 9823 }, { "epoch": 0.7394945332053671, "grad_norm": 4.350616931915283, "learning_rate": 1.6898301210433633e-05, "loss": 1.7402, "step": 9824 }, { "epoch": 0.7395698074860272, "grad_norm": 7.803241729736328, "learning_rate": 1.6889165440587635e-05, "loss": 1.9206, "step": 9825 }, { "epoch": 0.7396450817666874, "grad_norm": 3.8721065521240234, "learning_rate": 1.688003163908573e-05, "loss": 1.613, "step": 9826 }, { "epoch": 0.7397203560473475, "grad_norm": 8.960091590881348, "learning_rate": 1.687089980647093e-05, "loss": 1.8164, "step": 9827 }, { "epoch": 0.7397956303280077, "grad_norm": 5.168846130371094, "learning_rate": 1.6861769943286065e-05, "loss": 1.5543, "step": 9828 }, { "epoch": 0.7398709046086678, "grad_norm": 4.666390895843506, "learning_rate": 1.68526420500739e-05, "loss": 1.8044, "step": 9829 }, { "epoch": 0.739946178889328, "grad_norm": 5.713004112243652, "learning_rate": 1.6843516127377044e-05, "loss": 1.8832, "step": 9830 }, { "epoch": 0.7400214531699881, "grad_norm": 3.6045401096343994, "learning_rate": 1.683439217573804e-05, "loss": 1.8955, "step": 9831 }, { "epoch": 0.7400967274506483, "grad_norm": 4.702240467071533, "learning_rate": 1.682527019569923e-05, "loss": 2.1439, "step": 9832 }, { "epoch": 0.7401720017313085, "grad_norm": 3.8796756267547607, "learning_rate": 1.6816150187802925e-05, "loss": 1.9255, "step": 9833 }, { "epoch": 0.7402472760119686, "grad_norm": 4.2519073486328125, "learning_rate": 1.680703215259128e-05, "loss": 1.6422, "step": 9834 }, { "epoch": 0.7403225502926287, "grad_norm": 4.791501045227051, "learning_rate": 1.6797916090606326e-05, "loss": 2.0549, "step": 9835 }, { "epoch": 0.740397824573289, "grad_norm": 5.626745700836182, "learning_rate": 1.6788802002390008e-05, "loss": 2.0549, "step": 9836 }, { "epoch": 0.7404730988539491, "grad_norm": 4.230198383331299, "learning_rate": 1.677968988848411e-05, "loss": 1.8099, "step": 9837 }, { "epoch": 0.7405483731346092, "grad_norm": 4.774646282196045, "learning_rate": 1.677057974943033e-05, "loss": 1.9989, "step": 9838 }, { "epoch": 0.7406236474152694, "grad_norm": 4.388845920562744, "learning_rate": 1.6761471585770232e-05, "loss": 1.8398, "step": 9839 }, { "epoch": 0.7406989216959295, "grad_norm": 3.661468267440796, "learning_rate": 1.6752365398045295e-05, "loss": 2.0451, "step": 9840 }, { "epoch": 0.7407741959765897, "grad_norm": 4.158090114593506, "learning_rate": 1.674326118679681e-05, "loss": 1.8393, "step": 9841 }, { "epoch": 0.7408494702572499, "grad_norm": 4.40130090713501, "learning_rate": 1.6734158952566048e-05, "loss": 1.593, "step": 9842 }, { "epoch": 0.74092474453791, "grad_norm": 4.243234157562256, "learning_rate": 1.672505869589407e-05, "loss": 1.8338, "step": 9843 }, { "epoch": 0.7410000188185701, "grad_norm": 4.289233207702637, "learning_rate": 1.671596041732187e-05, "loss": 2.0266, "step": 9844 }, { "epoch": 0.7410752930992304, "grad_norm": 7.550484657287598, "learning_rate": 1.6706864117390326e-05, "loss": 1.896, "step": 9845 }, { "epoch": 0.7411505673798905, "grad_norm": 3.981187105178833, "learning_rate": 1.6697769796640196e-05, "loss": 1.1863, "step": 9846 }, { "epoch": 0.7412258416605506, "grad_norm": 5.1406097412109375, "learning_rate": 1.668867745561208e-05, "loss": 1.6656, "step": 9847 }, { "epoch": 0.7413011159412108, "grad_norm": 6.1259307861328125, "learning_rate": 1.6679587094846504e-05, "loss": 2.0484, "step": 9848 }, { "epoch": 0.741376390221871, "grad_norm": 3.8600540161132812, "learning_rate": 1.667049871488389e-05, "loss": 2.0193, "step": 9849 }, { "epoch": 0.7414516645025311, "grad_norm": 4.152215957641602, "learning_rate": 1.666141231626448e-05, "loss": 1.6299, "step": 9850 }, { "epoch": 0.7415269387831912, "grad_norm": 5.832450866699219, "learning_rate": 1.6652327899528458e-05, "loss": 1.6373, "step": 9851 }, { "epoch": 0.7416022130638514, "grad_norm": 4.742039680480957, "learning_rate": 1.6643245465215845e-05, "loss": 1.8856, "step": 9852 }, { "epoch": 0.7416774873445116, "grad_norm": 4.237407207489014, "learning_rate": 1.6634165013866592e-05, "loss": 1.8743, "step": 9853 }, { "epoch": 0.7417527616251717, "grad_norm": 7.049525737762451, "learning_rate": 1.662508654602048e-05, "loss": 1.8939, "step": 9854 }, { "epoch": 0.7418280359058319, "grad_norm": 5.102390766143799, "learning_rate": 1.6616010062217208e-05, "loss": 1.8585, "step": 9855 }, { "epoch": 0.741903310186492, "grad_norm": 6.493330001831055, "learning_rate": 1.6606935562996346e-05, "loss": 1.9909, "step": 9856 }, { "epoch": 0.7419785844671521, "grad_norm": 5.144820213317871, "learning_rate": 1.6597863048897367e-05, "loss": 1.8987, "step": 9857 }, { "epoch": 0.7420538587478124, "grad_norm": 4.827053546905518, "learning_rate": 1.658879252045957e-05, "loss": 1.7674, "step": 9858 }, { "epoch": 0.7421291330284725, "grad_norm": 4.613465785980225, "learning_rate": 1.6579723978222213e-05, "loss": 1.9365, "step": 9859 }, { "epoch": 0.7422044073091326, "grad_norm": 6.175017833709717, "learning_rate": 1.6570657422724345e-05, "loss": 1.8502, "step": 9860 }, { "epoch": 0.7422796815897929, "grad_norm": 4.701926231384277, "learning_rate": 1.6561592854504996e-05, "loss": 1.3468, "step": 9861 }, { "epoch": 0.742354955870453, "grad_norm": 4.537257671356201, "learning_rate": 1.655253027410299e-05, "loss": 2.1463, "step": 9862 }, { "epoch": 0.7424302301511131, "grad_norm": 3.98941707611084, "learning_rate": 1.6543469682057106e-05, "loss": 2.0973, "step": 9863 }, { "epoch": 0.7425055044317733, "grad_norm": 6.910943984985352, "learning_rate": 1.6534411078905932e-05, "loss": 2.0741, "step": 9864 }, { "epoch": 0.7425807787124334, "grad_norm": 4.984281063079834, "learning_rate": 1.6525354465187998e-05, "loss": 1.9611, "step": 9865 }, { "epoch": 0.7426560529930936, "grad_norm": 3.760453701019287, "learning_rate": 1.6516299841441707e-05, "loss": 1.7254, "step": 9866 }, { "epoch": 0.7427313272737538, "grad_norm": 4.243823051452637, "learning_rate": 1.6507247208205295e-05, "loss": 1.5871, "step": 9867 }, { "epoch": 0.7428066015544139, "grad_norm": 4.406725883483887, "learning_rate": 1.649819656601694e-05, "loss": 2.0743, "step": 9868 }, { "epoch": 0.742881875835074, "grad_norm": 5.711879253387451, "learning_rate": 1.6489147915414672e-05, "loss": 1.5666, "step": 9869 }, { "epoch": 0.7429571501157342, "grad_norm": 6.708004474639893, "learning_rate": 1.648010125693642e-05, "loss": 1.7104, "step": 9870 }, { "epoch": 0.7430324243963944, "grad_norm": 6.33036470413208, "learning_rate": 1.6471056591119955e-05, "loss": 1.7219, "step": 9871 }, { "epoch": 0.7431076986770545, "grad_norm": 4.447892665863037, "learning_rate": 1.6462013918502983e-05, "loss": 2.6098, "step": 9872 }, { "epoch": 0.7431829729577146, "grad_norm": 7.324742317199707, "learning_rate": 1.645297323962304e-05, "loss": 2.0948, "step": 9873 }, { "epoch": 0.7432582472383749, "grad_norm": 5.258800029754639, "learning_rate": 1.6443934555017593e-05, "loss": 2.0331, "step": 9874 }, { "epoch": 0.743333521519035, "grad_norm": 5.579085826873779, "learning_rate": 1.643489786522394e-05, "loss": 1.8244, "step": 9875 }, { "epoch": 0.7434087957996951, "grad_norm": 4.505311965942383, "learning_rate": 1.6425863170779315e-05, "loss": 1.9142, "step": 9876 }, { "epoch": 0.7434840700803553, "grad_norm": 6.410801410675049, "learning_rate": 1.641683047222078e-05, "loss": 1.7909, "step": 9877 }, { "epoch": 0.7435593443610155, "grad_norm": 4.629251480102539, "learning_rate": 1.6407799770085303e-05, "loss": 2.0321, "step": 9878 }, { "epoch": 0.7436346186416756, "grad_norm": 5.248072624206543, "learning_rate": 1.6398771064909744e-05, "loss": 1.7495, "step": 9879 }, { "epoch": 0.7437098929223358, "grad_norm": 4.81912088394165, "learning_rate": 1.6389744357230857e-05, "loss": 1.9599, "step": 9880 }, { "epoch": 0.7437851672029959, "grad_norm": 4.297432899475098, "learning_rate": 1.6380719647585202e-05, "loss": 1.7311, "step": 9881 }, { "epoch": 0.743860441483656, "grad_norm": 4.145941734313965, "learning_rate": 1.6371696936509307e-05, "loss": 1.8869, "step": 9882 }, { "epoch": 0.7439357157643163, "grad_norm": 3.571012020111084, "learning_rate": 1.6362676224539553e-05, "loss": 1.65, "step": 9883 }, { "epoch": 0.7440109900449764, "grad_norm": 3.5506057739257812, "learning_rate": 1.6353657512212168e-05, "loss": 1.8912, "step": 9884 }, { "epoch": 0.7440862643256365, "grad_norm": 5.5435686111450195, "learning_rate": 1.634464080006331e-05, "loss": 1.9438, "step": 9885 }, { "epoch": 0.7441615386062967, "grad_norm": 4.103124141693115, "learning_rate": 1.6335626088628982e-05, "loss": 1.4913, "step": 9886 }, { "epoch": 0.7442368128869569, "grad_norm": 4.7531914710998535, "learning_rate": 1.6326613378445094e-05, "loss": 1.8622, "step": 9887 }, { "epoch": 0.744312087167617, "grad_norm": 4.540632724761963, "learning_rate": 1.6317602670047412e-05, "loss": 1.7091, "step": 9888 }, { "epoch": 0.7443873614482771, "grad_norm": 3.917654037475586, "learning_rate": 1.63085939639716e-05, "loss": 1.7902, "step": 9889 }, { "epoch": 0.7444626357289373, "grad_norm": 4.325102806091309, "learning_rate": 1.6299587260753214e-05, "loss": 1.7013, "step": 9890 }, { "epoch": 0.7445379100095975, "grad_norm": 4.560401916503906, "learning_rate": 1.629058256092768e-05, "loss": 1.8116, "step": 9891 }, { "epoch": 0.7446131842902576, "grad_norm": 6.286881446838379, "learning_rate": 1.6281579865030272e-05, "loss": 1.635, "step": 9892 }, { "epoch": 0.7446884585709178, "grad_norm": 5.761804103851318, "learning_rate": 1.6272579173596207e-05, "loss": 1.866, "step": 9893 }, { "epoch": 0.7447637328515779, "grad_norm": 6.114563941955566, "learning_rate": 1.6263580487160523e-05, "loss": 1.892, "step": 9894 }, { "epoch": 0.744839007132238, "grad_norm": 4.459379196166992, "learning_rate": 1.6254583806258196e-05, "loss": 2.255, "step": 9895 }, { "epoch": 0.7449142814128983, "grad_norm": 3.4857254028320312, "learning_rate": 1.6245589131424015e-05, "loss": 1.5535, "step": 9896 }, { "epoch": 0.7449895556935584, "grad_norm": 5.383429050445557, "learning_rate": 1.6236596463192712e-05, "loss": 1.9829, "step": 9897 }, { "epoch": 0.7450648299742185, "grad_norm": 4.698866844177246, "learning_rate": 1.6227605802098883e-05, "loss": 2.0038, "step": 9898 }, { "epoch": 0.7451401042548788, "grad_norm": 5.783909320831299, "learning_rate": 1.6218617148676966e-05, "loss": 2.0986, "step": 9899 }, { "epoch": 0.7452153785355389, "grad_norm": 5.782463550567627, "learning_rate": 1.6209630503461337e-05, "loss": 1.537, "step": 9900 }, { "epoch": 0.745290652816199, "grad_norm": 5.257208347320557, "learning_rate": 1.620064586698622e-05, "loss": 2.3814, "step": 9901 }, { "epoch": 0.7453659270968592, "grad_norm": 5.572715759277344, "learning_rate": 1.619166323978574e-05, "loss": 1.9776, "step": 9902 }, { "epoch": 0.7454412013775193, "grad_norm": 6.007562160491943, "learning_rate": 1.618268262239385e-05, "loss": 2.3918, "step": 9903 }, { "epoch": 0.7455164756581795, "grad_norm": 5.239382743835449, "learning_rate": 1.6173704015344464e-05, "loss": 1.8424, "step": 9904 }, { "epoch": 0.7455917499388397, "grad_norm": 6.2795023918151855, "learning_rate": 1.6164727419171305e-05, "loss": 2.275, "step": 9905 }, { "epoch": 0.7456670242194998, "grad_norm": 4.568172931671143, "learning_rate": 1.6155752834408027e-05, "loss": 2.0041, "step": 9906 }, { "epoch": 0.7457422985001599, "grad_norm": 4.719252586364746, "learning_rate": 1.6146780261588123e-05, "loss": 1.9264, "step": 9907 }, { "epoch": 0.7458175727808201, "grad_norm": 5.133445739746094, "learning_rate": 1.613780970124501e-05, "loss": 1.6788, "step": 9908 }, { "epoch": 0.7458928470614803, "grad_norm": 5.094578742980957, "learning_rate": 1.6128841153911934e-05, "loss": 1.9862, "step": 9909 }, { "epoch": 0.7459681213421404, "grad_norm": 3.811624526977539, "learning_rate": 1.611987462012206e-05, "loss": 1.6791, "step": 9910 }, { "epoch": 0.7460433956228005, "grad_norm": 4.280270576477051, "learning_rate": 1.6110910100408427e-05, "loss": 2.0025, "step": 9911 }, { "epoch": 0.7461186699034608, "grad_norm": 4.275609493255615, "learning_rate": 1.610194759530397e-05, "loss": 1.5968, "step": 9912 }, { "epoch": 0.7461939441841209, "grad_norm": 4.728058815002441, "learning_rate": 1.6092987105341443e-05, "loss": 1.7052, "step": 9913 }, { "epoch": 0.746269218464781, "grad_norm": 5.2638373374938965, "learning_rate": 1.608402863105355e-05, "loss": 1.6152, "step": 9914 }, { "epoch": 0.7463444927454412, "grad_norm": 4.976005554199219, "learning_rate": 1.607507217297285e-05, "loss": 1.6614, "step": 9915 }, { "epoch": 0.7464197670261014, "grad_norm": 5.303394794464111, "learning_rate": 1.6066117731631754e-05, "loss": 2.3206, "step": 9916 }, { "epoch": 0.7464950413067615, "grad_norm": 5.240142822265625, "learning_rate": 1.60571653075626e-05, "loss": 1.8974, "step": 9917 }, { "epoch": 0.7465703155874217, "grad_norm": 5.302753925323486, "learning_rate": 1.6048214901297566e-05, "loss": 1.973, "step": 9918 }, { "epoch": 0.7466455898680818, "grad_norm": 4.430035591125488, "learning_rate": 1.6039266513368757e-05, "loss": 2.1102, "step": 9919 }, { "epoch": 0.746720864148742, "grad_norm": 5.703297138214111, "learning_rate": 1.6030320144308088e-05, "loss": 1.9649, "step": 9920 }, { "epoch": 0.7467961384294022, "grad_norm": 4.121432781219482, "learning_rate": 1.602137579464743e-05, "loss": 1.6882, "step": 9921 }, { "epoch": 0.7468714127100623, "grad_norm": 5.7158074378967285, "learning_rate": 1.6012433464918475e-05, "loss": 2.111, "step": 9922 }, { "epoch": 0.7469466869907224, "grad_norm": 3.5301871299743652, "learning_rate": 1.6003493155652825e-05, "loss": 1.8535, "step": 9923 }, { "epoch": 0.7470219612713827, "grad_norm": 5.5596489906311035, "learning_rate": 1.5994554867381967e-05, "loss": 2.0635, "step": 9924 }, { "epoch": 0.7470972355520428, "grad_norm": 4.481655120849609, "learning_rate": 1.5985618600637257e-05, "loss": 1.6222, "step": 9925 }, { "epoch": 0.7471725098327029, "grad_norm": 4.996507167816162, "learning_rate": 1.597668435594991e-05, "loss": 1.9297, "step": 9926 }, { "epoch": 0.747247784113363, "grad_norm": 4.631219387054443, "learning_rate": 1.5967752133851076e-05, "loss": 2.0041, "step": 9927 }, { "epoch": 0.7473230583940232, "grad_norm": 4.944079875946045, "learning_rate": 1.595882193487171e-05, "loss": 1.7581, "step": 9928 }, { "epoch": 0.7473983326746834, "grad_norm": 4.562366962432861, "learning_rate": 1.59498937595427e-05, "loss": 2.0134, "step": 9929 }, { "epoch": 0.7474736069553435, "grad_norm": 3.90766978263855, "learning_rate": 1.5940967608394823e-05, "loss": 1.7661, "step": 9930 }, { "epoch": 0.7475488812360037, "grad_norm": 4.513801097869873, "learning_rate": 1.593204348195868e-05, "loss": 2.156, "step": 9931 }, { "epoch": 0.7476241555166638, "grad_norm": 5.679186820983887, "learning_rate": 1.5923121380764818e-05, "loss": 1.7024, "step": 9932 }, { "epoch": 0.747699429797324, "grad_norm": 4.746894836425781, "learning_rate": 1.591420130534359e-05, "loss": 1.9402, "step": 9933 }, { "epoch": 0.7477747040779842, "grad_norm": 7.584643840789795, "learning_rate": 1.59052832562253e-05, "loss": 2.0297, "step": 9934 }, { "epoch": 0.7478499783586443, "grad_norm": 4.102123260498047, "learning_rate": 1.5896367233940084e-05, "loss": 1.7823, "step": 9935 }, { "epoch": 0.7479252526393044, "grad_norm": 4.667409896850586, "learning_rate": 1.5887453239018e-05, "loss": 2.0671, "step": 9936 }, { "epoch": 0.7480005269199647, "grad_norm": 4.708219051361084, "learning_rate": 1.587854127198893e-05, "loss": 1.9376, "step": 9937 }, { "epoch": 0.7480758012006248, "grad_norm": 6.039755821228027, "learning_rate": 1.5869631333382684e-05, "loss": 2.0222, "step": 9938 }, { "epoch": 0.7481510754812849, "grad_norm": 5.210358619689941, "learning_rate": 1.586072342372891e-05, "loss": 1.7184, "step": 9939 }, { "epoch": 0.7482263497619451, "grad_norm": 6.337641716003418, "learning_rate": 1.5851817543557196e-05, "loss": 2.1556, "step": 9940 }, { "epoch": 0.7483016240426053, "grad_norm": 4.71444845199585, "learning_rate": 1.5842913693396928e-05, "loss": 1.3112, "step": 9941 }, { "epoch": 0.7483768983232654, "grad_norm": 3.6353938579559326, "learning_rate": 1.5834011873777445e-05, "loss": 1.8482, "step": 9942 }, { "epoch": 0.7484521726039256, "grad_norm": 6.729156494140625, "learning_rate": 1.5825112085227904e-05, "loss": 1.8334, "step": 9943 }, { "epoch": 0.7485274468845857, "grad_norm": 5.832686901092529, "learning_rate": 1.5816214328277395e-05, "loss": 1.86, "step": 9944 }, { "epoch": 0.7486027211652458, "grad_norm": 4.744770526885986, "learning_rate": 1.5807318603454864e-05, "loss": 2.0968, "step": 9945 }, { "epoch": 0.7486779954459061, "grad_norm": 5.062871932983398, "learning_rate": 1.5798424911289128e-05, "loss": 1.6747, "step": 9946 }, { "epoch": 0.7487532697265662, "grad_norm": 4.214980602264404, "learning_rate": 1.5789533252308908e-05, "loss": 1.7126, "step": 9947 }, { "epoch": 0.7488285440072263, "grad_norm": 6.616235256195068, "learning_rate": 1.5780643627042763e-05, "loss": 1.7633, "step": 9948 }, { "epoch": 0.7489038182878864, "grad_norm": 4.031871795654297, "learning_rate": 1.5771756036019185e-05, "loss": 1.8728, "step": 9949 }, { "epoch": 0.7489790925685467, "grad_norm": 4.752917766571045, "learning_rate": 1.5762870479766484e-05, "loss": 1.7278, "step": 9950 }, { "epoch": 0.7490543668492068, "grad_norm": 3.939545154571533, "learning_rate": 1.575398695881291e-05, "loss": 1.8063, "step": 9951 }, { "epoch": 0.7491296411298669, "grad_norm": 4.217740535736084, "learning_rate": 1.5745105473686533e-05, "loss": 2.0681, "step": 9952 }, { "epoch": 0.7492049154105271, "grad_norm": 4.378330230712891, "learning_rate": 1.5736226024915364e-05, "loss": 1.6649, "step": 9953 }, { "epoch": 0.7492801896911873, "grad_norm": 3.8178975582122803, "learning_rate": 1.572734861302722e-05, "loss": 1.7891, "step": 9954 }, { "epoch": 0.7493554639718474, "grad_norm": 4.729191303253174, "learning_rate": 1.571847323854987e-05, "loss": 2.1144, "step": 9955 }, { "epoch": 0.7494307382525076, "grad_norm": 4.716762065887451, "learning_rate": 1.570959990201092e-05, "loss": 1.9679, "step": 9956 }, { "epoch": 0.7495060125331677, "grad_norm": 4.810666561126709, "learning_rate": 1.570072860393788e-05, "loss": 1.773, "step": 9957 }, { "epoch": 0.7495812868138279, "grad_norm": 4.066127300262451, "learning_rate": 1.5691859344858096e-05, "loss": 1.9217, "step": 9958 }, { "epoch": 0.7496565610944881, "grad_norm": 5.161827087402344, "learning_rate": 1.5682992125298847e-05, "loss": 2.239, "step": 9959 }, { "epoch": 0.7497318353751482, "grad_norm": 4.892882347106934, "learning_rate": 1.5674126945787232e-05, "loss": 2.1145, "step": 9960 }, { "epoch": 0.7498071096558083, "grad_norm": 4.724384307861328, "learning_rate": 1.5665263806850276e-05, "loss": 1.9097, "step": 9961 }, { "epoch": 0.7498823839364686, "grad_norm": 5.255415916442871, "learning_rate": 1.565640270901489e-05, "loss": 1.837, "step": 9962 }, { "epoch": 0.7499576582171287, "grad_norm": 6.142913818359375, "learning_rate": 1.5647543652807804e-05, "loss": 1.6247, "step": 9963 }, { "epoch": 0.7500329324977888, "grad_norm": 6.005850791931152, "learning_rate": 1.5638686638755696e-05, "loss": 1.6901, "step": 9964 }, { "epoch": 0.750108206778449, "grad_norm": 5.859500885009766, "learning_rate": 1.5629831667385054e-05, "loss": 1.7682, "step": 9965 }, { "epoch": 0.7501834810591091, "grad_norm": 6.2174577713012695, "learning_rate": 1.562097873922232e-05, "loss": 1.8863, "step": 9966 }, { "epoch": 0.7502587553397693, "grad_norm": 5.0533366203308105, "learning_rate": 1.5612127854793734e-05, "loss": 2.0477, "step": 9967 }, { "epoch": 0.7503340296204294, "grad_norm": 4.6626200675964355, "learning_rate": 1.5603279014625484e-05, "loss": 1.7795, "step": 9968 }, { "epoch": 0.7504093039010896, "grad_norm": 5.1897406578063965, "learning_rate": 1.5594432219243598e-05, "loss": 1.7437, "step": 9969 }, { "epoch": 0.7504845781817497, "grad_norm": 4.275720119476318, "learning_rate": 1.5585587469174012e-05, "loss": 1.5333, "step": 9970 }, { "epoch": 0.7505598524624099, "grad_norm": 4.640401363372803, "learning_rate": 1.557674476494249e-05, "loss": 1.6171, "step": 9971 }, { "epoch": 0.7506351267430701, "grad_norm": 5.083990573883057, "learning_rate": 1.556790410707473e-05, "loss": 1.7867, "step": 9972 }, { "epoch": 0.7507104010237302, "grad_norm": 4.3151373863220215, "learning_rate": 1.555906549609627e-05, "loss": 1.7202, "step": 9973 }, { "epoch": 0.7507856753043903, "grad_norm": 5.8796820640563965, "learning_rate": 1.555022893253255e-05, "loss": 1.7207, "step": 9974 }, { "epoch": 0.7508609495850506, "grad_norm": 4.458544731140137, "learning_rate": 1.554139441690886e-05, "loss": 1.7106, "step": 9975 }, { "epoch": 0.7509362238657107, "grad_norm": 4.028659343719482, "learning_rate": 1.5532561949750418e-05, "loss": 1.6165, "step": 9976 }, { "epoch": 0.7510114981463708, "grad_norm": 3.9202828407287598, "learning_rate": 1.552373153158225e-05, "loss": 1.9387, "step": 9977 }, { "epoch": 0.751086772427031, "grad_norm": 5.001904487609863, "learning_rate": 1.5514903162929323e-05, "loss": 2.3045, "step": 9978 }, { "epoch": 0.7511620467076912, "grad_norm": 5.902695655822754, "learning_rate": 1.5506076844316446e-05, "loss": 1.6151, "step": 9979 }, { "epoch": 0.7512373209883513, "grad_norm": 6.135552883148193, "learning_rate": 1.5497252576268336e-05, "loss": 1.7529, "step": 9980 }, { "epoch": 0.7513125952690115, "grad_norm": 3.423927068710327, "learning_rate": 1.5488430359309568e-05, "loss": 1.8646, "step": 9981 }, { "epoch": 0.7513878695496716, "grad_norm": 4.520468235015869, "learning_rate": 1.5479610193964582e-05, "loss": 1.5353, "step": 9982 }, { "epoch": 0.7514631438303317, "grad_norm": 4.159463405609131, "learning_rate": 1.5470792080757733e-05, "loss": 1.5023, "step": 9983 }, { "epoch": 0.751538418110992, "grad_norm": 4.193337917327881, "learning_rate": 1.5461976020213204e-05, "loss": 2.0218, "step": 9984 }, { "epoch": 0.7516136923916521, "grad_norm": 4.588779449462891, "learning_rate": 1.5453162012855117e-05, "loss": 1.5502, "step": 9985 }, { "epoch": 0.7516889666723122, "grad_norm": 6.364390850067139, "learning_rate": 1.54443500592074e-05, "loss": 1.7093, "step": 9986 }, { "epoch": 0.7517642409529723, "grad_norm": 5.398507595062256, "learning_rate": 1.5435540159793942e-05, "loss": 1.6269, "step": 9987 }, { "epoch": 0.7518395152336326, "grad_norm": 5.735795021057129, "learning_rate": 1.5426732315138425e-05, "loss": 1.9006, "step": 9988 }, { "epoch": 0.7519147895142927, "grad_norm": 5.758598327636719, "learning_rate": 1.5417926525764475e-05, "loss": 2.0928, "step": 9989 }, { "epoch": 0.7519900637949528, "grad_norm": 5.055922031402588, "learning_rate": 1.540912279219556e-05, "loss": 1.6823, "step": 9990 }, { "epoch": 0.752065338075613, "grad_norm": 6.7720818519592285, "learning_rate": 1.540032111495507e-05, "loss": 1.9203, "step": 9991 }, { "epoch": 0.7521406123562732, "grad_norm": 5.812138557434082, "learning_rate": 1.5391521494566184e-05, "loss": 1.7873, "step": 9992 }, { "epoch": 0.7522158866369333, "grad_norm": 5.028049945831299, "learning_rate": 1.5382723931552067e-05, "loss": 1.8459, "step": 9993 }, { "epoch": 0.7522911609175935, "grad_norm": 5.53501033782959, "learning_rate": 1.5373928426435663e-05, "loss": 1.7893, "step": 9994 }, { "epoch": 0.7523664351982536, "grad_norm": 3.7731287479400635, "learning_rate": 1.5365134979739858e-05, "loss": 1.9724, "step": 9995 }, { "epoch": 0.7524417094789138, "grad_norm": 5.036252498626709, "learning_rate": 1.5356343591987422e-05, "loss": 1.832, "step": 9996 }, { "epoch": 0.752516983759574, "grad_norm": 5.82282829284668, "learning_rate": 1.5347554263700936e-05, "loss": 2.0169, "step": 9997 }, { "epoch": 0.7525922580402341, "grad_norm": 6.087488651275635, "learning_rate": 1.533876699540294e-05, "loss": 1.9611, "step": 9998 }, { "epoch": 0.7526675323208942, "grad_norm": 4.167436122894287, "learning_rate": 1.532998178761577e-05, "loss": 1.8921, "step": 9999 }, { "epoch": 0.7527428066015545, "grad_norm": 4.24816370010376, "learning_rate": 1.5321198640861712e-05, "loss": 1.7124, "step": 10000 }, { "epoch": 0.7528180808822146, "grad_norm": 4.933469295501709, "learning_rate": 1.5312417555662882e-05, "loss": 1.65, "step": 10001 }, { "epoch": 0.7528933551628747, "grad_norm": 4.974427223205566, "learning_rate": 1.530363853254132e-05, "loss": 1.7783, "step": 10002 }, { "epoch": 0.7529686294435349, "grad_norm": 6.356522083282471, "learning_rate": 1.529486157201887e-05, "loss": 2.1736, "step": 10003 }, { "epoch": 0.753043903724195, "grad_norm": 6.629919528961182, "learning_rate": 1.528608667461734e-05, "loss": 2.1371, "step": 10004 }, { "epoch": 0.7531191780048552, "grad_norm": 5.3332343101501465, "learning_rate": 1.5277313840858337e-05, "loss": 1.5657, "step": 10005 }, { "epoch": 0.7531944522855153, "grad_norm": 7.4482808113098145, "learning_rate": 1.5268543071263408e-05, "loss": 1.9898, "step": 10006 }, { "epoch": 0.7532697265661755, "grad_norm": 6.14054012298584, "learning_rate": 1.5259774366353925e-05, "loss": 1.7172, "step": 10007 }, { "epoch": 0.7533450008468356, "grad_norm": 4.487518787384033, "learning_rate": 1.5251007726651184e-05, "loss": 1.7558, "step": 10008 }, { "epoch": 0.7534202751274958, "grad_norm": 4.30145263671875, "learning_rate": 1.5242243152676317e-05, "loss": 1.69, "step": 10009 }, { "epoch": 0.753495549408156, "grad_norm": 4.363803386688232, "learning_rate": 1.5233480644950365e-05, "loss": 1.669, "step": 10010 }, { "epoch": 0.7535708236888161, "grad_norm": 4.698695659637451, "learning_rate": 1.5224720203994231e-05, "loss": 1.905, "step": 10011 }, { "epoch": 0.7536460979694762, "grad_norm": 4.231746196746826, "learning_rate": 1.5215961830328695e-05, "loss": 1.9179, "step": 10012 }, { "epoch": 0.7537213722501365, "grad_norm": 5.356712341308594, "learning_rate": 1.5207205524474443e-05, "loss": 2.1216, "step": 10013 }, { "epoch": 0.7537966465307966, "grad_norm": 3.891530752182007, "learning_rate": 1.5198451286951975e-05, "loss": 1.9005, "step": 10014 }, { "epoch": 0.7538719208114567, "grad_norm": 4.404372215270996, "learning_rate": 1.5189699118281736e-05, "loss": 1.5977, "step": 10015 }, { "epoch": 0.7539471950921169, "grad_norm": 5.879538059234619, "learning_rate": 1.5180949018983987e-05, "loss": 1.8772, "step": 10016 }, { "epoch": 0.7540224693727771, "grad_norm": 4.035519599914551, "learning_rate": 1.5172200989578921e-05, "loss": 1.6174, "step": 10017 }, { "epoch": 0.7540977436534372, "grad_norm": 3.8961410522460938, "learning_rate": 1.5163455030586565e-05, "loss": 1.5893, "step": 10018 }, { "epoch": 0.7541730179340974, "grad_norm": 5.440550804138184, "learning_rate": 1.5154711142526867e-05, "loss": 1.4628, "step": 10019 }, { "epoch": 0.7542482922147575, "grad_norm": 6.2042436599731445, "learning_rate": 1.514596932591959e-05, "loss": 1.6468, "step": 10020 }, { "epoch": 0.7543235664954177, "grad_norm": 6.319267749786377, "learning_rate": 1.5137229581284445e-05, "loss": 1.4374, "step": 10021 }, { "epoch": 0.7543988407760779, "grad_norm": 4.681739807128906, "learning_rate": 1.5128491909140952e-05, "loss": 1.9577, "step": 10022 }, { "epoch": 0.754474115056738, "grad_norm": 5.44571590423584, "learning_rate": 1.511975631000856e-05, "loss": 1.7032, "step": 10023 }, { "epoch": 0.7545493893373981, "grad_norm": 6.700099468231201, "learning_rate": 1.5111022784406575e-05, "loss": 1.6449, "step": 10024 }, { "epoch": 0.7546246636180582, "grad_norm": 5.16166877746582, "learning_rate": 1.510229133285419e-05, "loss": 1.6617, "step": 10025 }, { "epoch": 0.7546999378987185, "grad_norm": 4.2618632316589355, "learning_rate": 1.5093561955870439e-05, "loss": 1.698, "step": 10026 }, { "epoch": 0.7547752121793786, "grad_norm": 5.441774845123291, "learning_rate": 1.5084834653974267e-05, "loss": 1.8413, "step": 10027 }, { "epoch": 0.7548504864600387, "grad_norm": 5.109415054321289, "learning_rate": 1.5076109427684515e-05, "loss": 1.7759, "step": 10028 }, { "epoch": 0.754925760740699, "grad_norm": 7.502496719360352, "learning_rate": 1.506738627751983e-05, "loss": 2.0623, "step": 10029 }, { "epoch": 0.7550010350213591, "grad_norm": 5.332769393920898, "learning_rate": 1.5058665203998812e-05, "loss": 2.0068, "step": 10030 }, { "epoch": 0.7550763093020192, "grad_norm": 4.8725504875183105, "learning_rate": 1.5049946207639875e-05, "loss": 1.9227, "step": 10031 }, { "epoch": 0.7551515835826794, "grad_norm": 4.100222587585449, "learning_rate": 1.5041229288961367e-05, "loss": 2.0567, "step": 10032 }, { "epoch": 0.7552268578633395, "grad_norm": 4.386989116668701, "learning_rate": 1.503251444848146e-05, "loss": 1.7172, "step": 10033 }, { "epoch": 0.7553021321439997, "grad_norm": 3.874253988265991, "learning_rate": 1.5023801686718231e-05, "loss": 1.6771, "step": 10034 }, { "epoch": 0.7553774064246599, "grad_norm": 5.213954925537109, "learning_rate": 1.5015091004189635e-05, "loss": 1.7787, "step": 10035 }, { "epoch": 0.75545268070532, "grad_norm": 5.724802017211914, "learning_rate": 1.500638240141351e-05, "loss": 1.9594, "step": 10036 }, { "epoch": 0.7555279549859801, "grad_norm": 5.363083362579346, "learning_rate": 1.499767587890753e-05, "loss": 2.1877, "step": 10037 }, { "epoch": 0.7556032292666404, "grad_norm": 4.180286407470703, "learning_rate": 1.4988971437189304e-05, "loss": 2.0091, "step": 10038 }, { "epoch": 0.7556785035473005, "grad_norm": 5.6729044914245605, "learning_rate": 1.4980269076776249e-05, "loss": 1.7128, "step": 10039 }, { "epoch": 0.7557537778279606, "grad_norm": 4.191105365753174, "learning_rate": 1.4971568798185731e-05, "loss": 1.7467, "step": 10040 }, { "epoch": 0.7558290521086208, "grad_norm": 8.155600547790527, "learning_rate": 1.4962870601934924e-05, "loss": 2.5319, "step": 10041 }, { "epoch": 0.755904326389281, "grad_norm": 4.792415142059326, "learning_rate": 1.4954174488540923e-05, "loss": 1.6589, "step": 10042 }, { "epoch": 0.7559796006699411, "grad_norm": 4.4061126708984375, "learning_rate": 1.4945480458520711e-05, "loss": 1.762, "step": 10043 }, { "epoch": 0.7560548749506013, "grad_norm": 5.4761857986450195, "learning_rate": 1.4936788512391087e-05, "loss": 1.7334, "step": 10044 }, { "epoch": 0.7561301492312614, "grad_norm": 5.121610164642334, "learning_rate": 1.4928098650668782e-05, "loss": 1.6016, "step": 10045 }, { "epoch": 0.7562054235119215, "grad_norm": 4.5385050773620605, "learning_rate": 1.4919410873870377e-05, "loss": 1.9712, "step": 10046 }, { "epoch": 0.7562806977925817, "grad_norm": 6.4989166259765625, "learning_rate": 1.4910725182512354e-05, "loss": 1.9882, "step": 10047 }, { "epoch": 0.7563559720732419, "grad_norm": 5.446986675262451, "learning_rate": 1.4902041577111014e-05, "loss": 1.7309, "step": 10048 }, { "epoch": 0.756431246353902, "grad_norm": 4.288657188415527, "learning_rate": 1.4893360058182621e-05, "loss": 1.8747, "step": 10049 }, { "epoch": 0.7565065206345621, "grad_norm": 4.878511428833008, "learning_rate": 1.4884680626243219e-05, "loss": 1.6474, "step": 10050 }, { "epoch": 0.7565817949152224, "grad_norm": 4.053687572479248, "learning_rate": 1.487600328180881e-05, "loss": 1.6522, "step": 10051 }, { "epoch": 0.7566570691958825, "grad_norm": 5.102494239807129, "learning_rate": 1.486732802539521e-05, "loss": 1.6672, "step": 10052 }, { "epoch": 0.7567323434765426, "grad_norm": 4.51108455657959, "learning_rate": 1.4858654857518173e-05, "loss": 1.7443, "step": 10053 }, { "epoch": 0.7568076177572028, "grad_norm": 4.4213337898254395, "learning_rate": 1.484998377869325e-05, "loss": 1.6428, "step": 10054 }, { "epoch": 0.756882892037863, "grad_norm": 4.611510753631592, "learning_rate": 1.484131478943594e-05, "loss": 1.8679, "step": 10055 }, { "epoch": 0.7569581663185231, "grad_norm": 5.248319149017334, "learning_rate": 1.483264789026158e-05, "loss": 1.7786, "step": 10056 }, { "epoch": 0.7570334405991833, "grad_norm": 4.49108362197876, "learning_rate": 1.4823983081685417e-05, "loss": 2.1219, "step": 10057 }, { "epoch": 0.7571087148798434, "grad_norm": 4.832563400268555, "learning_rate": 1.4815320364222507e-05, "loss": 1.6951, "step": 10058 }, { "epoch": 0.7571839891605036, "grad_norm": 8.878467559814453, "learning_rate": 1.4806659738387846e-05, "loss": 2.3644, "step": 10059 }, { "epoch": 0.7572592634411638, "grad_norm": 5.238609313964844, "learning_rate": 1.4798001204696305e-05, "loss": 2.0396, "step": 10060 }, { "epoch": 0.7573345377218239, "grad_norm": 4.765303611755371, "learning_rate": 1.4789344763662565e-05, "loss": 1.6369, "step": 10061 }, { "epoch": 0.757409812002484, "grad_norm": 5.658217430114746, "learning_rate": 1.4780690415801262e-05, "loss": 1.8445, "step": 10062 }, { "epoch": 0.7574850862831443, "grad_norm": 7.026732444763184, "learning_rate": 1.4772038161626845e-05, "loss": 1.5348, "step": 10063 }, { "epoch": 0.7575603605638044, "grad_norm": 5.15501070022583, "learning_rate": 1.47633880016537e-05, "loss": 1.8285, "step": 10064 }, { "epoch": 0.7576356348444645, "grad_norm": 5.152629375457764, "learning_rate": 1.4754739936396012e-05, "loss": 2.4846, "step": 10065 }, { "epoch": 0.7577109091251246, "grad_norm": 4.669248104095459, "learning_rate": 1.474609396636792e-05, "loss": 1.9223, "step": 10066 }, { "epoch": 0.7577861834057849, "grad_norm": 5.185349941253662, "learning_rate": 1.4737450092083371e-05, "loss": 1.8226, "step": 10067 }, { "epoch": 0.757861457686445, "grad_norm": 3.7794644832611084, "learning_rate": 1.4728808314056236e-05, "loss": 1.7302, "step": 10068 }, { "epoch": 0.7579367319671051, "grad_norm": 4.549095153808594, "learning_rate": 1.472016863280024e-05, "loss": 1.6542, "step": 10069 }, { "epoch": 0.7580120062477653, "grad_norm": 4.128381729125977, "learning_rate": 1.4711531048829008e-05, "loss": 1.851, "step": 10070 }, { "epoch": 0.7580872805284254, "grad_norm": 4.7417826652526855, "learning_rate": 1.4702895562655983e-05, "loss": 1.4187, "step": 10071 }, { "epoch": 0.7581625548090856, "grad_norm": 6.507777214050293, "learning_rate": 1.469426217479456e-05, "loss": 1.6092, "step": 10072 }, { "epoch": 0.7582378290897458, "grad_norm": 4.4352803230285645, "learning_rate": 1.4685630885757922e-05, "loss": 1.6687, "step": 10073 }, { "epoch": 0.7583131033704059, "grad_norm": 4.384361267089844, "learning_rate": 1.4677001696059206e-05, "loss": 1.8644, "step": 10074 }, { "epoch": 0.758388377651066, "grad_norm": 6.202516078948975, "learning_rate": 1.46683746062114e-05, "loss": 1.825, "step": 10075 }, { "epoch": 0.7584636519317263, "grad_norm": 4.29433012008667, "learning_rate": 1.465974961672733e-05, "loss": 1.9568, "step": 10076 }, { "epoch": 0.7585389262123864, "grad_norm": 7.799400329589844, "learning_rate": 1.4651126728119763e-05, "loss": 1.8897, "step": 10077 }, { "epoch": 0.7586142004930465, "grad_norm": 5.073342323303223, "learning_rate": 1.4642505940901263e-05, "loss": 2.2903, "step": 10078 }, { "epoch": 0.7586894747737067, "grad_norm": 6.123051166534424, "learning_rate": 1.4633887255584333e-05, "loss": 1.4637, "step": 10079 }, { "epoch": 0.7587647490543669, "grad_norm": 4.969133377075195, "learning_rate": 1.4625270672681329e-05, "loss": 1.7672, "step": 10080 }, { "epoch": 0.758840023335027, "grad_norm": 4.791050434112549, "learning_rate": 1.4616656192704503e-05, "loss": 2.0636, "step": 10081 }, { "epoch": 0.7589152976156872, "grad_norm": 4.908662796020508, "learning_rate": 1.4608043816165922e-05, "loss": 2.1467, "step": 10082 }, { "epoch": 0.7589905718963473, "grad_norm": 4.78682279586792, "learning_rate": 1.4599433543577595e-05, "loss": 1.7868, "step": 10083 }, { "epoch": 0.7590658461770075, "grad_norm": 4.780208110809326, "learning_rate": 1.4590825375451361e-05, "loss": 1.7034, "step": 10084 }, { "epoch": 0.7591411204576676, "grad_norm": 4.949016094207764, "learning_rate": 1.4582219312298967e-05, "loss": 1.9641, "step": 10085 }, { "epoch": 0.7592163947383278, "grad_norm": 4.622855186462402, "learning_rate": 1.4573615354631997e-05, "loss": 2.0881, "step": 10086 }, { "epoch": 0.7592916690189879, "grad_norm": 7.076879024505615, "learning_rate": 1.4565013502961961e-05, "loss": 1.7142, "step": 10087 }, { "epoch": 0.759366943299648, "grad_norm": 4.772395610809326, "learning_rate": 1.4556413757800185e-05, "loss": 1.7488, "step": 10088 }, { "epoch": 0.7594422175803083, "grad_norm": 4.258056640625, "learning_rate": 1.4547816119657909e-05, "loss": 2.0981, "step": 10089 }, { "epoch": 0.7595174918609684, "grad_norm": 4.140929222106934, "learning_rate": 1.4539220589046237e-05, "loss": 1.6252, "step": 10090 }, { "epoch": 0.7595927661416285, "grad_norm": 5.462441444396973, "learning_rate": 1.4530627166476157e-05, "loss": 1.6531, "step": 10091 }, { "epoch": 0.7596680404222887, "grad_norm": 6.058199405670166, "learning_rate": 1.4522035852458532e-05, "loss": 2.0438, "step": 10092 }, { "epoch": 0.7597433147029489, "grad_norm": 3.7801451683044434, "learning_rate": 1.4513446647504064e-05, "loss": 2.0075, "step": 10093 }, { "epoch": 0.759818588983609, "grad_norm": 4.6017231941223145, "learning_rate": 1.450485955212339e-05, "loss": 1.7479, "step": 10094 }, { "epoch": 0.7598938632642692, "grad_norm": 5.249778747558594, "learning_rate": 1.4496274566826945e-05, "loss": 1.9271, "step": 10095 }, { "epoch": 0.7599691375449293, "grad_norm": 3.9409539699554443, "learning_rate": 1.4487691692125126e-05, "loss": 1.5207, "step": 10096 }, { "epoch": 0.7600444118255895, "grad_norm": 6.422845363616943, "learning_rate": 1.4479110928528122e-05, "loss": 2.0724, "step": 10097 }, { "epoch": 0.7601196861062497, "grad_norm": 3.645026445388794, "learning_rate": 1.4470532276546073e-05, "loss": 1.8209, "step": 10098 }, { "epoch": 0.7601949603869098, "grad_norm": 5.043045520782471, "learning_rate": 1.4461955736688915e-05, "loss": 2.0898, "step": 10099 }, { "epoch": 0.7602702346675699, "grad_norm": 4.188684463500977, "learning_rate": 1.4453381309466519e-05, "loss": 1.6992, "step": 10100 }, { "epoch": 0.7603455089482302, "grad_norm": 4.888040065765381, "learning_rate": 1.444480899538861e-05, "loss": 2.0255, "step": 10101 }, { "epoch": 0.7604207832288903, "grad_norm": 4.585510730743408, "learning_rate": 1.4436238794964802e-05, "loss": 1.6398, "step": 10102 }, { "epoch": 0.7604960575095504, "grad_norm": 4.1049652099609375, "learning_rate": 1.4427670708704532e-05, "loss": 1.7256, "step": 10103 }, { "epoch": 0.7605713317902105, "grad_norm": 5.418903827667236, "learning_rate": 1.4419104737117195e-05, "loss": 1.933, "step": 10104 }, { "epoch": 0.7606466060708708, "grad_norm": 4.789968013763428, "learning_rate": 1.441054088071197e-05, "loss": 1.5497, "step": 10105 }, { "epoch": 0.7607218803515309, "grad_norm": 5.453304767608643, "learning_rate": 1.4401979139997968e-05, "loss": 1.9004, "step": 10106 }, { "epoch": 0.760797154632191, "grad_norm": 4.689150333404541, "learning_rate": 1.4393419515484186e-05, "loss": 1.8116, "step": 10107 }, { "epoch": 0.7608724289128512, "grad_norm": 4.344583511352539, "learning_rate": 1.4384862007679429e-05, "loss": 1.8466, "step": 10108 }, { "epoch": 0.7609477031935113, "grad_norm": 4.653151512145996, "learning_rate": 1.4376306617092445e-05, "loss": 2.0953, "step": 10109 }, { "epoch": 0.7610229774741715, "grad_norm": 5.969259738922119, "learning_rate": 1.4367753344231804e-05, "loss": 1.857, "step": 10110 }, { "epoch": 0.7610982517548317, "grad_norm": 3.999704599380493, "learning_rate": 1.4359202189605991e-05, "loss": 1.6641, "step": 10111 }, { "epoch": 0.7611735260354918, "grad_norm": 4.767504692077637, "learning_rate": 1.4350653153723337e-05, "loss": 1.9794, "step": 10112 }, { "epoch": 0.7612488003161519, "grad_norm": 8.0087251663208, "learning_rate": 1.4342106237092085e-05, "loss": 2.0896, "step": 10113 }, { "epoch": 0.7613240745968122, "grad_norm": 5.4577860832214355, "learning_rate": 1.4333561440220283e-05, "loss": 1.83, "step": 10114 }, { "epoch": 0.7613993488774723, "grad_norm": 4.4710211753845215, "learning_rate": 1.4325018763615933e-05, "loss": 2.0372, "step": 10115 }, { "epoch": 0.7614746231581324, "grad_norm": 7.323471546173096, "learning_rate": 1.4316478207786837e-05, "loss": 1.814, "step": 10116 }, { "epoch": 0.7615498974387926, "grad_norm": 4.940953254699707, "learning_rate": 1.4307939773240735e-05, "loss": 1.5845, "step": 10117 }, { "epoch": 0.7616251717194528, "grad_norm": 6.005489349365234, "learning_rate": 1.4299403460485195e-05, "loss": 1.705, "step": 10118 }, { "epoch": 0.7617004460001129, "grad_norm": 7.866281986236572, "learning_rate": 1.4290869270027696e-05, "loss": 2.193, "step": 10119 }, { "epoch": 0.7617757202807731, "grad_norm": 5.98714542388916, "learning_rate": 1.428233720237554e-05, "loss": 1.8539, "step": 10120 }, { "epoch": 0.7618509945614332, "grad_norm": 5.611191272735596, "learning_rate": 1.4273807258035964e-05, "loss": 1.9976, "step": 10121 }, { "epoch": 0.7619262688420934, "grad_norm": 6.04744291305542, "learning_rate": 1.4265279437516027e-05, "loss": 2.0185, "step": 10122 }, { "epoch": 0.7620015431227536, "grad_norm": 4.674132823944092, "learning_rate": 1.4256753741322692e-05, "loss": 1.8721, "step": 10123 }, { "epoch": 0.7620768174034137, "grad_norm": 3.6567792892456055, "learning_rate": 1.424823016996279e-05, "loss": 1.9501, "step": 10124 }, { "epoch": 0.7621520916840738, "grad_norm": 5.554230690002441, "learning_rate": 1.4239708723943018e-05, "loss": 1.7415, "step": 10125 }, { "epoch": 0.762227365964734, "grad_norm": 7.334959983825684, "learning_rate": 1.4231189403769967e-05, "loss": 1.6385, "step": 10126 }, { "epoch": 0.7623026402453942, "grad_norm": 6.000788688659668, "learning_rate": 1.4222672209950066e-05, "loss": 1.4593, "step": 10127 }, { "epoch": 0.7623779145260543, "grad_norm": 9.119935035705566, "learning_rate": 1.4214157142989654e-05, "loss": 1.7431, "step": 10128 }, { "epoch": 0.7624531888067144, "grad_norm": 4.112301349639893, "learning_rate": 1.4205644203394908e-05, "loss": 1.7218, "step": 10129 }, { "epoch": 0.7625284630873747, "grad_norm": 4.8725080490112305, "learning_rate": 1.4197133391671925e-05, "loss": 2.0033, "step": 10130 }, { "epoch": 0.7626037373680348, "grad_norm": 4.427544116973877, "learning_rate": 1.4188624708326609e-05, "loss": 1.7914, "step": 10131 }, { "epoch": 0.7626790116486949, "grad_norm": 3.9803240299224854, "learning_rate": 1.4180118153864818e-05, "loss": 2.1275, "step": 10132 }, { "epoch": 0.7627542859293551, "grad_norm": 4.8816375732421875, "learning_rate": 1.4171613728792215e-05, "loss": 1.7891, "step": 10133 }, { "epoch": 0.7628295602100152, "grad_norm": 4.174935817718506, "learning_rate": 1.4163111433614369e-05, "loss": 1.513, "step": 10134 }, { "epoch": 0.7629048344906754, "grad_norm": 4.618239879608154, "learning_rate": 1.4154611268836714e-05, "loss": 2.1072, "step": 10135 }, { "epoch": 0.7629801087713356, "grad_norm": 4.536387920379639, "learning_rate": 1.4146113234964592e-05, "loss": 2.0055, "step": 10136 }, { "epoch": 0.7630553830519957, "grad_norm": 5.468199253082275, "learning_rate": 1.4137617332503144e-05, "loss": 1.9225, "step": 10137 }, { "epoch": 0.7631306573326558, "grad_norm": 5.075319290161133, "learning_rate": 1.4129123561957457e-05, "loss": 1.94, "step": 10138 }, { "epoch": 0.7632059316133161, "grad_norm": 4.103004455566406, "learning_rate": 1.4120631923832433e-05, "loss": 1.9509, "step": 10139 }, { "epoch": 0.7632812058939762, "grad_norm": 5.256185054779053, "learning_rate": 1.4112142418632895e-05, "loss": 1.8984, "step": 10140 }, { "epoch": 0.7633564801746363, "grad_norm": 4.984541893005371, "learning_rate": 1.4103655046863535e-05, "loss": 1.729, "step": 10141 }, { "epoch": 0.7634317544552965, "grad_norm": 4.6674041748046875, "learning_rate": 1.4095169809028863e-05, "loss": 1.7435, "step": 10142 }, { "epoch": 0.7635070287359567, "grad_norm": 4.191567897796631, "learning_rate": 1.408668670563334e-05, "loss": 2.0525, "step": 10143 }, { "epoch": 0.7635823030166168, "grad_norm": 5.630195617675781, "learning_rate": 1.4078205737181233e-05, "loss": 1.486, "step": 10144 }, { "epoch": 0.7636575772972769, "grad_norm": 5.826077938079834, "learning_rate": 1.4069726904176722e-05, "loss": 1.9778, "step": 10145 }, { "epoch": 0.7637328515779371, "grad_norm": 4.659607410430908, "learning_rate": 1.4061250207123849e-05, "loss": 1.4408, "step": 10146 }, { "epoch": 0.7638081258585973, "grad_norm": 5.411288738250732, "learning_rate": 1.405277564652655e-05, "loss": 1.6467, "step": 10147 }, { "epoch": 0.7638834001392574, "grad_norm": 5.05604362487793, "learning_rate": 1.4044303222888577e-05, "loss": 1.9486, "step": 10148 }, { "epoch": 0.7639586744199176, "grad_norm": 4.694107532501221, "learning_rate": 1.4035832936713622e-05, "loss": 1.9845, "step": 10149 }, { "epoch": 0.7640339487005777, "grad_norm": 4.309806823730469, "learning_rate": 1.4027364788505199e-05, "loss": 1.354, "step": 10150 }, { "epoch": 0.7641092229812378, "grad_norm": 5.2016377449035645, "learning_rate": 1.4018898778766732e-05, "loss": 1.9064, "step": 10151 }, { "epoch": 0.7641844972618981, "grad_norm": 6.1968674659729, "learning_rate": 1.4010434908001474e-05, "loss": 1.6488, "step": 10152 }, { "epoch": 0.7642597715425582, "grad_norm": 4.028750419616699, "learning_rate": 1.4001973176712613e-05, "loss": 1.7321, "step": 10153 }, { "epoch": 0.7643350458232183, "grad_norm": 5.01830530166626, "learning_rate": 1.399351358540314e-05, "loss": 2.1989, "step": 10154 }, { "epoch": 0.7644103201038785, "grad_norm": 5.428408145904541, "learning_rate": 1.3985056134575975e-05, "loss": 1.9105, "step": 10155 }, { "epoch": 0.7644855943845387, "grad_norm": 5.1468400955200195, "learning_rate": 1.3976600824733876e-05, "loss": 1.8674, "step": 10156 }, { "epoch": 0.7645608686651988, "grad_norm": 7.597207069396973, "learning_rate": 1.3968147656379498e-05, "loss": 1.9232, "step": 10157 }, { "epoch": 0.764636142945859, "grad_norm": 4.367791175842285, "learning_rate": 1.3959696630015373e-05, "loss": 1.8208, "step": 10158 }, { "epoch": 0.7647114172265191, "grad_norm": 4.914565563201904, "learning_rate": 1.3951247746143859e-05, "loss": 1.9465, "step": 10159 }, { "epoch": 0.7647866915071793, "grad_norm": 4.326786041259766, "learning_rate": 1.3942801005267241e-05, "loss": 1.8901, "step": 10160 }, { "epoch": 0.7648619657878395, "grad_norm": 6.845241546630859, "learning_rate": 1.3934356407887633e-05, "loss": 1.753, "step": 10161 }, { "epoch": 0.7649372400684996, "grad_norm": 6.695394039154053, "learning_rate": 1.3925913954507063e-05, "loss": 1.9097, "step": 10162 }, { "epoch": 0.7650125143491597, "grad_norm": 4.475179672241211, "learning_rate": 1.3917473645627387e-05, "loss": 1.7474, "step": 10163 }, { "epoch": 0.7650877886298199, "grad_norm": 4.231069087982178, "learning_rate": 1.3909035481750388e-05, "loss": 2.0426, "step": 10164 }, { "epoch": 0.7651630629104801, "grad_norm": 4.919641017913818, "learning_rate": 1.3900599463377655e-05, "loss": 1.9045, "step": 10165 }, { "epoch": 0.7652383371911402, "grad_norm": 3.9251348972320557, "learning_rate": 1.3892165591010703e-05, "loss": 2.0674, "step": 10166 }, { "epoch": 0.7653136114718003, "grad_norm": 6.994894981384277, "learning_rate": 1.3883733865150917e-05, "loss": 1.73, "step": 10167 }, { "epoch": 0.7653888857524606, "grad_norm": 4.049494743347168, "learning_rate": 1.3875304286299511e-05, "loss": 1.903, "step": 10168 }, { "epoch": 0.7654641600331207, "grad_norm": 6.0920796394348145, "learning_rate": 1.3866876854957612e-05, "loss": 1.9131, "step": 10169 }, { "epoch": 0.7655394343137808, "grad_norm": 4.842472553253174, "learning_rate": 1.385845157162622e-05, "loss": 2.3039, "step": 10170 }, { "epoch": 0.765614708594441, "grad_norm": 4.513106822967529, "learning_rate": 1.3850028436806163e-05, "loss": 2.2758, "step": 10171 }, { "epoch": 0.7656899828751011, "grad_norm": 4.150333881378174, "learning_rate": 1.384160745099819e-05, "loss": 1.8097, "step": 10172 }, { "epoch": 0.7657652571557613, "grad_norm": 4.194115161895752, "learning_rate": 1.3833188614702918e-05, "loss": 1.8377, "step": 10173 }, { "epoch": 0.7658405314364215, "grad_norm": 4.844198226928711, "learning_rate": 1.3824771928420798e-05, "loss": 1.8957, "step": 10174 }, { "epoch": 0.7659158057170816, "grad_norm": 5.60052490234375, "learning_rate": 1.3816357392652202e-05, "loss": 2.0665, "step": 10175 }, { "epoch": 0.7659910799977417, "grad_norm": 4.106018543243408, "learning_rate": 1.3807945007897315e-05, "loss": 1.663, "step": 10176 }, { "epoch": 0.766066354278402, "grad_norm": 5.395410537719727, "learning_rate": 1.3799534774656276e-05, "loss": 2.1665, "step": 10177 }, { "epoch": 0.7661416285590621, "grad_norm": 4.695087909698486, "learning_rate": 1.3791126693429001e-05, "loss": 1.9174, "step": 10178 }, { "epoch": 0.7662169028397222, "grad_norm": 5.752013683319092, "learning_rate": 1.3782720764715357e-05, "loss": 1.7325, "step": 10179 }, { "epoch": 0.7662921771203824, "grad_norm": 4.553603172302246, "learning_rate": 1.3774316989015034e-05, "loss": 1.9477, "step": 10180 }, { "epoch": 0.7663674514010426, "grad_norm": 4.580021858215332, "learning_rate": 1.3765915366827648e-05, "loss": 1.6122, "step": 10181 }, { "epoch": 0.7664427256817027, "grad_norm": 5.4661712646484375, "learning_rate": 1.3757515898652607e-05, "loss": 1.7131, "step": 10182 }, { "epoch": 0.7665179999623628, "grad_norm": 4.642796516418457, "learning_rate": 1.3749118584989267e-05, "loss": 1.7412, "step": 10183 }, { "epoch": 0.766593274243023, "grad_norm": 4.853013515472412, "learning_rate": 1.3740723426336799e-05, "loss": 1.6331, "step": 10184 }, { "epoch": 0.7666685485236832, "grad_norm": 4.144401550292969, "learning_rate": 1.37323304231943e-05, "loss": 1.8196, "step": 10185 }, { "epoch": 0.7667438228043433, "grad_norm": 5.098297119140625, "learning_rate": 1.372393957606068e-05, "loss": 1.8074, "step": 10186 }, { "epoch": 0.7668190970850035, "grad_norm": 4.6307220458984375, "learning_rate": 1.3715550885434758e-05, "loss": 1.5264, "step": 10187 }, { "epoch": 0.7668943713656636, "grad_norm": 4.551353931427002, "learning_rate": 1.3707164351815244e-05, "loss": 1.6491, "step": 10188 }, { "epoch": 0.7669696456463237, "grad_norm": 5.007851600646973, "learning_rate": 1.369877997570066e-05, "loss": 1.6406, "step": 10189 }, { "epoch": 0.767044919926984, "grad_norm": 3.551795482635498, "learning_rate": 1.369039775758944e-05, "loss": 1.7617, "step": 10190 }, { "epoch": 0.7671201942076441, "grad_norm": 4.31390905380249, "learning_rate": 1.3682017697979899e-05, "loss": 1.7804, "step": 10191 }, { "epoch": 0.7671954684883042, "grad_norm": 4.623659610748291, "learning_rate": 1.3673639797370202e-05, "loss": 1.5581, "step": 10192 }, { "epoch": 0.7672707427689645, "grad_norm": 5.457238674163818, "learning_rate": 1.3665264056258381e-05, "loss": 1.5742, "step": 10193 }, { "epoch": 0.7673460170496246, "grad_norm": 4.827406883239746, "learning_rate": 1.3656890475142365e-05, "loss": 1.8533, "step": 10194 }, { "epoch": 0.7674212913302847, "grad_norm": 3.542780876159668, "learning_rate": 1.3648519054519921e-05, "loss": 1.7031, "step": 10195 }, { "epoch": 0.7674965656109449, "grad_norm": 5.623861789703369, "learning_rate": 1.3640149794888724e-05, "loss": 1.5998, "step": 10196 }, { "epoch": 0.767571839891605, "grad_norm": 5.4970879554748535, "learning_rate": 1.3631782696746281e-05, "loss": 1.8656, "step": 10197 }, { "epoch": 0.7676471141722652, "grad_norm": 6.528900623321533, "learning_rate": 1.3623417760590019e-05, "loss": 1.859, "step": 10198 }, { "epoch": 0.7677223884529254, "grad_norm": 5.136155128479004, "learning_rate": 1.3615054986917181e-05, "loss": 1.8869, "step": 10199 }, { "epoch": 0.7677976627335855, "grad_norm": 4.395875453948975, "learning_rate": 1.3606694376224927e-05, "loss": 1.4866, "step": 10200 }, { "epoch": 0.7678729370142456, "grad_norm": 5.447878360748291, "learning_rate": 1.3598335929010264e-05, "loss": 1.9389, "step": 10201 }, { "epoch": 0.7679482112949058, "grad_norm": 4.584583282470703, "learning_rate": 1.3589979645770095e-05, "loss": 1.672, "step": 10202 }, { "epoch": 0.768023485575566, "grad_norm": 3.8034114837646484, "learning_rate": 1.3581625527001152e-05, "loss": 1.7425, "step": 10203 }, { "epoch": 0.7680987598562261, "grad_norm": 5.853212356567383, "learning_rate": 1.3573273573200074e-05, "loss": 2.0961, "step": 10204 }, { "epoch": 0.7681740341368862, "grad_norm": 4.55974817276001, "learning_rate": 1.3564923784863381e-05, "loss": 1.535, "step": 10205 }, { "epoch": 0.7682493084175465, "grad_norm": 4.903714656829834, "learning_rate": 1.3556576162487405e-05, "loss": 1.8728, "step": 10206 }, { "epoch": 0.7683245826982066, "grad_norm": 4.199012279510498, "learning_rate": 1.3548230706568427e-05, "loss": 1.7993, "step": 10207 }, { "epoch": 0.7683998569788667, "grad_norm": 5.503488063812256, "learning_rate": 1.3539887417602525e-05, "loss": 1.962, "step": 10208 }, { "epoch": 0.7684751312595269, "grad_norm": 4.729506492614746, "learning_rate": 1.3531546296085718e-05, "loss": 1.5379, "step": 10209 }, { "epoch": 0.768550405540187, "grad_norm": 4.008634567260742, "learning_rate": 1.3523207342513827e-05, "loss": 1.68, "step": 10210 }, { "epoch": 0.7686256798208472, "grad_norm": 5.311148166656494, "learning_rate": 1.3514870557382592e-05, "loss": 1.9655, "step": 10211 }, { "epoch": 0.7687009541015074, "grad_norm": 5.845650672912598, "learning_rate": 1.350653594118762e-05, "loss": 1.8099, "step": 10212 }, { "epoch": 0.7687762283821675, "grad_norm": 4.755159378051758, "learning_rate": 1.3498203494424389e-05, "loss": 1.6694, "step": 10213 }, { "epoch": 0.7688515026628276, "grad_norm": 4.569772243499756, "learning_rate": 1.3489873217588206e-05, "loss": 1.4385, "step": 10214 }, { "epoch": 0.7689267769434879, "grad_norm": 6.986860275268555, "learning_rate": 1.348154511117432e-05, "loss": 1.6177, "step": 10215 }, { "epoch": 0.769002051224148, "grad_norm": 4.347231864929199, "learning_rate": 1.3473219175677776e-05, "loss": 1.4043, "step": 10216 }, { "epoch": 0.7690773255048081, "grad_norm": 6.113827705383301, "learning_rate": 1.3464895411593565e-05, "loss": 2.1064, "step": 10217 }, { "epoch": 0.7691525997854683, "grad_norm": 6.7090911865234375, "learning_rate": 1.3456573819416468e-05, "loss": 2.2535, "step": 10218 }, { "epoch": 0.7692278740661285, "grad_norm": 4.872086048126221, "learning_rate": 1.3448254399641207e-05, "loss": 1.6807, "step": 10219 }, { "epoch": 0.7693031483467886, "grad_norm": 5.076318740844727, "learning_rate": 1.3439937152762361e-05, "loss": 2.1116, "step": 10220 }, { "epoch": 0.7693784226274488, "grad_norm": 6.067685127258301, "learning_rate": 1.343162207927433e-05, "loss": 1.7678, "step": 10221 }, { "epoch": 0.7694536969081089, "grad_norm": 6.3673787117004395, "learning_rate": 1.3423309179671457e-05, "loss": 1.6488, "step": 10222 }, { "epoch": 0.7695289711887691, "grad_norm": 5.070117473602295, "learning_rate": 1.3414998454447886e-05, "loss": 2.1144, "step": 10223 }, { "epoch": 0.7696042454694292, "grad_norm": 4.137439250946045, "learning_rate": 1.340668990409768e-05, "loss": 1.8164, "step": 10224 }, { "epoch": 0.7696795197500894, "grad_norm": 4.388188362121582, "learning_rate": 1.3398383529114766e-05, "loss": 1.7565, "step": 10225 }, { "epoch": 0.7697547940307495, "grad_norm": 7.138183116912842, "learning_rate": 1.3390079329992943e-05, "loss": 2.1584, "step": 10226 }, { "epoch": 0.7698300683114097, "grad_norm": 5.42036247253418, "learning_rate": 1.338177730722584e-05, "loss": 1.7831, "step": 10227 }, { "epoch": 0.7699053425920699, "grad_norm": 6.899005889892578, "learning_rate": 1.3373477461307027e-05, "loss": 1.511, "step": 10228 }, { "epoch": 0.76998061687273, "grad_norm": 4.203088760375977, "learning_rate": 1.3365179792729871e-05, "loss": 1.5412, "step": 10229 }, { "epoch": 0.7700558911533901, "grad_norm": 5.174337863922119, "learning_rate": 1.3356884301987671e-05, "loss": 2.1092, "step": 10230 }, { "epoch": 0.7701311654340504, "grad_norm": 4.662639141082764, "learning_rate": 1.3348590989573544e-05, "loss": 1.5012, "step": 10231 }, { "epoch": 0.7702064397147105, "grad_norm": 5.123978614807129, "learning_rate": 1.3340299855980531e-05, "loss": 1.9079, "step": 10232 }, { "epoch": 0.7702817139953706, "grad_norm": 3.7078208923339844, "learning_rate": 1.3332010901701492e-05, "loss": 1.8383, "step": 10233 }, { "epoch": 0.7703569882760308, "grad_norm": 5.786520957946777, "learning_rate": 1.3323724127229192e-05, "loss": 1.8397, "step": 10234 }, { "epoch": 0.770432262556691, "grad_norm": 5.370747089385986, "learning_rate": 1.3315439533056251e-05, "loss": 1.9631, "step": 10235 }, { "epoch": 0.7705075368373511, "grad_norm": 5.924793720245361, "learning_rate": 1.3307157119675179e-05, "loss": 2.1299, "step": 10236 }, { "epoch": 0.7705828111180113, "grad_norm": 4.776413917541504, "learning_rate": 1.3298876887578338e-05, "loss": 1.9125, "step": 10237 }, { "epoch": 0.7706580853986714, "grad_norm": 5.023410797119141, "learning_rate": 1.329059883725795e-05, "loss": 1.8723, "step": 10238 }, { "epoch": 0.7707333596793315, "grad_norm": 4.753942966461182, "learning_rate": 1.3282322969206145e-05, "loss": 1.9613, "step": 10239 }, { "epoch": 0.7708086339599918, "grad_norm": 4.498771667480469, "learning_rate": 1.3274049283914869e-05, "loss": 2.2984, "step": 10240 }, { "epoch": 0.7708839082406519, "grad_norm": 7.124361038208008, "learning_rate": 1.3265777781875998e-05, "loss": 2.0021, "step": 10241 }, { "epoch": 0.770959182521312, "grad_norm": 3.7062511444091797, "learning_rate": 1.325750846358122e-05, "loss": 1.8647, "step": 10242 }, { "epoch": 0.7710344568019721, "grad_norm": 4.498233795166016, "learning_rate": 1.3249241329522155e-05, "loss": 1.6721, "step": 10243 }, { "epoch": 0.7711097310826324, "grad_norm": 6.511932373046875, "learning_rate": 1.3240976380190229e-05, "loss": 2.442, "step": 10244 }, { "epoch": 0.7711850053632925, "grad_norm": 6.102153778076172, "learning_rate": 1.323271361607678e-05, "loss": 1.7448, "step": 10245 }, { "epoch": 0.7712602796439526, "grad_norm": 4.479430198669434, "learning_rate": 1.3224453037673012e-05, "loss": 2.1887, "step": 10246 }, { "epoch": 0.7713355539246128, "grad_norm": 5.22567081451416, "learning_rate": 1.3216194645470003e-05, "loss": 1.6235, "step": 10247 }, { "epoch": 0.771410828205273, "grad_norm": 6.178092002868652, "learning_rate": 1.3207938439958667e-05, "loss": 1.5667, "step": 10248 }, { "epoch": 0.7714861024859331, "grad_norm": 6.74956750869751, "learning_rate": 1.3199684421629833e-05, "loss": 1.6232, "step": 10249 }, { "epoch": 0.7715613767665933, "grad_norm": 4.291245937347412, "learning_rate": 1.3191432590974157e-05, "loss": 1.8429, "step": 10250 }, { "epoch": 0.7716366510472534, "grad_norm": 4.1864495277404785, "learning_rate": 1.3183182948482209e-05, "loss": 1.7356, "step": 10251 }, { "epoch": 0.7717119253279136, "grad_norm": 8.126055717468262, "learning_rate": 1.3174935494644385e-05, "loss": 2.3598, "step": 10252 }, { "epoch": 0.7717871996085738, "grad_norm": 4.174500942230225, "learning_rate": 1.3166690229950978e-05, "loss": 2.4416, "step": 10253 }, { "epoch": 0.7718624738892339, "grad_norm": 5.007815837860107, "learning_rate": 1.3158447154892168e-05, "loss": 1.9911, "step": 10254 }, { "epoch": 0.771937748169894, "grad_norm": 4.2170023918151855, "learning_rate": 1.3150206269957948e-05, "loss": 1.938, "step": 10255 }, { "epoch": 0.7720130224505543, "grad_norm": 4.838845252990723, "learning_rate": 1.3141967575638231e-05, "loss": 2.2108, "step": 10256 }, { "epoch": 0.7720882967312144, "grad_norm": 5.322053909301758, "learning_rate": 1.313373107242279e-05, "loss": 1.744, "step": 10257 }, { "epoch": 0.7721635710118745, "grad_norm": 4.062689781188965, "learning_rate": 1.3125496760801265e-05, "loss": 1.8338, "step": 10258 }, { "epoch": 0.7722388452925347, "grad_norm": 6.558047294616699, "learning_rate": 1.3117264641263139e-05, "loss": 1.6632, "step": 10259 }, { "epoch": 0.7723141195731948, "grad_norm": 5.126962661743164, "learning_rate": 1.3109034714297813e-05, "loss": 1.578, "step": 10260 }, { "epoch": 0.772389393853855, "grad_norm": 4.497790336608887, "learning_rate": 1.3100806980394508e-05, "loss": 1.838, "step": 10261 }, { "epoch": 0.7724646681345151, "grad_norm": 4.56712532043457, "learning_rate": 1.3092581440042368e-05, "loss": 1.5657, "step": 10262 }, { "epoch": 0.7725399424151753, "grad_norm": 5.281686782836914, "learning_rate": 1.3084358093730348e-05, "loss": 1.8436, "step": 10263 }, { "epoch": 0.7726152166958354, "grad_norm": 4.230472564697266, "learning_rate": 1.3076136941947331e-05, "loss": 1.5697, "step": 10264 }, { "epoch": 0.7726904909764956, "grad_norm": 4.151181697845459, "learning_rate": 1.3067917985182016e-05, "loss": 1.9169, "step": 10265 }, { "epoch": 0.7727657652571558, "grad_norm": 4.191083908081055, "learning_rate": 1.3059701223923e-05, "loss": 2.2028, "step": 10266 }, { "epoch": 0.7728410395378159, "grad_norm": 5.646610736846924, "learning_rate": 1.3051486658658756e-05, "loss": 2.0996, "step": 10267 }, { "epoch": 0.772916313818476, "grad_norm": 5.301667213439941, "learning_rate": 1.3043274289877627e-05, "loss": 1.7049, "step": 10268 }, { "epoch": 0.7729915880991363, "grad_norm": 6.5089240074157715, "learning_rate": 1.3035064118067785e-05, "loss": 1.537, "step": 10269 }, { "epoch": 0.7730668623797964, "grad_norm": 4.733782768249512, "learning_rate": 1.3026856143717314e-05, "loss": 1.9638, "step": 10270 }, { "epoch": 0.7731421366604565, "grad_norm": 4.956386089324951, "learning_rate": 1.3018650367314179e-05, "loss": 1.643, "step": 10271 }, { "epoch": 0.7732174109411167, "grad_norm": 4.07127046585083, "learning_rate": 1.3010446789346149e-05, "loss": 2.0078, "step": 10272 }, { "epoch": 0.7732926852217769, "grad_norm": 4.3423919677734375, "learning_rate": 1.3002245410300939e-05, "loss": 1.675, "step": 10273 }, { "epoch": 0.773367959502437, "grad_norm": 3.9609344005584717, "learning_rate": 1.2994046230666068e-05, "loss": 2.0238, "step": 10274 }, { "epoch": 0.7734432337830972, "grad_norm": 4.841676235198975, "learning_rate": 1.2985849250928978e-05, "loss": 1.8763, "step": 10275 }, { "epoch": 0.7735185080637573, "grad_norm": 7.237201690673828, "learning_rate": 1.2977654471576929e-05, "loss": 1.8126, "step": 10276 }, { "epoch": 0.7735937823444174, "grad_norm": 5.282929420471191, "learning_rate": 1.2969461893097113e-05, "loss": 2.0821, "step": 10277 }, { "epoch": 0.7736690566250777, "grad_norm": 4.099306583404541, "learning_rate": 1.2961271515976515e-05, "loss": 1.6924, "step": 10278 }, { "epoch": 0.7737443309057378, "grad_norm": 4.538743019104004, "learning_rate": 1.2953083340702049e-05, "loss": 2.0157, "step": 10279 }, { "epoch": 0.7738196051863979, "grad_norm": 4.927515506744385, "learning_rate": 1.2944897367760478e-05, "loss": 1.6174, "step": 10280 }, { "epoch": 0.773894879467058, "grad_norm": 6.47377347946167, "learning_rate": 1.2936713597638456e-05, "loss": 2.0051, "step": 10281 }, { "epoch": 0.7739701537477183, "grad_norm": 4.01516580581665, "learning_rate": 1.2928532030822444e-05, "loss": 1.7607, "step": 10282 }, { "epoch": 0.7740454280283784, "grad_norm": 4.944225311279297, "learning_rate": 1.2920352667798852e-05, "loss": 1.7969, "step": 10283 }, { "epoch": 0.7741207023090385, "grad_norm": 4.958349227905273, "learning_rate": 1.291217550905388e-05, "loss": 1.919, "step": 10284 }, { "epoch": 0.7741959765896987, "grad_norm": 6.7434186935424805, "learning_rate": 1.2904000555073664e-05, "loss": 1.7989, "step": 10285 }, { "epoch": 0.7742712508703589, "grad_norm": 4.951135635375977, "learning_rate": 1.2895827806344185e-05, "loss": 1.8776, "step": 10286 }, { "epoch": 0.774346525151019, "grad_norm": 4.431406497955322, "learning_rate": 1.2887657263351265e-05, "loss": 1.8321, "step": 10287 }, { "epoch": 0.7744217994316792, "grad_norm": 5.4372334480285645, "learning_rate": 1.2879488926580647e-05, "loss": 2.1528, "step": 10288 }, { "epoch": 0.7744970737123393, "grad_norm": 3.8832361698150635, "learning_rate": 1.2871322796517888e-05, "loss": 1.6191, "step": 10289 }, { "epoch": 0.7745723479929995, "grad_norm": 4.155890464782715, "learning_rate": 1.2863158873648456e-05, "loss": 1.6831, "step": 10290 }, { "epoch": 0.7746476222736597, "grad_norm": 3.9918711185455322, "learning_rate": 1.285499715845767e-05, "loss": 1.2899, "step": 10291 }, { "epoch": 0.7747228965543198, "grad_norm": 4.175673484802246, "learning_rate": 1.2846837651430737e-05, "loss": 2.0, "step": 10292 }, { "epoch": 0.7747981708349799, "grad_norm": 5.092034816741943, "learning_rate": 1.283868035305269e-05, "loss": 1.568, "step": 10293 }, { "epoch": 0.7748734451156402, "grad_norm": 4.143556118011475, "learning_rate": 1.283052526380848e-05, "loss": 2.292, "step": 10294 }, { "epoch": 0.7749487193963003, "grad_norm": 6.161681175231934, "learning_rate": 1.2822372384182874e-05, "loss": 1.6774, "step": 10295 }, { "epoch": 0.7750239936769604, "grad_norm": 5.420450210571289, "learning_rate": 1.2814221714660574e-05, "loss": 1.7932, "step": 10296 }, { "epoch": 0.7750992679576206, "grad_norm": 5.200966835021973, "learning_rate": 1.2806073255726076e-05, "loss": 1.9951, "step": 10297 }, { "epoch": 0.7751745422382808, "grad_norm": 4.236879348754883, "learning_rate": 1.2797927007863819e-05, "loss": 1.6228, "step": 10298 }, { "epoch": 0.7752498165189409, "grad_norm": 4.418315887451172, "learning_rate": 1.2789782971558046e-05, "loss": 1.8796, "step": 10299 }, { "epoch": 0.775325090799601, "grad_norm": 4.553370475769043, "learning_rate": 1.27816411472929e-05, "loss": 1.8835, "step": 10300 }, { "epoch": 0.7754003650802612, "grad_norm": 5.340518951416016, "learning_rate": 1.2773501535552402e-05, "loss": 1.6591, "step": 10301 }, { "epoch": 0.7754756393609213, "grad_norm": 4.0605974197387695, "learning_rate": 1.2765364136820418e-05, "loss": 1.7191, "step": 10302 }, { "epoch": 0.7755509136415815, "grad_norm": 5.216078758239746, "learning_rate": 1.2757228951580718e-05, "loss": 1.322, "step": 10303 }, { "epoch": 0.7756261879222417, "grad_norm": 4.079835414886475, "learning_rate": 1.2749095980316877e-05, "loss": 1.7638, "step": 10304 }, { "epoch": 0.7757014622029018, "grad_norm": 4.159818172454834, "learning_rate": 1.2740965223512413e-05, "loss": 1.7083, "step": 10305 }, { "epoch": 0.7757767364835619, "grad_norm": 4.0179338455200195, "learning_rate": 1.273283668165064e-05, "loss": 1.9264, "step": 10306 }, { "epoch": 0.7758520107642222, "grad_norm": 5.417019367218018, "learning_rate": 1.2724710355214809e-05, "loss": 1.6962, "step": 10307 }, { "epoch": 0.7759272850448823, "grad_norm": 4.357333660125732, "learning_rate": 1.2716586244687984e-05, "loss": 1.6524, "step": 10308 }, { "epoch": 0.7760025593255424, "grad_norm": 6.675785541534424, "learning_rate": 1.2708464350553134e-05, "loss": 2.1753, "step": 10309 }, { "epoch": 0.7760778336062026, "grad_norm": 6.394741058349609, "learning_rate": 1.2700344673293069e-05, "loss": 1.9177, "step": 10310 }, { "epoch": 0.7761531078868628, "grad_norm": 3.9632022380828857, "learning_rate": 1.269222721339049e-05, "loss": 1.4868, "step": 10311 }, { "epoch": 0.7762283821675229, "grad_norm": 4.983537673950195, "learning_rate": 1.268411197132795e-05, "loss": 1.9336, "step": 10312 }, { "epoch": 0.7763036564481831, "grad_norm": 4.459327697753906, "learning_rate": 1.2675998947587898e-05, "loss": 2.0253, "step": 10313 }, { "epoch": 0.7763789307288432, "grad_norm": 7.048152446746826, "learning_rate": 1.2667888142652601e-05, "loss": 1.9392, "step": 10314 }, { "epoch": 0.7764542050095034, "grad_norm": 5.153658866882324, "learning_rate": 1.2659779557004248e-05, "loss": 1.6958, "step": 10315 }, { "epoch": 0.7765294792901636, "grad_norm": 4.004358768463135, "learning_rate": 1.2651673191124847e-05, "loss": 2.0261, "step": 10316 }, { "epoch": 0.7766047535708237, "grad_norm": 5.73649787902832, "learning_rate": 1.2643569045496312e-05, "loss": 2.1094, "step": 10317 }, { "epoch": 0.7766800278514838, "grad_norm": 6.659619331359863, "learning_rate": 1.263546712060042e-05, "loss": 1.8192, "step": 10318 }, { "epoch": 0.776755302132144, "grad_norm": 5.289032936096191, "learning_rate": 1.2627367416918783e-05, "loss": 1.7693, "step": 10319 }, { "epoch": 0.7768305764128042, "grad_norm": 5.239470958709717, "learning_rate": 1.2619269934932937e-05, "loss": 1.8994, "step": 10320 }, { "epoch": 0.7769058506934643, "grad_norm": 4.476828575134277, "learning_rate": 1.2611174675124222e-05, "loss": 1.6465, "step": 10321 }, { "epoch": 0.7769811249741244, "grad_norm": 4.582423210144043, "learning_rate": 1.26030816379739e-05, "loss": 1.8044, "step": 10322 }, { "epoch": 0.7770563992547846, "grad_norm": 4.477261543273926, "learning_rate": 1.259499082396306e-05, "loss": 1.5441, "step": 10323 }, { "epoch": 0.7771316735354448, "grad_norm": 4.7444071769714355, "learning_rate": 1.258690223357269e-05, "loss": 1.3621, "step": 10324 }, { "epoch": 0.7772069478161049, "grad_norm": 4.55178689956665, "learning_rate": 1.2578815867283628e-05, "loss": 2.0456, "step": 10325 }, { "epoch": 0.7772822220967651, "grad_norm": 4.64877986907959, "learning_rate": 1.2570731725576606e-05, "loss": 1.779, "step": 10326 }, { "epoch": 0.7773574963774252, "grad_norm": 5.220911502838135, "learning_rate": 1.2562649808932175e-05, "loss": 1.5407, "step": 10327 }, { "epoch": 0.7774327706580854, "grad_norm": 4.980185031890869, "learning_rate": 1.2554570117830805e-05, "loss": 1.9126, "step": 10328 }, { "epoch": 0.7775080449387456, "grad_norm": 5.095905780792236, "learning_rate": 1.2546492652752784e-05, "loss": 1.8974, "step": 10329 }, { "epoch": 0.7775833192194057, "grad_norm": 5.476496696472168, "learning_rate": 1.2538417414178322e-05, "loss": 1.8037, "step": 10330 }, { "epoch": 0.7776585935000658, "grad_norm": 4.375895977020264, "learning_rate": 1.2530344402587441e-05, "loss": 1.8401, "step": 10331 }, { "epoch": 0.7777338677807261, "grad_norm": 4.5472893714904785, "learning_rate": 1.2522273618460073e-05, "loss": 1.8458, "step": 10332 }, { "epoch": 0.7778091420613862, "grad_norm": 7.180160999298096, "learning_rate": 1.2514205062276019e-05, "loss": 2.0163, "step": 10333 }, { "epoch": 0.7778844163420463, "grad_norm": 4.065024375915527, "learning_rate": 1.25061387345149e-05, "loss": 1.9184, "step": 10334 }, { "epoch": 0.7779596906227065, "grad_norm": 7.553528785705566, "learning_rate": 1.2498074635656248e-05, "loss": 2.3549, "step": 10335 }, { "epoch": 0.7780349649033667, "grad_norm": 4.331940174102783, "learning_rate": 1.249001276617946e-05, "loss": 1.7035, "step": 10336 }, { "epoch": 0.7781102391840268, "grad_norm": 4.262084007263184, "learning_rate": 1.2481953126563795e-05, "loss": 1.6999, "step": 10337 }, { "epoch": 0.778185513464687, "grad_norm": 4.418400287628174, "learning_rate": 1.2473895717288353e-05, "loss": 1.3919, "step": 10338 }, { "epoch": 0.7782607877453471, "grad_norm": 4.201472759246826, "learning_rate": 1.2465840538832147e-05, "loss": 1.6041, "step": 10339 }, { "epoch": 0.7783360620260072, "grad_norm": 4.977296829223633, "learning_rate": 1.2457787591674014e-05, "loss": 2.0329, "step": 10340 }, { "epoch": 0.7784113363066674, "grad_norm": 5.484498023986816, "learning_rate": 1.2449736876292695e-05, "loss": 2.1246, "step": 10341 }, { "epoch": 0.7784866105873276, "grad_norm": 9.492502212524414, "learning_rate": 1.2441688393166767e-05, "loss": 1.8732, "step": 10342 }, { "epoch": 0.7785618848679877, "grad_norm": 4.51957893371582, "learning_rate": 1.243364214277471e-05, "loss": 1.5185, "step": 10343 }, { "epoch": 0.7786371591486478, "grad_norm": 3.8268020153045654, "learning_rate": 1.2425598125594823e-05, "loss": 1.6918, "step": 10344 }, { "epoch": 0.7787124334293081, "grad_norm": 4.446866512298584, "learning_rate": 1.2417556342105314e-05, "loss": 1.7997, "step": 10345 }, { "epoch": 0.7787877077099682, "grad_norm": 4.687377452850342, "learning_rate": 1.2409516792784242e-05, "loss": 1.6313, "step": 10346 }, { "epoch": 0.7788629819906283, "grad_norm": 3.6155290603637695, "learning_rate": 1.2401479478109557e-05, "loss": 1.8244, "step": 10347 }, { "epoch": 0.7789382562712885, "grad_norm": 4.625990390777588, "learning_rate": 1.2393444398559018e-05, "loss": 2.1789, "step": 10348 }, { "epoch": 0.7790135305519487, "grad_norm": 7.053936004638672, "learning_rate": 1.2385411554610304e-05, "loss": 1.8304, "step": 10349 }, { "epoch": 0.7790888048326088, "grad_norm": 6.356447696685791, "learning_rate": 1.2377380946740958e-05, "loss": 2.0246, "step": 10350 }, { "epoch": 0.779164079113269, "grad_norm": 4.5922698974609375, "learning_rate": 1.236935257542835e-05, "loss": 1.85, "step": 10351 }, { "epoch": 0.7792393533939291, "grad_norm": 4.202937126159668, "learning_rate": 1.2361326441149773e-05, "loss": 1.8956, "step": 10352 }, { "epoch": 0.7793146276745893, "grad_norm": 6.506840705871582, "learning_rate": 1.2353302544382329e-05, "loss": 1.9281, "step": 10353 }, { "epoch": 0.7793899019552495, "grad_norm": 4.456070423126221, "learning_rate": 1.2345280885603039e-05, "loss": 1.8459, "step": 10354 }, { "epoch": 0.7794651762359096, "grad_norm": 5.902978897094727, "learning_rate": 1.2337261465288742e-05, "loss": 1.9373, "step": 10355 }, { "epoch": 0.7795404505165697, "grad_norm": 6.452949047088623, "learning_rate": 1.2329244283916187e-05, "loss": 1.959, "step": 10356 }, { "epoch": 0.77961572479723, "grad_norm": 5.0876784324646, "learning_rate": 1.2321229341961965e-05, "loss": 2.0362, "step": 10357 }, { "epoch": 0.7796909990778901, "grad_norm": 6.603688716888428, "learning_rate": 1.2313216639902564e-05, "loss": 1.4617, "step": 10358 }, { "epoch": 0.7797662733585502, "grad_norm": 6.878844261169434, "learning_rate": 1.2305206178214279e-05, "loss": 2.064, "step": 10359 }, { "epoch": 0.7798415476392103, "grad_norm": 4.643365859985352, "learning_rate": 1.2297197957373353e-05, "loss": 1.7397, "step": 10360 }, { "epoch": 0.7799168219198706, "grad_norm": 5.111471652984619, "learning_rate": 1.2289191977855801e-05, "loss": 1.8848, "step": 10361 }, { "epoch": 0.7799920962005307, "grad_norm": 4.444356918334961, "learning_rate": 1.2281188240137604e-05, "loss": 1.6391, "step": 10362 }, { "epoch": 0.7800673704811908, "grad_norm": 4.751591205596924, "learning_rate": 1.2273186744694521e-05, "loss": 1.8169, "step": 10363 }, { "epoch": 0.780142644761851, "grad_norm": 4.49291467666626, "learning_rate": 1.2265187492002239e-05, "loss": 1.7499, "step": 10364 }, { "epoch": 0.7802179190425111, "grad_norm": 7.507765293121338, "learning_rate": 1.2257190482536296e-05, "loss": 1.9189, "step": 10365 }, { "epoch": 0.7802931933231713, "grad_norm": 5.165387153625488, "learning_rate": 1.2249195716772071e-05, "loss": 1.5551, "step": 10366 }, { "epoch": 0.7803684676038315, "grad_norm": 7.934799671173096, "learning_rate": 1.2241203195184842e-05, "loss": 1.7629, "step": 10367 }, { "epoch": 0.7804437418844916, "grad_norm": 4.580837249755859, "learning_rate": 1.2233212918249742e-05, "loss": 1.9449, "step": 10368 }, { "epoch": 0.7805190161651517, "grad_norm": 4.414484977722168, "learning_rate": 1.2225224886441784e-05, "loss": 2.0188, "step": 10369 }, { "epoch": 0.780594290445812, "grad_norm": 6.710933685302734, "learning_rate": 1.22172391002358e-05, "loss": 1.935, "step": 10370 }, { "epoch": 0.7806695647264721, "grad_norm": 6.412884712219238, "learning_rate": 1.220925556010656e-05, "loss": 1.9024, "step": 10371 }, { "epoch": 0.7807448390071322, "grad_norm": 4.9875874519348145, "learning_rate": 1.2201274266528622e-05, "loss": 1.5656, "step": 10372 }, { "epoch": 0.7808201132877924, "grad_norm": 8.993953704833984, "learning_rate": 1.2193295219976491e-05, "loss": 2.255, "step": 10373 }, { "epoch": 0.7808953875684526, "grad_norm": 4.583989143371582, "learning_rate": 1.2185318420924462e-05, "loss": 1.6245, "step": 10374 }, { "epoch": 0.7809706618491127, "grad_norm": 5.050039291381836, "learning_rate": 1.2177343869846769e-05, "loss": 1.8833, "step": 10375 }, { "epoch": 0.7810459361297729, "grad_norm": 5.000247955322266, "learning_rate": 1.216937156721744e-05, "loss": 1.9465, "step": 10376 }, { "epoch": 0.781121210410433, "grad_norm": 4.74833869934082, "learning_rate": 1.2161401513510439e-05, "loss": 1.8926, "step": 10377 }, { "epoch": 0.7811964846910932, "grad_norm": 4.246920108795166, "learning_rate": 1.2153433709199536e-05, "loss": 1.7957, "step": 10378 }, { "epoch": 0.7812717589717533, "grad_norm": 6.072246551513672, "learning_rate": 1.21454681547584e-05, "loss": 1.8872, "step": 10379 }, { "epoch": 0.7813470332524135, "grad_norm": 4.116084098815918, "learning_rate": 1.2137504850660564e-05, "loss": 1.8675, "step": 10380 }, { "epoch": 0.7814223075330736, "grad_norm": 5.24302339553833, "learning_rate": 1.2129543797379428e-05, "loss": 1.6275, "step": 10381 }, { "epoch": 0.7814975818137337, "grad_norm": 4.412202835083008, "learning_rate": 1.2121584995388268e-05, "loss": 1.6991, "step": 10382 }, { "epoch": 0.781572856094394, "grad_norm": 4.712615489959717, "learning_rate": 1.2113628445160175e-05, "loss": 1.6041, "step": 10383 }, { "epoch": 0.7816481303750541, "grad_norm": 4.713449001312256, "learning_rate": 1.2105674147168177e-05, "loss": 1.9424, "step": 10384 }, { "epoch": 0.7817234046557142, "grad_norm": 5.1208014488220215, "learning_rate": 1.2097722101885112e-05, "loss": 1.757, "step": 10385 }, { "epoch": 0.7817986789363744, "grad_norm": 4.6546630859375, "learning_rate": 1.208977230978372e-05, "loss": 1.7464, "step": 10386 }, { "epoch": 0.7818739532170346, "grad_norm": 5.556705474853516, "learning_rate": 1.208182477133658e-05, "loss": 2.2877, "step": 10387 }, { "epoch": 0.7819492274976947, "grad_norm": 4.568760871887207, "learning_rate": 1.2073879487016171e-05, "loss": 1.7213, "step": 10388 }, { "epoch": 0.7820245017783549, "grad_norm": 4.479852676391602, "learning_rate": 1.2065936457294785e-05, "loss": 1.6587, "step": 10389 }, { "epoch": 0.782099776059015, "grad_norm": 4.923946857452393, "learning_rate": 1.205799568264464e-05, "loss": 1.9684, "step": 10390 }, { "epoch": 0.7821750503396752, "grad_norm": 3.9960007667541504, "learning_rate": 1.2050057163537775e-05, "loss": 1.746, "step": 10391 }, { "epoch": 0.7822503246203354, "grad_norm": 4.612290382385254, "learning_rate": 1.2042120900446141e-05, "loss": 1.8248, "step": 10392 }, { "epoch": 0.7823255989009955, "grad_norm": 6.955273151397705, "learning_rate": 1.2034186893841493e-05, "loss": 1.9221, "step": 10393 }, { "epoch": 0.7824008731816556, "grad_norm": 4.870214939117432, "learning_rate": 1.2026255144195508e-05, "loss": 1.6902, "step": 10394 }, { "epoch": 0.7824761474623159, "grad_norm": 4.886004447937012, "learning_rate": 1.2018325651979684e-05, "loss": 1.9584, "step": 10395 }, { "epoch": 0.782551421742976, "grad_norm": 4.791713237762451, "learning_rate": 1.201039841766543e-05, "loss": 2.0985, "step": 10396 }, { "epoch": 0.7826266960236361, "grad_norm": 4.8301920890808105, "learning_rate": 1.2002473441723972e-05, "loss": 1.5059, "step": 10397 }, { "epoch": 0.7827019703042963, "grad_norm": 5.076519966125488, "learning_rate": 1.1994550724626441e-05, "loss": 1.7431, "step": 10398 }, { "epoch": 0.7827772445849565, "grad_norm": 4.058077812194824, "learning_rate": 1.1986630266843835e-05, "loss": 1.6869, "step": 10399 }, { "epoch": 0.7828525188656166, "grad_norm": 4.515242099761963, "learning_rate": 1.1978712068846976e-05, "loss": 1.1763, "step": 10400 }, { "epoch": 0.7829277931462767, "grad_norm": 10.128247261047363, "learning_rate": 1.1970796131106582e-05, "loss": 2.315, "step": 10401 }, { "epoch": 0.7830030674269369, "grad_norm": 4.249345779418945, "learning_rate": 1.1962882454093244e-05, "loss": 2.0062, "step": 10402 }, { "epoch": 0.783078341707597, "grad_norm": 7.606321334838867, "learning_rate": 1.1954971038277418e-05, "loss": 1.9329, "step": 10403 }, { "epoch": 0.7831536159882572, "grad_norm": 4.880064487457275, "learning_rate": 1.1947061884129385e-05, "loss": 2.4716, "step": 10404 }, { "epoch": 0.7832288902689174, "grad_norm": 4.390942096710205, "learning_rate": 1.1939154992119356e-05, "loss": 1.6747, "step": 10405 }, { "epoch": 0.7833041645495775, "grad_norm": 4.513258934020996, "learning_rate": 1.1931250362717333e-05, "loss": 2.0628, "step": 10406 }, { "epoch": 0.7833794388302376, "grad_norm": 6.1467742919921875, "learning_rate": 1.1923347996393259e-05, "loss": 1.569, "step": 10407 }, { "epoch": 0.7834547131108979, "grad_norm": 6.095576763153076, "learning_rate": 1.1915447893616883e-05, "loss": 1.8377, "step": 10408 }, { "epoch": 0.783529987391558, "grad_norm": 6.952891826629639, "learning_rate": 1.1907550054857863e-05, "loss": 2.0903, "step": 10409 }, { "epoch": 0.7836052616722181, "grad_norm": 5.326104640960693, "learning_rate": 1.189965448058568e-05, "loss": 1.6673, "step": 10410 }, { "epoch": 0.7836805359528783, "grad_norm": 4.444530963897705, "learning_rate": 1.1891761171269717e-05, "loss": 1.6171, "step": 10411 }, { "epoch": 0.7837558102335385, "grad_norm": 5.441180229187012, "learning_rate": 1.1883870127379204e-05, "loss": 1.7582, "step": 10412 }, { "epoch": 0.7838310845141986, "grad_norm": 5.535923957824707, "learning_rate": 1.1875981349383248e-05, "loss": 1.5503, "step": 10413 }, { "epoch": 0.7839063587948588, "grad_norm": 3.8517143726348877, "learning_rate": 1.186809483775082e-05, "loss": 1.8827, "step": 10414 }, { "epoch": 0.7839816330755189, "grad_norm": 5.057762622833252, "learning_rate": 1.1860210592950732e-05, "loss": 1.6214, "step": 10415 }, { "epoch": 0.7840569073561791, "grad_norm": 6.8897318840026855, "learning_rate": 1.1852328615451702e-05, "loss": 2.1726, "step": 10416 }, { "epoch": 0.7841321816368393, "grad_norm": 5.734986782073975, "learning_rate": 1.1844448905722267e-05, "loss": 1.6404, "step": 10417 }, { "epoch": 0.7842074559174994, "grad_norm": 5.420649528503418, "learning_rate": 1.1836571464230873e-05, "loss": 1.9012, "step": 10418 }, { "epoch": 0.7842827301981595, "grad_norm": 6.2140069007873535, "learning_rate": 1.182869629144579e-05, "loss": 2.1754, "step": 10419 }, { "epoch": 0.7843580044788196, "grad_norm": 4.264763832092285, "learning_rate": 1.18208233878352e-05, "loss": 2.1013, "step": 10420 }, { "epoch": 0.7844332787594799, "grad_norm": 4.4977312088012695, "learning_rate": 1.1812952753867096e-05, "loss": 1.7753, "step": 10421 }, { "epoch": 0.78450855304014, "grad_norm": 5.05929708480835, "learning_rate": 1.180508439000938e-05, "loss": 2.11, "step": 10422 }, { "epoch": 0.7845838273208001, "grad_norm": 6.788955211639404, "learning_rate": 1.1797218296729823e-05, "loss": 1.8552, "step": 10423 }, { "epoch": 0.7846591016014604, "grad_norm": 4.922468185424805, "learning_rate": 1.1789354474496e-05, "loss": 1.8283, "step": 10424 }, { "epoch": 0.7847343758821205, "grad_norm": 4.120331287384033, "learning_rate": 1.1781492923775422e-05, "loss": 1.8723, "step": 10425 }, { "epoch": 0.7848096501627806, "grad_norm": 5.124722957611084, "learning_rate": 1.1773633645035443e-05, "loss": 1.9874, "step": 10426 }, { "epoch": 0.7848849244434408, "grad_norm": 5.40078592300415, "learning_rate": 1.1765776638743242e-05, "loss": 1.8642, "step": 10427 }, { "epoch": 0.7849601987241009, "grad_norm": 4.8090925216674805, "learning_rate": 1.1757921905365927e-05, "loss": 1.6028, "step": 10428 }, { "epoch": 0.7850354730047611, "grad_norm": 5.225217342376709, "learning_rate": 1.175006944537041e-05, "loss": 1.9427, "step": 10429 }, { "epoch": 0.7851107472854213, "grad_norm": 4.434376239776611, "learning_rate": 1.174221925922352e-05, "loss": 1.912, "step": 10430 }, { "epoch": 0.7851860215660814, "grad_norm": 5.809786796569824, "learning_rate": 1.1734371347391932e-05, "loss": 1.7531, "step": 10431 }, { "epoch": 0.7852612958467415, "grad_norm": 4.614532470703125, "learning_rate": 1.1726525710342157e-05, "loss": 1.6316, "step": 10432 }, { "epoch": 0.7853365701274018, "grad_norm": 4.726720333099365, "learning_rate": 1.1718682348540621e-05, "loss": 2.2078, "step": 10433 }, { "epoch": 0.7854118444080619, "grad_norm": 3.8608624935150146, "learning_rate": 1.1710841262453564e-05, "loss": 1.9004, "step": 10434 }, { "epoch": 0.785487118688722, "grad_norm": 4.820503234863281, "learning_rate": 1.1703002452547135e-05, "loss": 1.6875, "step": 10435 }, { "epoch": 0.7855623929693822, "grad_norm": 5.726373195648193, "learning_rate": 1.1695165919287321e-05, "loss": 2.0559, "step": 10436 }, { "epoch": 0.7856376672500424, "grad_norm": 7.237523555755615, "learning_rate": 1.1687331663139994e-05, "loss": 1.9674, "step": 10437 }, { "epoch": 0.7857129415307025, "grad_norm": 4.502978801727295, "learning_rate": 1.167949968457086e-05, "loss": 1.8373, "step": 10438 }, { "epoch": 0.7857882158113626, "grad_norm": 5.338217735290527, "learning_rate": 1.167166998404553e-05, "loss": 1.8209, "step": 10439 }, { "epoch": 0.7858634900920228, "grad_norm": 5.1488423347473145, "learning_rate": 1.1663842562029425e-05, "loss": 2.2347, "step": 10440 }, { "epoch": 0.785938764372683, "grad_norm": 4.778382301330566, "learning_rate": 1.1656017418987896e-05, "loss": 1.5278, "step": 10441 }, { "epoch": 0.7860140386533431, "grad_norm": 4.175078392028809, "learning_rate": 1.16481945553861e-05, "loss": 1.7219, "step": 10442 }, { "epoch": 0.7860893129340033, "grad_norm": 4.212646007537842, "learning_rate": 1.1640373971689106e-05, "loss": 1.7759, "step": 10443 }, { "epoch": 0.7861645872146634, "grad_norm": 4.006810188293457, "learning_rate": 1.1632555668361799e-05, "loss": 1.7194, "step": 10444 }, { "epoch": 0.7862398614953235, "grad_norm": 4.763530254364014, "learning_rate": 1.1624739645868966e-05, "loss": 2.0957, "step": 10445 }, { "epoch": 0.7863151357759838, "grad_norm": 5.659134864807129, "learning_rate": 1.1616925904675253e-05, "loss": 1.6673, "step": 10446 }, { "epoch": 0.7863904100566439, "grad_norm": 5.201047897338867, "learning_rate": 1.1609114445245161e-05, "loss": 1.722, "step": 10447 }, { "epoch": 0.786465684337304, "grad_norm": 5.603242874145508, "learning_rate": 1.1601305268043079e-05, "loss": 1.9005, "step": 10448 }, { "epoch": 0.7865409586179642, "grad_norm": 6.72194766998291, "learning_rate": 1.1593498373533196e-05, "loss": 2.188, "step": 10449 }, { "epoch": 0.7866162328986244, "grad_norm": 4.8227152824401855, "learning_rate": 1.1585693762179656e-05, "loss": 2.0663, "step": 10450 }, { "epoch": 0.7866915071792845, "grad_norm": 3.574622392654419, "learning_rate": 1.157789143444638e-05, "loss": 1.7861, "step": 10451 }, { "epoch": 0.7867667814599447, "grad_norm": 4.818136692047119, "learning_rate": 1.1570091390797232e-05, "loss": 1.7715, "step": 10452 }, { "epoch": 0.7868420557406048, "grad_norm": 5.031071662902832, "learning_rate": 1.1562293631695864e-05, "loss": 1.6933, "step": 10453 }, { "epoch": 0.786917330021265, "grad_norm": 7.095625877380371, "learning_rate": 1.1554498157605864e-05, "loss": 2.1685, "step": 10454 }, { "epoch": 0.7869926043019252, "grad_norm": 4.141712665557861, "learning_rate": 1.1546704968990623e-05, "loss": 1.9408, "step": 10455 }, { "epoch": 0.7870678785825853, "grad_norm": 4.2152862548828125, "learning_rate": 1.153891406631344e-05, "loss": 1.9961, "step": 10456 }, { "epoch": 0.7871431528632454, "grad_norm": 4.70906925201416, "learning_rate": 1.1531125450037455e-05, "loss": 1.8631, "step": 10457 }, { "epoch": 0.7872184271439056, "grad_norm": 4.998145580291748, "learning_rate": 1.1523339120625698e-05, "loss": 2.0323, "step": 10458 }, { "epoch": 0.7872937014245658, "grad_norm": 5.832976818084717, "learning_rate": 1.1515555078541012e-05, "loss": 1.8167, "step": 10459 }, { "epoch": 0.7873689757052259, "grad_norm": 5.334981441497803, "learning_rate": 1.1507773324246168e-05, "loss": 1.8554, "step": 10460 }, { "epoch": 0.787444249985886, "grad_norm": 4.192061901092529, "learning_rate": 1.1499993858203739e-05, "loss": 1.7941, "step": 10461 }, { "epoch": 0.7875195242665463, "grad_norm": 4.096248149871826, "learning_rate": 1.1492216680876212e-05, "loss": 1.5605, "step": 10462 }, { "epoch": 0.7875947985472064, "grad_norm": 4.156639575958252, "learning_rate": 1.148444179272592e-05, "loss": 1.8762, "step": 10463 }, { "epoch": 0.7876700728278665, "grad_norm": 8.860349655151367, "learning_rate": 1.1476669194215034e-05, "loss": 1.9457, "step": 10464 }, { "epoch": 0.7877453471085267, "grad_norm": 4.781268119812012, "learning_rate": 1.1468898885805646e-05, "loss": 1.8275, "step": 10465 }, { "epoch": 0.7878206213891868, "grad_norm": 5.10231876373291, "learning_rate": 1.1461130867959647e-05, "loss": 1.9615, "step": 10466 }, { "epoch": 0.787895895669847, "grad_norm": 5.899445056915283, "learning_rate": 1.1453365141138834e-05, "loss": 1.5259, "step": 10467 }, { "epoch": 0.7879711699505072, "grad_norm": 6.294816970825195, "learning_rate": 1.1445601705804864e-05, "loss": 2.1219, "step": 10468 }, { "epoch": 0.7880464442311673, "grad_norm": 4.7341532707214355, "learning_rate": 1.143784056241926e-05, "loss": 1.7959, "step": 10469 }, { "epoch": 0.7881217185118274, "grad_norm": 4.954004287719727, "learning_rate": 1.1430081711443375e-05, "loss": 1.6121, "step": 10470 }, { "epoch": 0.7881969927924877, "grad_norm": 4.701150894165039, "learning_rate": 1.1422325153338475e-05, "loss": 1.8551, "step": 10471 }, { "epoch": 0.7882722670731478, "grad_norm": 4.160506725311279, "learning_rate": 1.141457088856564e-05, "loss": 1.8352, "step": 10472 }, { "epoch": 0.7883475413538079, "grad_norm": 4.863999843597412, "learning_rate": 1.1406818917585865e-05, "loss": 1.8032, "step": 10473 }, { "epoch": 0.7884228156344681, "grad_norm": 6.536059379577637, "learning_rate": 1.1399069240859955e-05, "loss": 1.9012, "step": 10474 }, { "epoch": 0.7884980899151283, "grad_norm": 5.541990756988525, "learning_rate": 1.1391321858848636e-05, "loss": 1.453, "step": 10475 }, { "epoch": 0.7885733641957884, "grad_norm": 5.1296186447143555, "learning_rate": 1.1383576772012438e-05, "loss": 1.9429, "step": 10476 }, { "epoch": 0.7886486384764485, "grad_norm": 3.924842357635498, "learning_rate": 1.1375833980811795e-05, "loss": 1.4478, "step": 10477 }, { "epoch": 0.7887239127571087, "grad_norm": 4.032488822937012, "learning_rate": 1.1368093485707015e-05, "loss": 1.6431, "step": 10478 }, { "epoch": 0.7887991870377689, "grad_norm": 4.2381134033203125, "learning_rate": 1.1360355287158214e-05, "loss": 1.8313, "step": 10479 }, { "epoch": 0.788874461318429, "grad_norm": 4.998696327209473, "learning_rate": 1.1352619385625418e-05, "loss": 1.6417, "step": 10480 }, { "epoch": 0.7889497355990892, "grad_norm": 4.638940334320068, "learning_rate": 1.134488578156851e-05, "loss": 2.2269, "step": 10481 }, { "epoch": 0.7890250098797493, "grad_norm": 4.457995414733887, "learning_rate": 1.1337154475447243e-05, "loss": 1.831, "step": 10482 }, { "epoch": 0.7891002841604094, "grad_norm": 6.432929039001465, "learning_rate": 1.1329425467721194e-05, "loss": 2.1138, "step": 10483 }, { "epoch": 0.7891755584410697, "grad_norm": 5.354211330413818, "learning_rate": 1.1321698758849853e-05, "loss": 1.4494, "step": 10484 }, { "epoch": 0.7892508327217298, "grad_norm": 4.447618007659912, "learning_rate": 1.1313974349292528e-05, "loss": 1.8217, "step": 10485 }, { "epoch": 0.7893261070023899, "grad_norm": 4.113140106201172, "learning_rate": 1.1306252239508441e-05, "loss": 1.9076, "step": 10486 }, { "epoch": 0.7894013812830502, "grad_norm": 5.005544662475586, "learning_rate": 1.1298532429956615e-05, "loss": 1.9262, "step": 10487 }, { "epoch": 0.7894766555637103, "grad_norm": 6.521586894989014, "learning_rate": 1.1290814921096011e-05, "loss": 1.8046, "step": 10488 }, { "epoch": 0.7895519298443704, "grad_norm": 5.025541305541992, "learning_rate": 1.128309971338537e-05, "loss": 1.7758, "step": 10489 }, { "epoch": 0.7896272041250306, "grad_norm": 4.610903739929199, "learning_rate": 1.1275386807283362e-05, "loss": 1.8613, "step": 10490 }, { "epoch": 0.7897024784056907, "grad_norm": 5.4119553565979, "learning_rate": 1.126767620324849e-05, "loss": 2.0248, "step": 10491 }, { "epoch": 0.7897777526863509, "grad_norm": 5.321690082550049, "learning_rate": 1.1259967901739155e-05, "loss": 1.5541, "step": 10492 }, { "epoch": 0.7898530269670111, "grad_norm": 4.882321357727051, "learning_rate": 1.1252261903213552e-05, "loss": 1.8298, "step": 10493 }, { "epoch": 0.7899283012476712, "grad_norm": 4.279666423797607, "learning_rate": 1.1244558208129796e-05, "loss": 2.0404, "step": 10494 }, { "epoch": 0.7900035755283313, "grad_norm": 4.211197376251221, "learning_rate": 1.1236856816945873e-05, "loss": 1.7322, "step": 10495 }, { "epoch": 0.7900788498089916, "grad_norm": 5.199526309967041, "learning_rate": 1.1229157730119571e-05, "loss": 2.3412, "step": 10496 }, { "epoch": 0.7901541240896517, "grad_norm": 5.16616678237915, "learning_rate": 1.122146094810861e-05, "loss": 1.8211, "step": 10497 }, { "epoch": 0.7902293983703118, "grad_norm": 5.986471176147461, "learning_rate": 1.1213766471370513e-05, "loss": 1.8823, "step": 10498 }, { "epoch": 0.7903046726509719, "grad_norm": 4.395902156829834, "learning_rate": 1.1206074300362723e-05, "loss": 1.893, "step": 10499 }, { "epoch": 0.7903799469316322, "grad_norm": 6.345595836639404, "learning_rate": 1.1198384435542492e-05, "loss": 2.0792, "step": 10500 }, { "epoch": 0.7904552212122923, "grad_norm": 4.27605676651001, "learning_rate": 1.119069687736697e-05, "loss": 1.7656, "step": 10501 }, { "epoch": 0.7905304954929524, "grad_norm": 4.311114311218262, "learning_rate": 1.118301162629316e-05, "loss": 1.9608, "step": 10502 }, { "epoch": 0.7906057697736126, "grad_norm": 5.204464435577393, "learning_rate": 1.1175328682777952e-05, "loss": 1.7361, "step": 10503 }, { "epoch": 0.7906810440542728, "grad_norm": 4.836484909057617, "learning_rate": 1.1167648047278034e-05, "loss": 1.7384, "step": 10504 }, { "epoch": 0.7907563183349329, "grad_norm": 4.623745441436768, "learning_rate": 1.115996972025003e-05, "loss": 2.1583, "step": 10505 }, { "epoch": 0.7908315926155931, "grad_norm": 5.195585250854492, "learning_rate": 1.1152293702150368e-05, "loss": 1.7657, "step": 10506 }, { "epoch": 0.7909068668962532, "grad_norm": 4.8195624351501465, "learning_rate": 1.1144619993435396e-05, "loss": 2.0471, "step": 10507 }, { "epoch": 0.7909821411769133, "grad_norm": 4.018946647644043, "learning_rate": 1.1136948594561258e-05, "loss": 1.8078, "step": 10508 }, { "epoch": 0.7910574154575736, "grad_norm": 4.304199695587158, "learning_rate": 1.1129279505984036e-05, "loss": 1.854, "step": 10509 }, { "epoch": 0.7911326897382337, "grad_norm": 4.6675705909729, "learning_rate": 1.1121612728159598e-05, "loss": 1.6067, "step": 10510 }, { "epoch": 0.7912079640188938, "grad_norm": 6.016103267669678, "learning_rate": 1.1113948261543727e-05, "loss": 1.7509, "step": 10511 }, { "epoch": 0.791283238299554, "grad_norm": 6.147856712341309, "learning_rate": 1.1106286106592057e-05, "loss": 1.978, "step": 10512 }, { "epoch": 0.7913585125802142, "grad_norm": 4.859924793243408, "learning_rate": 1.1098626263760077e-05, "loss": 1.8824, "step": 10513 }, { "epoch": 0.7914337868608743, "grad_norm": 4.324882507324219, "learning_rate": 1.109096873350316e-05, "loss": 1.7933, "step": 10514 }, { "epoch": 0.7915090611415345, "grad_norm": 5.222978115081787, "learning_rate": 1.1083313516276495e-05, "loss": 2.0191, "step": 10515 }, { "epoch": 0.7915843354221946, "grad_norm": 4.9351420402526855, "learning_rate": 1.1075660612535193e-05, "loss": 1.7355, "step": 10516 }, { "epoch": 0.7916596097028548, "grad_norm": 5.0382280349731445, "learning_rate": 1.1068010022734165e-05, "loss": 1.8204, "step": 10517 }, { "epoch": 0.7917348839835149, "grad_norm": 4.4992995262146, "learning_rate": 1.1060361747328247e-05, "loss": 1.8748, "step": 10518 }, { "epoch": 0.7918101582641751, "grad_norm": 8.730916023254395, "learning_rate": 1.1052715786772077e-05, "loss": 1.6461, "step": 10519 }, { "epoch": 0.7918854325448352, "grad_norm": 6.294488906860352, "learning_rate": 1.1045072141520219e-05, "loss": 1.7078, "step": 10520 }, { "epoch": 0.7919607068254954, "grad_norm": 5.546000003814697, "learning_rate": 1.1037430812027033e-05, "loss": 1.878, "step": 10521 }, { "epoch": 0.7920359811061556, "grad_norm": 5.042482852935791, "learning_rate": 1.1029791798746792e-05, "loss": 1.6006, "step": 10522 }, { "epoch": 0.7921112553868157, "grad_norm": 4.686640739440918, "learning_rate": 1.1022155102133603e-05, "loss": 1.7746, "step": 10523 }, { "epoch": 0.7921865296674758, "grad_norm": 5.204477310180664, "learning_rate": 1.1014520722641476e-05, "loss": 2.017, "step": 10524 }, { "epoch": 0.7922618039481361, "grad_norm": 4.673209190368652, "learning_rate": 1.1006888660724207e-05, "loss": 1.4741, "step": 10525 }, { "epoch": 0.7923370782287962, "grad_norm": 5.0145769119262695, "learning_rate": 1.0999258916835531e-05, "loss": 2.0601, "step": 10526 }, { "epoch": 0.7924123525094563, "grad_norm": 4.081797122955322, "learning_rate": 1.0991631491429017e-05, "loss": 1.6026, "step": 10527 }, { "epoch": 0.7924876267901165, "grad_norm": 4.916884422302246, "learning_rate": 1.0984006384958068e-05, "loss": 1.8145, "step": 10528 }, { "epoch": 0.7925629010707766, "grad_norm": 4.483363628387451, "learning_rate": 1.0976383597876006e-05, "loss": 1.9069, "step": 10529 }, { "epoch": 0.7926381753514368, "grad_norm": 5.311499118804932, "learning_rate": 1.0968763130635951e-05, "loss": 1.8131, "step": 10530 }, { "epoch": 0.792713449632097, "grad_norm": 4.320664405822754, "learning_rate": 1.0961144983690952e-05, "loss": 2.151, "step": 10531 }, { "epoch": 0.7927887239127571, "grad_norm": 4.578085422515869, "learning_rate": 1.0953529157493852e-05, "loss": 1.9826, "step": 10532 }, { "epoch": 0.7928639981934172, "grad_norm": 3.807785987854004, "learning_rate": 1.0945915652497424e-05, "loss": 1.8156, "step": 10533 }, { "epoch": 0.7929392724740775, "grad_norm": 6.542794227600098, "learning_rate": 1.0938304469154237e-05, "loss": 1.7456, "step": 10534 }, { "epoch": 0.7930145467547376, "grad_norm": 5.614415168762207, "learning_rate": 1.093069560791677e-05, "loss": 2.1634, "step": 10535 }, { "epoch": 0.7930898210353977, "grad_norm": 4.348136901855469, "learning_rate": 1.0923089069237341e-05, "loss": 1.6034, "step": 10536 }, { "epoch": 0.7931650953160578, "grad_norm": 5.081855297088623, "learning_rate": 1.0915484853568164e-05, "loss": 1.786, "step": 10537 }, { "epoch": 0.7932403695967181, "grad_norm": 4.064887046813965, "learning_rate": 1.0907882961361248e-05, "loss": 1.5823, "step": 10538 }, { "epoch": 0.7933156438773782, "grad_norm": 5.364055633544922, "learning_rate": 1.090028339306854e-05, "loss": 1.8118, "step": 10539 }, { "epoch": 0.7933909181580383, "grad_norm": 5.807515621185303, "learning_rate": 1.0892686149141773e-05, "loss": 1.8948, "step": 10540 }, { "epoch": 0.7934661924386985, "grad_norm": 5.550662994384766, "learning_rate": 1.0885091230032624e-05, "loss": 1.9071, "step": 10541 }, { "epoch": 0.7935414667193587, "grad_norm": 5.765417098999023, "learning_rate": 1.0877498636192552e-05, "loss": 1.8936, "step": 10542 }, { "epoch": 0.7936167410000188, "grad_norm": 5.395507335662842, "learning_rate": 1.0869908368072928e-05, "loss": 1.8087, "step": 10543 }, { "epoch": 0.793692015280679, "grad_norm": 5.470346927642822, "learning_rate": 1.0862320426124989e-05, "loss": 2.0436, "step": 10544 }, { "epoch": 0.7937672895613391, "grad_norm": 4.59730339050293, "learning_rate": 1.0854734810799794e-05, "loss": 1.9371, "step": 10545 }, { "epoch": 0.7938425638419992, "grad_norm": 6.259925842285156, "learning_rate": 1.0847151522548287e-05, "loss": 1.8583, "step": 10546 }, { "epoch": 0.7939178381226595, "grad_norm": 5.511729717254639, "learning_rate": 1.0839570561821282e-05, "loss": 2.0519, "step": 10547 }, { "epoch": 0.7939931124033196, "grad_norm": 4.435070514678955, "learning_rate": 1.0831991929069463e-05, "loss": 2.1082, "step": 10548 }, { "epoch": 0.7940683866839797, "grad_norm": 4.86382532119751, "learning_rate": 1.0824415624743316e-05, "loss": 1.7053, "step": 10549 }, { "epoch": 0.79414366096464, "grad_norm": 6.5566630363464355, "learning_rate": 1.0816841649293275e-05, "loss": 1.915, "step": 10550 }, { "epoch": 0.7942189352453001, "grad_norm": 5.961004257202148, "learning_rate": 1.0809270003169548e-05, "loss": 1.5648, "step": 10551 }, { "epoch": 0.7942942095259602, "grad_norm": 5.56597375869751, "learning_rate": 1.0801700686822286e-05, "loss": 1.941, "step": 10552 }, { "epoch": 0.7943694838066204, "grad_norm": 4.410744667053223, "learning_rate": 1.0794133700701432e-05, "loss": 1.8975, "step": 10553 }, { "epoch": 0.7944447580872805, "grad_norm": 4.48292875289917, "learning_rate": 1.0786569045256845e-05, "loss": 1.7614, "step": 10554 }, { "epoch": 0.7945200323679407, "grad_norm": 4.816855430603027, "learning_rate": 1.0779006720938201e-05, "loss": 1.6495, "step": 10555 }, { "epoch": 0.7945953066486008, "grad_norm": 4.901763439178467, "learning_rate": 1.0771446728195073e-05, "loss": 2.1345, "step": 10556 }, { "epoch": 0.794670580929261, "grad_norm": 4.171257972717285, "learning_rate": 1.0763889067476873e-05, "loss": 1.6871, "step": 10557 }, { "epoch": 0.7947458552099211, "grad_norm": 5.240748405456543, "learning_rate": 1.0756333739232888e-05, "loss": 1.7006, "step": 10558 }, { "epoch": 0.7948211294905813, "grad_norm": 4.132002830505371, "learning_rate": 1.0748780743912273e-05, "loss": 1.9255, "step": 10559 }, { "epoch": 0.7948964037712415, "grad_norm": 5.472195625305176, "learning_rate": 1.0741230081964004e-05, "loss": 1.8117, "step": 10560 }, { "epoch": 0.7949716780519016, "grad_norm": 5.17474365234375, "learning_rate": 1.0733681753836977e-05, "loss": 1.805, "step": 10561 }, { "epoch": 0.7950469523325617, "grad_norm": 5.187584400177002, "learning_rate": 1.0726135759979888e-05, "loss": 1.7891, "step": 10562 }, { "epoch": 0.795122226613222, "grad_norm": 4.606949806213379, "learning_rate": 1.0718592100841352e-05, "loss": 1.9286, "step": 10563 }, { "epoch": 0.7951975008938821, "grad_norm": 5.152125835418701, "learning_rate": 1.0711050776869791e-05, "loss": 1.8753, "step": 10564 }, { "epoch": 0.7952727751745422, "grad_norm": 4.856875419616699, "learning_rate": 1.0703511788513543e-05, "loss": 1.8397, "step": 10565 }, { "epoch": 0.7953480494552024, "grad_norm": 6.894735813140869, "learning_rate": 1.0695975136220749e-05, "loss": 2.0339, "step": 10566 }, { "epoch": 0.7954233237358626, "grad_norm": 4.199718952178955, "learning_rate": 1.0688440820439455e-05, "loss": 1.9094, "step": 10567 }, { "epoch": 0.7954985980165227, "grad_norm": 4.852376461029053, "learning_rate": 1.0680908841617559e-05, "loss": 1.8294, "step": 10568 }, { "epoch": 0.7955738722971829, "grad_norm": 5.831381797790527, "learning_rate": 1.0673379200202831e-05, "loss": 1.5391, "step": 10569 }, { "epoch": 0.795649146577843, "grad_norm": 4.5340423583984375, "learning_rate": 1.0665851896642854e-05, "loss": 2.0763, "step": 10570 }, { "epoch": 0.7957244208585031, "grad_norm": 4.982690334320068, "learning_rate": 1.0658326931385127e-05, "loss": 1.7669, "step": 10571 }, { "epoch": 0.7957996951391634, "grad_norm": 4.154668807983398, "learning_rate": 1.065080430487697e-05, "loss": 1.952, "step": 10572 }, { "epoch": 0.7958749694198235, "grad_norm": 4.445095062255859, "learning_rate": 1.0643284017565608e-05, "loss": 2.0379, "step": 10573 }, { "epoch": 0.7959502437004836, "grad_norm": 5.1501240730285645, "learning_rate": 1.0635766069898068e-05, "loss": 1.8313, "step": 10574 }, { "epoch": 0.7960255179811437, "grad_norm": 3.782294988632202, "learning_rate": 1.0628250462321287e-05, "loss": 1.7792, "step": 10575 }, { "epoch": 0.796100792261804, "grad_norm": 4.5195794105529785, "learning_rate": 1.062073719528206e-05, "loss": 1.8392, "step": 10576 }, { "epoch": 0.7961760665424641, "grad_norm": 5.385557174682617, "learning_rate": 1.0613226269227e-05, "loss": 2.0307, "step": 10577 }, { "epoch": 0.7962513408231242, "grad_norm": 4.283694267272949, "learning_rate": 1.060571768460264e-05, "loss": 1.4571, "step": 10578 }, { "epoch": 0.7963266151037844, "grad_norm": 6.535726070404053, "learning_rate": 1.0598211441855311e-05, "loss": 1.6028, "step": 10579 }, { "epoch": 0.7964018893844446, "grad_norm": 4.400607109069824, "learning_rate": 1.059070754143126e-05, "loss": 1.6233, "step": 10580 }, { "epoch": 0.7964771636651047, "grad_norm": 4.905205726623535, "learning_rate": 1.0583205983776567e-05, "loss": 1.9233, "step": 10581 }, { "epoch": 0.7965524379457649, "grad_norm": 4.403603553771973, "learning_rate": 1.057570676933719e-05, "loss": 1.9596, "step": 10582 }, { "epoch": 0.796627712226425, "grad_norm": 4.7504448890686035, "learning_rate": 1.0568209898558911e-05, "loss": 1.5788, "step": 10583 }, { "epoch": 0.7967029865070852, "grad_norm": 7.158041000366211, "learning_rate": 1.0560715371887425e-05, "loss": 2.176, "step": 10584 }, { "epoch": 0.7967782607877454, "grad_norm": 4.335253715515137, "learning_rate": 1.0553223189768235e-05, "loss": 1.6456, "step": 10585 }, { "epoch": 0.7968535350684055, "grad_norm": 4.165205955505371, "learning_rate": 1.0545733352646753e-05, "loss": 1.7837, "step": 10586 }, { "epoch": 0.7969288093490656, "grad_norm": 5.156903266906738, "learning_rate": 1.0538245860968204e-05, "loss": 1.7138, "step": 10587 }, { "epoch": 0.7970040836297259, "grad_norm": 5.625891208648682, "learning_rate": 1.0530760715177724e-05, "loss": 1.8802, "step": 10588 }, { "epoch": 0.797079357910386, "grad_norm": 4.726641654968262, "learning_rate": 1.0523277915720253e-05, "loss": 1.7543, "step": 10589 }, { "epoch": 0.7971546321910461, "grad_norm": 4.422852516174316, "learning_rate": 1.0515797463040639e-05, "loss": 1.9818, "step": 10590 }, { "epoch": 0.7972299064717063, "grad_norm": 4.1001105308532715, "learning_rate": 1.0508319357583574e-05, "loss": 2.0495, "step": 10591 }, { "epoch": 0.7973051807523664, "grad_norm": 4.357824325561523, "learning_rate": 1.0500843599793609e-05, "loss": 1.4581, "step": 10592 }, { "epoch": 0.7973804550330266, "grad_norm": 6.033605098724365, "learning_rate": 1.0493370190115171e-05, "loss": 1.5925, "step": 10593 }, { "epoch": 0.7974557293136868, "grad_norm": 5.0049285888671875, "learning_rate": 1.0485899128992499e-05, "loss": 1.7696, "step": 10594 }, { "epoch": 0.7975310035943469, "grad_norm": 6.114724159240723, "learning_rate": 1.0478430416869767e-05, "loss": 1.8543, "step": 10595 }, { "epoch": 0.797606277875007, "grad_norm": 4.8095383644104, "learning_rate": 1.0470964054190928e-05, "loss": 1.8121, "step": 10596 }, { "epoch": 0.7976815521556672, "grad_norm": 4.194774150848389, "learning_rate": 1.0463500041399866e-05, "loss": 2.0228, "step": 10597 }, { "epoch": 0.7977568264363274, "grad_norm": 6.96627140045166, "learning_rate": 1.0456038378940276e-05, "loss": 2.0407, "step": 10598 }, { "epoch": 0.7978321007169875, "grad_norm": 4.906794548034668, "learning_rate": 1.0448579067255749e-05, "loss": 1.6905, "step": 10599 }, { "epoch": 0.7979073749976476, "grad_norm": 6.082169055938721, "learning_rate": 1.0441122106789697e-05, "loss": 1.9021, "step": 10600 }, { "epoch": 0.7979826492783079, "grad_norm": 3.8410072326660156, "learning_rate": 1.0433667497985433e-05, "loss": 1.5913, "step": 10601 }, { "epoch": 0.798057923558968, "grad_norm": 5.016761302947998, "learning_rate": 1.0426215241286103e-05, "loss": 1.9341, "step": 10602 }, { "epoch": 0.7981331978396281, "grad_norm": 7.987148761749268, "learning_rate": 1.041876533713474e-05, "loss": 1.6058, "step": 10603 }, { "epoch": 0.7982084721202883, "grad_norm": 5.406179428100586, "learning_rate": 1.0411317785974196e-05, "loss": 1.8621, "step": 10604 }, { "epoch": 0.7982837464009485, "grad_norm": 9.935291290283203, "learning_rate": 1.0403872588247232e-05, "loss": 1.6798, "step": 10605 }, { "epoch": 0.7983590206816086, "grad_norm": 4.945474624633789, "learning_rate": 1.0396429744396414e-05, "loss": 1.4806, "step": 10606 }, { "epoch": 0.7984342949622688, "grad_norm": 6.583197593688965, "learning_rate": 1.0388989254864206e-05, "loss": 2.2207, "step": 10607 }, { "epoch": 0.7985095692429289, "grad_norm": 4.793309211730957, "learning_rate": 1.0381551120092953e-05, "loss": 2.1315, "step": 10608 }, { "epoch": 0.798584843523589, "grad_norm": 5.367116928100586, "learning_rate": 1.0374115340524787e-05, "loss": 1.8995, "step": 10609 }, { "epoch": 0.7986601178042493, "grad_norm": 5.316595554351807, "learning_rate": 1.0366681916601784e-05, "loss": 1.6572, "step": 10610 }, { "epoch": 0.7987353920849094, "grad_norm": 6.079329490661621, "learning_rate": 1.03592508487658e-05, "loss": 1.783, "step": 10611 }, { "epoch": 0.7988106663655695, "grad_norm": 6.35397481918335, "learning_rate": 1.0351822137458617e-05, "loss": 1.7655, "step": 10612 }, { "epoch": 0.7988859406462298, "grad_norm": 4.46872091293335, "learning_rate": 1.0344395783121842e-05, "loss": 1.6872, "step": 10613 }, { "epoch": 0.7989612149268899, "grad_norm": 4.64361572265625, "learning_rate": 1.0336971786196964e-05, "loss": 1.5952, "step": 10614 }, { "epoch": 0.79903648920755, "grad_norm": 4.38369083404541, "learning_rate": 1.03295501471253e-05, "loss": 1.9921, "step": 10615 }, { "epoch": 0.7991117634882101, "grad_norm": 4.6624908447265625, "learning_rate": 1.0322130866348063e-05, "loss": 1.7404, "step": 10616 }, { "epoch": 0.7991870377688703, "grad_norm": 6.090721130371094, "learning_rate": 1.031471394430628e-05, "loss": 2.0913, "step": 10617 }, { "epoch": 0.7992623120495305, "grad_norm": 7.265249252319336, "learning_rate": 1.0307299381440904e-05, "loss": 2.1835, "step": 10618 }, { "epoch": 0.7993375863301906, "grad_norm": 4.3720383644104, "learning_rate": 1.0299887178192668e-05, "loss": 2.2123, "step": 10619 }, { "epoch": 0.7994128606108508, "grad_norm": 5.423922538757324, "learning_rate": 1.0292477335002243e-05, "loss": 1.7212, "step": 10620 }, { "epoch": 0.7994881348915109, "grad_norm": 4.756758213043213, "learning_rate": 1.0285069852310097e-05, "loss": 1.6806, "step": 10621 }, { "epoch": 0.7995634091721711, "grad_norm": 4.531630992889404, "learning_rate": 1.0277664730556591e-05, "loss": 2.0157, "step": 10622 }, { "epoch": 0.7996386834528313, "grad_norm": 5.678662300109863, "learning_rate": 1.0270261970181938e-05, "loss": 1.8567, "step": 10623 }, { "epoch": 0.7997139577334914, "grad_norm": 5.319336414337158, "learning_rate": 1.0262861571626232e-05, "loss": 2.177, "step": 10624 }, { "epoch": 0.7997892320141515, "grad_norm": 5.703810691833496, "learning_rate": 1.0255463535329369e-05, "loss": 2.0952, "step": 10625 }, { "epoch": 0.7998645062948118, "grad_norm": 5.950246334075928, "learning_rate": 1.0248067861731158e-05, "loss": 1.6458, "step": 10626 }, { "epoch": 0.7999397805754719, "grad_norm": 4.7629523277282715, "learning_rate": 1.0240674551271267e-05, "loss": 1.9041, "step": 10627 }, { "epoch": 0.800015054856132, "grad_norm": 5.508734226226807, "learning_rate": 1.0233283604389183e-05, "loss": 1.8321, "step": 10628 }, { "epoch": 0.8000903291367922, "grad_norm": 5.226073265075684, "learning_rate": 1.0225895021524289e-05, "loss": 1.4688, "step": 10629 }, { "epoch": 0.8001656034174524, "grad_norm": 5.285341262817383, "learning_rate": 1.0218508803115806e-05, "loss": 2.0605, "step": 10630 }, { "epoch": 0.8002408776981125, "grad_norm": 5.178981781005859, "learning_rate": 1.021112494960284e-05, "loss": 1.6965, "step": 10631 }, { "epoch": 0.8003161519787727, "grad_norm": 5.68692684173584, "learning_rate": 1.0203743461424315e-05, "loss": 1.7158, "step": 10632 }, { "epoch": 0.8003914262594328, "grad_norm": 4.835432052612305, "learning_rate": 1.0196364339019065e-05, "loss": 1.9185, "step": 10633 }, { "epoch": 0.800466700540093, "grad_norm": 5.050309658050537, "learning_rate": 1.0188987582825732e-05, "loss": 1.7044, "step": 10634 }, { "epoch": 0.8005419748207531, "grad_norm": 5.335615634918213, "learning_rate": 1.0181613193282857e-05, "loss": 1.892, "step": 10635 }, { "epoch": 0.8006172491014133, "grad_norm": 5.324244976043701, "learning_rate": 1.0174241170828824e-05, "loss": 1.4616, "step": 10636 }, { "epoch": 0.8006925233820734, "grad_norm": 4.925962448120117, "learning_rate": 1.01668715159019e-05, "loss": 1.5167, "step": 10637 }, { "epoch": 0.8007677976627335, "grad_norm": 5.607387065887451, "learning_rate": 1.0159504228940154e-05, "loss": 1.6402, "step": 10638 }, { "epoch": 0.8008430719433938, "grad_norm": 4.65199089050293, "learning_rate": 1.0152139310381565e-05, "loss": 2.0347, "step": 10639 }, { "epoch": 0.8009183462240539, "grad_norm": 8.321224212646484, "learning_rate": 1.0144776760663972e-05, "loss": 1.8323, "step": 10640 }, { "epoch": 0.800993620504714, "grad_norm": 4.292596340179443, "learning_rate": 1.0137416580225029e-05, "loss": 1.7162, "step": 10641 }, { "epoch": 0.8010688947853742, "grad_norm": 3.7240381240844727, "learning_rate": 1.0130058769502304e-05, "loss": 1.519, "step": 10642 }, { "epoch": 0.8011441690660344, "grad_norm": 5.344223499298096, "learning_rate": 1.0122703328933175e-05, "loss": 2.2044, "step": 10643 }, { "epoch": 0.8012194433466945, "grad_norm": 6.579819202423096, "learning_rate": 1.0115350258954926e-05, "loss": 1.9823, "step": 10644 }, { "epoch": 0.8012947176273547, "grad_norm": 4.223052501678467, "learning_rate": 1.0107999560004643e-05, "loss": 1.7825, "step": 10645 }, { "epoch": 0.8013699919080148, "grad_norm": 6.044129848480225, "learning_rate": 1.0100651232519325e-05, "loss": 1.8163, "step": 10646 }, { "epoch": 0.801445266188675, "grad_norm": 4.213158130645752, "learning_rate": 1.009330527693581e-05, "loss": 1.7388, "step": 10647 }, { "epoch": 0.8015205404693352, "grad_norm": 5.015080451965332, "learning_rate": 1.008596169369081e-05, "loss": 1.7258, "step": 10648 }, { "epoch": 0.8015958147499953, "grad_norm": 5.202444076538086, "learning_rate": 1.0078620483220841e-05, "loss": 1.6329, "step": 10649 }, { "epoch": 0.8016710890306554, "grad_norm": 4.469363212585449, "learning_rate": 1.007128164596235e-05, "loss": 1.7105, "step": 10650 }, { "epoch": 0.8017463633113157, "grad_norm": 5.312971115112305, "learning_rate": 1.006394518235158e-05, "loss": 1.6272, "step": 10651 }, { "epoch": 0.8018216375919758, "grad_norm": 4.956375598907471, "learning_rate": 1.0056611092824702e-05, "loss": 1.7256, "step": 10652 }, { "epoch": 0.8018969118726359, "grad_norm": 4.4215898513793945, "learning_rate": 1.0049279377817667e-05, "loss": 1.7335, "step": 10653 }, { "epoch": 0.801972186153296, "grad_norm": 4.372335433959961, "learning_rate": 1.0041950037766351e-05, "loss": 1.8173, "step": 10654 }, { "epoch": 0.8020474604339562, "grad_norm": 3.806447744369507, "learning_rate": 1.0034623073106441e-05, "loss": 1.8572, "step": 10655 }, { "epoch": 0.8021227347146164, "grad_norm": 8.996606826782227, "learning_rate": 1.0027298484273518e-05, "loss": 2.282, "step": 10656 }, { "epoch": 0.8021980089952765, "grad_norm": 5.268815994262695, "learning_rate": 1.0019976271703007e-05, "loss": 1.7133, "step": 10657 }, { "epoch": 0.8022732832759367, "grad_norm": 4.3197550773620605, "learning_rate": 1.0012656435830186e-05, "loss": 1.9446, "step": 10658 }, { "epoch": 0.8023485575565968, "grad_norm": 6.176807880401611, "learning_rate": 1.0005338977090223e-05, "loss": 1.9034, "step": 10659 }, { "epoch": 0.802423831837257, "grad_norm": 5.208347320556641, "learning_rate": 9.99802389591808e-06, "loss": 1.8544, "step": 10660 }, { "epoch": 0.8024991061179172, "grad_norm": 3.647977590560913, "learning_rate": 9.990711192748658e-06, "loss": 1.7485, "step": 10661 }, { "epoch": 0.8025743803985773, "grad_norm": 5.0032429695129395, "learning_rate": 9.98340086801664e-06, "loss": 1.9411, "step": 10662 }, { "epoch": 0.8026496546792374, "grad_norm": 4.905118942260742, "learning_rate": 9.976092922156632e-06, "loss": 1.772, "step": 10663 }, { "epoch": 0.8027249289598977, "grad_norm": 5.962714672088623, "learning_rate": 9.968787355603044e-06, "loss": 1.7948, "step": 10664 }, { "epoch": 0.8028002032405578, "grad_norm": 6.093608379364014, "learning_rate": 9.961484168790197e-06, "loss": 1.6562, "step": 10665 }, { "epoch": 0.8028754775212179, "grad_norm": 4.597022533416748, "learning_rate": 9.954183362152225e-06, "loss": 1.6775, "step": 10666 }, { "epoch": 0.8029507518018781, "grad_norm": 6.098922252655029, "learning_rate": 9.946884936123147e-06, "loss": 1.8179, "step": 10667 }, { "epoch": 0.8030260260825383, "grad_norm": 6.503742694854736, "learning_rate": 9.93958889113683e-06, "loss": 1.9091, "step": 10668 }, { "epoch": 0.8031013003631984, "grad_norm": 3.827249050140381, "learning_rate": 9.932295227627026e-06, "loss": 1.7099, "step": 10669 }, { "epoch": 0.8031765746438586, "grad_norm": 4.976996898651123, "learning_rate": 9.925003946027284e-06, "loss": 1.6231, "step": 10670 }, { "epoch": 0.8032518489245187, "grad_norm": 5.31277322769165, "learning_rate": 9.917715046771075e-06, "loss": 1.8341, "step": 10671 }, { "epoch": 0.8033271232051789, "grad_norm": 5.836062908172607, "learning_rate": 9.91042853029171e-06, "loss": 1.9529, "step": 10672 }, { "epoch": 0.803402397485839, "grad_norm": 4.353870868682861, "learning_rate": 9.903144397022324e-06, "loss": 1.6743, "step": 10673 }, { "epoch": 0.8034776717664992, "grad_norm": 5.54949426651001, "learning_rate": 9.895862647395964e-06, "loss": 1.9927, "step": 10674 }, { "epoch": 0.8035529460471593, "grad_norm": 4.469899654388428, "learning_rate": 9.888583281845487e-06, "loss": 2.1199, "step": 10675 }, { "epoch": 0.8036282203278194, "grad_norm": 6.5621466636657715, "learning_rate": 9.881306300803656e-06, "loss": 2.1115, "step": 10676 }, { "epoch": 0.8037034946084797, "grad_norm": 7.258991241455078, "learning_rate": 9.874031704703035e-06, "loss": 2.4716, "step": 10677 }, { "epoch": 0.8037787688891398, "grad_norm": 3.9647786617279053, "learning_rate": 9.866759493976113e-06, "loss": 1.5881, "step": 10678 }, { "epoch": 0.8038540431697999, "grad_norm": 5.832951068878174, "learning_rate": 9.859489669055166e-06, "loss": 1.6782, "step": 10679 }, { "epoch": 0.8039293174504601, "grad_norm": 5.832951068878174, "learning_rate": 9.859489669055166e-06, "loss": 1.7834, "step": 10680 }, { "epoch": 0.8040045917311203, "grad_norm": 6.232776641845703, "learning_rate": 9.852222230372388e-06, "loss": 1.996, "step": 10681 }, { "epoch": 0.8040798660117804, "grad_norm": 4.047651290893555, "learning_rate": 9.844957178359798e-06, "loss": 1.6895, "step": 10682 }, { "epoch": 0.8041551402924406, "grad_norm": 4.456789016723633, "learning_rate": 9.837694513449297e-06, "loss": 1.4301, "step": 10683 }, { "epoch": 0.8042304145731007, "grad_norm": 6.298038959503174, "learning_rate": 9.830434236072605e-06, "loss": 2.3635, "step": 10684 }, { "epoch": 0.8043056888537609, "grad_norm": 6.315861701965332, "learning_rate": 9.823176346661356e-06, "loss": 1.709, "step": 10685 }, { "epoch": 0.8043809631344211, "grad_norm": 5.794083595275879, "learning_rate": 9.815920845646976e-06, "loss": 1.6315, "step": 10686 }, { "epoch": 0.8044562374150812, "grad_norm": 4.282148361206055, "learning_rate": 9.80866773346082e-06, "loss": 1.4934, "step": 10687 }, { "epoch": 0.8045315116957413, "grad_norm": 6.939359188079834, "learning_rate": 9.801417010534026e-06, "loss": 1.9112, "step": 10688 }, { "epoch": 0.8046067859764016, "grad_norm": 5.723571300506592, "learning_rate": 9.794168677297649e-06, "loss": 1.9267, "step": 10689 }, { "epoch": 0.8046820602570617, "grad_norm": 6.424620628356934, "learning_rate": 9.786922734182597e-06, "loss": 2.1242, "step": 10690 }, { "epoch": 0.8047573345377218, "grad_norm": 3.8335604667663574, "learning_rate": 9.779679181619583e-06, "loss": 1.7609, "step": 10691 }, { "epoch": 0.804832608818382, "grad_norm": 6.078704357147217, "learning_rate": 9.772438020039243e-06, "loss": 1.5793, "step": 10692 }, { "epoch": 0.8049078830990422, "grad_norm": 4.111569404602051, "learning_rate": 9.765199249872038e-06, "loss": 1.4538, "step": 10693 }, { "epoch": 0.8049831573797023, "grad_norm": 3.8913235664367676, "learning_rate": 9.757962871548305e-06, "loss": 1.6399, "step": 10694 }, { "epoch": 0.8050584316603624, "grad_norm": 4.875214576721191, "learning_rate": 9.750728885498195e-06, "loss": 1.4047, "step": 10695 }, { "epoch": 0.8051337059410226, "grad_norm": 4.7166643142700195, "learning_rate": 9.743497292151776e-06, "loss": 1.6971, "step": 10696 }, { "epoch": 0.8052089802216827, "grad_norm": 6.289992809295654, "learning_rate": 9.736268091938927e-06, "loss": 1.6479, "step": 10697 }, { "epoch": 0.8052842545023429, "grad_norm": 4.575614929199219, "learning_rate": 9.72904128528942e-06, "loss": 1.7579, "step": 10698 }, { "epoch": 0.8053595287830031, "grad_norm": 4.374817371368408, "learning_rate": 9.721816872632844e-06, "loss": 1.6652, "step": 10699 }, { "epoch": 0.8054348030636632, "grad_norm": 5.086137771606445, "learning_rate": 9.714594854398695e-06, "loss": 1.8821, "step": 10700 }, { "epoch": 0.8055100773443233, "grad_norm": 4.670071125030518, "learning_rate": 9.707375231016286e-06, "loss": 1.8576, "step": 10701 }, { "epoch": 0.8055853516249836, "grad_norm": 5.064945697784424, "learning_rate": 9.7001580029148e-06, "loss": 1.5716, "step": 10702 }, { "epoch": 0.8056606259056437, "grad_norm": 6.030887126922607, "learning_rate": 9.692943170523288e-06, "loss": 2.3776, "step": 10703 }, { "epoch": 0.8057359001863038, "grad_norm": 5.851210594177246, "learning_rate": 9.685730734270654e-06, "loss": 1.666, "step": 10704 }, { "epoch": 0.805811174466964, "grad_norm": 4.9819536209106445, "learning_rate": 9.678520694585664e-06, "loss": 1.7411, "step": 10705 }, { "epoch": 0.8058864487476242, "grad_norm": 5.127562999725342, "learning_rate": 9.671313051896908e-06, "loss": 1.7239, "step": 10706 }, { "epoch": 0.8059617230282843, "grad_norm": 5.233977794647217, "learning_rate": 9.664107806632888e-06, "loss": 2.1205, "step": 10707 }, { "epoch": 0.8060369973089445, "grad_norm": 3.824859857559204, "learning_rate": 9.656904959221912e-06, "loss": 1.8875, "step": 10708 }, { "epoch": 0.8061122715896046, "grad_norm": 5.744001388549805, "learning_rate": 9.649704510092195e-06, "loss": 1.7138, "step": 10709 }, { "epoch": 0.8061875458702648, "grad_norm": 7.650867462158203, "learning_rate": 9.642506459671746e-06, "loss": 2.4303, "step": 10710 }, { "epoch": 0.806262820150925, "grad_norm": 5.673069477081299, "learning_rate": 9.635310808388509e-06, "loss": 1.4306, "step": 10711 }, { "epoch": 0.8063380944315851, "grad_norm": 4.3479905128479, "learning_rate": 9.628117556670207e-06, "loss": 1.4309, "step": 10712 }, { "epoch": 0.8064133687122452, "grad_norm": 4.550969123840332, "learning_rate": 9.620926704944482e-06, "loss": 2.0859, "step": 10713 }, { "epoch": 0.8064886429929053, "grad_norm": 4.660947322845459, "learning_rate": 9.613738253638798e-06, "loss": 1.8436, "step": 10714 }, { "epoch": 0.8065639172735656, "grad_norm": 4.935039043426514, "learning_rate": 9.606552203180513e-06, "loss": 1.8763, "step": 10715 }, { "epoch": 0.8066391915542257, "grad_norm": 4.6597819328308105, "learning_rate": 9.599368553996779e-06, "loss": 1.6187, "step": 10716 }, { "epoch": 0.8067144658348858, "grad_norm": 4.312115669250488, "learning_rate": 9.592187306514676e-06, "loss": 1.7801, "step": 10717 }, { "epoch": 0.806789740115546, "grad_norm": 5.013323783874512, "learning_rate": 9.585008461161082e-06, "loss": 1.8543, "step": 10718 }, { "epoch": 0.8068650143962062, "grad_norm": 4.603612422943115, "learning_rate": 9.577832018362788e-06, "loss": 1.5446, "step": 10719 }, { "epoch": 0.8069402886768663, "grad_norm": 5.909267425537109, "learning_rate": 9.570657978546383e-06, "loss": 1.5844, "step": 10720 }, { "epoch": 0.8070155629575265, "grad_norm": 6.481149673461914, "learning_rate": 9.56348634213835e-06, "loss": 1.4137, "step": 10721 }, { "epoch": 0.8070908372381866, "grad_norm": 4.195736885070801, "learning_rate": 9.55631710956505e-06, "loss": 1.6917, "step": 10722 }, { "epoch": 0.8071661115188468, "grad_norm": 5.30164098739624, "learning_rate": 9.549150281252633e-06, "loss": 1.6404, "step": 10723 }, { "epoch": 0.807241385799507, "grad_norm": 5.554624557495117, "learning_rate": 9.541985857627166e-06, "loss": 1.9411, "step": 10724 }, { "epoch": 0.8073166600801671, "grad_norm": 4.075313568115234, "learning_rate": 9.534823839114553e-06, "loss": 1.9542, "step": 10725 }, { "epoch": 0.8073919343608272, "grad_norm": 4.310482025146484, "learning_rate": 9.52766422614057e-06, "loss": 1.665, "step": 10726 }, { "epoch": 0.8074672086414875, "grad_norm": 4.176849365234375, "learning_rate": 9.520507019130804e-06, "loss": 1.6664, "step": 10727 }, { "epoch": 0.8075424829221476, "grad_norm": 6.520838260650635, "learning_rate": 9.513352218510763e-06, "loss": 1.8622, "step": 10728 }, { "epoch": 0.8076177572028077, "grad_norm": 4.1897687911987305, "learning_rate": 9.506199824705753e-06, "loss": 1.8259, "step": 10729 }, { "epoch": 0.8076930314834679, "grad_norm": 4.515954494476318, "learning_rate": 9.499049838140982e-06, "loss": 1.94, "step": 10730 }, { "epoch": 0.8077683057641281, "grad_norm": 4.747716903686523, "learning_rate": 9.491902259241476e-06, "loss": 1.348, "step": 10731 }, { "epoch": 0.8078435800447882, "grad_norm": 6.552875995635986, "learning_rate": 9.484757088432161e-06, "loss": 1.7899, "step": 10732 }, { "epoch": 0.8079188543254483, "grad_norm": 4.528985977172852, "learning_rate": 9.477614326137773e-06, "loss": 1.8451, "step": 10733 }, { "epoch": 0.8079941286061085, "grad_norm": 5.588685035705566, "learning_rate": 9.470473972782962e-06, "loss": 1.5428, "step": 10734 }, { "epoch": 0.8080694028867687, "grad_norm": 5.34883975982666, "learning_rate": 9.463336028792157e-06, "loss": 2.0991, "step": 10735 }, { "epoch": 0.8081446771674288, "grad_norm": 4.229326248168945, "learning_rate": 9.456200494589723e-06, "loss": 1.9816, "step": 10736 }, { "epoch": 0.808219951448089, "grad_norm": 3.9271128177642822, "learning_rate": 9.449067370599829e-06, "loss": 1.6893, "step": 10737 }, { "epoch": 0.8082952257287491, "grad_norm": 5.129074573516846, "learning_rate": 9.44193665724653e-06, "loss": 1.8029, "step": 10738 }, { "epoch": 0.8083705000094092, "grad_norm": 5.309793472290039, "learning_rate": 9.434808354953734e-06, "loss": 1.6701, "step": 10739 }, { "epoch": 0.8084457742900695, "grad_norm": 4.950886249542236, "learning_rate": 9.427682464145172e-06, "loss": 2.2871, "step": 10740 }, { "epoch": 0.8085210485707296, "grad_norm": 3.967445135116577, "learning_rate": 9.420558985244488e-06, "loss": 1.8083, "step": 10741 }, { "epoch": 0.8085963228513897, "grad_norm": 6.281248569488525, "learning_rate": 9.413437918675123e-06, "loss": 1.7827, "step": 10742 }, { "epoch": 0.80867159713205, "grad_norm": 4.986514568328857, "learning_rate": 9.40631926486043e-06, "loss": 2.0824, "step": 10743 }, { "epoch": 0.8087468714127101, "grad_norm": 4.760549068450928, "learning_rate": 9.399203024223568e-06, "loss": 1.7938, "step": 10744 }, { "epoch": 0.8088221456933702, "grad_norm": 4.574357986450195, "learning_rate": 9.392089197187604e-06, "loss": 1.4696, "step": 10745 }, { "epoch": 0.8088974199740304, "grad_norm": 3.9092679023742676, "learning_rate": 9.384977784175403e-06, "loss": 1.6746, "step": 10746 }, { "epoch": 0.8089726942546905, "grad_norm": 5.171268463134766, "learning_rate": 9.37786878560974e-06, "loss": 1.8227, "step": 10747 }, { "epoch": 0.8090479685353507, "grad_norm": 5.368162155151367, "learning_rate": 9.37076220191322e-06, "loss": 1.383, "step": 10748 }, { "epoch": 0.8091232428160109, "grad_norm": 4.458542346954346, "learning_rate": 9.363658033508315e-06, "loss": 1.6161, "step": 10749 }, { "epoch": 0.809198517096671, "grad_norm": 4.535194396972656, "learning_rate": 9.356556280817335e-06, "loss": 1.7852, "step": 10750 }, { "epoch": 0.8092737913773311, "grad_norm": 7.015781402587891, "learning_rate": 9.349456944262474e-06, "loss": 1.4985, "step": 10751 }, { "epoch": 0.8093490656579913, "grad_norm": 3.718797206878662, "learning_rate": 9.34236002426575e-06, "loss": 1.5249, "step": 10752 }, { "epoch": 0.8094243399386515, "grad_norm": 6.286694049835205, "learning_rate": 9.335265521249059e-06, "loss": 1.7973, "step": 10753 }, { "epoch": 0.8094996142193116, "grad_norm": 6.147034168243408, "learning_rate": 9.328173435634164e-06, "loss": 2.2572, "step": 10754 }, { "epoch": 0.8095748884999717, "grad_norm": 7.531181812286377, "learning_rate": 9.321083767842648e-06, "loss": 1.9205, "step": 10755 }, { "epoch": 0.809650162780632, "grad_norm": 5.010212421417236, "learning_rate": 9.313996518295993e-06, "loss": 1.6824, "step": 10756 }, { "epoch": 0.8097254370612921, "grad_norm": 3.934354066848755, "learning_rate": 9.306911687415488e-06, "loss": 1.7618, "step": 10757 }, { "epoch": 0.8098007113419522, "grad_norm": 4.612907409667969, "learning_rate": 9.299829275622323e-06, "loss": 1.6604, "step": 10758 }, { "epoch": 0.8098759856226124, "grad_norm": 4.187878131866455, "learning_rate": 9.292749283337533e-06, "loss": 1.5036, "step": 10759 }, { "epoch": 0.8099512599032725, "grad_norm": 5.0052103996276855, "learning_rate": 9.285671710981997e-06, "loss": 1.971, "step": 10760 }, { "epoch": 0.8100265341839327, "grad_norm": 7.626448631286621, "learning_rate": 9.27859655897645e-06, "loss": 1.8097, "step": 10761 }, { "epoch": 0.8101018084645929, "grad_norm": 5.556909084320068, "learning_rate": 9.271523827741508e-06, "loss": 1.7399, "step": 10762 }, { "epoch": 0.810177082745253, "grad_norm": 4.775475978851318, "learning_rate": 9.264453517697597e-06, "loss": 1.6214, "step": 10763 }, { "epoch": 0.8102523570259131, "grad_norm": 5.027905464172363, "learning_rate": 9.257385629265053e-06, "loss": 1.8219, "step": 10764 }, { "epoch": 0.8103276313065734, "grad_norm": 5.68484354019165, "learning_rate": 9.250320162864012e-06, "loss": 1.777, "step": 10765 }, { "epoch": 0.8104029055872335, "grad_norm": 5.306346416473389, "learning_rate": 9.243257118914533e-06, "loss": 1.7954, "step": 10766 }, { "epoch": 0.8104781798678936, "grad_norm": 4.2520647048950195, "learning_rate": 9.236196497836458e-06, "loss": 1.5632, "step": 10767 }, { "epoch": 0.8105534541485538, "grad_norm": 5.100350856781006, "learning_rate": 9.22913830004954e-06, "loss": 1.6379, "step": 10768 }, { "epoch": 0.810628728429214, "grad_norm": 5.0618672370910645, "learning_rate": 9.222082525973364e-06, "loss": 1.491, "step": 10769 }, { "epoch": 0.8107040027098741, "grad_norm": 5.813129425048828, "learning_rate": 9.215029176027373e-06, "loss": 1.7683, "step": 10770 }, { "epoch": 0.8107792769905343, "grad_norm": 4.636019229888916, "learning_rate": 9.207978250630888e-06, "loss": 1.6041, "step": 10771 }, { "epoch": 0.8108545512711944, "grad_norm": 4.913620948791504, "learning_rate": 9.20092975020304e-06, "loss": 1.8379, "step": 10772 }, { "epoch": 0.8109298255518546, "grad_norm": 5.632382869720459, "learning_rate": 9.193883675162863e-06, "loss": 1.6162, "step": 10773 }, { "epoch": 0.8110050998325147, "grad_norm": 5.632382869720459, "learning_rate": 9.193883675162863e-06, "loss": 1.7046, "step": 10774 }, { "epoch": 0.8110803741131749, "grad_norm": 5.2676682472229, "learning_rate": 9.186840025929206e-06, "loss": 1.8647, "step": 10775 }, { "epoch": 0.811155648393835, "grad_norm": 5.111133098602295, "learning_rate": 9.179798802920814e-06, "loss": 1.4395, "step": 10776 }, { "epoch": 0.8112309226744951, "grad_norm": 4.482577323913574, "learning_rate": 9.172760006556242e-06, "loss": 1.8926, "step": 10777 }, { "epoch": 0.8113061969551554, "grad_norm": 5.009120941162109, "learning_rate": 9.165723637253953e-06, "loss": 2.032, "step": 10778 }, { "epoch": 0.8113814712358155, "grad_norm": 5.149282455444336, "learning_rate": 9.158689695432221e-06, "loss": 1.7103, "step": 10779 }, { "epoch": 0.8114567455164756, "grad_norm": 8.16108512878418, "learning_rate": 9.151658181509194e-06, "loss": 2.3891, "step": 10780 }, { "epoch": 0.8115320197971359, "grad_norm": 6.173976898193359, "learning_rate": 9.144629095902896e-06, "loss": 1.8225, "step": 10781 }, { "epoch": 0.811607294077796, "grad_norm": 4.532297611236572, "learning_rate": 9.137602439031157e-06, "loss": 1.7306, "step": 10782 }, { "epoch": 0.8116825683584561, "grad_norm": 6.302655220031738, "learning_rate": 9.130578211311708e-06, "loss": 1.751, "step": 10783 }, { "epoch": 0.8117578426391163, "grad_norm": 4.427190780639648, "learning_rate": 9.123556413162126e-06, "loss": 1.8865, "step": 10784 }, { "epoch": 0.8118331169197764, "grad_norm": 4.001774311065674, "learning_rate": 9.116537044999817e-06, "loss": 2.0738, "step": 10785 }, { "epoch": 0.8119083912004366, "grad_norm": 5.022767066955566, "learning_rate": 9.10952010724207e-06, "loss": 1.5779, "step": 10786 }, { "epoch": 0.8119836654810968, "grad_norm": 5.475062847137451, "learning_rate": 9.102505600306045e-06, "loss": 2.3591, "step": 10787 }, { "epoch": 0.8120589397617569, "grad_norm": 4.143284320831299, "learning_rate": 9.095493524608694e-06, "loss": 1.6952, "step": 10788 }, { "epoch": 0.812134214042417, "grad_norm": 5.463651180267334, "learning_rate": 9.0884838805669e-06, "loss": 1.8657, "step": 10789 }, { "epoch": 0.8122094883230773, "grad_norm": 4.535449504852295, "learning_rate": 9.081476668597338e-06, "loss": 1.8759, "step": 10790 }, { "epoch": 0.8122847626037374, "grad_norm": 5.332362651824951, "learning_rate": 9.074471889116593e-06, "loss": 1.491, "step": 10791 }, { "epoch": 0.8123600368843975, "grad_norm": 6.7469282150268555, "learning_rate": 9.067469542541051e-06, "loss": 2.2716, "step": 10792 }, { "epoch": 0.8124353111650576, "grad_norm": 4.326296806335449, "learning_rate": 9.060469629287e-06, "loss": 1.6534, "step": 10793 }, { "epoch": 0.8125105854457179, "grad_norm": 6.551939010620117, "learning_rate": 9.053472149770559e-06, "loss": 1.8713, "step": 10794 }, { "epoch": 0.812585859726378, "grad_norm": 4.524019241333008, "learning_rate": 9.04647710440772e-06, "loss": 1.7803, "step": 10795 }, { "epoch": 0.8126611340070381, "grad_norm": 4.482858180999756, "learning_rate": 9.039484493614298e-06, "loss": 1.8058, "step": 10796 }, { "epoch": 0.8127364082876983, "grad_norm": 10.275532722473145, "learning_rate": 9.032494317806007e-06, "loss": 2.0376, "step": 10797 }, { "epoch": 0.8128116825683585, "grad_norm": 4.812849998474121, "learning_rate": 9.025506577398362e-06, "loss": 1.6337, "step": 10798 }, { "epoch": 0.8128869568490186, "grad_norm": 4.211179733276367, "learning_rate": 9.0185212728068e-06, "loss": 2.0419, "step": 10799 }, { "epoch": 0.8129622311296788, "grad_norm": 8.443922996520996, "learning_rate": 9.011538404446546e-06, "loss": 1.8586, "step": 10800 }, { "epoch": 0.8130375054103389, "grad_norm": 3.891097068786621, "learning_rate": 9.00455797273274e-06, "loss": 1.891, "step": 10801 }, { "epoch": 0.813112779690999, "grad_norm": 6.209191799163818, "learning_rate": 8.997579978080317e-06, "loss": 1.7102, "step": 10802 }, { "epoch": 0.8131880539716593, "grad_norm": 5.509005069732666, "learning_rate": 8.990604420904115e-06, "loss": 2.0006, "step": 10803 }, { "epoch": 0.8132633282523194, "grad_norm": 4.314136981964111, "learning_rate": 8.983631301618812e-06, "loss": 1.7638, "step": 10804 }, { "epoch": 0.8133386025329795, "grad_norm": 4.226730823516846, "learning_rate": 8.976660620638939e-06, "loss": 2.1153, "step": 10805 }, { "epoch": 0.8134138768136397, "grad_norm": 6.691463947296143, "learning_rate": 8.969692378378897e-06, "loss": 1.8806, "step": 10806 }, { "epoch": 0.8134891510942999, "grad_norm": 6.5203537940979, "learning_rate": 8.9627265752529e-06, "loss": 1.7904, "step": 10807 }, { "epoch": 0.81356442537496, "grad_norm": 4.176247596740723, "learning_rate": 8.955763211675072e-06, "loss": 1.8313, "step": 10808 }, { "epoch": 0.8136396996556202, "grad_norm": 4.838740348815918, "learning_rate": 8.948802288059338e-06, "loss": 1.9172, "step": 10809 }, { "epoch": 0.8137149739362803, "grad_norm": 4.36600399017334, "learning_rate": 8.941843804819533e-06, "loss": 2.5324, "step": 10810 }, { "epoch": 0.8137902482169405, "grad_norm": 6.494029998779297, "learning_rate": 8.934887762369293e-06, "loss": 1.4995, "step": 10811 }, { "epoch": 0.8138655224976006, "grad_norm": 4.998374938964844, "learning_rate": 8.927934161122154e-06, "loss": 1.9609, "step": 10812 }, { "epoch": 0.8139407967782608, "grad_norm": 6.45481538772583, "learning_rate": 8.920983001491473e-06, "loss": 1.7951, "step": 10813 }, { "epoch": 0.8140160710589209, "grad_norm": 7.360757350921631, "learning_rate": 8.914034283890483e-06, "loss": 1.776, "step": 10814 }, { "epoch": 0.814091345339581, "grad_norm": 5.082930564880371, "learning_rate": 8.907088008732261e-06, "loss": 1.6566, "step": 10815 }, { "epoch": 0.8141666196202413, "grad_norm": 4.800956726074219, "learning_rate": 8.900144176429765e-06, "loss": 1.7358, "step": 10816 }, { "epoch": 0.8142418939009014, "grad_norm": 5.695190906524658, "learning_rate": 8.893202787395755e-06, "loss": 1.981, "step": 10817 }, { "epoch": 0.8143171681815615, "grad_norm": 5.245087623596191, "learning_rate": 8.886263842042892e-06, "loss": 2.0929, "step": 10818 }, { "epoch": 0.8143924424622218, "grad_norm": 6.453237056732178, "learning_rate": 8.879327340783688e-06, "loss": 2.072, "step": 10819 }, { "epoch": 0.8144677167428819, "grad_norm": 4.42209005355835, "learning_rate": 8.872393284030472e-06, "loss": 1.8223, "step": 10820 }, { "epoch": 0.814542991023542, "grad_norm": 4.721053600311279, "learning_rate": 8.865461672195479e-06, "loss": 1.8154, "step": 10821 }, { "epoch": 0.8146182653042022, "grad_norm": 5.299997806549072, "learning_rate": 8.858532505690747e-06, "loss": 1.9668, "step": 10822 }, { "epoch": 0.8146935395848623, "grad_norm": 6.3773627281188965, "learning_rate": 8.851605784928224e-06, "loss": 1.8669, "step": 10823 }, { "epoch": 0.8147688138655225, "grad_norm": 4.670791149139404, "learning_rate": 8.844681510319658e-06, "loss": 2.0647, "step": 10824 }, { "epoch": 0.8148440881461827, "grad_norm": 4.377533435821533, "learning_rate": 8.837759682276685e-06, "loss": 1.9389, "step": 10825 }, { "epoch": 0.8149193624268428, "grad_norm": 4.788690090179443, "learning_rate": 8.830840301210796e-06, "loss": 2.0852, "step": 10826 }, { "epoch": 0.8149946367075029, "grad_norm": 5.35303258895874, "learning_rate": 8.823923367533332e-06, "loss": 2.2674, "step": 10827 }, { "epoch": 0.8150699109881632, "grad_norm": 4.1622772216796875, "learning_rate": 8.817008881655464e-06, "loss": 1.6945, "step": 10828 }, { "epoch": 0.8151451852688233, "grad_norm": 5.137579441070557, "learning_rate": 8.810096843988263e-06, "loss": 1.576, "step": 10829 }, { "epoch": 0.8152204595494834, "grad_norm": 5.846545696258545, "learning_rate": 8.803187254942607e-06, "loss": 1.9946, "step": 10830 }, { "epoch": 0.8152957338301435, "grad_norm": 4.926131248474121, "learning_rate": 8.796280114929274e-06, "loss": 1.5647, "step": 10831 }, { "epoch": 0.8153710081108038, "grad_norm": 5.013854503631592, "learning_rate": 8.789375424358847e-06, "loss": 1.696, "step": 10832 }, { "epoch": 0.8154462823914639, "grad_norm": 3.7120392322540283, "learning_rate": 8.78247318364182e-06, "loss": 1.6129, "step": 10833 }, { "epoch": 0.815521556672124, "grad_norm": 4.516750335693359, "learning_rate": 8.775573393188485e-06, "loss": 1.8204, "step": 10834 }, { "epoch": 0.8155968309527842, "grad_norm": 4.984897136688232, "learning_rate": 8.76867605340902e-06, "loss": 1.8868, "step": 10835 }, { "epoch": 0.8156721052334444, "grad_norm": 5.796410083770752, "learning_rate": 8.761781164713472e-06, "loss": 2.0085, "step": 10836 }, { "epoch": 0.8157473795141045, "grad_norm": 4.42592191696167, "learning_rate": 8.754888727511696e-06, "loss": 1.5354, "step": 10837 }, { "epoch": 0.8158226537947647, "grad_norm": 4.458138942718506, "learning_rate": 8.747998742213443e-06, "loss": 1.5543, "step": 10838 }, { "epoch": 0.8158979280754248, "grad_norm": 5.161789894104004, "learning_rate": 8.741111209228292e-06, "loss": 1.7793, "step": 10839 }, { "epoch": 0.815973202356085, "grad_norm": 3.833183765411377, "learning_rate": 8.73422612896571e-06, "loss": 1.7617, "step": 10840 }, { "epoch": 0.8160484766367452, "grad_norm": 7.159696102142334, "learning_rate": 8.727343501834972e-06, "loss": 2.2675, "step": 10841 }, { "epoch": 0.8161237509174053, "grad_norm": 4.582847595214844, "learning_rate": 8.720463328245248e-06, "loss": 1.5086, "step": 10842 }, { "epoch": 0.8161990251980654, "grad_norm": 4.902654647827148, "learning_rate": 8.713585608605524e-06, "loss": 1.7983, "step": 10843 }, { "epoch": 0.8162742994787257, "grad_norm": 4.875204086303711, "learning_rate": 8.706710343324681e-06, "loss": 1.7053, "step": 10844 }, { "epoch": 0.8163495737593858, "grad_norm": 3.8512213230133057, "learning_rate": 8.69983753281141e-06, "loss": 1.7641, "step": 10845 }, { "epoch": 0.8164248480400459, "grad_norm": 5.193028450012207, "learning_rate": 8.69296717747431e-06, "loss": 1.9469, "step": 10846 }, { "epoch": 0.8165001223207061, "grad_norm": 5.994436740875244, "learning_rate": 8.686099277721777e-06, "loss": 1.5639, "step": 10847 }, { "epoch": 0.8165753966013662, "grad_norm": 4.695322513580322, "learning_rate": 8.679233833962097e-06, "loss": 1.7832, "step": 10848 }, { "epoch": 0.8166506708820264, "grad_norm": 5.546252250671387, "learning_rate": 8.672370846603406e-06, "loss": 1.9258, "step": 10849 }, { "epoch": 0.8167259451626865, "grad_norm": 4.912105083465576, "learning_rate": 8.665510316053682e-06, "loss": 1.4253, "step": 10850 }, { "epoch": 0.8168012194433467, "grad_norm": 5.021442413330078, "learning_rate": 8.658652242720777e-06, "loss": 1.9913, "step": 10851 }, { "epoch": 0.8168764937240068, "grad_norm": 3.657658815383911, "learning_rate": 8.651796627012371e-06, "loss": 1.9411, "step": 10852 }, { "epoch": 0.816951768004667, "grad_norm": 6.262230396270752, "learning_rate": 8.64494346933602e-06, "loss": 2.0501, "step": 10853 }, { "epoch": 0.8170270422853272, "grad_norm": 6.539548397064209, "learning_rate": 8.63809277009911e-06, "loss": 1.681, "step": 10854 }, { "epoch": 0.8171023165659873, "grad_norm": 4.937403678894043, "learning_rate": 8.631244529708915e-06, "loss": 1.9723, "step": 10855 }, { "epoch": 0.8171775908466474, "grad_norm": 3.7029614448547363, "learning_rate": 8.62439874857252e-06, "loss": 1.9043, "step": 10856 }, { "epoch": 0.8172528651273077, "grad_norm": 6.8704304695129395, "learning_rate": 8.617555427096913e-06, "loss": 1.932, "step": 10857 }, { "epoch": 0.8173281394079678, "grad_norm": 4.510538578033447, "learning_rate": 8.610714565688882e-06, "loss": 2.1058, "step": 10858 }, { "epoch": 0.8174034136886279, "grad_norm": 4.989439487457275, "learning_rate": 8.60387616475511e-06, "loss": 1.9232, "step": 10859 }, { "epoch": 0.8174786879692881, "grad_norm": 6.048931121826172, "learning_rate": 8.597040224702124e-06, "loss": 2.1272, "step": 10860 }, { "epoch": 0.8175539622499483, "grad_norm": 4.535512447357178, "learning_rate": 8.590206745936308e-06, "loss": 1.551, "step": 10861 }, { "epoch": 0.8176292365306084, "grad_norm": 6.280004978179932, "learning_rate": 8.583375728863874e-06, "loss": 1.6735, "step": 10862 }, { "epoch": 0.8177045108112686, "grad_norm": 4.35219144821167, "learning_rate": 8.576547173890926e-06, "loss": 1.7076, "step": 10863 }, { "epoch": 0.8177797850919287, "grad_norm": 5.373569488525391, "learning_rate": 8.569721081423376e-06, "loss": 1.9191, "step": 10864 }, { "epoch": 0.8178550593725888, "grad_norm": 4.578735828399658, "learning_rate": 8.562897451867047e-06, "loss": 1.798, "step": 10865 }, { "epoch": 0.8179303336532491, "grad_norm": 4.907925128936768, "learning_rate": 8.556076285627557e-06, "loss": 2.1186, "step": 10866 }, { "epoch": 0.8180056079339092, "grad_norm": 5.499814987182617, "learning_rate": 8.549257583110415e-06, "loss": 1.5638, "step": 10867 }, { "epoch": 0.8180808822145693, "grad_norm": 4.844296455383301, "learning_rate": 8.54244134472098e-06, "loss": 1.7051, "step": 10868 }, { "epoch": 0.8181561564952295, "grad_norm": 6.254519462585449, "learning_rate": 8.535627570864447e-06, "loss": 1.6878, "step": 10869 }, { "epoch": 0.8182314307758897, "grad_norm": 4.311774730682373, "learning_rate": 8.528816261945877e-06, "loss": 1.7822, "step": 10870 }, { "epoch": 0.8183067050565498, "grad_norm": 3.9855504035949707, "learning_rate": 8.522007418370188e-06, "loss": 1.665, "step": 10871 }, { "epoch": 0.8183819793372099, "grad_norm": 6.331838607788086, "learning_rate": 8.515201040542158e-06, "loss": 1.7336, "step": 10872 }, { "epoch": 0.8184572536178701, "grad_norm": 5.406843662261963, "learning_rate": 8.50839712886638e-06, "loss": 1.7519, "step": 10873 }, { "epoch": 0.8185325278985303, "grad_norm": 4.678377628326416, "learning_rate": 8.501595683747348e-06, "loss": 1.7087, "step": 10874 }, { "epoch": 0.8186078021791904, "grad_norm": 5.033703804016113, "learning_rate": 8.49479670558937e-06, "loss": 1.3437, "step": 10875 }, { "epoch": 0.8186830764598506, "grad_norm": 5.009019374847412, "learning_rate": 8.488000194796647e-06, "loss": 1.6715, "step": 10876 }, { "epoch": 0.8187583507405107, "grad_norm": 5.265163898468018, "learning_rate": 8.48120615177319e-06, "loss": 1.8161, "step": 10877 }, { "epoch": 0.8188336250211709, "grad_norm": 4.090880393981934, "learning_rate": 8.474414576922912e-06, "loss": 1.8737, "step": 10878 }, { "epoch": 0.8189088993018311, "grad_norm": 3.992671012878418, "learning_rate": 8.467625470649526e-06, "loss": 1.7233, "step": 10879 }, { "epoch": 0.8189841735824912, "grad_norm": 5.976937294006348, "learning_rate": 8.460838833356632e-06, "loss": 2.058, "step": 10880 }, { "epoch": 0.8190594478631513, "grad_norm": 3.9277961254119873, "learning_rate": 8.454054665447681e-06, "loss": 1.6229, "step": 10881 }, { "epoch": 0.8191347221438116, "grad_norm": 7.06601095199585, "learning_rate": 8.447272967325981e-06, "loss": 1.8344, "step": 10882 }, { "epoch": 0.8192099964244717, "grad_norm": 6.076743125915527, "learning_rate": 8.440493739394668e-06, "loss": 2.1974, "step": 10883 }, { "epoch": 0.8192852707051318, "grad_norm": 4.61710262298584, "learning_rate": 8.433716982056749e-06, "loss": 1.6597, "step": 10884 }, { "epoch": 0.819360544985792, "grad_norm": 5.550755500793457, "learning_rate": 8.426942695715101e-06, "loss": 1.6254, "step": 10885 }, { "epoch": 0.8194358192664521, "grad_norm": 6.744055271148682, "learning_rate": 8.420170880772415e-06, "loss": 1.6014, "step": 10886 }, { "epoch": 0.8195110935471123, "grad_norm": 4.458874225616455, "learning_rate": 8.413401537631277e-06, "loss": 1.9159, "step": 10887 }, { "epoch": 0.8195863678277725, "grad_norm": 4.733869552612305, "learning_rate": 8.406634666694074e-06, "loss": 1.7131, "step": 10888 }, { "epoch": 0.8196616421084326, "grad_norm": 4.1965227127075195, "learning_rate": 8.399870268363113e-06, "loss": 1.5551, "step": 10889 }, { "epoch": 0.8197369163890927, "grad_norm": 6.501962661743164, "learning_rate": 8.393108343040484e-06, "loss": 1.7853, "step": 10890 }, { "epoch": 0.8198121906697529, "grad_norm": 5.464043617248535, "learning_rate": 8.386348891128198e-06, "loss": 2.0379, "step": 10891 }, { "epoch": 0.8198874649504131, "grad_norm": 6.3169074058532715, "learning_rate": 8.379591913028051e-06, "loss": 1.7009, "step": 10892 }, { "epoch": 0.8199627392310732, "grad_norm": 5.8659539222717285, "learning_rate": 8.372837409141744e-06, "loss": 1.9881, "step": 10893 }, { "epoch": 0.8200380135117333, "grad_norm": 4.222545623779297, "learning_rate": 8.366085379870814e-06, "loss": 1.3388, "step": 10894 }, { "epoch": 0.8201132877923936, "grad_norm": 4.818695068359375, "learning_rate": 8.359335825616655e-06, "loss": 1.7907, "step": 10895 }, { "epoch": 0.8201885620730537, "grad_norm": 3.8797008991241455, "learning_rate": 8.352588746780488e-06, "loss": 1.8336, "step": 10896 }, { "epoch": 0.8202638363537138, "grad_norm": 3.721681833267212, "learning_rate": 8.345844143763437e-06, "loss": 1.7255, "step": 10897 }, { "epoch": 0.820339110634374, "grad_norm": 5.012353897094727, "learning_rate": 8.339102016966415e-06, "loss": 1.3947, "step": 10898 }, { "epoch": 0.8204143849150342, "grad_norm": 5.829016208648682, "learning_rate": 8.332362366790241e-06, "loss": 1.7644, "step": 10899 }, { "epoch": 0.8204896591956943, "grad_norm": 5.4228386878967285, "learning_rate": 8.325625193635583e-06, "loss": 1.6821, "step": 10900 }, { "epoch": 0.8205649334763545, "grad_norm": 6.020086765289307, "learning_rate": 8.318890497902914e-06, "loss": 1.695, "step": 10901 }, { "epoch": 0.8206402077570146, "grad_norm": 6.0146684646606445, "learning_rate": 8.31215827999262e-06, "loss": 1.7345, "step": 10902 }, { "epoch": 0.8207154820376747, "grad_norm": 4.474878787994385, "learning_rate": 8.305428540304889e-06, "loss": 1.6151, "step": 10903 }, { "epoch": 0.820790756318335, "grad_norm": 3.7589683532714844, "learning_rate": 8.298701279239795e-06, "loss": 1.858, "step": 10904 }, { "epoch": 0.8208660305989951, "grad_norm": 3.725609540939331, "learning_rate": 8.291976497197263e-06, "loss": 1.7318, "step": 10905 }, { "epoch": 0.8209413048796552, "grad_norm": 4.537235260009766, "learning_rate": 8.285254194577057e-06, "loss": 1.9291, "step": 10906 }, { "epoch": 0.8210165791603155, "grad_norm": 6.215237617492676, "learning_rate": 8.27853437177879e-06, "loss": 1.6112, "step": 10907 }, { "epoch": 0.8210918534409756, "grad_norm": 4.277639865875244, "learning_rate": 8.27181702920195e-06, "loss": 2.1441, "step": 10908 }, { "epoch": 0.8211671277216357, "grad_norm": 4.801058769226074, "learning_rate": 8.265102167245848e-06, "loss": 1.8406, "step": 10909 }, { "epoch": 0.8212424020022958, "grad_norm": 5.4379730224609375, "learning_rate": 8.258389786309677e-06, "loss": 1.6644, "step": 10910 }, { "epoch": 0.821317676282956, "grad_norm": 6.305517196655273, "learning_rate": 8.251679886792457e-06, "loss": 1.7889, "step": 10911 }, { "epoch": 0.8213929505636162, "grad_norm": 5.066317081451416, "learning_rate": 8.244972469093092e-06, "loss": 1.8237, "step": 10912 }, { "epoch": 0.8214682248442763, "grad_norm": 5.019262790679932, "learning_rate": 8.23826753361029e-06, "loss": 1.7514, "step": 10913 }, { "epoch": 0.8215434991249365, "grad_norm": 4.752792835235596, "learning_rate": 8.231565080742654e-06, "loss": 1.728, "step": 10914 }, { "epoch": 0.8216187734055966, "grad_norm": 3.877133846282959, "learning_rate": 8.22486511088863e-06, "loss": 1.7324, "step": 10915 }, { "epoch": 0.8216940476862568, "grad_norm": 4.577157974243164, "learning_rate": 8.218167624446504e-06, "loss": 2.0657, "step": 10916 }, { "epoch": 0.821769321966917, "grad_norm": 4.082424163818359, "learning_rate": 8.211472621814441e-06, "loss": 1.5422, "step": 10917 }, { "epoch": 0.8218445962475771, "grad_norm": 5.90043306350708, "learning_rate": 8.204780103390414e-06, "loss": 2.0231, "step": 10918 }, { "epoch": 0.8219198705282372, "grad_norm": 5.470618724822998, "learning_rate": 8.198090069572301e-06, "loss": 2.0521, "step": 10919 }, { "epoch": 0.8219951448088975, "grad_norm": 4.029650688171387, "learning_rate": 8.191402520757773e-06, "loss": 1.7807, "step": 10920 }, { "epoch": 0.8220704190895576, "grad_norm": 6.506187915802002, "learning_rate": 8.184717457344421e-06, "loss": 1.631, "step": 10921 }, { "epoch": 0.8221456933702177, "grad_norm": 4.254798412322998, "learning_rate": 8.178034879729618e-06, "loss": 1.3766, "step": 10922 }, { "epoch": 0.8222209676508779, "grad_norm": 5.289538383483887, "learning_rate": 8.171354788310659e-06, "loss": 2.1839, "step": 10923 }, { "epoch": 0.822296241931538, "grad_norm": 4.185739040374756, "learning_rate": 8.16467718348462e-06, "loss": 2.0434, "step": 10924 }, { "epoch": 0.8223715162121982, "grad_norm": 4.849399089813232, "learning_rate": 8.158002065648485e-06, "loss": 1.956, "step": 10925 }, { "epoch": 0.8224467904928584, "grad_norm": 7.248867034912109, "learning_rate": 8.151329435199068e-06, "loss": 2.0818, "step": 10926 }, { "epoch": 0.8225220647735185, "grad_norm": 4.065545082092285, "learning_rate": 8.144659292533052e-06, "loss": 1.5531, "step": 10927 }, { "epoch": 0.8225973390541786, "grad_norm": 4.054635047912598, "learning_rate": 8.137991638046932e-06, "loss": 1.9219, "step": 10928 }, { "epoch": 0.8226726133348388, "grad_norm": 5.3163676261901855, "learning_rate": 8.131326472137107e-06, "loss": 1.6865, "step": 10929 }, { "epoch": 0.822747887615499, "grad_norm": 4.921208381652832, "learning_rate": 8.124663795199771e-06, "loss": 2.0788, "step": 10930 }, { "epoch": 0.8228231618961591, "grad_norm": 5.136446475982666, "learning_rate": 8.118003607631025e-06, "loss": 1.5267, "step": 10931 }, { "epoch": 0.8228984361768192, "grad_norm": 5.207831382751465, "learning_rate": 8.111345909826801e-06, "loss": 1.9983, "step": 10932 }, { "epoch": 0.8229737104574795, "grad_norm": 4.303314685821533, "learning_rate": 8.10469070218286e-06, "loss": 1.9486, "step": 10933 }, { "epoch": 0.8230489847381396, "grad_norm": 4.740650653839111, "learning_rate": 8.098037985094858e-06, "loss": 2.025, "step": 10934 }, { "epoch": 0.8231242590187997, "grad_norm": 4.641570091247559, "learning_rate": 8.091387758958253e-06, "loss": 1.952, "step": 10935 }, { "epoch": 0.8231995332994599, "grad_norm": 4.878793239593506, "learning_rate": 8.084740024168408e-06, "loss": 1.987, "step": 10936 }, { "epoch": 0.8232748075801201, "grad_norm": 5.910962104797363, "learning_rate": 8.078094781120494e-06, "loss": 1.8295, "step": 10937 }, { "epoch": 0.8233500818607802, "grad_norm": 4.991444110870361, "learning_rate": 8.071452030209553e-06, "loss": 1.8612, "step": 10938 }, { "epoch": 0.8234253561414404, "grad_norm": 4.691476821899414, "learning_rate": 8.064811771830483e-06, "loss": 2.094, "step": 10939 }, { "epoch": 0.8235006304221005, "grad_norm": 7.601766586303711, "learning_rate": 8.058174006378039e-06, "loss": 1.766, "step": 10940 }, { "epoch": 0.8235759047027607, "grad_norm": 5.229360580444336, "learning_rate": 8.051538734246793e-06, "loss": 2.0167, "step": 10941 }, { "epoch": 0.8236511789834209, "grad_norm": 6.295788764953613, "learning_rate": 8.044905955831223e-06, "loss": 1.6874, "step": 10942 }, { "epoch": 0.823726453264081, "grad_norm": 4.330489158630371, "learning_rate": 8.038275671525592e-06, "loss": 1.8229, "step": 10943 }, { "epoch": 0.8238017275447411, "grad_norm": 5.644754409790039, "learning_rate": 8.031647881724086e-06, "loss": 1.9557, "step": 10944 }, { "epoch": 0.8238770018254014, "grad_norm": 5.599817276000977, "learning_rate": 8.025022586820679e-06, "loss": 1.8217, "step": 10945 }, { "epoch": 0.8239522761060615, "grad_norm": 4.577640533447266, "learning_rate": 8.018399787209258e-06, "loss": 1.6692, "step": 10946 }, { "epoch": 0.8240275503867216, "grad_norm": 7.905070781707764, "learning_rate": 8.011779483283494e-06, "loss": 2.8015, "step": 10947 }, { "epoch": 0.8241028246673817, "grad_norm": 6.149569034576416, "learning_rate": 8.005161675436961e-06, "loss": 1.7827, "step": 10948 }, { "epoch": 0.824178098948042, "grad_norm": 5.115812301635742, "learning_rate": 7.998546364063069e-06, "loss": 1.6647, "step": 10949 }, { "epoch": 0.8242533732287021, "grad_norm": 4.170004844665527, "learning_rate": 7.99193354955508e-06, "loss": 1.7517, "step": 10950 }, { "epoch": 0.8243286475093622, "grad_norm": 4.287388324737549, "learning_rate": 7.985323232306124e-06, "loss": 2.1085, "step": 10951 }, { "epoch": 0.8244039217900224, "grad_norm": 5.317685127258301, "learning_rate": 7.978715412709131e-06, "loss": 1.9699, "step": 10952 }, { "epoch": 0.8244791960706825, "grad_norm": 5.4601731300354, "learning_rate": 7.972110091156947e-06, "loss": 2.0166, "step": 10953 }, { "epoch": 0.8245544703513427, "grad_norm": 6.129608154296875, "learning_rate": 7.965507268042217e-06, "loss": 1.7415, "step": 10954 }, { "epoch": 0.8246297446320029, "grad_norm": 4.224732875823975, "learning_rate": 7.958906943757483e-06, "loss": 1.8813, "step": 10955 }, { "epoch": 0.824705018912663, "grad_norm": 3.9492104053497314, "learning_rate": 7.952309118695084e-06, "loss": 1.677, "step": 10956 }, { "epoch": 0.8247802931933231, "grad_norm": 4.593045234680176, "learning_rate": 7.945713793247273e-06, "loss": 1.5629, "step": 10957 }, { "epoch": 0.8248555674739834, "grad_norm": 5.2660064697265625, "learning_rate": 7.939120967806101e-06, "loss": 1.6015, "step": 10958 }, { "epoch": 0.8249308417546435, "grad_norm": 5.169642925262451, "learning_rate": 7.932530642763498e-06, "loss": 2.0367, "step": 10959 }, { "epoch": 0.8250061160353036, "grad_norm": 8.278311729431152, "learning_rate": 7.925942818511245e-06, "loss": 2.0329, "step": 10960 }, { "epoch": 0.8250813903159638, "grad_norm": 4.970489501953125, "learning_rate": 7.919357495440977e-06, "loss": 1.6282, "step": 10961 }, { "epoch": 0.825156664596624, "grad_norm": 3.97762131690979, "learning_rate": 7.912774673944157e-06, "loss": 1.7111, "step": 10962 }, { "epoch": 0.8252319388772841, "grad_norm": 4.463409423828125, "learning_rate": 7.906194354412117e-06, "loss": 2.1348, "step": 10963 }, { "epoch": 0.8253072131579443, "grad_norm": 7.818986415863037, "learning_rate": 7.89961653723605e-06, "loss": 1.9446, "step": 10964 }, { "epoch": 0.8253824874386044, "grad_norm": 5.592797756195068, "learning_rate": 7.893041222806975e-06, "loss": 1.8107, "step": 10965 }, { "epoch": 0.8254577617192645, "grad_norm": 6.234675884246826, "learning_rate": 7.886468411515784e-06, "loss": 1.8526, "step": 10966 }, { "epoch": 0.8255330359999248, "grad_norm": 5.361891269683838, "learning_rate": 7.879898103753208e-06, "loss": 1.5978, "step": 10967 }, { "epoch": 0.8256083102805849, "grad_norm": 3.8418986797332764, "learning_rate": 7.873330299909837e-06, "loss": 1.909, "step": 10968 }, { "epoch": 0.825683584561245, "grad_norm": 5.313836097717285, "learning_rate": 7.866765000376098e-06, "loss": 1.7203, "step": 10969 }, { "epoch": 0.8257588588419051, "grad_norm": 5.107827663421631, "learning_rate": 7.860202205542282e-06, "loss": 1.6603, "step": 10970 }, { "epoch": 0.8258341331225654, "grad_norm": 5.811594009399414, "learning_rate": 7.853641915798532e-06, "loss": 2.0315, "step": 10971 }, { "epoch": 0.8259094074032255, "grad_norm": 3.613593101501465, "learning_rate": 7.847084131534855e-06, "loss": 1.917, "step": 10972 }, { "epoch": 0.8259846816838856, "grad_norm": 5.179539203643799, "learning_rate": 7.84052885314106e-06, "loss": 1.7584, "step": 10973 }, { "epoch": 0.8260599559645458, "grad_norm": 4.686038970947266, "learning_rate": 7.833976081006873e-06, "loss": 2.0446, "step": 10974 }, { "epoch": 0.826135230245206, "grad_norm": 4.419909477233887, "learning_rate": 7.827425815521804e-06, "loss": 1.9806, "step": 10975 }, { "epoch": 0.8262105045258661, "grad_norm": 5.647611141204834, "learning_rate": 7.820878057075275e-06, "loss": 1.8023, "step": 10976 }, { "epoch": 0.8262857788065263, "grad_norm": 6.002238750457764, "learning_rate": 7.814332806056507e-06, "loss": 1.6235, "step": 10977 }, { "epoch": 0.8263610530871864, "grad_norm": 4.448552131652832, "learning_rate": 7.807790062854625e-06, "loss": 1.6161, "step": 10978 }, { "epoch": 0.8264363273678466, "grad_norm": 6.940880298614502, "learning_rate": 7.801249827858547e-06, "loss": 2.0792, "step": 10979 }, { "epoch": 0.8265116016485068, "grad_norm": 5.002687454223633, "learning_rate": 7.794712101457086e-06, "loss": 1.522, "step": 10980 }, { "epoch": 0.8265868759291669, "grad_norm": 4.470086097717285, "learning_rate": 7.788176884038889e-06, "loss": 1.8557, "step": 10981 }, { "epoch": 0.826662150209827, "grad_norm": 4.128772735595703, "learning_rate": 7.78164417599246e-06, "loss": 1.8809, "step": 10982 }, { "epoch": 0.8267374244904873, "grad_norm": 5.70338249206543, "learning_rate": 7.77511397770615e-06, "loss": 2.0448, "step": 10983 }, { "epoch": 0.8268126987711474, "grad_norm": 4.384184837341309, "learning_rate": 7.768586289568153e-06, "loss": 2.0023, "step": 10984 }, { "epoch": 0.8268879730518075, "grad_norm": 6.2365875244140625, "learning_rate": 7.762061111966534e-06, "loss": 1.9373, "step": 10985 }, { "epoch": 0.8269632473324677, "grad_norm": 4.188620567321777, "learning_rate": 7.755538445289179e-06, "loss": 1.9007, "step": 10986 }, { "epoch": 0.8270385216131279, "grad_norm": 4.218332767486572, "learning_rate": 7.74901828992386e-06, "loss": 1.8203, "step": 10987 }, { "epoch": 0.827113795893788, "grad_norm": 5.843628883361816, "learning_rate": 7.74250064625816e-06, "loss": 1.6626, "step": 10988 }, { "epoch": 0.8271890701744481, "grad_norm": 4.487967014312744, "learning_rate": 7.735985514679561e-06, "loss": 2.19, "step": 10989 }, { "epoch": 0.8272643444551083, "grad_norm": 4.762287139892578, "learning_rate": 7.729472895575341e-06, "loss": 1.7855, "step": 10990 }, { "epoch": 0.8273396187357684, "grad_norm": 5.629855155944824, "learning_rate": 7.722962789332676e-06, "loss": 1.6636, "step": 10991 }, { "epoch": 0.8274148930164286, "grad_norm": 4.606715679168701, "learning_rate": 7.716455196338563e-06, "loss": 2.0147, "step": 10992 }, { "epoch": 0.8274901672970888, "grad_norm": 5.693932056427002, "learning_rate": 7.709950116979858e-06, "loss": 1.7612, "step": 10993 }, { "epoch": 0.8275654415777489, "grad_norm": 5.193629264831543, "learning_rate": 7.703447551643278e-06, "loss": 1.734, "step": 10994 }, { "epoch": 0.827640715858409, "grad_norm": 5.53890323638916, "learning_rate": 7.69694750071538e-06, "loss": 1.971, "step": 10995 }, { "epoch": 0.8277159901390693, "grad_norm": 5.0209503173828125, "learning_rate": 7.69044996458258e-06, "loss": 1.7613, "step": 10996 }, { "epoch": 0.8277912644197294, "grad_norm": 5.361001491546631, "learning_rate": 7.683954943631116e-06, "loss": 1.7346, "step": 10997 }, { "epoch": 0.8278665387003895, "grad_norm": 4.7079081535339355, "learning_rate": 7.677462438247129e-06, "loss": 1.5784, "step": 10998 }, { "epoch": 0.8279418129810497, "grad_norm": 4.381944179534912, "learning_rate": 7.67097244881655e-06, "loss": 1.6478, "step": 10999 }, { "epoch": 0.8280170872617099, "grad_norm": 4.929294109344482, "learning_rate": 7.664484975725211e-06, "loss": 1.6248, "step": 11000 }, { "epoch": 0.82809236154237, "grad_norm": 6.1471028327941895, "learning_rate": 7.658000019358764e-06, "loss": 2.0462, "step": 11001 }, { "epoch": 0.8281676358230302, "grad_norm": 4.5387983322143555, "learning_rate": 7.651517580102724e-06, "loss": 1.6195, "step": 11002 }, { "epoch": 0.8282429101036903, "grad_norm": 4.5794572830200195, "learning_rate": 7.645037658342447e-06, "loss": 1.6948, "step": 11003 }, { "epoch": 0.8283181843843505, "grad_norm": 4.946218490600586, "learning_rate": 7.63856025446315e-06, "loss": 1.6512, "step": 11004 }, { "epoch": 0.8283934586650107, "grad_norm": 6.110923767089844, "learning_rate": 7.632085368849896e-06, "loss": 1.8117, "step": 11005 }, { "epoch": 0.8284687329456708, "grad_norm": 4.481554985046387, "learning_rate": 7.62561300188761e-06, "loss": 1.6948, "step": 11006 }, { "epoch": 0.8285440072263309, "grad_norm": 4.663935661315918, "learning_rate": 7.619143153961039e-06, "loss": 1.9378, "step": 11007 }, { "epoch": 0.828619281506991, "grad_norm": 6.7290730476379395, "learning_rate": 7.6126758254548125e-06, "loss": 1.6098, "step": 11008 }, { "epoch": 0.8286945557876513, "grad_norm": 4.337218284606934, "learning_rate": 7.6062110167533726e-06, "loss": 1.7352, "step": 11009 }, { "epoch": 0.8287698300683114, "grad_norm": 6.8893866539001465, "learning_rate": 7.599748728241057e-06, "loss": 1.385, "step": 11010 }, { "epoch": 0.8288451043489715, "grad_norm": 4.733375549316406, "learning_rate": 7.593288960302008e-06, "loss": 1.3784, "step": 11011 }, { "epoch": 0.8289203786296317, "grad_norm": 5.756259441375732, "learning_rate": 7.586831713320253e-06, "loss": 1.6168, "step": 11012 }, { "epoch": 0.8289956529102919, "grad_norm": 7.07257604598999, "learning_rate": 7.580376987679666e-06, "loss": 1.8568, "step": 11013 }, { "epoch": 0.829070927190952, "grad_norm": 5.0352253913879395, "learning_rate": 7.573924783763942e-06, "loss": 2.01, "step": 11014 }, { "epoch": 0.8291462014716122, "grad_norm": 4.676278591156006, "learning_rate": 7.567475101956656e-06, "loss": 1.771, "step": 11015 }, { "epoch": 0.8292214757522723, "grad_norm": 4.422346591949463, "learning_rate": 7.561027942641219e-06, "loss": 1.8488, "step": 11016 }, { "epoch": 0.8292967500329325, "grad_norm": 4.946375846862793, "learning_rate": 7.554583306200913e-06, "loss": 1.6746, "step": 11017 }, { "epoch": 0.8293720243135927, "grad_norm": 4.293478012084961, "learning_rate": 7.548141193018832e-06, "loss": 1.7332, "step": 11018 }, { "epoch": 0.8294472985942528, "grad_norm": 5.111875534057617, "learning_rate": 7.541701603477957e-06, "loss": 1.8061, "step": 11019 }, { "epoch": 0.8295225728749129, "grad_norm": 6.037639617919922, "learning_rate": 7.535264537961084e-06, "loss": 1.7114, "step": 11020 }, { "epoch": 0.8295978471555732, "grad_norm": 5.5868964195251465, "learning_rate": 7.5288299968509e-06, "loss": 1.9138, "step": 11021 }, { "epoch": 0.8296731214362333, "grad_norm": 4.353113651275635, "learning_rate": 7.5223979805299e-06, "loss": 1.993, "step": 11022 }, { "epoch": 0.8297483957168934, "grad_norm": 5.670248985290527, "learning_rate": 7.515968489380465e-06, "loss": 2.2826, "step": 11023 }, { "epoch": 0.8298236699975536, "grad_norm": 5.503589153289795, "learning_rate": 7.509541523784797e-06, "loss": 1.7227, "step": 11024 }, { "epoch": 0.8298989442782138, "grad_norm": 3.9664077758789062, "learning_rate": 7.503117084124961e-06, "loss": 1.675, "step": 11025 }, { "epoch": 0.8299742185588739, "grad_norm": 5.680034637451172, "learning_rate": 7.49669517078288e-06, "loss": 2.2477, "step": 11026 }, { "epoch": 0.830049492839534, "grad_norm": 4.639273166656494, "learning_rate": 7.4902757841403325e-06, "loss": 1.8315, "step": 11027 }, { "epoch": 0.8301247671201942, "grad_norm": 6.767391204833984, "learning_rate": 7.483858924578896e-06, "loss": 2.0925, "step": 11028 }, { "epoch": 0.8302000414008543, "grad_norm": 4.622908115386963, "learning_rate": 7.47744459248006e-06, "loss": 1.661, "step": 11029 }, { "epoch": 0.8302753156815145, "grad_norm": 5.5543131828308105, "learning_rate": 7.4710327882251355e-06, "loss": 1.7081, "step": 11030 }, { "epoch": 0.8303505899621747, "grad_norm": 5.449149131774902, "learning_rate": 7.464623512195279e-06, "loss": 2.1125, "step": 11031 }, { "epoch": 0.8304258642428348, "grad_norm": 5.358203411102295, "learning_rate": 7.458216764771514e-06, "loss": 1.7113, "step": 11032 }, { "epoch": 0.8305011385234949, "grad_norm": 5.866293907165527, "learning_rate": 7.4518125463346855e-06, "loss": 1.6806, "step": 11033 }, { "epoch": 0.8305764128041552, "grad_norm": 4.008992671966553, "learning_rate": 7.445410857265528e-06, "loss": 1.8011, "step": 11034 }, { "epoch": 0.8306516870848153, "grad_norm": 4.065059661865234, "learning_rate": 7.439011697944581e-06, "loss": 1.6445, "step": 11035 }, { "epoch": 0.8307269613654754, "grad_norm": 5.211691856384277, "learning_rate": 7.43261506875228e-06, "loss": 1.6852, "step": 11036 }, { "epoch": 0.8308022356461356, "grad_norm": 5.035432815551758, "learning_rate": 7.426220970068864e-06, "loss": 1.615, "step": 11037 }, { "epoch": 0.8308775099267958, "grad_norm": 4.664009094238281, "learning_rate": 7.419829402274453e-06, "loss": 1.9492, "step": 11038 }, { "epoch": 0.8309527842074559, "grad_norm": 3.64105224609375, "learning_rate": 7.413440365749002e-06, "loss": 1.8129, "step": 11039 }, { "epoch": 0.8310280584881161, "grad_norm": 6.603893280029297, "learning_rate": 7.407053860872343e-06, "loss": 1.7318, "step": 11040 }, { "epoch": 0.8311033327687762, "grad_norm": 6.286447525024414, "learning_rate": 7.4006698880241084e-06, "loss": 2.0356, "step": 11041 }, { "epoch": 0.8311786070494364, "grad_norm": 5.453951835632324, "learning_rate": 7.394288447583825e-06, "loss": 1.9739, "step": 11042 }, { "epoch": 0.8312538813300966, "grad_norm": 5.093417644500732, "learning_rate": 7.3879095399308375e-06, "loss": 1.6465, "step": 11043 }, { "epoch": 0.8313291556107567, "grad_norm": 5.27060604095459, "learning_rate": 7.381533165444355e-06, "loss": 2.0461, "step": 11044 }, { "epoch": 0.8314044298914168, "grad_norm": 6.195003986358643, "learning_rate": 7.375159324503456e-06, "loss": 1.876, "step": 11045 }, { "epoch": 0.8314797041720771, "grad_norm": 4.605326175689697, "learning_rate": 7.368788017487016e-06, "loss": 2.0004, "step": 11046 }, { "epoch": 0.8315549784527372, "grad_norm": 4.836153984069824, "learning_rate": 7.362419244773816e-06, "loss": 1.77, "step": 11047 }, { "epoch": 0.8316302527333973, "grad_norm": 11.919204711914062, "learning_rate": 7.356053006742442e-06, "loss": 1.4523, "step": 11048 }, { "epoch": 0.8317055270140574, "grad_norm": 4.372076034545898, "learning_rate": 7.3496893037713564e-06, "loss": 1.6074, "step": 11049 }, { "epoch": 0.8317808012947177, "grad_norm": 5.7756757736206055, "learning_rate": 7.343328136238869e-06, "loss": 1.1829, "step": 11050 }, { "epoch": 0.8318560755753778, "grad_norm": 6.489713668823242, "learning_rate": 7.336969504523133e-06, "loss": 2.4567, "step": 11051 }, { "epoch": 0.8319313498560379, "grad_norm": 3.8584771156311035, "learning_rate": 7.330613409002135e-06, "loss": 2.0335, "step": 11052 }, { "epoch": 0.8320066241366981, "grad_norm": 4.212832450866699, "learning_rate": 7.324259850053755e-06, "loss": 1.555, "step": 11053 }, { "epoch": 0.8320818984173582, "grad_norm": 4.950713634490967, "learning_rate": 7.317908828055659e-06, "loss": 1.8308, "step": 11054 }, { "epoch": 0.8321571726980184, "grad_norm": 5.218753814697266, "learning_rate": 7.311560343385432e-06, "loss": 1.705, "step": 11055 }, { "epoch": 0.8322324469786786, "grad_norm": 6.191812038421631, "learning_rate": 7.30521439642044e-06, "loss": 1.8691, "step": 11056 }, { "epoch": 0.8323077212593387, "grad_norm": 6.975505828857422, "learning_rate": 7.298870987537959e-06, "loss": 1.8965, "step": 11057 }, { "epoch": 0.8323829955399988, "grad_norm": 5.136867046356201, "learning_rate": 7.292530117115059e-06, "loss": 1.5467, "step": 11058 }, { "epoch": 0.8324582698206591, "grad_norm": 4.7672529220581055, "learning_rate": 7.286191785528707e-06, "loss": 1.9761, "step": 11059 }, { "epoch": 0.8325335441013192, "grad_norm": 5.822338104248047, "learning_rate": 7.2798559931556934e-06, "loss": 1.9192, "step": 11060 }, { "epoch": 0.8326088183819793, "grad_norm": 4.806952476501465, "learning_rate": 7.273522740372662e-06, "loss": 1.9103, "step": 11061 }, { "epoch": 0.8326840926626395, "grad_norm": 4.035099506378174, "learning_rate": 7.267192027556119e-06, "loss": 1.9447, "step": 11062 }, { "epoch": 0.8327593669432997, "grad_norm": 5.279050350189209, "learning_rate": 7.260863855082389e-06, "loss": 1.8112, "step": 11063 }, { "epoch": 0.8328346412239598, "grad_norm": 5.123414039611816, "learning_rate": 7.254538223327678e-06, "loss": 1.9104, "step": 11064 }, { "epoch": 0.83290991550462, "grad_norm": 3.678065538406372, "learning_rate": 7.248215132668007e-06, "loss": 2.034, "step": 11065 }, { "epoch": 0.8329851897852801, "grad_norm": 5.231009483337402, "learning_rate": 7.241894583479286e-06, "loss": 1.6622, "step": 11066 }, { "epoch": 0.8330604640659403, "grad_norm": 5.201895713806152, "learning_rate": 7.235576576137243e-06, "loss": 1.8051, "step": 11067 }, { "epoch": 0.8331357383466004, "grad_norm": 5.084416389465332, "learning_rate": 7.229261111017471e-06, "loss": 1.8061, "step": 11068 }, { "epoch": 0.8332110126272606, "grad_norm": 4.602647304534912, "learning_rate": 7.222948188495393e-06, "loss": 1.8797, "step": 11069 }, { "epoch": 0.8332862869079207, "grad_norm": 5.758812427520752, "learning_rate": 7.216637808946308e-06, "loss": 1.8662, "step": 11070 }, { "epoch": 0.8333615611885808, "grad_norm": 4.816676139831543, "learning_rate": 7.210329972745344e-06, "loss": 1.7012, "step": 11071 }, { "epoch": 0.8334368354692411, "grad_norm": 4.679792404174805, "learning_rate": 7.204024680267496e-06, "loss": 1.7483, "step": 11072 }, { "epoch": 0.8335121097499012, "grad_norm": 4.840357303619385, "learning_rate": 7.197721931887574e-06, "loss": 1.7046, "step": 11073 }, { "epoch": 0.8335873840305613, "grad_norm": 5.905903339385986, "learning_rate": 7.19142172798028e-06, "loss": 2.0841, "step": 11074 }, { "epoch": 0.8336626583112215, "grad_norm": 7.352158546447754, "learning_rate": 7.185124068920124e-06, "loss": 1.7296, "step": 11075 }, { "epoch": 0.8337379325918817, "grad_norm": 6.462066650390625, "learning_rate": 7.17882895508149e-06, "loss": 1.4067, "step": 11076 }, { "epoch": 0.8338132068725418, "grad_norm": 6.375122547149658, "learning_rate": 7.1725363868386185e-06, "loss": 2.0245, "step": 11077 }, { "epoch": 0.833888481153202, "grad_norm": 6.159372806549072, "learning_rate": 7.166246364565565e-06, "loss": 1.8356, "step": 11078 }, { "epoch": 0.8339637554338621, "grad_norm": 5.991606712341309, "learning_rate": 7.15995888863627e-06, "loss": 1.7666, "step": 11079 }, { "epoch": 0.8340390297145223, "grad_norm": 5.612191200256348, "learning_rate": 7.153673959424484e-06, "loss": 2.0731, "step": 11080 }, { "epoch": 0.8341143039951825, "grad_norm": 5.853646755218506, "learning_rate": 7.147391577303847e-06, "loss": 1.7331, "step": 11081 }, { "epoch": 0.8341895782758426, "grad_norm": 4.453214168548584, "learning_rate": 7.141111742647816e-06, "loss": 1.5959, "step": 11082 }, { "epoch": 0.8342648525565027, "grad_norm": 4.861266136169434, "learning_rate": 7.134834455829731e-06, "loss": 1.8731, "step": 11083 }, { "epoch": 0.834340126837163, "grad_norm": 4.7663893699646, "learning_rate": 7.1285597172227325e-06, "loss": 1.6521, "step": 11084 }, { "epoch": 0.8344154011178231, "grad_norm": 5.383310317993164, "learning_rate": 7.122287527199861e-06, "loss": 1.8253, "step": 11085 }, { "epoch": 0.8344906753984832, "grad_norm": 6.459989070892334, "learning_rate": 7.116017886133946e-06, "loss": 1.7457, "step": 11086 }, { "epoch": 0.8345659496791433, "grad_norm": 5.328629016876221, "learning_rate": 7.1097507943977405e-06, "loss": 1.7066, "step": 11087 }, { "epoch": 0.8346412239598036, "grad_norm": 3.785832405090332, "learning_rate": 7.103486252363767e-06, "loss": 1.8998, "step": 11088 }, { "epoch": 0.8347164982404637, "grad_norm": 6.3931450843811035, "learning_rate": 7.097224260404467e-06, "loss": 1.6242, "step": 11089 }, { "epoch": 0.8347917725211238, "grad_norm": 4.314263820648193, "learning_rate": 7.090964818892071e-06, "loss": 1.5718, "step": 11090 }, { "epoch": 0.834867046801784, "grad_norm": 6.2679762840271, "learning_rate": 7.084707928198703e-06, "loss": 1.7489, "step": 11091 }, { "epoch": 0.8349423210824441, "grad_norm": 5.2999491691589355, "learning_rate": 7.078453588696304e-06, "loss": 1.9958, "step": 11092 }, { "epoch": 0.8350175953631043, "grad_norm": 5.195150852203369, "learning_rate": 7.072201800756684e-06, "loss": 1.8579, "step": 11093 }, { "epoch": 0.8350928696437645, "grad_norm": 5.689398288726807, "learning_rate": 7.065952564751488e-06, "loss": 1.9673, "step": 11094 }, { "epoch": 0.8351681439244246, "grad_norm": 6.044033527374268, "learning_rate": 7.0597058810522255e-06, "loss": 2.3433, "step": 11095 }, { "epoch": 0.8352434182050847, "grad_norm": 4.774587154388428, "learning_rate": 7.053461750030249e-06, "loss": 2.0049, "step": 11096 }, { "epoch": 0.835318692485745, "grad_norm": 6.9197211265563965, "learning_rate": 7.047220172056734e-06, "loss": 2.149, "step": 11097 }, { "epoch": 0.8353939667664051, "grad_norm": 4.784257888793945, "learning_rate": 7.040981147502746e-06, "loss": 1.7605, "step": 11098 }, { "epoch": 0.8354692410470652, "grad_norm": 5.215122222900391, "learning_rate": 7.034744676739152e-06, "loss": 1.7546, "step": 11099 }, { "epoch": 0.8355445153277254, "grad_norm": 6.411661148071289, "learning_rate": 7.028510760136719e-06, "loss": 1.5137, "step": 11100 }, { "epoch": 0.8356197896083856, "grad_norm": 5.081946849822998, "learning_rate": 7.022279398066006e-06, "loss": 1.7903, "step": 11101 }, { "epoch": 0.8356950638890457, "grad_norm": 5.067341327667236, "learning_rate": 7.016050590897482e-06, "loss": 2.0615, "step": 11102 }, { "epoch": 0.8357703381697059, "grad_norm": 5.401487350463867, "learning_rate": 7.009824339001403e-06, "loss": 1.7912, "step": 11103 }, { "epoch": 0.835845612450366, "grad_norm": 4.524129867553711, "learning_rate": 7.0036006427479154e-06, "loss": 1.8348, "step": 11104 }, { "epoch": 0.8359208867310262, "grad_norm": 7.708333969116211, "learning_rate": 6.997379502507001e-06, "loss": 2.102, "step": 11105 }, { "epoch": 0.8359961610116863, "grad_norm": 4.23085355758667, "learning_rate": 6.991160918648493e-06, "loss": 1.7265, "step": 11106 }, { "epoch": 0.8360714352923465, "grad_norm": 4.6369218826293945, "learning_rate": 6.9849448915420555e-06, "loss": 1.9469, "step": 11107 }, { "epoch": 0.8361467095730066, "grad_norm": 4.357454776763916, "learning_rate": 6.9787314215572165e-06, "loss": 1.6868, "step": 11108 }, { "epoch": 0.8362219838536668, "grad_norm": 4.764465808868408, "learning_rate": 6.972520509063363e-06, "loss": 1.4145, "step": 11109 }, { "epoch": 0.836297258134327, "grad_norm": 5.528480529785156, "learning_rate": 6.966312154429699e-06, "loss": 1.5341, "step": 11110 }, { "epoch": 0.8363725324149871, "grad_norm": 4.671770095825195, "learning_rate": 6.9601063580253086e-06, "loss": 1.8078, "step": 11111 }, { "epoch": 0.8364478066956472, "grad_norm": 3.9453747272491455, "learning_rate": 6.953903120219091e-06, "loss": 1.7232, "step": 11112 }, { "epoch": 0.8365230809763075, "grad_norm": 6.011135101318359, "learning_rate": 6.947702441379828e-06, "loss": 2.4406, "step": 11113 }, { "epoch": 0.8365983552569676, "grad_norm": 5.105716705322266, "learning_rate": 6.9415043218761136e-06, "loss": 1.5811, "step": 11114 }, { "epoch": 0.8366736295376277, "grad_norm": 5.13422966003418, "learning_rate": 6.935308762076415e-06, "loss": 1.6372, "step": 11115 }, { "epoch": 0.8367489038182879, "grad_norm": 4.162962436676025, "learning_rate": 6.9291157623490475e-06, "loss": 1.6416, "step": 11116 }, { "epoch": 0.836824178098948, "grad_norm": 4.233205795288086, "learning_rate": 6.92292532306218e-06, "loss": 1.915, "step": 11117 }, { "epoch": 0.8368994523796082, "grad_norm": 4.262418746948242, "learning_rate": 6.916737444583782e-06, "loss": 1.5722, "step": 11118 }, { "epoch": 0.8369747266602684, "grad_norm": 3.917956590652466, "learning_rate": 6.9105521272817384e-06, "loss": 2.0511, "step": 11119 }, { "epoch": 0.8370500009409285, "grad_norm": 5.590387344360352, "learning_rate": 6.9043693715237165e-06, "loss": 1.7002, "step": 11120 }, { "epoch": 0.8371252752215886, "grad_norm": 4.6854705810546875, "learning_rate": 6.898189177677295e-06, "loss": 1.7841, "step": 11121 }, { "epoch": 0.8372005495022489, "grad_norm": 5.919164657592773, "learning_rate": 6.8920115461098445e-06, "loss": 1.6234, "step": 11122 }, { "epoch": 0.837275823782909, "grad_norm": 4.469350814819336, "learning_rate": 6.885836477188629e-06, "loss": 1.7272, "step": 11123 }, { "epoch": 0.8373510980635691, "grad_norm": 5.236168384552002, "learning_rate": 6.879663971280709e-06, "loss": 1.6134, "step": 11124 }, { "epoch": 0.8374263723442292, "grad_norm": 4.39715576171875, "learning_rate": 6.873494028753041e-06, "loss": 1.7281, "step": 11125 }, { "epoch": 0.8375016466248895, "grad_norm": 5.225582599639893, "learning_rate": 6.8673266499724166e-06, "loss": 1.7771, "step": 11126 }, { "epoch": 0.8375769209055496, "grad_norm": 4.675426006317139, "learning_rate": 6.861161835305452e-06, "loss": 1.6815, "step": 11127 }, { "epoch": 0.8376521951862097, "grad_norm": 4.589982032775879, "learning_rate": 6.854999585118654e-06, "loss": 1.4972, "step": 11128 }, { "epoch": 0.8377274694668699, "grad_norm": 4.061214447021484, "learning_rate": 6.8488398997783195e-06, "loss": 1.3671, "step": 11129 }, { "epoch": 0.83780274374753, "grad_norm": 4.747130870819092, "learning_rate": 6.842682779650655e-06, "loss": 1.83, "step": 11130 }, { "epoch": 0.8378780180281902, "grad_norm": 4.630528450012207, "learning_rate": 6.8365282251016515e-06, "loss": 1.6915, "step": 11131 }, { "epoch": 0.8379532923088504, "grad_norm": 3.776576280593872, "learning_rate": 6.830376236497205e-06, "loss": 1.7709, "step": 11132 }, { "epoch": 0.8380285665895105, "grad_norm": 4.731005668640137, "learning_rate": 6.824226814203017e-06, "loss": 1.6608, "step": 11133 }, { "epoch": 0.8381038408701706, "grad_norm": 4.95127010345459, "learning_rate": 6.8180799585846665e-06, "loss": 2.0121, "step": 11134 }, { "epoch": 0.8381791151508309, "grad_norm": 4.485501766204834, "learning_rate": 6.811935670007552e-06, "loss": 1.7871, "step": 11135 }, { "epoch": 0.838254389431491, "grad_norm": 4.852588653564453, "learning_rate": 6.805793948836941e-06, "loss": 1.819, "step": 11136 }, { "epoch": 0.8383296637121511, "grad_norm": 5.005697250366211, "learning_rate": 6.799654795437949e-06, "loss": 1.5672, "step": 11137 }, { "epoch": 0.8384049379928113, "grad_norm": 8.769268035888672, "learning_rate": 6.793518210175515e-06, "loss": 2.1549, "step": 11138 }, { "epoch": 0.8384802122734715, "grad_norm": 4.214446544647217, "learning_rate": 6.787384193414453e-06, "loss": 1.782, "step": 11139 }, { "epoch": 0.8385554865541316, "grad_norm": 4.881605625152588, "learning_rate": 6.781252745519417e-06, "loss": 1.8778, "step": 11140 }, { "epoch": 0.8386307608347918, "grad_norm": 5.593880653381348, "learning_rate": 6.775123866854888e-06, "loss": 2.1665, "step": 11141 }, { "epoch": 0.8387060351154519, "grad_norm": 5.857632637023926, "learning_rate": 6.768997557785217e-06, "loss": 1.9774, "step": 11142 }, { "epoch": 0.8387813093961121, "grad_norm": 6.197014808654785, "learning_rate": 6.762873818674609e-06, "loss": 2.3342, "step": 11143 }, { "epoch": 0.8388565836767723, "grad_norm": 4.749516487121582, "learning_rate": 6.756752649887082e-06, "loss": 1.6864, "step": 11144 }, { "epoch": 0.8389318579574324, "grad_norm": 4.441079139709473, "learning_rate": 6.750634051786542e-06, "loss": 1.7264, "step": 11145 }, { "epoch": 0.8390071322380925, "grad_norm": 5.646838665008545, "learning_rate": 6.744518024736696e-06, "loss": 2.0181, "step": 11146 }, { "epoch": 0.8390824065187527, "grad_norm": 4.400927543640137, "learning_rate": 6.738404569101153e-06, "loss": 1.5453, "step": 11147 }, { "epoch": 0.8391576807994129, "grad_norm": 4.460588455200195, "learning_rate": 6.7322936852433184e-06, "loss": 1.4863, "step": 11148 }, { "epoch": 0.839232955080073, "grad_norm": 5.296993732452393, "learning_rate": 6.726185373526473e-06, "loss": 2.2783, "step": 11149 }, { "epoch": 0.8393082293607331, "grad_norm": 5.389023303985596, "learning_rate": 6.720079634313742e-06, "loss": 1.4713, "step": 11150 }, { "epoch": 0.8393835036413934, "grad_norm": 4.712506294250488, "learning_rate": 6.713976467968103e-06, "loss": 1.7241, "step": 11151 }, { "epoch": 0.8394587779220535, "grad_norm": 5.460819244384766, "learning_rate": 6.707875874852348e-06, "loss": 1.6132, "step": 11152 }, { "epoch": 0.8395340522027136, "grad_norm": 4.264761447906494, "learning_rate": 6.701777855329167e-06, "loss": 1.9214, "step": 11153 }, { "epoch": 0.8396093264833738, "grad_norm": 4.11516809463501, "learning_rate": 6.695682409761045e-06, "loss": 2.101, "step": 11154 }, { "epoch": 0.839684600764034, "grad_norm": 5.3792009353637695, "learning_rate": 6.6895895385103605e-06, "loss": 1.5466, "step": 11155 }, { "epoch": 0.8397598750446941, "grad_norm": 7.123032569885254, "learning_rate": 6.683499241939295e-06, "loss": 1.7773, "step": 11156 }, { "epoch": 0.8398351493253543, "grad_norm": 5.164830207824707, "learning_rate": 6.67741152040991e-06, "loss": 1.8228, "step": 11157 }, { "epoch": 0.8399104236060144, "grad_norm": 4.144986629486084, "learning_rate": 6.6713263742841145e-06, "loss": 1.6297, "step": 11158 }, { "epoch": 0.8399856978866745, "grad_norm": 4.575332164764404, "learning_rate": 6.665243803923632e-06, "loss": 1.7586, "step": 11159 }, { "epoch": 0.8400609721673348, "grad_norm": 5.089232444763184, "learning_rate": 6.659163809690067e-06, "loss": 1.6265, "step": 11160 }, { "epoch": 0.8401362464479949, "grad_norm": 4.514784812927246, "learning_rate": 6.653086391944852e-06, "loss": 1.7338, "step": 11161 }, { "epoch": 0.840211520728655, "grad_norm": 4.259333610534668, "learning_rate": 6.647011551049287e-06, "loss": 1.4617, "step": 11162 }, { "epoch": 0.8402867950093152, "grad_norm": 5.080699443817139, "learning_rate": 6.640939287364478e-06, "loss": 1.6577, "step": 11163 }, { "epoch": 0.8403620692899754, "grad_norm": 5.670527458190918, "learning_rate": 6.634869601251426e-06, "loss": 2.3144, "step": 11164 }, { "epoch": 0.8404373435706355, "grad_norm": 3.8688299655914307, "learning_rate": 6.62880249307094e-06, "loss": 1.7761, "step": 11165 }, { "epoch": 0.8405126178512956, "grad_norm": 10.417957305908203, "learning_rate": 6.622737963183712e-06, "loss": 2.1792, "step": 11166 }, { "epoch": 0.8405878921319558, "grad_norm": 4.5511393547058105, "learning_rate": 6.6166760119502405e-06, "loss": 1.6349, "step": 11167 }, { "epoch": 0.840663166412616, "grad_norm": 4.164097309112549, "learning_rate": 6.6106166397309045e-06, "loss": 1.9282, "step": 11168 }, { "epoch": 0.8407384406932761, "grad_norm": 3.953265428543091, "learning_rate": 6.6045598468858996e-06, "loss": 1.6833, "step": 11169 }, { "epoch": 0.8408137149739363, "grad_norm": 3.1790060997009277, "learning_rate": 6.598505633775304e-06, "loss": 1.8434, "step": 11170 }, { "epoch": 0.8408889892545964, "grad_norm": 4.558568000793457, "learning_rate": 6.5924540007590076e-06, "loss": 1.6772, "step": 11171 }, { "epoch": 0.8409642635352566, "grad_norm": 5.258319854736328, "learning_rate": 6.5864049481967834e-06, "loss": 1.9443, "step": 11172 }, { "epoch": 0.8410395378159168, "grad_norm": 4.417283058166504, "learning_rate": 6.580358476448212e-06, "loss": 1.6938, "step": 11173 }, { "epoch": 0.8411148120965769, "grad_norm": 4.552297115325928, "learning_rate": 6.574314585872738e-06, "loss": 1.8327, "step": 11174 }, { "epoch": 0.841190086377237, "grad_norm": 4.643150329589844, "learning_rate": 6.5682732768296725e-06, "loss": 1.7858, "step": 11175 }, { "epoch": 0.8412653606578973, "grad_norm": 6.868575096130371, "learning_rate": 6.5622345496781315e-06, "loss": 1.7422, "step": 11176 }, { "epoch": 0.8413406349385574, "grad_norm": 4.626029968261719, "learning_rate": 6.556198404777119e-06, "loss": 2.2154, "step": 11177 }, { "epoch": 0.8414159092192175, "grad_norm": 6.260887622833252, "learning_rate": 6.550164842485445e-06, "loss": 1.6859, "step": 11178 }, { "epoch": 0.8414911834998777, "grad_norm": 4.105422496795654, "learning_rate": 6.544133863161811e-06, "loss": 1.4085, "step": 11179 }, { "epoch": 0.8415664577805378, "grad_norm": 4.183300495147705, "learning_rate": 6.538105467164718e-06, "loss": 1.9785, "step": 11180 }, { "epoch": 0.841641732061198, "grad_norm": 4.764847755432129, "learning_rate": 6.532079654852552e-06, "loss": 1.8358, "step": 11181 }, { "epoch": 0.8417170063418582, "grad_norm": 6.050087928771973, "learning_rate": 6.526056426583526e-06, "loss": 1.9587, "step": 11182 }, { "epoch": 0.8417922806225183, "grad_norm": 4.971504211425781, "learning_rate": 6.520035782715716e-06, "loss": 1.8499, "step": 11183 }, { "epoch": 0.8418675549031784, "grad_norm": 5.099761486053467, "learning_rate": 6.514017723607007e-06, "loss": 1.7452, "step": 11184 }, { "epoch": 0.8419428291838386, "grad_norm": 4.524901390075684, "learning_rate": 6.508002249615186e-06, "loss": 1.9179, "step": 11185 }, { "epoch": 0.8420181034644988, "grad_norm": 5.432777404785156, "learning_rate": 6.501989361097821e-06, "loss": 1.9629, "step": 11186 }, { "epoch": 0.8420933777451589, "grad_norm": 4.498457431793213, "learning_rate": 6.4959790584123934e-06, "loss": 1.8279, "step": 11187 }, { "epoch": 0.842168652025819, "grad_norm": 4.5996294021606445, "learning_rate": 6.4899713419161735e-06, "loss": 1.8645, "step": 11188 }, { "epoch": 0.8422439263064793, "grad_norm": 5.522895812988281, "learning_rate": 6.483966211966308e-06, "loss": 2.0191, "step": 11189 }, { "epoch": 0.8423192005871394, "grad_norm": 3.5606210231781006, "learning_rate": 6.477963668919806e-06, "loss": 2.0935, "step": 11190 }, { "epoch": 0.8423944748677995, "grad_norm": 5.094019889831543, "learning_rate": 6.471963713133472e-06, "loss": 1.7191, "step": 11191 }, { "epoch": 0.8424697491484597, "grad_norm": 4.548551559448242, "learning_rate": 6.465966344964014e-06, "loss": 1.4239, "step": 11192 }, { "epoch": 0.8425450234291199, "grad_norm": 4.219467639923096, "learning_rate": 6.459971564767925e-06, "loss": 1.7072, "step": 11193 }, { "epoch": 0.84262029770978, "grad_norm": 4.513721942901611, "learning_rate": 6.453979372901603e-06, "loss": 1.9566, "step": 11194 }, { "epoch": 0.8426955719904402, "grad_norm": 4.069916248321533, "learning_rate": 6.4479897697212574e-06, "loss": 1.8304, "step": 11195 }, { "epoch": 0.8427708462711003, "grad_norm": 4.504793167114258, "learning_rate": 6.442002755582966e-06, "loss": 1.9497, "step": 11196 }, { "epoch": 0.8428461205517604, "grad_norm": 3.9722812175750732, "learning_rate": 6.436018330842619e-06, "loss": 1.8806, "step": 11197 }, { "epoch": 0.8429213948324207, "grad_norm": 7.850261211395264, "learning_rate": 6.43003649585599e-06, "loss": 1.773, "step": 11198 }, { "epoch": 0.8429966691130808, "grad_norm": 5.102206707000732, "learning_rate": 6.424057250978671e-06, "loss": 1.7999, "step": 11199 }, { "epoch": 0.8430719433937409, "grad_norm": 6.055898189544678, "learning_rate": 6.418080596566123e-06, "loss": 1.8146, "step": 11200 }, { "epoch": 0.8431472176744012, "grad_norm": 4.280010223388672, "learning_rate": 6.4121065329736165e-06, "loss": 1.8588, "step": 11201 }, { "epoch": 0.8432224919550613, "grad_norm": 4.572033882141113, "learning_rate": 6.406135060556329e-06, "loss": 1.7214, "step": 11202 }, { "epoch": 0.8432977662357214, "grad_norm": 4.822880268096924, "learning_rate": 6.400166179669209e-06, "loss": 1.9036, "step": 11203 }, { "epoch": 0.8433730405163815, "grad_norm": 4.275100231170654, "learning_rate": 6.394199890667113e-06, "loss": 1.7458, "step": 11204 }, { "epoch": 0.8434483147970417, "grad_norm": 5.10707950592041, "learning_rate": 6.388236193904712e-06, "loss": 2.3163, "step": 11205 }, { "epoch": 0.8435235890777019, "grad_norm": 4.97098970413208, "learning_rate": 6.382275089736534e-06, "loss": 1.8306, "step": 11206 }, { "epoch": 0.843598863358362, "grad_norm": 4.013286590576172, "learning_rate": 6.376316578516955e-06, "loss": 1.76, "step": 11207 }, { "epoch": 0.8436741376390222, "grad_norm": 6.344820022583008, "learning_rate": 6.370360660600178e-06, "loss": 1.7801, "step": 11208 }, { "epoch": 0.8437494119196823, "grad_norm": 4.477084636688232, "learning_rate": 6.3644073363402836e-06, "loss": 1.8177, "step": 11209 }, { "epoch": 0.8438246862003425, "grad_norm": 5.514729976654053, "learning_rate": 6.3584566060911565e-06, "loss": 1.9845, "step": 11210 }, { "epoch": 0.8438999604810027, "grad_norm": 4.488874435424805, "learning_rate": 6.3525084702065754e-06, "loss": 1.9079, "step": 11211 }, { "epoch": 0.8439752347616628, "grad_norm": 5.180163383483887, "learning_rate": 6.346562929040112e-06, "loss": 2.2314, "step": 11212 }, { "epoch": 0.8440505090423229, "grad_norm": 4.341010570526123, "learning_rate": 6.340619982945239e-06, "loss": 1.7962, "step": 11213 }, { "epoch": 0.8441257833229832, "grad_norm": 6.545727252960205, "learning_rate": 6.334679632275226e-06, "loss": 1.8127, "step": 11214 }, { "epoch": 0.8442010576036433, "grad_norm": 4.947411060333252, "learning_rate": 6.328741877383221e-06, "loss": 1.8007, "step": 11215 }, { "epoch": 0.8442763318843034, "grad_norm": 5.583044052124023, "learning_rate": 6.322806718622204e-06, "loss": 1.8532, "step": 11216 }, { "epoch": 0.8443516061649636, "grad_norm": 5.236429691314697, "learning_rate": 6.3168741563450095e-06, "loss": 1.5682, "step": 11217 }, { "epoch": 0.8444268804456238, "grad_norm": 5.19541597366333, "learning_rate": 6.3109441909043e-06, "loss": 1.7171, "step": 11218 }, { "epoch": 0.8445021547262839, "grad_norm": 6.942011833190918, "learning_rate": 6.305016822652609e-06, "loss": 1.7679, "step": 11219 }, { "epoch": 0.8445774290069441, "grad_norm": 6.323000907897949, "learning_rate": 6.299092051942279e-06, "loss": 2.1287, "step": 11220 }, { "epoch": 0.8446527032876042, "grad_norm": 6.352884292602539, "learning_rate": 6.293169879125538e-06, "loss": 2.1497, "step": 11221 }, { "epoch": 0.8447279775682643, "grad_norm": 4.266534805297852, "learning_rate": 6.287250304554448e-06, "loss": 1.6091, "step": 11222 }, { "epoch": 0.8448032518489245, "grad_norm": 3.6926400661468506, "learning_rate": 6.281333328580896e-06, "loss": 1.6438, "step": 11223 }, { "epoch": 0.8448785261295847, "grad_norm": 6.845376491546631, "learning_rate": 6.275418951556639e-06, "loss": 1.5882, "step": 11224 }, { "epoch": 0.8449538004102448, "grad_norm": 5.468923568725586, "learning_rate": 6.2695071738332555e-06, "loss": 1.8688, "step": 11225 }, { "epoch": 0.8450290746909049, "grad_norm": 5.483686923980713, "learning_rate": 6.263597995762199e-06, "loss": 1.845, "step": 11226 }, { "epoch": 0.8451043489715652, "grad_norm": 5.1428608894348145, "learning_rate": 6.257691417694739e-06, "loss": 1.86, "step": 11227 }, { "epoch": 0.8451796232522253, "grad_norm": 4.612880229949951, "learning_rate": 6.251787439982032e-06, "loss": 1.8806, "step": 11228 }, { "epoch": 0.8452548975328854, "grad_norm": 4.612621307373047, "learning_rate": 6.245886062975021e-06, "loss": 1.5457, "step": 11229 }, { "epoch": 0.8453301718135456, "grad_norm": 3.6237878799438477, "learning_rate": 6.239987287024546e-06, "loss": 1.7081, "step": 11230 }, { "epoch": 0.8454054460942058, "grad_norm": 5.440907955169678, "learning_rate": 6.2340911124812605e-06, "loss": 1.894, "step": 11231 }, { "epoch": 0.8454807203748659, "grad_norm": 5.359150409698486, "learning_rate": 6.228197539695685e-06, "loss": 1.8042, "step": 11232 }, { "epoch": 0.8455559946555261, "grad_norm": 3.9303054809570312, "learning_rate": 6.222306569018166e-06, "loss": 1.6954, "step": 11233 }, { "epoch": 0.8456312689361862, "grad_norm": 4.7124223709106445, "learning_rate": 6.2164182007989124e-06, "loss": 1.9149, "step": 11234 }, { "epoch": 0.8457065432168464, "grad_norm": 4.2395243644714355, "learning_rate": 6.2105324353879615e-06, "loss": 1.5313, "step": 11235 }, { "epoch": 0.8457818174975066, "grad_norm": 4.236502647399902, "learning_rate": 6.20464927313521e-06, "loss": 1.8376, "step": 11236 }, { "epoch": 0.8458570917781667, "grad_norm": 4.608453273773193, "learning_rate": 6.198768714390396e-06, "loss": 2.225, "step": 11237 }, { "epoch": 0.8459323660588268, "grad_norm": 4.667847156524658, "learning_rate": 6.192890759503117e-06, "loss": 1.719, "step": 11238 }, { "epoch": 0.846007640339487, "grad_norm": 4.957977771759033, "learning_rate": 6.187015408822772e-06, "loss": 1.6268, "step": 11239 }, { "epoch": 0.8460829146201472, "grad_norm": 4.79469633102417, "learning_rate": 6.181142662698647e-06, "loss": 2.0776, "step": 11240 }, { "epoch": 0.8461581889008073, "grad_norm": 5.145556449890137, "learning_rate": 6.175272521479869e-06, "loss": 1.946, "step": 11241 }, { "epoch": 0.8462334631814675, "grad_norm": 5.155332088470459, "learning_rate": 6.169404985515381e-06, "loss": 1.7049, "step": 11242 }, { "epoch": 0.8463087374621276, "grad_norm": 5.052038192749023, "learning_rate": 6.163540055154016e-06, "loss": 1.7565, "step": 11243 }, { "epoch": 0.8463840117427878, "grad_norm": 4.200429916381836, "learning_rate": 6.1576777307444046e-06, "loss": 1.804, "step": 11244 }, { "epoch": 0.8464592860234479, "grad_norm": 4.319472789764404, "learning_rate": 6.151818012635064e-06, "loss": 1.4788, "step": 11245 }, { "epoch": 0.8465345603041081, "grad_norm": 6.487365245819092, "learning_rate": 6.145960901174314e-06, "loss": 1.7951, "step": 11246 }, { "epoch": 0.8466098345847682, "grad_norm": 4.510237216949463, "learning_rate": 6.140106396710371e-06, "loss": 2.2786, "step": 11247 }, { "epoch": 0.8466851088654284, "grad_norm": 6.785840034484863, "learning_rate": 6.134254499591247e-06, "loss": 2.1406, "step": 11248 }, { "epoch": 0.8467603831460886, "grad_norm": 5.295591354370117, "learning_rate": 6.128405210164823e-06, "loss": 2.1391, "step": 11249 }, { "epoch": 0.8468356574267487, "grad_norm": 4.7377238273620605, "learning_rate": 6.122558528778826e-06, "loss": 1.8891, "step": 11250 }, { "epoch": 0.8469109317074088, "grad_norm": 5.227242469787598, "learning_rate": 6.116714455780842e-06, "loss": 1.6686, "step": 11251 }, { "epoch": 0.8469862059880691, "grad_norm": 5.858652114868164, "learning_rate": 6.110872991518251e-06, "loss": 1.8096, "step": 11252 }, { "epoch": 0.8470614802687292, "grad_norm": 4.820150852203369, "learning_rate": 6.105034136338333e-06, "loss": 1.7638, "step": 11253 }, { "epoch": 0.8471367545493893, "grad_norm": 5.854217529296875, "learning_rate": 6.099197890588199e-06, "loss": 1.991, "step": 11254 }, { "epoch": 0.8472120288300495, "grad_norm": 6.970722198486328, "learning_rate": 6.093364254614775e-06, "loss": 1.8455, "step": 11255 }, { "epoch": 0.8472873031107097, "grad_norm": 5.369820594787598, "learning_rate": 6.087533228764869e-06, "loss": 1.7705, "step": 11256 }, { "epoch": 0.8473625773913698, "grad_norm": 5.172910213470459, "learning_rate": 6.081704813385109e-06, "loss": 1.6951, "step": 11257 }, { "epoch": 0.84743785167203, "grad_norm": 5.604563236236572, "learning_rate": 6.0758790088219885e-06, "loss": 2.1248, "step": 11258 }, { "epoch": 0.8475131259526901, "grad_norm": 4.990391731262207, "learning_rate": 6.070055815421816e-06, "loss": 1.9498, "step": 11259 }, { "epoch": 0.8475884002333502, "grad_norm": 5.144252777099609, "learning_rate": 6.064235233530779e-06, "loss": 1.5854, "step": 11260 }, { "epoch": 0.8476636745140105, "grad_norm": 3.9531991481781006, "learning_rate": 6.0584172634948925e-06, "loss": 1.6825, "step": 11261 }, { "epoch": 0.8477389487946706, "grad_norm": 7.152756690979004, "learning_rate": 6.052601905660027e-06, "loss": 1.9642, "step": 11262 }, { "epoch": 0.8478142230753307, "grad_norm": 6.138336181640625, "learning_rate": 6.0467891603718686e-06, "loss": 1.5943, "step": 11263 }, { "epoch": 0.8478894973559908, "grad_norm": 4.9320878982543945, "learning_rate": 6.04097902797599e-06, "loss": 1.5358, "step": 11264 }, { "epoch": 0.8479647716366511, "grad_norm": 4.430438995361328, "learning_rate": 6.035171508817766e-06, "loss": 2.0484, "step": 11265 }, { "epoch": 0.8480400459173112, "grad_norm": 4.702756881713867, "learning_rate": 6.029366603242453e-06, "loss": 1.8195, "step": 11266 }, { "epoch": 0.8481153201979713, "grad_norm": 4.260861873626709, "learning_rate": 6.02356431159512e-06, "loss": 1.5999, "step": 11267 }, { "epoch": 0.8481905944786315, "grad_norm": 4.201164722442627, "learning_rate": 6.017764634220719e-06, "loss": 1.9959, "step": 11268 }, { "epoch": 0.8482658687592917, "grad_norm": 6.723023414611816, "learning_rate": 6.011967571464e-06, "loss": 2.0883, "step": 11269 }, { "epoch": 0.8483411430399518, "grad_norm": 6.232792854309082, "learning_rate": 6.00617312366959e-06, "loss": 1.9358, "step": 11270 }, { "epoch": 0.848416417320612, "grad_norm": 3.7897911071777344, "learning_rate": 6.0003812911819615e-06, "loss": 1.8006, "step": 11271 }, { "epoch": 0.8484916916012721, "grad_norm": 6.220593452453613, "learning_rate": 5.994592074345412e-06, "loss": 2.0034, "step": 11272 }, { "epoch": 0.8485669658819323, "grad_norm": 5.0158538818359375, "learning_rate": 5.988805473504106e-06, "loss": 2.0759, "step": 11273 }, { "epoch": 0.8486422401625925, "grad_norm": 3.8611676692962646, "learning_rate": 5.9830214890020274e-06, "loss": 1.7533, "step": 11274 }, { "epoch": 0.8487175144432526, "grad_norm": 4.1529860496521, "learning_rate": 5.9772401211830306e-06, "loss": 1.8748, "step": 11275 }, { "epoch": 0.8487927887239127, "grad_norm": 5.566751956939697, "learning_rate": 5.971461370390779e-06, "loss": 1.7035, "step": 11276 }, { "epoch": 0.848868063004573, "grad_norm": 5.445955753326416, "learning_rate": 5.965685236968832e-06, "loss": 2.0433, "step": 11277 }, { "epoch": 0.8489433372852331, "grad_norm": 4.100853443145752, "learning_rate": 5.959911721260541e-06, "loss": 1.6811, "step": 11278 }, { "epoch": 0.8490186115658932, "grad_norm": 4.144077301025391, "learning_rate": 5.954140823609139e-06, "loss": 1.5215, "step": 11279 }, { "epoch": 0.8490938858465534, "grad_norm": 6.304539203643799, "learning_rate": 5.948372544357672e-06, "loss": 1.8958, "step": 11280 }, { "epoch": 0.8491691601272136, "grad_norm": 4.566933631896973, "learning_rate": 5.942606883849061e-06, "loss": 1.8817, "step": 11281 }, { "epoch": 0.8492444344078737, "grad_norm": 5.39998722076416, "learning_rate": 5.936843842426054e-06, "loss": 1.9203, "step": 11282 }, { "epoch": 0.8493197086885338, "grad_norm": 5.680767059326172, "learning_rate": 5.93108342043126e-06, "loss": 1.9378, "step": 11283 }, { "epoch": 0.849394982969194, "grad_norm": 4.6040730476379395, "learning_rate": 5.925325618207101e-06, "loss": 1.681, "step": 11284 }, { "epoch": 0.8494702572498541, "grad_norm": 6.809340000152588, "learning_rate": 5.919570436095878e-06, "loss": 2.0843, "step": 11285 }, { "epoch": 0.8495455315305143, "grad_norm": 8.677302360534668, "learning_rate": 5.9138178744397036e-06, "loss": 1.7938, "step": 11286 }, { "epoch": 0.8496208058111745, "grad_norm": 4.486725807189941, "learning_rate": 5.908067933580558e-06, "loss": 1.645, "step": 11287 }, { "epoch": 0.8496960800918346, "grad_norm": 5.2654266357421875, "learning_rate": 5.902320613860268e-06, "loss": 1.6082, "step": 11288 }, { "epoch": 0.8497713543724947, "grad_norm": 5.1449198722839355, "learning_rate": 5.896575915620478e-06, "loss": 1.4999, "step": 11289 }, { "epoch": 0.849846628653155, "grad_norm": 7.149578094482422, "learning_rate": 5.890833839202714e-06, "loss": 1.6823, "step": 11290 }, { "epoch": 0.8499219029338151, "grad_norm": 4.404765605926514, "learning_rate": 5.885094384948303e-06, "loss": 2.0962, "step": 11291 }, { "epoch": 0.8499971772144752, "grad_norm": 5.636532306671143, "learning_rate": 5.879357553198461e-06, "loss": 1.9787, "step": 11292 }, { "epoch": 0.8500724514951354, "grad_norm": 4.277993679046631, "learning_rate": 5.87362334429421e-06, "loss": 1.9387, "step": 11293 }, { "epoch": 0.8501477257757956, "grad_norm": 5.32755184173584, "learning_rate": 5.867891758576432e-06, "loss": 1.5929, "step": 11294 }, { "epoch": 0.8502230000564557, "grad_norm": 8.300172805786133, "learning_rate": 5.862162796385867e-06, "loss": 1.6336, "step": 11295 }, { "epoch": 0.8502982743371159, "grad_norm": 4.200565814971924, "learning_rate": 5.856436458063086e-06, "loss": 1.9825, "step": 11296 }, { "epoch": 0.850373548617776, "grad_norm": 6.907848358154297, "learning_rate": 5.850712743948483e-06, "loss": 1.7795, "step": 11297 }, { "epoch": 0.8504488228984362, "grad_norm": 5.160358905792236, "learning_rate": 5.844991654382337e-06, "loss": 1.7414, "step": 11298 }, { "epoch": 0.8505240971790964, "grad_norm": 4.649304389953613, "learning_rate": 5.839273189704736e-06, "loss": 2.4212, "step": 11299 }, { "epoch": 0.8505993714597565, "grad_norm": 6.801667213439941, "learning_rate": 5.833557350255642e-06, "loss": 1.4508, "step": 11300 }, { "epoch": 0.8506746457404166, "grad_norm": 4.885019779205322, "learning_rate": 5.827844136374827e-06, "loss": 1.8312, "step": 11301 }, { "epoch": 0.8507499200210767, "grad_norm": 4.672741413116455, "learning_rate": 5.822133548401931e-06, "loss": 2.0063, "step": 11302 }, { "epoch": 0.850825194301737, "grad_norm": 6.103072643280029, "learning_rate": 5.816425586676444e-06, "loss": 1.8144, "step": 11303 }, { "epoch": 0.8509004685823971, "grad_norm": 3.7380785942077637, "learning_rate": 5.810720251537671e-06, "loss": 1.5957, "step": 11304 }, { "epoch": 0.8509757428630572, "grad_norm": 5.434122562408447, "learning_rate": 5.8050175433247864e-06, "loss": 1.7337, "step": 11305 }, { "epoch": 0.8510510171437174, "grad_norm": 5.055976867675781, "learning_rate": 5.799317462376802e-06, "loss": 1.5506, "step": 11306 }, { "epoch": 0.8511262914243776, "grad_norm": 5.650340557098389, "learning_rate": 5.793620009032574e-06, "loss": 2.1537, "step": 11307 }, { "epoch": 0.8512015657050377, "grad_norm": 4.495242118835449, "learning_rate": 5.787925183630788e-06, "loss": 1.9838, "step": 11308 }, { "epoch": 0.8512768399856979, "grad_norm": 5.4922990798950195, "learning_rate": 5.782232986509995e-06, "loss": 1.8259, "step": 11309 }, { "epoch": 0.851352114266358, "grad_norm": 4.520008087158203, "learning_rate": 5.776543418008573e-06, "loss": 1.9088, "step": 11310 }, { "epoch": 0.8514273885470182, "grad_norm": 5.4600629806518555, "learning_rate": 5.770856478464759e-06, "loss": 1.7309, "step": 11311 }, { "epoch": 0.8515026628276784, "grad_norm": 6.564732551574707, "learning_rate": 5.765172168216609e-06, "loss": 1.6091, "step": 11312 }, { "epoch": 0.8515779371083385, "grad_norm": 3.8732523918151855, "learning_rate": 5.759490487602065e-06, "loss": 2.0157, "step": 11313 }, { "epoch": 0.8516532113889986, "grad_norm": 4.641762733459473, "learning_rate": 5.753811436958856e-06, "loss": 2.1006, "step": 11314 }, { "epoch": 0.8517284856696589, "grad_norm": 4.557394504547119, "learning_rate": 5.7481350166246004e-06, "loss": 1.7528, "step": 11315 }, { "epoch": 0.851803759950319, "grad_norm": 5.444219589233398, "learning_rate": 5.742461226936746e-06, "loss": 1.8976, "step": 11316 }, { "epoch": 0.8518790342309791, "grad_norm": 6.359411716461182, "learning_rate": 5.736790068232595e-06, "loss": 1.7496, "step": 11317 }, { "epoch": 0.8519543085116393, "grad_norm": 4.254822731018066, "learning_rate": 5.7311215408492605e-06, "loss": 2.0206, "step": 11318 }, { "epoch": 0.8520295827922995, "grad_norm": 4.232534885406494, "learning_rate": 5.725455645123723e-06, "loss": 1.8079, "step": 11319 }, { "epoch": 0.8521048570729596, "grad_norm": 5.990346908569336, "learning_rate": 5.719792381392824e-06, "loss": 1.6694, "step": 11320 }, { "epoch": 0.8521801313536198, "grad_norm": 4.427980422973633, "learning_rate": 5.714131749993201e-06, "loss": 2.0511, "step": 11321 }, { "epoch": 0.8522554056342799, "grad_norm": 3.8947792053222656, "learning_rate": 5.708473751261384e-06, "loss": 1.7736, "step": 11322 }, { "epoch": 0.85233067991494, "grad_norm": 4.89677619934082, "learning_rate": 5.7028183855337095e-06, "loss": 1.6667, "step": 11323 }, { "epoch": 0.8524059541956002, "grad_norm": 4.721692085266113, "learning_rate": 5.697165653146386e-06, "loss": 1.4774, "step": 11324 }, { "epoch": 0.8524812284762604, "grad_norm": 4.659456253051758, "learning_rate": 5.691515554435439e-06, "loss": 2.1724, "step": 11325 }, { "epoch": 0.8525565027569205, "grad_norm": 4.3916425704956055, "learning_rate": 5.685868089736757e-06, "loss": 1.8265, "step": 11326 }, { "epoch": 0.8526317770375806, "grad_norm": 4.282839298248291, "learning_rate": 5.680223259386064e-06, "loss": 1.5446, "step": 11327 }, { "epoch": 0.8527070513182409, "grad_norm": 5.252005577087402, "learning_rate": 5.6745810637189364e-06, "loss": 1.9379, "step": 11328 }, { "epoch": 0.852782325598901, "grad_norm": 6.562023162841797, "learning_rate": 5.668941503070773e-06, "loss": 1.5385, "step": 11329 }, { "epoch": 0.8528575998795611, "grad_norm": 5.468602657318115, "learning_rate": 5.663304577776851e-06, "loss": 1.9455, "step": 11330 }, { "epoch": 0.8529328741602213, "grad_norm": 4.0843825340271, "learning_rate": 5.657670288172246e-06, "loss": 1.9929, "step": 11331 }, { "epoch": 0.8530081484408815, "grad_norm": 5.78109884262085, "learning_rate": 5.652038634591916e-06, "loss": 2.1174, "step": 11332 }, { "epoch": 0.8530834227215416, "grad_norm": 4.657322406768799, "learning_rate": 5.64640961737063e-06, "loss": 1.6437, "step": 11333 }, { "epoch": 0.8531586970022018, "grad_norm": 4.304448127746582, "learning_rate": 5.640783236843028e-06, "loss": 1.8518, "step": 11334 }, { "epoch": 0.8532339712828619, "grad_norm": 4.677581787109375, "learning_rate": 5.635159493343595e-06, "loss": 1.711, "step": 11335 }, { "epoch": 0.8533092455635221, "grad_norm": 4.7286882400512695, "learning_rate": 5.629538387206617e-06, "loss": 2.1629, "step": 11336 }, { "epoch": 0.8533845198441823, "grad_norm": 5.954164505004883, "learning_rate": 5.6239199187662735e-06, "loss": 1.8109, "step": 11337 }, { "epoch": 0.8534597941248424, "grad_norm": 6.394717216491699, "learning_rate": 5.618304088356563e-06, "loss": 1.7403, "step": 11338 }, { "epoch": 0.8535350684055025, "grad_norm": 5.252198219299316, "learning_rate": 5.612690896311334e-06, "loss": 1.7397, "step": 11339 }, { "epoch": 0.8536103426861628, "grad_norm": 4.424313068389893, "learning_rate": 5.607080342964266e-06, "loss": 1.6129, "step": 11340 }, { "epoch": 0.8536856169668229, "grad_norm": 6.685618877410889, "learning_rate": 5.601472428648902e-06, "loss": 1.8551, "step": 11341 }, { "epoch": 0.853760891247483, "grad_norm": 4.491438865661621, "learning_rate": 5.595867153698597e-06, "loss": 1.9619, "step": 11342 }, { "epoch": 0.8538361655281431, "grad_norm": 4.7786407470703125, "learning_rate": 5.590264518446586e-06, "loss": 2.4309, "step": 11343 }, { "epoch": 0.8539114398088034, "grad_norm": 5.806628704071045, "learning_rate": 5.584664523225918e-06, "loss": 2.0155, "step": 11344 }, { "epoch": 0.8539867140894635, "grad_norm": 4.507737159729004, "learning_rate": 5.579067168369511e-06, "loss": 1.7316, "step": 11345 }, { "epoch": 0.8540619883701236, "grad_norm": 4.34588623046875, "learning_rate": 5.573472454210099e-06, "loss": 1.8259, "step": 11346 }, { "epoch": 0.8541372626507838, "grad_norm": 4.247511863708496, "learning_rate": 5.567880381080276e-06, "loss": 1.7818, "step": 11347 }, { "epoch": 0.8542125369314439, "grad_norm": 4.825319766998291, "learning_rate": 5.562290949312471e-06, "loss": 1.6259, "step": 11348 }, { "epoch": 0.8542878112121041, "grad_norm": 5.244551181793213, "learning_rate": 5.5567041592389624e-06, "loss": 1.9471, "step": 11349 }, { "epoch": 0.8543630854927643, "grad_norm": 5.681702613830566, "learning_rate": 5.551120011191868e-06, "loss": 2.111, "step": 11350 }, { "epoch": 0.8544383597734244, "grad_norm": 5.542675495147705, "learning_rate": 5.54553850550315e-06, "loss": 1.5149, "step": 11351 }, { "epoch": 0.8545136340540845, "grad_norm": 8.099947929382324, "learning_rate": 5.539959642504622e-06, "loss": 2.1872, "step": 11352 }, { "epoch": 0.8545889083347448, "grad_norm": 4.147285461425781, "learning_rate": 5.534383422527917e-06, "loss": 1.7073, "step": 11353 }, { "epoch": 0.8546641826154049, "grad_norm": 4.144649982452393, "learning_rate": 5.528809845904537e-06, "loss": 1.6317, "step": 11354 }, { "epoch": 0.854739456896065, "grad_norm": 4.827475070953369, "learning_rate": 5.523238912965806e-06, "loss": 1.8281, "step": 11355 }, { "epoch": 0.8548147311767252, "grad_norm": 4.3208723068237305, "learning_rate": 5.517670624042909e-06, "loss": 1.7259, "step": 11356 }, { "epoch": 0.8548900054573854, "grad_norm": 4.612565994262695, "learning_rate": 5.512104979466848e-06, "loss": 1.516, "step": 11357 }, { "epoch": 0.8549652797380455, "grad_norm": 4.594831466674805, "learning_rate": 5.506541979568508e-06, "loss": 1.946, "step": 11358 }, { "epoch": 0.8550405540187057, "grad_norm": 4.143504619598389, "learning_rate": 5.50098162467857e-06, "loss": 1.9511, "step": 11359 }, { "epoch": 0.8551158282993658, "grad_norm": 4.555916786193848, "learning_rate": 5.495423915127596e-06, "loss": 2.1596, "step": 11360 }, { "epoch": 0.855191102580026, "grad_norm": 4.5442023277282715, "learning_rate": 5.489868851245966e-06, "loss": 1.8068, "step": 11361 }, { "epoch": 0.8552663768606861, "grad_norm": 5.009327411651611, "learning_rate": 5.4843164333639344e-06, "loss": 1.8077, "step": 11362 }, { "epoch": 0.8553416511413463, "grad_norm": 4.815648555755615, "learning_rate": 5.478766661811552e-06, "loss": 1.7917, "step": 11363 }, { "epoch": 0.8554169254220064, "grad_norm": 5.050643444061279, "learning_rate": 5.47321953691875e-06, "loss": 1.7534, "step": 11364 }, { "epoch": 0.8554921997026665, "grad_norm": 6.382200241088867, "learning_rate": 5.467675059015276e-06, "loss": 1.7799, "step": 11365 }, { "epoch": 0.8555674739833268, "grad_norm": 3.927781343460083, "learning_rate": 5.462133228430749e-06, "loss": 1.6054, "step": 11366 }, { "epoch": 0.8556427482639869, "grad_norm": 3.700148344039917, "learning_rate": 5.456594045494612e-06, "loss": 1.6804, "step": 11367 }, { "epoch": 0.855718022544647, "grad_norm": 4.513267517089844, "learning_rate": 5.451057510536139e-06, "loss": 1.8871, "step": 11368 }, { "epoch": 0.8557932968253072, "grad_norm": 4.847709655761719, "learning_rate": 5.445523623884485e-06, "loss": 2.0879, "step": 11369 }, { "epoch": 0.8558685711059674, "grad_norm": 5.425947189331055, "learning_rate": 5.439992385868598e-06, "loss": 1.7721, "step": 11370 }, { "epoch": 0.8559438453866275, "grad_norm": 4.6961565017700195, "learning_rate": 5.434463796817307e-06, "loss": 1.4921, "step": 11371 }, { "epoch": 0.8560191196672877, "grad_norm": 6.164254188537598, "learning_rate": 5.428937857059274e-06, "loss": 1.6734, "step": 11372 }, { "epoch": 0.8560943939479478, "grad_norm": 4.206858158111572, "learning_rate": 5.423414566923002e-06, "loss": 1.8606, "step": 11373 }, { "epoch": 0.856169668228608, "grad_norm": 4.629970550537109, "learning_rate": 5.4178939267368226e-06, "loss": 1.6114, "step": 11374 }, { "epoch": 0.8562449425092682, "grad_norm": 4.035036087036133, "learning_rate": 5.4123759368289336e-06, "loss": 2.0318, "step": 11375 }, { "epoch": 0.8563202167899283, "grad_norm": 5.874871730804443, "learning_rate": 5.4068605975273525e-06, "loss": 1.8604, "step": 11376 }, { "epoch": 0.8563954910705884, "grad_norm": 5.744148254394531, "learning_rate": 5.401347909159971e-06, "loss": 1.9988, "step": 11377 }, { "epoch": 0.8564707653512487, "grad_norm": 4.31630802154541, "learning_rate": 5.395837872054471e-06, "loss": 1.7285, "step": 11378 }, { "epoch": 0.8565460396319088, "grad_norm": 5.325786590576172, "learning_rate": 5.390330486538442e-06, "loss": 1.5601, "step": 11379 }, { "epoch": 0.8566213139125689, "grad_norm": 6.213504791259766, "learning_rate": 5.384825752939254e-06, "loss": 2.3901, "step": 11380 }, { "epoch": 0.856696588193229, "grad_norm": 4.757960796356201, "learning_rate": 5.3793236715841574e-06, "loss": 1.8566, "step": 11381 }, { "epoch": 0.8567718624738893, "grad_norm": 3.7822248935699463, "learning_rate": 5.373824242800241e-06, "loss": 1.8802, "step": 11382 }, { "epoch": 0.8568471367545494, "grad_norm": 4.352264404296875, "learning_rate": 5.368327466914425e-06, "loss": 1.7107, "step": 11383 }, { "epoch": 0.8569224110352095, "grad_norm": 4.954226016998291, "learning_rate": 5.362833344253487e-06, "loss": 1.6309, "step": 11384 }, { "epoch": 0.8569976853158697, "grad_norm": 5.501974105834961, "learning_rate": 5.3573418751440175e-06, "loss": 1.8756, "step": 11385 }, { "epoch": 0.8570729595965298, "grad_norm": 5.184263229370117, "learning_rate": 5.351853059912493e-06, "loss": 2.1411, "step": 11386 }, { "epoch": 0.85714823387719, "grad_norm": 5.575963973999023, "learning_rate": 5.34636689888518e-06, "loss": 1.8215, "step": 11387 }, { "epoch": 0.8572235081578502, "grad_norm": 5.021780014038086, "learning_rate": 5.340883392388246e-06, "loss": 1.6331, "step": 11388 }, { "epoch": 0.8572987824385103, "grad_norm": 4.778122425079346, "learning_rate": 5.335402540747636e-06, "loss": 1.928, "step": 11389 }, { "epoch": 0.8573740567191704, "grad_norm": 5.680552959442139, "learning_rate": 5.3299243442892e-06, "loss": 1.6547, "step": 11390 }, { "epoch": 0.8574493309998307, "grad_norm": 5.697072982788086, "learning_rate": 5.324448803338577e-06, "loss": 2.1053, "step": 11391 }, { "epoch": 0.8575246052804908, "grad_norm": 4.456849575042725, "learning_rate": 5.318975918221292e-06, "loss": 1.4087, "step": 11392 }, { "epoch": 0.8575998795611509, "grad_norm": 4.067120552062988, "learning_rate": 5.313505689262688e-06, "loss": 1.7939, "step": 11393 }, { "epoch": 0.8576751538418111, "grad_norm": 5.4717912673950195, "learning_rate": 5.308038116787939e-06, "loss": 1.3898, "step": 11394 }, { "epoch": 0.8577504281224713, "grad_norm": 5.063018321990967, "learning_rate": 5.302573201122091e-06, "loss": 1.7387, "step": 11395 }, { "epoch": 0.8578257024031314, "grad_norm": 5.424300193786621, "learning_rate": 5.297110942590028e-06, "loss": 1.8743, "step": 11396 }, { "epoch": 0.8579009766837916, "grad_norm": 4.340017795562744, "learning_rate": 5.29165134151644e-06, "loss": 2.3701, "step": 11397 }, { "epoch": 0.8579762509644517, "grad_norm": 4.655369758605957, "learning_rate": 5.2861943982259025e-06, "loss": 1.802, "step": 11398 }, { "epoch": 0.8580515252451119, "grad_norm": 7.578344821929932, "learning_rate": 5.280740113042803e-06, "loss": 1.8941, "step": 11399 }, { "epoch": 0.858126799525772, "grad_norm": 7.623256683349609, "learning_rate": 5.275288486291391e-06, "loss": 2.1034, "step": 11400 }, { "epoch": 0.8582020738064322, "grad_norm": 5.581150054931641, "learning_rate": 5.2698395182957605e-06, "loss": 2.0165, "step": 11401 }, { "epoch": 0.8582773480870923, "grad_norm": 5.090108394622803, "learning_rate": 5.264393209379814e-06, "loss": 1.62, "step": 11402 }, { "epoch": 0.8583526223677524, "grad_norm": 5.204704761505127, "learning_rate": 5.258949559867338e-06, "loss": 1.8108, "step": 11403 }, { "epoch": 0.8584278966484127, "grad_norm": 4.71949577331543, "learning_rate": 5.253508570081928e-06, "loss": 1.6139, "step": 11404 }, { "epoch": 0.8585031709290728, "grad_norm": 4.059711456298828, "learning_rate": 5.2480702403470415e-06, "loss": 1.5545, "step": 11405 }, { "epoch": 0.8585784452097329, "grad_norm": 6.447992324829102, "learning_rate": 5.242634570985966e-06, "loss": 1.8447, "step": 11406 }, { "epoch": 0.8586537194903932, "grad_norm": 3.8485782146453857, "learning_rate": 5.237201562321858e-06, "loss": 1.9373, "step": 11407 }, { "epoch": 0.8587289937710533, "grad_norm": 4.185934066772461, "learning_rate": 5.231771214677667e-06, "loss": 1.8701, "step": 11408 }, { "epoch": 0.8588042680517134, "grad_norm": 4.902507305145264, "learning_rate": 5.226343528376226e-06, "loss": 1.6618, "step": 11409 }, { "epoch": 0.8588795423323736, "grad_norm": 5.240971565246582, "learning_rate": 5.220918503740191e-06, "loss": 1.8494, "step": 11410 }, { "epoch": 0.8589548166130337, "grad_norm": 5.346987724304199, "learning_rate": 5.215496141092075e-06, "loss": 1.7486, "step": 11411 }, { "epoch": 0.8590300908936939, "grad_norm": 3.6412837505340576, "learning_rate": 5.210076440754197e-06, "loss": 1.8034, "step": 11412 }, { "epoch": 0.8591053651743541, "grad_norm": 4.350774765014648, "learning_rate": 5.204659403048767e-06, "loss": 2.0478, "step": 11413 }, { "epoch": 0.8591806394550142, "grad_norm": 4.902470111846924, "learning_rate": 5.1992450282978e-06, "loss": 1.5889, "step": 11414 }, { "epoch": 0.8592559137356743, "grad_norm": 6.072124481201172, "learning_rate": 5.1938333168231656e-06, "loss": 2.1175, "step": 11415 }, { "epoch": 0.8593311880163346, "grad_norm": 6.366473197937012, "learning_rate": 5.188424268946573e-06, "loss": 1.83, "step": 11416 }, { "epoch": 0.8594064622969947, "grad_norm": 5.194305896759033, "learning_rate": 5.183017884989583e-06, "loss": 1.8335, "step": 11417 }, { "epoch": 0.8594817365776548, "grad_norm": 5.40255069732666, "learning_rate": 5.177614165273597e-06, "loss": 1.6381, "step": 11418 }, { "epoch": 0.859557010858315, "grad_norm": 4.915066719055176, "learning_rate": 5.172213110119823e-06, "loss": 1.8786, "step": 11419 }, { "epoch": 0.8596322851389752, "grad_norm": 5.29539680480957, "learning_rate": 5.166814719849372e-06, "loss": 1.9521, "step": 11420 }, { "epoch": 0.8597075594196353, "grad_norm": 5.285453796386719, "learning_rate": 5.161418994783129e-06, "loss": 1.9565, "step": 11421 }, { "epoch": 0.8597828337002954, "grad_norm": 3.980003595352173, "learning_rate": 5.156025935241881e-06, "loss": 1.9911, "step": 11422 }, { "epoch": 0.8598581079809556, "grad_norm": 4.498735427856445, "learning_rate": 5.150635541546211e-06, "loss": 1.8753, "step": 11423 }, { "epoch": 0.8599333822616158, "grad_norm": 7.84881067276001, "learning_rate": 5.145247814016579e-06, "loss": 1.8622, "step": 11424 }, { "epoch": 0.8600086565422759, "grad_norm": 3.8739640712738037, "learning_rate": 5.139862752973257e-06, "loss": 1.755, "step": 11425 }, { "epoch": 0.8600839308229361, "grad_norm": 5.518463134765625, "learning_rate": 5.134480358736371e-06, "loss": 1.9369, "step": 11426 }, { "epoch": 0.8601592051035962, "grad_norm": 5.194349765777588, "learning_rate": 5.129100631625894e-06, "loss": 1.683, "step": 11427 }, { "epoch": 0.8602344793842563, "grad_norm": 4.4260454177856445, "learning_rate": 5.123723571961647e-06, "loss": 2.4653, "step": 11428 }, { "epoch": 0.8603097536649166, "grad_norm": 4.79770040512085, "learning_rate": 5.118349180063259e-06, "loss": 2.081, "step": 11429 }, { "epoch": 0.8603850279455767, "grad_norm": 7.826600551605225, "learning_rate": 5.112977456250245e-06, "loss": 1.9509, "step": 11430 }, { "epoch": 0.8604603022262368, "grad_norm": 7.9065704345703125, "learning_rate": 5.107608400841913e-06, "loss": 1.742, "step": 11431 }, { "epoch": 0.860535576506897, "grad_norm": 5.645403861999512, "learning_rate": 5.102242014157449e-06, "loss": 1.8479, "step": 11432 }, { "epoch": 0.8606108507875572, "grad_norm": 5.59318208694458, "learning_rate": 5.096878296515883e-06, "loss": 1.784, "step": 11433 }, { "epoch": 0.8606861250682173, "grad_norm": 7.79551887512207, "learning_rate": 5.0915172482360556e-06, "loss": 1.5384, "step": 11434 }, { "epoch": 0.8607613993488775, "grad_norm": 4.59033727645874, "learning_rate": 5.086158869636676e-06, "loss": 1.9118, "step": 11435 }, { "epoch": 0.8608366736295376, "grad_norm": 5.209449291229248, "learning_rate": 5.080803161036268e-06, "loss": 1.8927, "step": 11436 }, { "epoch": 0.8609119479101978, "grad_norm": 5.686946868896484, "learning_rate": 5.075450122753228e-06, "loss": 1.931, "step": 11437 }, { "epoch": 0.860987222190858, "grad_norm": 4.581290245056152, "learning_rate": 5.070099755105772e-06, "loss": 1.5945, "step": 11438 }, { "epoch": 0.8610624964715181, "grad_norm": 7.103540420532227, "learning_rate": 5.064752058411975e-06, "loss": 1.6487, "step": 11439 }, { "epoch": 0.8611377707521782, "grad_norm": 5.881924152374268, "learning_rate": 5.059407032989733e-06, "loss": 1.9384, "step": 11440 }, { "epoch": 0.8612130450328384, "grad_norm": 3.439589500427246, "learning_rate": 5.054064679156795e-06, "loss": 1.9732, "step": 11441 }, { "epoch": 0.8612883193134986, "grad_norm": 5.381748199462891, "learning_rate": 5.0487249972307415e-06, "loss": 1.6726, "step": 11442 }, { "epoch": 0.8613635935941587, "grad_norm": 4.571863651275635, "learning_rate": 5.043387987529019e-06, "loss": 2.0467, "step": 11443 }, { "epoch": 0.8614388678748188, "grad_norm": 4.915619373321533, "learning_rate": 5.038053650368874e-06, "loss": 1.9863, "step": 11444 }, { "epoch": 0.8615141421554791, "grad_norm": 6.579681396484375, "learning_rate": 5.03272198606744e-06, "loss": 2.2532, "step": 11445 }, { "epoch": 0.8615894164361392, "grad_norm": 4.242094039916992, "learning_rate": 5.027392994941643e-06, "loss": 1.7283, "step": 11446 }, { "epoch": 0.8616646907167993, "grad_norm": 6.660259246826172, "learning_rate": 5.022066677308296e-06, "loss": 2.2605, "step": 11447 }, { "epoch": 0.8617399649974595, "grad_norm": 5.98316764831543, "learning_rate": 5.016743033484039e-06, "loss": 1.6845, "step": 11448 }, { "epoch": 0.8618152392781196, "grad_norm": 4.509366512298584, "learning_rate": 5.011422063785332e-06, "loss": 1.8656, "step": 11449 }, { "epoch": 0.8618905135587798, "grad_norm": 4.465482234954834, "learning_rate": 5.006103768528486e-06, "loss": 1.4003, "step": 11450 }, { "epoch": 0.86196578783944, "grad_norm": 5.4790778160095215, "learning_rate": 5.000788148029678e-06, "loss": 2.0433, "step": 11451 }, { "epoch": 0.8620410621201001, "grad_norm": 5.217287063598633, "learning_rate": 4.995475202604905e-06, "loss": 1.7413, "step": 11452 }, { "epoch": 0.8621163364007602, "grad_norm": 5.025666236877441, "learning_rate": 4.990164932569985e-06, "loss": 1.8915, "step": 11453 }, { "epoch": 0.8621916106814205, "grad_norm": 4.624127388000488, "learning_rate": 4.984857338240623e-06, "loss": 1.9549, "step": 11454 }, { "epoch": 0.8622668849620806, "grad_norm": 4.594157695770264, "learning_rate": 4.979552419932327e-06, "loss": 2.1406, "step": 11455 }, { "epoch": 0.8623421592427407, "grad_norm": 4.060484409332275, "learning_rate": 4.97425017796046e-06, "loss": 1.7687, "step": 11456 }, { "epoch": 0.862417433523401, "grad_norm": 6.634838104248047, "learning_rate": 4.9689506126402216e-06, "loss": 1.8449, "step": 11457 }, { "epoch": 0.8624927078040611, "grad_norm": 3.821725368499756, "learning_rate": 4.963653724286671e-06, "loss": 1.5767, "step": 11458 }, { "epoch": 0.8625679820847212, "grad_norm": 5.238875865936279, "learning_rate": 4.958359513214678e-06, "loss": 2.2654, "step": 11459 }, { "epoch": 0.8626432563653813, "grad_norm": 5.4520792961120605, "learning_rate": 4.953067979738968e-06, "loss": 1.9012, "step": 11460 }, { "epoch": 0.8627185306460415, "grad_norm": 4.650437831878662, "learning_rate": 4.947779124174112e-06, "loss": 1.7756, "step": 11461 }, { "epoch": 0.8627938049267017, "grad_norm": 7.86212158203125, "learning_rate": 4.942492946834526e-06, "loss": 2.2318, "step": 11462 }, { "epoch": 0.8628690792073618, "grad_norm": 5.373051166534424, "learning_rate": 4.937209448034441e-06, "loss": 1.4676, "step": 11463 }, { "epoch": 0.862944353488022, "grad_norm": 4.336888790130615, "learning_rate": 4.931928628087956e-06, "loss": 1.8026, "step": 11464 }, { "epoch": 0.8630196277686821, "grad_norm": 5.209778308868408, "learning_rate": 4.926650487309009e-06, "loss": 1.9573, "step": 11465 }, { "epoch": 0.8630949020493422, "grad_norm": 4.62621545791626, "learning_rate": 4.921375026011349e-06, "loss": 1.7024, "step": 11466 }, { "epoch": 0.8631701763300025, "grad_norm": 4.174376010894775, "learning_rate": 4.916102244508614e-06, "loss": 1.6201, "step": 11467 }, { "epoch": 0.8632454506106626, "grad_norm": 4.613985061645508, "learning_rate": 4.910832143114225e-06, "loss": 1.6349, "step": 11468 }, { "epoch": 0.8633207248913227, "grad_norm": 3.961477518081665, "learning_rate": 4.905564722141498e-06, "loss": 1.5436, "step": 11469 }, { "epoch": 0.863395999171983, "grad_norm": 4.334711074829102, "learning_rate": 4.900299981903556e-06, "loss": 1.7122, "step": 11470 }, { "epoch": 0.8634712734526431, "grad_norm": 4.807438373565674, "learning_rate": 4.895037922713369e-06, "loss": 1.7362, "step": 11471 }, { "epoch": 0.8635465477333032, "grad_norm": 4.9058637619018555, "learning_rate": 4.8897785448837555e-06, "loss": 1.7389, "step": 11472 }, { "epoch": 0.8636218220139634, "grad_norm": 4.043872356414795, "learning_rate": 4.884521848727386e-06, "loss": 1.8973, "step": 11473 }, { "epoch": 0.8636970962946235, "grad_norm": 4.52606725692749, "learning_rate": 4.879267834556728e-06, "loss": 1.734, "step": 11474 }, { "epoch": 0.8637723705752837, "grad_norm": 3.9975106716156006, "learning_rate": 4.874016502684142e-06, "loss": 1.7962, "step": 11475 }, { "epoch": 0.8638476448559439, "grad_norm": 5.700189590454102, "learning_rate": 4.868767853421785e-06, "loss": 1.9558, "step": 11476 }, { "epoch": 0.863922919136604, "grad_norm": 4.748110294342041, "learning_rate": 4.86352188708169e-06, "loss": 1.8304, "step": 11477 }, { "epoch": 0.8639981934172641, "grad_norm": 4.018179416656494, "learning_rate": 4.8582786039757025e-06, "loss": 1.8149, "step": 11478 }, { "epoch": 0.8640734676979243, "grad_norm": 4.373999118804932, "learning_rate": 4.8530380044155274e-06, "loss": 1.7972, "step": 11479 }, { "epoch": 0.8641487419785845, "grad_norm": 4.066504001617432, "learning_rate": 4.847800088712706e-06, "loss": 1.8754, "step": 11480 }, { "epoch": 0.8642240162592446, "grad_norm": 5.190664291381836, "learning_rate": 4.842564857178605e-06, "loss": 1.9336, "step": 11481 }, { "epoch": 0.8642992905399047, "grad_norm": 5.248288631439209, "learning_rate": 4.837332310124454e-06, "loss": 1.9199, "step": 11482 }, { "epoch": 0.864374564820565, "grad_norm": 4.19376277923584, "learning_rate": 4.832102447861309e-06, "loss": 2.1524, "step": 11483 }, { "epoch": 0.8644498391012251, "grad_norm": 4.536162376403809, "learning_rate": 4.826875270700082e-06, "loss": 1.7688, "step": 11484 }, { "epoch": 0.8645251133818852, "grad_norm": 3.9947783946990967, "learning_rate": 4.821650778951492e-06, "loss": 1.6149, "step": 11485 }, { "epoch": 0.8646003876625454, "grad_norm": 4.225605010986328, "learning_rate": 4.816428972926146e-06, "loss": 1.6091, "step": 11486 }, { "epoch": 0.8646756619432056, "grad_norm": 4.963068008422852, "learning_rate": 4.8112098529344395e-06, "loss": 1.8834, "step": 11487 }, { "epoch": 0.8647509362238657, "grad_norm": 4.862916946411133, "learning_rate": 4.805993419286658e-06, "loss": 1.7848, "step": 11488 }, { "epoch": 0.8648262105045259, "grad_norm": 4.686556816101074, "learning_rate": 4.800779672292882e-06, "loss": 1.7064, "step": 11489 }, { "epoch": 0.864901484785186, "grad_norm": 5.078429222106934, "learning_rate": 4.7955686122630685e-06, "loss": 1.9309, "step": 11490 }, { "epoch": 0.8649767590658461, "grad_norm": 5.973289966583252, "learning_rate": 4.790360239506986e-06, "loss": 1.6972, "step": 11491 }, { "epoch": 0.8650520333465064, "grad_norm": 4.8482441902160645, "learning_rate": 4.78515455433427e-06, "loss": 1.8212, "step": 11492 }, { "epoch": 0.8651273076271665, "grad_norm": 4.316984176635742, "learning_rate": 4.779951557054391e-06, "loss": 2.2551, "step": 11493 }, { "epoch": 0.8652025819078266, "grad_norm": 4.604041576385498, "learning_rate": 4.774751247976628e-06, "loss": 1.6312, "step": 11494 }, { "epoch": 0.8652778561884868, "grad_norm": 5.055167198181152, "learning_rate": 4.769553627410134e-06, "loss": 1.6159, "step": 11495 }, { "epoch": 0.865353130469147, "grad_norm": 4.917102813720703, "learning_rate": 4.7643586956639005e-06, "loss": 2.2331, "step": 11496 }, { "epoch": 0.8654284047498071, "grad_norm": 5.21144437789917, "learning_rate": 4.759166453046754e-06, "loss": 1.794, "step": 11497 }, { "epoch": 0.8655036790304672, "grad_norm": 4.193215847015381, "learning_rate": 4.753976899867346e-06, "loss": 1.9602, "step": 11498 }, { "epoch": 0.8655789533111274, "grad_norm": 5.544768333435059, "learning_rate": 4.748790036434192e-06, "loss": 1.9221, "step": 11499 }, { "epoch": 0.8656542275917876, "grad_norm": 4.593832969665527, "learning_rate": 4.7436058630556225e-06, "loss": 1.8846, "step": 11500 }, { "epoch": 0.8657295018724477, "grad_norm": 5.250650405883789, "learning_rate": 4.738424380039835e-06, "loss": 1.6778, "step": 11501 }, { "epoch": 0.8658047761531079, "grad_norm": 3.97278094291687, "learning_rate": 4.733245587694846e-06, "loss": 2.081, "step": 11502 }, { "epoch": 0.865880050433768, "grad_norm": 6.192994594573975, "learning_rate": 4.728069486328524e-06, "loss": 2.0425, "step": 11503 }, { "epoch": 0.8659553247144282, "grad_norm": 6.482472896575928, "learning_rate": 4.722896076248568e-06, "loss": 1.8384, "step": 11504 }, { "epoch": 0.8660305989950884, "grad_norm": 5.451891899108887, "learning_rate": 4.71772535776252e-06, "loss": 1.6352, "step": 11505 }, { "epoch": 0.8661058732757485, "grad_norm": 6.063283920288086, "learning_rate": 4.712557331177775e-06, "loss": 1.8351, "step": 11506 }, { "epoch": 0.8661811475564086, "grad_norm": 4.665142059326172, "learning_rate": 4.707391996801558e-06, "loss": 1.5667, "step": 11507 }, { "epoch": 0.8662564218370689, "grad_norm": 5.57370662689209, "learning_rate": 4.702229354940918e-06, "loss": 1.8064, "step": 11508 }, { "epoch": 0.866331696117729, "grad_norm": 4.212967872619629, "learning_rate": 4.697069405902782e-06, "loss": 1.4364, "step": 11509 }, { "epoch": 0.8664069703983891, "grad_norm": 4.478463172912598, "learning_rate": 4.691912149993866e-06, "loss": 1.4597, "step": 11510 }, { "epoch": 0.8664822446790493, "grad_norm": 4.530065059661865, "learning_rate": 4.686757587520774e-06, "loss": 2.0677, "step": 11511 }, { "epoch": 0.8665575189597094, "grad_norm": 4.536741733551025, "learning_rate": 4.681605718789933e-06, "loss": 1.4195, "step": 11512 }, { "epoch": 0.8666327932403696, "grad_norm": 3.8651647567749023, "learning_rate": 4.6764565441075935e-06, "loss": 1.8939, "step": 11513 }, { "epoch": 0.8667080675210298, "grad_norm": 6.274515628814697, "learning_rate": 4.671310063779871e-06, "loss": 1.7656, "step": 11514 }, { "epoch": 0.8667833418016899, "grad_norm": 4.5438232421875, "learning_rate": 4.666166278112688e-06, "loss": 1.6627, "step": 11515 }, { "epoch": 0.86685861608235, "grad_norm": 4.222365379333496, "learning_rate": 4.661025187411849e-06, "loss": 1.6147, "step": 11516 }, { "epoch": 0.8669338903630103, "grad_norm": 5.207612037658691, "learning_rate": 4.655886791982972e-06, "loss": 1.8663, "step": 11517 }, { "epoch": 0.8670091646436704, "grad_norm": 4.801716327667236, "learning_rate": 4.650751092131523e-06, "loss": 2.2603, "step": 11518 }, { "epoch": 0.8670844389243305, "grad_norm": 6.084791660308838, "learning_rate": 4.64561808816279e-06, "loss": 1.9834, "step": 11519 }, { "epoch": 0.8671597132049906, "grad_norm": 6.908560752868652, "learning_rate": 4.640487780381936e-06, "loss": 1.5051, "step": 11520 }, { "epoch": 0.8672349874856509, "grad_norm": 4.244696140289307, "learning_rate": 4.635360169093928e-06, "loss": 1.7514, "step": 11521 }, { "epoch": 0.867310261766311, "grad_norm": 3.900775194168091, "learning_rate": 4.6302352546035935e-06, "loss": 1.7779, "step": 11522 }, { "epoch": 0.8673855360469711, "grad_norm": 4.440598487854004, "learning_rate": 4.625113037215589e-06, "loss": 1.4278, "step": 11523 }, { "epoch": 0.8674608103276313, "grad_norm": 4.032225608825684, "learning_rate": 4.619993517234428e-06, "loss": 1.8447, "step": 11524 }, { "epoch": 0.8675360846082915, "grad_norm": 4.032618045806885, "learning_rate": 4.614876694964432e-06, "loss": 1.4955, "step": 11525 }, { "epoch": 0.8676113588889516, "grad_norm": 4.271254539489746, "learning_rate": 4.6097625707097914e-06, "loss": 1.787, "step": 11526 }, { "epoch": 0.8676866331696118, "grad_norm": 4.169753551483154, "learning_rate": 4.604651144774524e-06, "loss": 1.6766, "step": 11527 }, { "epoch": 0.8677619074502719, "grad_norm": 5.307674407958984, "learning_rate": 4.599542417462499e-06, "loss": 1.5426, "step": 11528 }, { "epoch": 0.867837181730932, "grad_norm": 4.8163981437683105, "learning_rate": 4.594436389077411e-06, "loss": 2.1584, "step": 11529 }, { "epoch": 0.8679124560115923, "grad_norm": 4.162590026855469, "learning_rate": 4.58933305992279e-06, "loss": 1.7731, "step": 11530 }, { "epoch": 0.8679877302922524, "grad_norm": 4.718050003051758, "learning_rate": 4.584232430302033e-06, "loss": 1.6502, "step": 11531 }, { "epoch": 0.8680630045729125, "grad_norm": 5.588866233825684, "learning_rate": 4.579134500518334e-06, "loss": 2.1532, "step": 11532 }, { "epoch": 0.8681382788535728, "grad_norm": 4.87261438369751, "learning_rate": 4.574039270874775e-06, "loss": 1.6802, "step": 11533 }, { "epoch": 0.8682135531342329, "grad_norm": 5.608423709869385, "learning_rate": 4.5689467416742234e-06, "loss": 1.72, "step": 11534 }, { "epoch": 0.868288827414893, "grad_norm": 6.913978576660156, "learning_rate": 4.5638569132194446e-06, "loss": 1.6945, "step": 11535 }, { "epoch": 0.8683641016955532, "grad_norm": 4.584637641906738, "learning_rate": 4.558769785812994e-06, "loss": 1.554, "step": 11536 }, { "epoch": 0.8684393759762133, "grad_norm": 4.676369667053223, "learning_rate": 4.553685359757287e-06, "loss": 1.8062, "step": 11537 }, { "epoch": 0.8685146502568735, "grad_norm": 4.393977165222168, "learning_rate": 4.548603635354592e-06, "loss": 1.9029, "step": 11538 }, { "epoch": 0.8685899245375336, "grad_norm": 5.103185653686523, "learning_rate": 4.543524612907002e-06, "loss": 1.9495, "step": 11539 }, { "epoch": 0.8686651988181938, "grad_norm": 4.820971488952637, "learning_rate": 4.538448292716441e-06, "loss": 1.6858, "step": 11540 }, { "epoch": 0.8687404730988539, "grad_norm": 5.37093448638916, "learning_rate": 4.533374675084689e-06, "loss": 1.9335, "step": 11541 }, { "epoch": 0.8688157473795141, "grad_norm": 5.1994123458862305, "learning_rate": 4.528303760313346e-06, "loss": 1.8861, "step": 11542 }, { "epoch": 0.8688910216601743, "grad_norm": 5.579268932342529, "learning_rate": 4.52323554870388e-06, "loss": 1.8159, "step": 11543 }, { "epoch": 0.8689662959408344, "grad_norm": 3.9115986824035645, "learning_rate": 4.518170040557568e-06, "loss": 1.7044, "step": 11544 }, { "epoch": 0.8690415702214945, "grad_norm": 5.134445667266846, "learning_rate": 4.51310723617554e-06, "loss": 1.8796, "step": 11545 }, { "epoch": 0.8691168445021548, "grad_norm": 4.854567527770996, "learning_rate": 4.508047135858778e-06, "loss": 2.0243, "step": 11546 }, { "epoch": 0.8691921187828149, "grad_norm": 5.1354217529296875, "learning_rate": 4.502989739908081e-06, "loss": 1.894, "step": 11547 }, { "epoch": 0.869267393063475, "grad_norm": 5.95161247253418, "learning_rate": 4.497935048624102e-06, "loss": 1.7065, "step": 11548 }, { "epoch": 0.8693426673441352, "grad_norm": 4.187385559082031, "learning_rate": 4.492883062307318e-06, "loss": 1.7254, "step": 11549 }, { "epoch": 0.8694179416247954, "grad_norm": 3.7051124572753906, "learning_rate": 4.4878337812580605e-06, "loss": 1.6645, "step": 11550 }, { "epoch": 0.8694932159054555, "grad_norm": 5.29319953918457, "learning_rate": 4.482787205776495e-06, "loss": 1.9425, "step": 11551 }, { "epoch": 0.8695684901861157, "grad_norm": 4.27440881729126, "learning_rate": 4.477743336162638e-06, "loss": 1.914, "step": 11552 }, { "epoch": 0.8696437644667758, "grad_norm": 4.368507385253906, "learning_rate": 4.472702172716309e-06, "loss": 1.684, "step": 11553 }, { "epoch": 0.869719038747436, "grad_norm": 6.391887187957764, "learning_rate": 4.467663715737214e-06, "loss": 1.9123, "step": 11554 }, { "epoch": 0.8697943130280962, "grad_norm": 4.533965110778809, "learning_rate": 4.462627965524851e-06, "loss": 2.1336, "step": 11555 }, { "epoch": 0.8698695873087563, "grad_norm": 3.8347065448760986, "learning_rate": 4.4575949223786105e-06, "loss": 2.0396, "step": 11556 }, { "epoch": 0.8699448615894164, "grad_norm": 4.205840587615967, "learning_rate": 4.452564586597663e-06, "loss": 1.5999, "step": 11557 }, { "epoch": 0.8700201358700765, "grad_norm": 5.164328098297119, "learning_rate": 4.447536958481069e-06, "loss": 1.7612, "step": 11558 }, { "epoch": 0.8700954101507368, "grad_norm": 3.815864324569702, "learning_rate": 4.44251203832769e-06, "loss": 2.0034, "step": 11559 }, { "epoch": 0.8701706844313969, "grad_norm": 6.352051734924316, "learning_rate": 4.437489826436253e-06, "loss": 2.1426, "step": 11560 }, { "epoch": 0.870245958712057, "grad_norm": 6.125393390655518, "learning_rate": 4.432470323105309e-06, "loss": 1.9815, "step": 11561 }, { "epoch": 0.8703212329927172, "grad_norm": 5.443470478057861, "learning_rate": 4.427453528633263e-06, "loss": 2.008, "step": 11562 }, { "epoch": 0.8703965072733774, "grad_norm": 3.6203298568725586, "learning_rate": 4.422439443318343e-06, "loss": 1.4455, "step": 11563 }, { "epoch": 0.8704717815540375, "grad_norm": 5.032252311706543, "learning_rate": 4.417428067458618e-06, "loss": 2.0595, "step": 11564 }, { "epoch": 0.8705470558346977, "grad_norm": 4.501786708831787, "learning_rate": 4.412419401352008e-06, "loss": 1.8541, "step": 11565 }, { "epoch": 0.8706223301153578, "grad_norm": 5.044890880584717, "learning_rate": 4.407413445296255e-06, "loss": 2.0446, "step": 11566 }, { "epoch": 0.870697604396018, "grad_norm": 4.555783271789551, "learning_rate": 4.402410199588958e-06, "loss": 1.5987, "step": 11567 }, { "epoch": 0.8707728786766782, "grad_norm": 7.961402893066406, "learning_rate": 4.3974096645275354e-06, "loss": 1.6441, "step": 11568 }, { "epoch": 0.8708481529573383, "grad_norm": 5.344577312469482, "learning_rate": 4.392411840409266e-06, "loss": 1.7703, "step": 11569 }, { "epoch": 0.8709234272379984, "grad_norm": 4.754013538360596, "learning_rate": 4.38741672753124e-06, "loss": 1.8351, "step": 11570 }, { "epoch": 0.8709987015186587, "grad_norm": 4.343361854553223, "learning_rate": 4.382424326190415e-06, "loss": 1.6238, "step": 11571 }, { "epoch": 0.8710739757993188, "grad_norm": 5.468206405639648, "learning_rate": 4.37743463668357e-06, "loss": 1.3445, "step": 11572 }, { "epoch": 0.8711492500799789, "grad_norm": 4.7761969566345215, "learning_rate": 4.372447659307338e-06, "loss": 2.1126, "step": 11573 }, { "epoch": 0.8712245243606391, "grad_norm": 4.152520179748535, "learning_rate": 4.367463394358168e-06, "loss": 1.3699, "step": 11574 }, { "epoch": 0.8712997986412993, "grad_norm": 4.990994453430176, "learning_rate": 4.362481842132371e-06, "loss": 1.9946, "step": 11575 }, { "epoch": 0.8713750729219594, "grad_norm": 4.660702705383301, "learning_rate": 4.357503002926072e-06, "loss": 1.7523, "step": 11576 }, { "epoch": 0.8714503472026195, "grad_norm": 5.045409202575684, "learning_rate": 4.352526877035257e-06, "loss": 1.6273, "step": 11577 }, { "epoch": 0.8715256214832797, "grad_norm": 5.219690322875977, "learning_rate": 4.347553464755749e-06, "loss": 1.8787, "step": 11578 }, { "epoch": 0.8716008957639398, "grad_norm": 5.692169666290283, "learning_rate": 4.342582766383185e-06, "loss": 1.8663, "step": 11579 }, { "epoch": 0.8716761700446, "grad_norm": 4.974844932556152, "learning_rate": 4.337614782213079e-06, "loss": 1.8248, "step": 11580 }, { "epoch": 0.8717514443252602, "grad_norm": 4.979865074157715, "learning_rate": 4.332649512540748e-06, "loss": 1.5178, "step": 11581 }, { "epoch": 0.8718267186059203, "grad_norm": 5.0891618728637695, "learning_rate": 4.32768695766137e-06, "loss": 2.0021, "step": 11582 }, { "epoch": 0.8719019928865804, "grad_norm": 3.6198441982269287, "learning_rate": 4.322727117869951e-06, "loss": 1.9911, "step": 11583 }, { "epoch": 0.8719772671672407, "grad_norm": 4.341318130493164, "learning_rate": 4.317769993461351e-06, "loss": 1.866, "step": 11584 }, { "epoch": 0.8720525414479008, "grad_norm": 4.057556629180908, "learning_rate": 4.31281558473024e-06, "loss": 1.9603, "step": 11585 }, { "epoch": 0.8721278157285609, "grad_norm": 6.607398986816406, "learning_rate": 4.307863891971164e-06, "loss": 2.0442, "step": 11586 }, { "epoch": 0.8722030900092211, "grad_norm": 4.788666725158691, "learning_rate": 4.302914915478462e-06, "loss": 1.7816, "step": 11587 }, { "epoch": 0.8722783642898813, "grad_norm": 6.239824295043945, "learning_rate": 4.297968655546353e-06, "loss": 1.7223, "step": 11588 }, { "epoch": 0.8723536385705414, "grad_norm": 3.6766207218170166, "learning_rate": 4.293025112468868e-06, "loss": 1.7178, "step": 11589 }, { "epoch": 0.8724289128512016, "grad_norm": 4.75899600982666, "learning_rate": 4.288084286539906e-06, "loss": 1.6665, "step": 11590 }, { "epoch": 0.8725041871318617, "grad_norm": 6.134271144866943, "learning_rate": 4.283146178053155e-06, "loss": 1.7782, "step": 11591 }, { "epoch": 0.8725794614125219, "grad_norm": 6.320107936859131, "learning_rate": 4.278210787302189e-06, "loss": 1.3949, "step": 11592 }, { "epoch": 0.8726547356931821, "grad_norm": 4.9732666015625, "learning_rate": 4.273278114580403e-06, "loss": 1.6789, "step": 11593 }, { "epoch": 0.8727300099738422, "grad_norm": 6.660370349884033, "learning_rate": 4.268348160181035e-06, "loss": 1.9689, "step": 11594 }, { "epoch": 0.8728052842545023, "grad_norm": 5.151939392089844, "learning_rate": 4.263420924397143e-06, "loss": 1.7288, "step": 11595 }, { "epoch": 0.8728805585351624, "grad_norm": 5.486466884613037, "learning_rate": 4.2584964075216446e-06, "loss": 1.9685, "step": 11596 }, { "epoch": 0.8729558328158227, "grad_norm": 4.641103744506836, "learning_rate": 4.2535746098473e-06, "loss": 1.6891, "step": 11597 }, { "epoch": 0.8730311070964828, "grad_norm": 4.811590194702148, "learning_rate": 4.248655531666668e-06, "loss": 1.8336, "step": 11598 }, { "epoch": 0.8731063813771429, "grad_norm": 6.013032913208008, "learning_rate": 4.2437391732721985e-06, "loss": 1.8566, "step": 11599 }, { "epoch": 0.8731816556578031, "grad_norm": 4.535085201263428, "learning_rate": 4.238825534956142e-06, "loss": 1.8832, "step": 11600 }, { "epoch": 0.8732569299384633, "grad_norm": 4.858318328857422, "learning_rate": 4.233914617010609e-06, "loss": 1.9169, "step": 11601 }, { "epoch": 0.8733322042191234, "grad_norm": 5.1384196281433105, "learning_rate": 4.229006419727527e-06, "loss": 2.1884, "step": 11602 }, { "epoch": 0.8734074784997836, "grad_norm": 4.979616641998291, "learning_rate": 4.22410094339869e-06, "loss": 1.7776, "step": 11603 }, { "epoch": 0.8734827527804437, "grad_norm": 5.56610107421875, "learning_rate": 4.2191981883156985e-06, "loss": 1.7658, "step": 11604 }, { "epoch": 0.8735580270611039, "grad_norm": 5.433693885803223, "learning_rate": 4.214298154770013e-06, "loss": 1.7263, "step": 11605 }, { "epoch": 0.8736333013417641, "grad_norm": 5.809007167816162, "learning_rate": 4.209400843052924e-06, "loss": 1.6306, "step": 11606 }, { "epoch": 0.8737085756224242, "grad_norm": 5.423648834228516, "learning_rate": 4.204506253455582e-06, "loss": 1.7637, "step": 11607 }, { "epoch": 0.8737838499030843, "grad_norm": 5.1037116050720215, "learning_rate": 4.1996143862689294e-06, "loss": 1.7978, "step": 11608 }, { "epoch": 0.8738591241837446, "grad_norm": 4.331618785858154, "learning_rate": 4.194725241783792e-06, "loss": 1.4007, "step": 11609 }, { "epoch": 0.8739343984644047, "grad_norm": 3.9037506580352783, "learning_rate": 4.189838820290809e-06, "loss": 1.6225, "step": 11610 }, { "epoch": 0.8740096727450648, "grad_norm": 5.359700679779053, "learning_rate": 4.184955122080458e-06, "loss": 1.9444, "step": 11611 }, { "epoch": 0.874084947025725, "grad_norm": 5.916167259216309, "learning_rate": 4.180074147443081e-06, "loss": 1.8808, "step": 11612 }, { "epoch": 0.8741602213063852, "grad_norm": 6.919978141784668, "learning_rate": 4.17519589666881e-06, "loss": 1.8984, "step": 11613 }, { "epoch": 0.8742354955870453, "grad_norm": 3.9990856647491455, "learning_rate": 4.170320370047665e-06, "loss": 1.5025, "step": 11614 }, { "epoch": 0.8743107698677055, "grad_norm": 6.4516825675964355, "learning_rate": 4.1654475678694686e-06, "loss": 2.0728, "step": 11615 }, { "epoch": 0.8743860441483656, "grad_norm": 5.90434455871582, "learning_rate": 4.160577490423895e-06, "loss": 1.8552, "step": 11616 }, { "epoch": 0.8744613184290257, "grad_norm": 4.695637226104736, "learning_rate": 4.155710138000468e-06, "loss": 1.7793, "step": 11617 }, { "epoch": 0.8745365927096859, "grad_norm": 3.9613237380981445, "learning_rate": 4.1508455108885344e-06, "loss": 1.7337, "step": 11618 }, { "epoch": 0.8746118669903461, "grad_norm": 5.0093302726745605, "learning_rate": 4.145983609377274e-06, "loss": 1.5641, "step": 11619 }, { "epoch": 0.8746871412710062, "grad_norm": 4.1677703857421875, "learning_rate": 4.141124433755722e-06, "loss": 1.8011, "step": 11620 }, { "epoch": 0.8747624155516663, "grad_norm": 5.50277853012085, "learning_rate": 4.136267984312736e-06, "loss": 1.8674, "step": 11621 }, { "epoch": 0.8748376898323266, "grad_norm": 7.7771806716918945, "learning_rate": 4.131414261337024e-06, "loss": 1.9557, "step": 11622 }, { "epoch": 0.8749129641129867, "grad_norm": 4.732153415679932, "learning_rate": 4.126563265117117e-06, "loss": 1.8455, "step": 11623 }, { "epoch": 0.8749882383936468, "grad_norm": 6.435847759246826, "learning_rate": 4.1217149959414e-06, "loss": 1.7414, "step": 11624 }, { "epoch": 0.875063512674307, "grad_norm": 6.931395530700684, "learning_rate": 4.116869454098093e-06, "loss": 2.16, "step": 11625 }, { "epoch": 0.8751387869549672, "grad_norm": 5.370662689208984, "learning_rate": 4.112026639875233e-06, "loss": 1.6863, "step": 11626 }, { "epoch": 0.8752140612356273, "grad_norm": 4.384397983551025, "learning_rate": 4.1071865535607266e-06, "loss": 1.4211, "step": 11627 }, { "epoch": 0.8752893355162875, "grad_norm": 6.004904747009277, "learning_rate": 4.1023491954422955e-06, "loss": 1.6357, "step": 11628 }, { "epoch": 0.8753646097969476, "grad_norm": 4.047789096832275, "learning_rate": 4.097514565807514e-06, "loss": 1.6432, "step": 11629 }, { "epoch": 0.8754398840776078, "grad_norm": 4.4809160232543945, "learning_rate": 4.092682664943781e-06, "loss": 1.8955, "step": 11630 }, { "epoch": 0.875515158358268, "grad_norm": 4.3151068687438965, "learning_rate": 4.087853493138344e-06, "loss": 1.6515, "step": 11631 }, { "epoch": 0.8755904326389281, "grad_norm": 7.531031608581543, "learning_rate": 4.083027050678279e-06, "loss": 1.5924, "step": 11632 }, { "epoch": 0.8756657069195882, "grad_norm": 6.302493572235107, "learning_rate": 4.078203337850506e-06, "loss": 1.8163, "step": 11633 }, { "epoch": 0.8757409812002485, "grad_norm": 4.595805644989014, "learning_rate": 4.0733823549417736e-06, "loss": 1.6336, "step": 11634 }, { "epoch": 0.8758162554809086, "grad_norm": 5.001889705657959, "learning_rate": 4.068564102238692e-06, "loss": 2.0329, "step": 11635 }, { "epoch": 0.8758915297615687, "grad_norm": 5.550800323486328, "learning_rate": 4.063748580027676e-06, "loss": 2.2104, "step": 11636 }, { "epoch": 0.8759668040422288, "grad_norm": 5.672146320343018, "learning_rate": 4.0589357885949975e-06, "loss": 1.8105, "step": 11637 }, { "epoch": 0.876042078322889, "grad_norm": 3.606351852416992, "learning_rate": 4.054125728226765e-06, "loss": 1.7368, "step": 11638 }, { "epoch": 0.8761173526035492, "grad_norm": 7.833452224731445, "learning_rate": 4.049318399208935e-06, "loss": 1.764, "step": 11639 }, { "epoch": 0.8761926268842093, "grad_norm": 5.139779567718506, "learning_rate": 4.044513801827271e-06, "loss": 2.6393, "step": 11640 }, { "epoch": 0.8762679011648695, "grad_norm": 4.088352680206299, "learning_rate": 4.039711936367402e-06, "loss": 1.6665, "step": 11641 }, { "epoch": 0.8763431754455296, "grad_norm": 5.271029472351074, "learning_rate": 4.034912803114793e-06, "loss": 1.7512, "step": 11642 }, { "epoch": 0.8764184497261898, "grad_norm": 4.135016441345215, "learning_rate": 4.030116402354717e-06, "loss": 2.3249, "step": 11643 }, { "epoch": 0.87649372400685, "grad_norm": 5.018148422241211, "learning_rate": 4.025322734372328e-06, "loss": 1.6255, "step": 11644 }, { "epoch": 0.8765689982875101, "grad_norm": 5.522651195526123, "learning_rate": 4.020531799452576e-06, "loss": 1.7589, "step": 11645 }, { "epoch": 0.8766442725681702, "grad_norm": 5.385191440582275, "learning_rate": 4.015743597880289e-06, "loss": 1.9545, "step": 11646 }, { "epoch": 0.8767195468488305, "grad_norm": 4.508188724517822, "learning_rate": 4.010958129940096e-06, "loss": 1.5308, "step": 11647 }, { "epoch": 0.8767948211294906, "grad_norm": 5.951359272003174, "learning_rate": 4.006175395916489e-06, "loss": 1.5051, "step": 11648 }, { "epoch": 0.8768700954101507, "grad_norm": 4.846992015838623, "learning_rate": 4.001395396093777e-06, "loss": 1.6764, "step": 11649 }, { "epoch": 0.8769453696908109, "grad_norm": 5.062412738800049, "learning_rate": 3.996618130756119e-06, "loss": 1.6578, "step": 11650 }, { "epoch": 0.8770206439714711, "grad_norm": 5.050487041473389, "learning_rate": 3.991843600187522e-06, "loss": 1.8284, "step": 11651 }, { "epoch": 0.8770959182521312, "grad_norm": 4.7077436447143555, "learning_rate": 3.987071804671816e-06, "loss": 1.6228, "step": 11652 }, { "epoch": 0.8771711925327914, "grad_norm": 5.870832920074463, "learning_rate": 3.982302744492655e-06, "loss": 1.8833, "step": 11653 }, { "epoch": 0.8772464668134515, "grad_norm": 4.387657165527344, "learning_rate": 3.97753641993357e-06, "loss": 1.7787, "step": 11654 }, { "epoch": 0.8773217410941117, "grad_norm": 4.99583625793457, "learning_rate": 3.9727728312778765e-06, "loss": 1.8209, "step": 11655 }, { "epoch": 0.8773970153747718, "grad_norm": 4.308382034301758, "learning_rate": 3.9680119788087835e-06, "loss": 1.4889, "step": 11656 }, { "epoch": 0.877472289655432, "grad_norm": 5.0629496574401855, "learning_rate": 3.96325386280929e-06, "loss": 1.7658, "step": 11657 }, { "epoch": 0.8775475639360921, "grad_norm": 4.795827388763428, "learning_rate": 3.958498483562262e-06, "loss": 1.7653, "step": 11658 }, { "epoch": 0.8776228382167522, "grad_norm": 4.843021392822266, "learning_rate": 3.953745841350393e-06, "loss": 1.812, "step": 11659 }, { "epoch": 0.8776981124974125, "grad_norm": 5.62350606918335, "learning_rate": 3.948995936456207e-06, "loss": 1.726, "step": 11660 }, { "epoch": 0.8777733867780726, "grad_norm": 5.8269171714782715, "learning_rate": 3.9442487691620785e-06, "loss": 1.9534, "step": 11661 }, { "epoch": 0.8778486610587327, "grad_norm": 4.477621555328369, "learning_rate": 3.939504339750211e-06, "loss": 1.6203, "step": 11662 }, { "epoch": 0.877923935339393, "grad_norm": 6.591576099395752, "learning_rate": 3.93476264850266e-06, "loss": 1.8539, "step": 11663 }, { "epoch": 0.8779992096200531, "grad_norm": 4.527975082397461, "learning_rate": 3.9300236957012795e-06, "loss": 1.697, "step": 11664 }, { "epoch": 0.8780744839007132, "grad_norm": 7.748128890991211, "learning_rate": 3.92528748162781e-06, "loss": 1.6799, "step": 11665 }, { "epoch": 0.8781497581813734, "grad_norm": 4.162700176239014, "learning_rate": 3.920554006563792e-06, "loss": 1.7182, "step": 11666 }, { "epoch": 0.8782250324620335, "grad_norm": 4.6374831199646, "learning_rate": 3.915823270790625e-06, "loss": 2.174, "step": 11667 }, { "epoch": 0.8783003067426937, "grad_norm": 4.238149166107178, "learning_rate": 3.9110952745895325e-06, "loss": 1.6535, "step": 11668 }, { "epoch": 0.8783755810233539, "grad_norm": 4.5881147384643555, "learning_rate": 3.906370018241584e-06, "loss": 1.6171, "step": 11669 }, { "epoch": 0.878450855304014, "grad_norm": 5.682853698730469, "learning_rate": 3.9016475020276774e-06, "loss": 1.8192, "step": 11670 }, { "epoch": 0.8785261295846741, "grad_norm": 6.177950859069824, "learning_rate": 3.89692772622855e-06, "loss": 1.5586, "step": 11671 }, { "epoch": 0.8786014038653344, "grad_norm": 5.545617580413818, "learning_rate": 3.892210691124787e-06, "loss": 1.7595, "step": 11672 }, { "epoch": 0.8786766781459945, "grad_norm": 4.5383992195129395, "learning_rate": 3.887496396996803e-06, "loss": 1.6476, "step": 11673 }, { "epoch": 0.8787519524266546, "grad_norm": 4.272154808044434, "learning_rate": 3.8827848441248535e-06, "loss": 1.5321, "step": 11674 }, { "epoch": 0.8788272267073147, "grad_norm": 4.761125564575195, "learning_rate": 3.8780760327890074e-06, "loss": 2.1016, "step": 11675 }, { "epoch": 0.878902500987975, "grad_norm": 5.4325337409973145, "learning_rate": 3.873369963269219e-06, "loss": 1.7483, "step": 11676 }, { "epoch": 0.8789777752686351, "grad_norm": 5.6443939208984375, "learning_rate": 3.8686666358452196e-06, "loss": 1.5513, "step": 11677 }, { "epoch": 0.8790530495492952, "grad_norm": 4.852678298950195, "learning_rate": 3.863966050796635e-06, "loss": 1.9776, "step": 11678 }, { "epoch": 0.8791283238299554, "grad_norm": 4.189968109130859, "learning_rate": 3.859268208402877e-06, "loss": 1.8548, "step": 11679 }, { "epoch": 0.8792035981106155, "grad_norm": 4.60287618637085, "learning_rate": 3.854573108943244e-06, "loss": 1.9396, "step": 11680 }, { "epoch": 0.8792788723912757, "grad_norm": 5.255151748657227, "learning_rate": 3.849880752696822e-06, "loss": 2.1499, "step": 11681 }, { "epoch": 0.8793541466719359, "grad_norm": 5.558388710021973, "learning_rate": 3.8451911399425725e-06, "loss": 1.7875, "step": 11682 }, { "epoch": 0.879429420952596, "grad_norm": 4.829524517059326, "learning_rate": 3.840504270959272e-06, "loss": 1.8612, "step": 11683 }, { "epoch": 0.8795046952332561, "grad_norm": 4.546968936920166, "learning_rate": 3.835820146025559e-06, "loss": 1.5953, "step": 11684 }, { "epoch": 0.8795799695139164, "grad_norm": 6.303708553314209, "learning_rate": 3.831138765419867e-06, "loss": 2.1797, "step": 11685 }, { "epoch": 0.8796552437945765, "grad_norm": 4.823291301727295, "learning_rate": 3.8264601294205114e-06, "loss": 2.0228, "step": 11686 }, { "epoch": 0.8797305180752366, "grad_norm": 4.09678316116333, "learning_rate": 3.821784238305609e-06, "loss": 1.8344, "step": 11687 }, { "epoch": 0.8798057923558968, "grad_norm": 5.928163051605225, "learning_rate": 3.817111092353138e-06, "loss": 1.7816, "step": 11688 }, { "epoch": 0.879881066636557, "grad_norm": 4.956056118011475, "learning_rate": 3.8124406918408962e-06, "loss": 1.7789, "step": 11689 }, { "epoch": 0.8799563409172171, "grad_norm": 4.112998962402344, "learning_rate": 3.8077730370465247e-06, "loss": 1.7091, "step": 11690 }, { "epoch": 0.8800316151978773, "grad_norm": 4.732588291168213, "learning_rate": 3.8031081282475122e-06, "loss": 1.6471, "step": 11691 }, { "epoch": 0.8801068894785374, "grad_norm": 4.895622730255127, "learning_rate": 3.798445965721159e-06, "loss": 1.6966, "step": 11692 }, { "epoch": 0.8801821637591976, "grad_norm": 4.909831523895264, "learning_rate": 3.793786549744627e-06, "loss": 1.7482, "step": 11693 }, { "epoch": 0.8802574380398578, "grad_norm": 5.542243480682373, "learning_rate": 3.789129880594905e-06, "loss": 1.4186, "step": 11694 }, { "epoch": 0.8803327123205179, "grad_norm": 5.330420017242432, "learning_rate": 3.7844759585488276e-06, "loss": 1.8367, "step": 11695 }, { "epoch": 0.880407986601178, "grad_norm": 7.158842086791992, "learning_rate": 3.7798247838830403e-06, "loss": 1.9479, "step": 11696 }, { "epoch": 0.8804832608818381, "grad_norm": 5.65027379989624, "learning_rate": 3.775176356874055e-06, "loss": 2.0471, "step": 11697 }, { "epoch": 0.8805585351624984, "grad_norm": 5.438329696655273, "learning_rate": 3.7705306777981896e-06, "loss": 1.7276, "step": 11698 }, { "epoch": 0.8806338094431585, "grad_norm": 4.852111339569092, "learning_rate": 3.76588774693164e-06, "loss": 1.6597, "step": 11699 }, { "epoch": 0.8807090837238186, "grad_norm": 3.996556520462036, "learning_rate": 3.7612475645503963e-06, "loss": 1.657, "step": 11700 }, { "epoch": 0.8807843580044789, "grad_norm": 4.998070240020752, "learning_rate": 3.756610130930316e-06, "loss": 1.405, "step": 11701 }, { "epoch": 0.880859632285139, "grad_norm": 4.979634761810303, "learning_rate": 3.7519754463470725e-06, "loss": 2.0878, "step": 11702 }, { "epoch": 0.8809349065657991, "grad_norm": 4.137125015258789, "learning_rate": 3.7473435110761903e-06, "loss": 1.8052, "step": 11703 }, { "epoch": 0.8810101808464593, "grad_norm": 4.739142417907715, "learning_rate": 3.7427143253930207e-06, "loss": 1.9087, "step": 11704 }, { "epoch": 0.8810854551271194, "grad_norm": 6.339193344116211, "learning_rate": 3.73808788957275e-06, "loss": 1.9109, "step": 11705 }, { "epoch": 0.8811607294077796, "grad_norm": 5.92103385925293, "learning_rate": 3.733464203890419e-06, "loss": 1.5833, "step": 11706 }, { "epoch": 0.8812360036884398, "grad_norm": 4.376158237457275, "learning_rate": 3.7288432686208853e-06, "loss": 1.8536, "step": 11707 }, { "epoch": 0.8813112779690999, "grad_norm": 5.535974025726318, "learning_rate": 3.7242250840388625e-06, "loss": 1.7207, "step": 11708 }, { "epoch": 0.88138655224976, "grad_norm": 4.286526203155518, "learning_rate": 3.7196096504188705e-06, "loss": 1.824, "step": 11709 }, { "epoch": 0.8814618265304203, "grad_norm": 5.494035243988037, "learning_rate": 3.7149969680353e-06, "loss": 1.713, "step": 11710 }, { "epoch": 0.8815371008110804, "grad_norm": 3.8491909503936768, "learning_rate": 3.710387037162344e-06, "loss": 1.5778, "step": 11711 }, { "epoch": 0.8816123750917405, "grad_norm": 4.229644775390625, "learning_rate": 3.7057798580740656e-06, "loss": 2.1494, "step": 11712 }, { "epoch": 0.8816876493724007, "grad_norm": 5.165879249572754, "learning_rate": 3.7011754310443347e-06, "loss": 1.9515, "step": 11713 }, { "epoch": 0.8817629236530609, "grad_norm": 5.129971027374268, "learning_rate": 3.6965737563468884e-06, "loss": 1.7509, "step": 11714 }, { "epoch": 0.881838197933721, "grad_norm": 4.066309928894043, "learning_rate": 3.691974834255263e-06, "loss": 1.7692, "step": 11715 }, { "epoch": 0.8819134722143811, "grad_norm": 4.80290412902832, "learning_rate": 3.687378665042862e-06, "loss": 1.8186, "step": 11716 }, { "epoch": 0.8819887464950413, "grad_norm": 4.2350568771362305, "learning_rate": 3.682785248982912e-06, "loss": 1.8628, "step": 11717 }, { "epoch": 0.8820640207757015, "grad_norm": 5.003052711486816, "learning_rate": 3.678194586348488e-06, "loss": 1.6765, "step": 11718 }, { "epoch": 0.8821392950563616, "grad_norm": 4.9283766746521, "learning_rate": 3.6736066774124778e-06, "loss": 1.8051, "step": 11719 }, { "epoch": 0.8822145693370218, "grad_norm": 3.984339475631714, "learning_rate": 3.6690215224476297e-06, "loss": 1.7729, "step": 11720 }, { "epoch": 0.8822898436176819, "grad_norm": 5.933766841888428, "learning_rate": 3.6644391217265094e-06, "loss": 2.1696, "step": 11721 }, { "epoch": 0.882365117898342, "grad_norm": 4.05521297454834, "learning_rate": 3.6598594755215266e-06, "loss": 2.1176, "step": 11722 }, { "epoch": 0.8824403921790023, "grad_norm": 4.379974365234375, "learning_rate": 3.6552825841049464e-06, "loss": 1.6567, "step": 11723 }, { "epoch": 0.8825156664596624, "grad_norm": 4.739940166473389, "learning_rate": 3.6507084477488295e-06, "loss": 1.5901, "step": 11724 }, { "epoch": 0.8825909407403225, "grad_norm": 4.386242389678955, "learning_rate": 3.646137066725108e-06, "loss": 1.7343, "step": 11725 }, { "epoch": 0.8826662150209827, "grad_norm": 4.930104732513428, "learning_rate": 3.6415684413055317e-06, "loss": 2.2461, "step": 11726 }, { "epoch": 0.8827414893016429, "grad_norm": 5.442676067352295, "learning_rate": 3.6370025717616886e-06, "loss": 1.6076, "step": 11727 }, { "epoch": 0.882816763582303, "grad_norm": 4.562854290008545, "learning_rate": 3.632439458365017e-06, "loss": 2.0476, "step": 11728 }, { "epoch": 0.8828920378629632, "grad_norm": 4.101523399353027, "learning_rate": 3.627879101386783e-06, "loss": 1.7887, "step": 11729 }, { "epoch": 0.8829673121436233, "grad_norm": 5.210818290710449, "learning_rate": 3.6233215010980704e-06, "loss": 2.0049, "step": 11730 }, { "epoch": 0.8830425864242835, "grad_norm": 3.6276750564575195, "learning_rate": 3.6187666577698344e-06, "loss": 1.7847, "step": 11731 }, { "epoch": 0.8831178607049437, "grad_norm": 6.829989910125732, "learning_rate": 3.6142145716728305e-06, "loss": 1.7474, "step": 11732 }, { "epoch": 0.8831931349856038, "grad_norm": 6.399127960205078, "learning_rate": 3.609665243077681e-06, "loss": 1.9325, "step": 11733 }, { "epoch": 0.8832684092662639, "grad_norm": 4.941920757293701, "learning_rate": 3.60511867225482e-06, "loss": 1.8576, "step": 11734 }, { "epoch": 0.883343683546924, "grad_norm": 4.378361701965332, "learning_rate": 3.600574859474537e-06, "loss": 2.0939, "step": 11735 }, { "epoch": 0.8834189578275843, "grad_norm": 4.887096881866455, "learning_rate": 3.5960338050069318e-06, "loss": 1.5146, "step": 11736 }, { "epoch": 0.8834942321082444, "grad_norm": 3.7326459884643555, "learning_rate": 3.5914955091219727e-06, "loss": 1.5082, "step": 11737 }, { "epoch": 0.8835695063889045, "grad_norm": 5.693533420562744, "learning_rate": 3.5869599720894486e-06, "loss": 2.0089, "step": 11738 }, { "epoch": 0.8836447806695648, "grad_norm": 5.627947807312012, "learning_rate": 3.582427194178972e-06, "loss": 1.8153, "step": 11739 }, { "epoch": 0.8837200549502249, "grad_norm": 6.34576416015625, "learning_rate": 3.5778971756600277e-06, "loss": 2.0942, "step": 11740 }, { "epoch": 0.883795329230885, "grad_norm": 4.331240177154541, "learning_rate": 3.5733699168018832e-06, "loss": 1.8588, "step": 11741 }, { "epoch": 0.8838706035115452, "grad_norm": 4.81483268737793, "learning_rate": 3.5688454178736954e-06, "loss": 1.7601, "step": 11742 }, { "epoch": 0.8839458777922053, "grad_norm": 4.712187767028809, "learning_rate": 3.56432367914441e-06, "loss": 1.5383, "step": 11743 }, { "epoch": 0.8840211520728655, "grad_norm": 4.57413911819458, "learning_rate": 3.5598047008828517e-06, "loss": 1.5656, "step": 11744 }, { "epoch": 0.8840964263535257, "grad_norm": 5.560389518737793, "learning_rate": 3.555288483357644e-06, "loss": 1.9668, "step": 11745 }, { "epoch": 0.8841717006341858, "grad_norm": 4.135054111480713, "learning_rate": 3.550775026837283e-06, "loss": 1.9964, "step": 11746 }, { "epoch": 0.8842469749148459, "grad_norm": 8.078460693359375, "learning_rate": 3.5462643315900602e-06, "loss": 1.9363, "step": 11747 }, { "epoch": 0.8843222491955062, "grad_norm": 5.840259075164795, "learning_rate": 3.5417563978841273e-06, "loss": 1.7716, "step": 11748 }, { "epoch": 0.8843975234761663, "grad_norm": 4.86867618560791, "learning_rate": 3.5372512259874867e-06, "loss": 1.8895, "step": 11749 }, { "epoch": 0.8844727977568264, "grad_norm": 4.765399932861328, "learning_rate": 3.53274881616793e-06, "loss": 1.7925, "step": 11750 }, { "epoch": 0.8845480720374866, "grad_norm": 4.583217144012451, "learning_rate": 3.5282491686931317e-06, "loss": 1.9386, "step": 11751 }, { "epoch": 0.8846233463181468, "grad_norm": 5.901762008666992, "learning_rate": 3.523752283830584e-06, "loss": 1.7171, "step": 11752 }, { "epoch": 0.8846986205988069, "grad_norm": 5.61619234085083, "learning_rate": 3.5192581618475997e-06, "loss": 1.7941, "step": 11753 }, { "epoch": 0.884773894879467, "grad_norm": 6.162242889404297, "learning_rate": 3.514766803011349e-06, "loss": 2.0147, "step": 11754 }, { "epoch": 0.8848491691601272, "grad_norm": 5.188413143157959, "learning_rate": 3.5102782075888407e-06, "loss": 2.1021, "step": 11755 }, { "epoch": 0.8849244434407874, "grad_norm": 5.4029011726379395, "learning_rate": 3.5057923758468893e-06, "loss": 1.6433, "step": 11756 }, { "epoch": 0.8849997177214475, "grad_norm": 4.076094627380371, "learning_rate": 3.5013093080521808e-06, "loss": 1.8113, "step": 11757 }, { "epoch": 0.8850749920021077, "grad_norm": 7.070972919464111, "learning_rate": 3.496829004471208e-06, "loss": 2.0368, "step": 11758 }, { "epoch": 0.8851502662827678, "grad_norm": 4.31694221496582, "learning_rate": 3.4923514653703247e-06, "loss": 1.7314, "step": 11759 }, { "epoch": 0.885225540563428, "grad_norm": 5.058100700378418, "learning_rate": 3.487876691015696e-06, "loss": 1.7466, "step": 11760 }, { "epoch": 0.8853008148440882, "grad_norm": 4.5564117431640625, "learning_rate": 3.483404681673341e-06, "loss": 1.6735, "step": 11761 }, { "epoch": 0.8853760891247483, "grad_norm": 4.933740615844727, "learning_rate": 3.4789354376091043e-06, "loss": 1.674, "step": 11762 }, { "epoch": 0.8854513634054084, "grad_norm": 5.162326812744141, "learning_rate": 3.474468959088678e-06, "loss": 1.7968, "step": 11763 }, { "epoch": 0.8855266376860687, "grad_norm": 5.507837772369385, "learning_rate": 3.470005246377572e-06, "loss": 1.7393, "step": 11764 }, { "epoch": 0.8856019119667288, "grad_norm": 4.234638214111328, "learning_rate": 3.465544299741147e-06, "loss": 1.5094, "step": 11765 }, { "epoch": 0.8856771862473889, "grad_norm": 3.986448049545288, "learning_rate": 3.4610861194445897e-06, "loss": 1.7611, "step": 11766 }, { "epoch": 0.8857524605280491, "grad_norm": 6.9107184410095215, "learning_rate": 3.456630705752928e-06, "loss": 1.9551, "step": 11767 }, { "epoch": 0.8858277348087092, "grad_norm": 5.278472900390625, "learning_rate": 3.4521780589310213e-06, "loss": 1.5861, "step": 11768 }, { "epoch": 0.8859030090893694, "grad_norm": 3.7348806858062744, "learning_rate": 3.4477281792435646e-06, "loss": 1.8812, "step": 11769 }, { "epoch": 0.8859782833700296, "grad_norm": 5.235109806060791, "learning_rate": 3.443281066955106e-06, "loss": 1.667, "step": 11770 }, { "epoch": 0.8860535576506897, "grad_norm": 5.706743240356445, "learning_rate": 3.438836722329991e-06, "loss": 1.7574, "step": 11771 }, { "epoch": 0.8861288319313498, "grad_norm": 4.530086994171143, "learning_rate": 3.434395145632435e-06, "loss": 1.5736, "step": 11772 }, { "epoch": 0.88620410621201, "grad_norm": 4.790240287780762, "learning_rate": 3.429956337126472e-06, "loss": 1.8924, "step": 11773 }, { "epoch": 0.8862793804926702, "grad_norm": 8.091974258422852, "learning_rate": 3.42552029707599e-06, "loss": 1.6761, "step": 11774 }, { "epoch": 0.8863546547733303, "grad_norm": 4.775570869445801, "learning_rate": 3.4210870257446847e-06, "loss": 1.8182, "step": 11775 }, { "epoch": 0.8864299290539904, "grad_norm": 4.188730239868164, "learning_rate": 3.4166565233961113e-06, "loss": 1.5956, "step": 11776 }, { "epoch": 0.8865052033346507, "grad_norm": 4.084385871887207, "learning_rate": 3.412228790293637e-06, "loss": 1.9459, "step": 11777 }, { "epoch": 0.8865804776153108, "grad_norm": 4.2184553146362305, "learning_rate": 3.4078038267004953e-06, "loss": 1.8293, "step": 11778 }, { "epoch": 0.8866557518959709, "grad_norm": 4.290620803833008, "learning_rate": 3.403381632879721e-06, "loss": 1.6568, "step": 11779 }, { "epoch": 0.8867310261766311, "grad_norm": 4.861558437347412, "learning_rate": 3.3989622090942087e-06, "loss": 2.1661, "step": 11780 }, { "epoch": 0.8868063004572913, "grad_norm": 4.432613849639893, "learning_rate": 3.394545555606682e-06, "loss": 2.1038, "step": 11781 }, { "epoch": 0.8868815747379514, "grad_norm": 4.658480167388916, "learning_rate": 3.390131672679686e-06, "loss": 1.4717, "step": 11782 }, { "epoch": 0.8869568490186116, "grad_norm": 4.808432579040527, "learning_rate": 3.385720560575628e-06, "loss": 1.8406, "step": 11783 }, { "epoch": 0.8870321232992717, "grad_norm": 6.0383734703063965, "learning_rate": 3.3813122195567426e-06, "loss": 1.7959, "step": 11784 }, { "epoch": 0.8871073975799318, "grad_norm": 5.115785598754883, "learning_rate": 3.37690664988507e-06, "loss": 1.6087, "step": 11785 }, { "epoch": 0.8871826718605921, "grad_norm": 5.181887626647949, "learning_rate": 3.3725038518225228e-06, "loss": 1.5711, "step": 11786 }, { "epoch": 0.8872579461412522, "grad_norm": 6.70319128036499, "learning_rate": 3.368103825630836e-06, "loss": 1.5276, "step": 11787 }, { "epoch": 0.8873332204219123, "grad_norm": 5.32488489151001, "learning_rate": 3.3637065715715676e-06, "loss": 1.6134, "step": 11788 }, { "epoch": 0.8874084947025725, "grad_norm": 3.7461555004119873, "learning_rate": 3.3593120899061413e-06, "loss": 1.6014, "step": 11789 }, { "epoch": 0.8874837689832327, "grad_norm": 3.944082260131836, "learning_rate": 3.354920380895771e-06, "loss": 1.7107, "step": 11790 }, { "epoch": 0.8875590432638928, "grad_norm": 4.842274188995361, "learning_rate": 3.350531444801558e-06, "loss": 2.0498, "step": 11791 }, { "epoch": 0.887634317544553, "grad_norm": 4.317521095275879, "learning_rate": 3.3461452818843887e-06, "loss": 1.7203, "step": 11792 }, { "epoch": 0.8877095918252131, "grad_norm": 6.183497428894043, "learning_rate": 3.3417618924050153e-06, "loss": 1.9386, "step": 11793 }, { "epoch": 0.8877848661058733, "grad_norm": 3.985163450241089, "learning_rate": 3.3373812766240188e-06, "loss": 1.8251, "step": 11794 }, { "epoch": 0.8878601403865334, "grad_norm": 4.228626728057861, "learning_rate": 3.3330034348018235e-06, "loss": 2.1026, "step": 11795 }, { "epoch": 0.8879354146671936, "grad_norm": 7.4498090744018555, "learning_rate": 3.3286283671986664e-06, "loss": 1.6393, "step": 11796 }, { "epoch": 0.8880106889478537, "grad_norm": 4.097780227661133, "learning_rate": 3.324256074074644e-06, "loss": 1.5087, "step": 11797 }, { "epoch": 0.8880859632285139, "grad_norm": 5.756229400634766, "learning_rate": 3.319886555689661e-06, "loss": 2.0575, "step": 11798 }, { "epoch": 0.8881612375091741, "grad_norm": 3.740021228790283, "learning_rate": 3.315519812303491e-06, "loss": 1.8686, "step": 11799 }, { "epoch": 0.8882365117898342, "grad_norm": 5.576794147491455, "learning_rate": 3.311155844175712e-06, "loss": 1.6125, "step": 11800 }, { "epoch": 0.8883117860704943, "grad_norm": 5.809225559234619, "learning_rate": 3.3067946515657532e-06, "loss": 1.7876, "step": 11801 }, { "epoch": 0.8883870603511546, "grad_norm": 3.8192005157470703, "learning_rate": 3.3024362347328697e-06, "loss": 1.5767, "step": 11802 }, { "epoch": 0.8884623346318147, "grad_norm": 5.374025344848633, "learning_rate": 3.2980805939361646e-06, "loss": 1.7612, "step": 11803 }, { "epoch": 0.8885376089124748, "grad_norm": 5.49501895904541, "learning_rate": 3.2937277294345703e-06, "loss": 1.6202, "step": 11804 }, { "epoch": 0.888612883193135, "grad_norm": 5.229307174682617, "learning_rate": 3.2893776414868406e-06, "loss": 2.0495, "step": 11805 }, { "epoch": 0.8886881574737951, "grad_norm": 5.162867069244385, "learning_rate": 3.2850303303515797e-06, "loss": 1.7861, "step": 11806 }, { "epoch": 0.8887634317544553, "grad_norm": 5.295732498168945, "learning_rate": 3.280685796287225e-06, "loss": 1.8394, "step": 11807 }, { "epoch": 0.8888387060351155, "grad_norm": 6.164709091186523, "learning_rate": 3.2763440395520593e-06, "loss": 1.7819, "step": 11808 }, { "epoch": 0.8889139803157756, "grad_norm": 4.862895965576172, "learning_rate": 3.272005060404165e-06, "loss": 1.8755, "step": 11809 }, { "epoch": 0.8889892545964357, "grad_norm": 5.777836799621582, "learning_rate": 3.267668859101497e-06, "loss": 2.0372, "step": 11810 }, { "epoch": 0.889064528877096, "grad_norm": 4.18715763092041, "learning_rate": 3.263335435901821e-06, "loss": 1.803, "step": 11811 }, { "epoch": 0.8891398031577561, "grad_norm": 4.8020734786987305, "learning_rate": 3.2590047910627585e-06, "loss": 1.5089, "step": 11812 }, { "epoch": 0.8892150774384162, "grad_norm": 6.14511251449585, "learning_rate": 3.2546769248417374e-06, "loss": 2.6288, "step": 11813 }, { "epoch": 0.8892903517190763, "grad_norm": 4.045788288116455, "learning_rate": 3.250351837496057e-06, "loss": 1.8121, "step": 11814 }, { "epoch": 0.8893656259997366, "grad_norm": 5.799196243286133, "learning_rate": 3.2460295292828067e-06, "loss": 1.7446, "step": 11815 }, { "epoch": 0.8894409002803967, "grad_norm": 4.424937725067139, "learning_rate": 3.241710000458953e-06, "loss": 1.8113, "step": 11816 }, { "epoch": 0.8895161745610568, "grad_norm": 7.677599906921387, "learning_rate": 3.2373932512812733e-06, "loss": 1.9577, "step": 11817 }, { "epoch": 0.889591448841717, "grad_norm": 4.3186750411987305, "learning_rate": 3.2330792820063905e-06, "loss": 1.5709, "step": 11818 }, { "epoch": 0.8896667231223772, "grad_norm": 4.3186750411987305, "learning_rate": 3.2330792820063905e-06, "loss": 2.0441, "step": 11819 }, { "epoch": 0.8897419974030373, "grad_norm": 4.3186750411987305, "learning_rate": 3.2330792820063905e-06, "loss": 1.826, "step": 11820 }, { "epoch": 0.8898172716836975, "grad_norm": 5.141098976135254, "learning_rate": 3.228768092890755e-06, "loss": 1.7428, "step": 11821 }, { "epoch": 0.8898925459643576, "grad_norm": 7.1366753578186035, "learning_rate": 3.224459684190656e-06, "loss": 2.0392, "step": 11822 }, { "epoch": 0.8899678202450177, "grad_norm": 3.4166417121887207, "learning_rate": 3.220154056162217e-06, "loss": 1.7084, "step": 11823 }, { "epoch": 0.890043094525678, "grad_norm": 4.907994747161865, "learning_rate": 3.215851209061388e-06, "loss": 1.8153, "step": 11824 }, { "epoch": 0.8901183688063381, "grad_norm": 3.8626811504364014, "learning_rate": 3.2115511431439704e-06, "loss": 1.3738, "step": 11825 }, { "epoch": 0.8901936430869982, "grad_norm": 6.657654285430908, "learning_rate": 3.207253858665582e-06, "loss": 1.7999, "step": 11826 }, { "epoch": 0.8902689173676585, "grad_norm": 5.0645270347595215, "learning_rate": 3.202959355881696e-06, "loss": 1.9608, "step": 11827 }, { "epoch": 0.8903441916483186, "grad_norm": 4.2301154136657715, "learning_rate": 3.198667635047592e-06, "loss": 1.7449, "step": 11828 }, { "epoch": 0.8904194659289787, "grad_norm": 4.87670373916626, "learning_rate": 3.1943786964184043e-06, "loss": 1.9404, "step": 11829 }, { "epoch": 0.8904947402096389, "grad_norm": 3.7896482944488525, "learning_rate": 3.1900925402491067e-06, "loss": 2.0846, "step": 11830 }, { "epoch": 0.890570014490299, "grad_norm": 5.157933712005615, "learning_rate": 3.185809166794507e-06, "loss": 1.7528, "step": 11831 }, { "epoch": 0.8906452887709592, "grad_norm": 3.8069164752960205, "learning_rate": 3.1815285763092118e-06, "loss": 1.9564, "step": 11832 }, { "epoch": 0.8907205630516193, "grad_norm": 3.3343169689178467, "learning_rate": 3.177250769047718e-06, "loss": 1.7407, "step": 11833 }, { "epoch": 0.8907958373322795, "grad_norm": 3.9978039264678955, "learning_rate": 3.172975745264306e-06, "loss": 1.7015, "step": 11834 }, { "epoch": 0.8908711116129396, "grad_norm": 4.346434116363525, "learning_rate": 3.168703505213133e-06, "loss": 1.9475, "step": 11835 }, { "epoch": 0.8909463858935998, "grad_norm": 4.142141342163086, "learning_rate": 3.1644340491481574e-06, "loss": 1.7142, "step": 11836 }, { "epoch": 0.89102166017426, "grad_norm": 4.459104537963867, "learning_rate": 3.1601673773231867e-06, "loss": 1.8307, "step": 11837 }, { "epoch": 0.8910969344549201, "grad_norm": 3.816215991973877, "learning_rate": 3.1559034899918794e-06, "loss": 1.7033, "step": 11838 }, { "epoch": 0.8911722087355802, "grad_norm": 5.893944263458252, "learning_rate": 3.1516423874076886e-06, "loss": 1.8463, "step": 11839 }, { "epoch": 0.8912474830162405, "grad_norm": 5.152439117431641, "learning_rate": 3.1473840698239334e-06, "loss": 1.7638, "step": 11840 }, { "epoch": 0.8913227572969006, "grad_norm": 4.627366065979004, "learning_rate": 3.1431285374937614e-06, "loss": 1.5939, "step": 11841 }, { "epoch": 0.8913980315775607, "grad_norm": 4.759149551391602, "learning_rate": 3.1388757906701595e-06, "loss": 2.0037, "step": 11842 }, { "epoch": 0.8914733058582209, "grad_norm": 4.542873382568359, "learning_rate": 3.134625829605925e-06, "loss": 1.803, "step": 11843 }, { "epoch": 0.891548580138881, "grad_norm": 4.590996742248535, "learning_rate": 3.1303786545537173e-06, "loss": 1.6505, "step": 11844 }, { "epoch": 0.8916238544195412, "grad_norm": 4.327089786529541, "learning_rate": 3.1261342657660063e-06, "loss": 1.7829, "step": 11845 }, { "epoch": 0.8916991287002014, "grad_norm": 5.363874912261963, "learning_rate": 3.1218926634951296e-06, "loss": 1.6933, "step": 11846 }, { "epoch": 0.8917744029808615, "grad_norm": 6.655374050140381, "learning_rate": 3.117653847993213e-06, "loss": 2.0365, "step": 11847 }, { "epoch": 0.8918496772615216, "grad_norm": 4.274673938751221, "learning_rate": 3.1134178195122653e-06, "loss": 1.4952, "step": 11848 }, { "epoch": 0.8919249515421819, "grad_norm": 4.389845371246338, "learning_rate": 3.109184578304086e-06, "loss": 1.841, "step": 11849 }, { "epoch": 0.892000225822842, "grad_norm": 4.642415523529053, "learning_rate": 3.1049541246203406e-06, "loss": 1.7275, "step": 11850 }, { "epoch": 0.8920755001035021, "grad_norm": 5.653261661529541, "learning_rate": 3.1007264587125163e-06, "loss": 1.4653, "step": 11851 }, { "epoch": 0.8921507743841622, "grad_norm": 5.971973896026611, "learning_rate": 3.09650158083194e-06, "loss": 1.4789, "step": 11852 }, { "epoch": 0.8922260486648225, "grad_norm": 4.13915491104126, "learning_rate": 3.0922794912297605e-06, "loss": 1.6234, "step": 11853 }, { "epoch": 0.8923013229454826, "grad_norm": 5.602231979370117, "learning_rate": 3.088060190156966e-06, "loss": 1.6539, "step": 11854 }, { "epoch": 0.8923765972261427, "grad_norm": 5.522675514221191, "learning_rate": 3.0838436778643954e-06, "loss": 1.5568, "step": 11855 }, { "epoch": 0.8924518715068029, "grad_norm": 4.610293865203857, "learning_rate": 3.0796299546026976e-06, "loss": 1.7234, "step": 11856 }, { "epoch": 0.8925271457874631, "grad_norm": 6.0704216957092285, "learning_rate": 3.0754190206223776e-06, "loss": 1.9192, "step": 11857 }, { "epoch": 0.8926024200681232, "grad_norm": 6.7875494956970215, "learning_rate": 3.0712108761737413e-06, "loss": 1.7332, "step": 11858 }, { "epoch": 0.8926776943487834, "grad_norm": 6.020510673522949, "learning_rate": 3.0670055215069772e-06, "loss": 2.4567, "step": 11859 }, { "epoch": 0.8927529686294435, "grad_norm": 4.295178413391113, "learning_rate": 3.062802956872057e-06, "loss": 2.0064, "step": 11860 }, { "epoch": 0.8928282429101037, "grad_norm": 5.418540954589844, "learning_rate": 3.058603182518832e-06, "loss": 1.89, "step": 11861 }, { "epoch": 0.8929035171907639, "grad_norm": 4.2862677574157715, "learning_rate": 3.054406198696952e-06, "loss": 2.0019, "step": 11862 }, { "epoch": 0.892978791471424, "grad_norm": 4.884140968322754, "learning_rate": 3.0502120056559223e-06, "loss": 1.8629, "step": 11863 }, { "epoch": 0.8930540657520841, "grad_norm": 5.246596813201904, "learning_rate": 3.0460206036450724e-06, "loss": 1.8386, "step": 11864 }, { "epoch": 0.8931293400327444, "grad_norm": 5.6330695152282715, "learning_rate": 3.0418319929135854e-06, "loss": 1.8794, "step": 11865 }, { "epoch": 0.8932046143134045, "grad_norm": 5.535592555999756, "learning_rate": 3.0376461737104345e-06, "loss": 1.7446, "step": 11866 }, { "epoch": 0.8932798885940646, "grad_norm": 7.210932731628418, "learning_rate": 3.0334631462844766e-06, "loss": 1.8556, "step": 11867 }, { "epoch": 0.8933551628747248, "grad_norm": 5.280955791473389, "learning_rate": 3.0292829108843625e-06, "loss": 1.8282, "step": 11868 }, { "epoch": 0.893430437155385, "grad_norm": 4.655411243438721, "learning_rate": 3.02510546775861e-06, "loss": 1.9196, "step": 11869 }, { "epoch": 0.8935057114360451, "grad_norm": 4.468110084533691, "learning_rate": 3.0209308171555593e-06, "loss": 1.9959, "step": 11870 }, { "epoch": 0.8935809857167052, "grad_norm": 5.1010332107543945, "learning_rate": 3.0167589593233624e-06, "loss": 1.9308, "step": 11871 }, { "epoch": 0.8936562599973654, "grad_norm": 3.653269052505493, "learning_rate": 3.0125898945100427e-06, "loss": 1.913, "step": 11872 }, { "epoch": 0.8937315342780255, "grad_norm": 5.934589385986328, "learning_rate": 3.0084236229634244e-06, "loss": 1.7819, "step": 11873 }, { "epoch": 0.8938068085586857, "grad_norm": 4.32749605178833, "learning_rate": 3.0042601449311925e-06, "loss": 2.0242, "step": 11874 }, { "epoch": 0.8938820828393459, "grad_norm": 4.6355977058410645, "learning_rate": 3.0000994606608433e-06, "loss": 1.6809, "step": 11875 }, { "epoch": 0.893957357120006, "grad_norm": 4.898728370666504, "learning_rate": 2.995941570399735e-06, "loss": 1.9374, "step": 11876 }, { "epoch": 0.8940326314006661, "grad_norm": 4.161886692047119, "learning_rate": 2.9917864743950195e-06, "loss": 1.6023, "step": 11877 }, { "epoch": 0.8941079056813264, "grad_norm": 5.221473693847656, "learning_rate": 2.987634172893727e-06, "loss": 1.9183, "step": 11878 }, { "epoch": 0.8941831799619865, "grad_norm": 4.498020172119141, "learning_rate": 2.983484666142683e-06, "loss": 1.8458, "step": 11879 }, { "epoch": 0.8942584542426466, "grad_norm": 5.728368282318115, "learning_rate": 2.9793379543885724e-06, "loss": 2.0314, "step": 11880 }, { "epoch": 0.8943337285233068, "grad_norm": 5.019113063812256, "learning_rate": 2.9751940378778986e-06, "loss": 1.5129, "step": 11881 }, { "epoch": 0.894409002803967, "grad_norm": 4.794315338134766, "learning_rate": 2.97105291685702e-06, "loss": 1.6153, "step": 11882 }, { "epoch": 0.8944842770846271, "grad_norm": 4.009993076324463, "learning_rate": 2.96691459157209e-06, "loss": 1.6454, "step": 11883 }, { "epoch": 0.8945595513652873, "grad_norm": 5.24515438079834, "learning_rate": 2.9627790622691387e-06, "loss": 1.7421, "step": 11884 }, { "epoch": 0.8946348256459474, "grad_norm": 6.472077369689941, "learning_rate": 2.9586463291940093e-06, "loss": 1.705, "step": 11885 }, { "epoch": 0.8947100999266075, "grad_norm": 4.370456218719482, "learning_rate": 2.9545163925923767e-06, "loss": 2.1343, "step": 11886 }, { "epoch": 0.8947853742072678, "grad_norm": 7.8226141929626465, "learning_rate": 2.950389252709762e-06, "loss": 2.1813, "step": 11887 }, { "epoch": 0.8948606484879279, "grad_norm": 4.593178749084473, "learning_rate": 2.946264909791502e-06, "loss": 1.6944, "step": 11888 }, { "epoch": 0.894935922768588, "grad_norm": 4.995981693267822, "learning_rate": 2.9421433640827834e-06, "loss": 1.4935, "step": 11889 }, { "epoch": 0.8950111970492483, "grad_norm": 6.428556442260742, "learning_rate": 2.9380246158286105e-06, "loss": 1.6151, "step": 11890 }, { "epoch": 0.8950864713299084, "grad_norm": 4.174126148223877, "learning_rate": 2.933908665273849e-06, "loss": 1.7006, "step": 11891 }, { "epoch": 0.8951617456105685, "grad_norm": 4.930836200714111, "learning_rate": 2.929795512663164e-06, "loss": 2.0235, "step": 11892 }, { "epoch": 0.8952370198912286, "grad_norm": 4.087159633636475, "learning_rate": 2.925685158241076e-06, "loss": 2.0748, "step": 11893 }, { "epoch": 0.8953122941718888, "grad_norm": 4.530377388000488, "learning_rate": 2.9215776022519347e-06, "loss": 1.7235, "step": 11894 }, { "epoch": 0.895387568452549, "grad_norm": 6.479909896850586, "learning_rate": 2.917472844939917e-06, "loss": 1.657, "step": 11895 }, { "epoch": 0.8954628427332091, "grad_norm": 4.67534065246582, "learning_rate": 2.9133708865490438e-06, "loss": 1.759, "step": 11896 }, { "epoch": 0.8955381170138693, "grad_norm": 3.5963919162750244, "learning_rate": 2.9092717273231764e-06, "loss": 1.7908, "step": 11897 }, { "epoch": 0.8956133912945294, "grad_norm": 5.5777106285095215, "learning_rate": 2.9051753675059745e-06, "loss": 1.886, "step": 11898 }, { "epoch": 0.8956886655751896, "grad_norm": 3.829454183578491, "learning_rate": 2.901081807340977e-06, "loss": 1.6295, "step": 11899 }, { "epoch": 0.8957639398558498, "grad_norm": 6.346753120422363, "learning_rate": 2.8969910470715122e-06, "loss": 1.6911, "step": 11900 }, { "epoch": 0.8958392141365099, "grad_norm": 4.225761890411377, "learning_rate": 2.892903086940779e-06, "loss": 1.686, "step": 11901 }, { "epoch": 0.89591448841717, "grad_norm": 5.266272068023682, "learning_rate": 2.8888179271918e-06, "loss": 1.3056, "step": 11902 }, { "epoch": 0.8959897626978303, "grad_norm": 6.298349857330322, "learning_rate": 2.8847355680674094e-06, "loss": 1.7288, "step": 11903 }, { "epoch": 0.8960650369784904, "grad_norm": 4.389970779418945, "learning_rate": 2.8806560098103117e-06, "loss": 1.755, "step": 11904 }, { "epoch": 0.8961403112591505, "grad_norm": 7.884381294250488, "learning_rate": 2.876579252663003e-06, "loss": 1.5247, "step": 11905 }, { "epoch": 0.8962155855398107, "grad_norm": 6.21115779876709, "learning_rate": 2.8725052968678502e-06, "loss": 1.9159, "step": 11906 }, { "epoch": 0.8962908598204709, "grad_norm": 7.484222888946533, "learning_rate": 2.8684341426670316e-06, "loss": 1.481, "step": 11907 }, { "epoch": 0.896366134101131, "grad_norm": 4.33501672744751, "learning_rate": 2.8643657903025647e-06, "loss": 1.8524, "step": 11908 }, { "epoch": 0.8964414083817912, "grad_norm": 3.8369853496551514, "learning_rate": 2.8603002400163003e-06, "loss": 1.6306, "step": 11909 }, { "epoch": 0.8965166826624513, "grad_norm": 4.877063751220703, "learning_rate": 2.85623749204994e-06, "loss": 1.9345, "step": 11910 }, { "epoch": 0.8965919569431114, "grad_norm": 5.807718753814697, "learning_rate": 2.852177546644985e-06, "loss": 1.8559, "step": 11911 }, { "epoch": 0.8966672312237716, "grad_norm": 4.768845558166504, "learning_rate": 2.8481204040427923e-06, "loss": 1.6579, "step": 11912 }, { "epoch": 0.8967425055044318, "grad_norm": 5.8899827003479, "learning_rate": 2.844066064484546e-06, "loss": 2.2638, "step": 11913 }, { "epoch": 0.8968177797850919, "grad_norm": 4.5548810958862305, "learning_rate": 2.8400145282112713e-06, "loss": 1.548, "step": 11914 }, { "epoch": 0.896893054065752, "grad_norm": 4.679693222045898, "learning_rate": 2.835965795463813e-06, "loss": 1.6485, "step": 11915 }, { "epoch": 0.8969683283464123, "grad_norm": 5.313938140869141, "learning_rate": 2.8319198664828628e-06, "loss": 2.1556, "step": 11916 }, { "epoch": 0.8970436026270724, "grad_norm": 5.710687160491943, "learning_rate": 2.827876741508928e-06, "loss": 1.8302, "step": 11917 }, { "epoch": 0.8971188769077325, "grad_norm": 7.161043167114258, "learning_rate": 2.823836420782372e-06, "loss": 2.0583, "step": 11918 }, { "epoch": 0.8971941511883927, "grad_norm": 4.390212535858154, "learning_rate": 2.819798904543375e-06, "loss": 2.0067, "step": 11919 }, { "epoch": 0.8972694254690529, "grad_norm": 6.0757269859313965, "learning_rate": 2.8157641930319613e-06, "loss": 1.7676, "step": 11920 }, { "epoch": 0.897344699749713, "grad_norm": 6.575694561004639, "learning_rate": 2.8117322864879834e-06, "loss": 1.6393, "step": 11921 }, { "epoch": 0.8974199740303732, "grad_norm": 3.7963900566101074, "learning_rate": 2.8077031851511173e-06, "loss": 1.8968, "step": 11922 }, { "epoch": 0.8974952483110333, "grad_norm": 5.335954189300537, "learning_rate": 2.8036768892608924e-06, "loss": 1.8378, "step": 11923 }, { "epoch": 0.8975705225916935, "grad_norm": 6.314944744110107, "learning_rate": 2.7996533990566454e-06, "loss": 2.0751, "step": 11924 }, { "epoch": 0.8976457968723537, "grad_norm": 4.7898850440979, "learning_rate": 2.795632714777585e-06, "loss": 1.5771, "step": 11925 }, { "epoch": 0.8977210711530138, "grad_norm": 9.502776145935059, "learning_rate": 2.791614836662704e-06, "loss": 2.3836, "step": 11926 }, { "epoch": 0.8977963454336739, "grad_norm": 5.735191822052002, "learning_rate": 2.7875997649508713e-06, "loss": 1.8338, "step": 11927 }, { "epoch": 0.8978716197143342, "grad_norm": 3.998290777206421, "learning_rate": 2.7835874998807576e-06, "loss": 2.0137, "step": 11928 }, { "epoch": 0.8979468939949943, "grad_norm": 4.785402297973633, "learning_rate": 2.779578041690889e-06, "loss": 1.5864, "step": 11929 }, { "epoch": 0.8980221682756544, "grad_norm": 4.234623432159424, "learning_rate": 2.7755713906196134e-06, "loss": 1.6581, "step": 11930 }, { "epoch": 0.8980974425563145, "grad_norm": 4.966156005859375, "learning_rate": 2.7715675469051293e-06, "loss": 1.6874, "step": 11931 }, { "epoch": 0.8981727168369747, "grad_norm": 5.304784774780273, "learning_rate": 2.7675665107854298e-06, "loss": 2.6467, "step": 11932 }, { "epoch": 0.8982479911176349, "grad_norm": 4.455078601837158, "learning_rate": 2.7635682824983743e-06, "loss": 1.4891, "step": 11933 }, { "epoch": 0.898323265398295, "grad_norm": 6.032569885253906, "learning_rate": 2.759572862281662e-06, "loss": 1.3525, "step": 11934 }, { "epoch": 0.8983985396789552, "grad_norm": 7.020341873168945, "learning_rate": 2.75558025037278e-06, "loss": 1.7543, "step": 11935 }, { "epoch": 0.8984738139596153, "grad_norm": 3.9947221279144287, "learning_rate": 2.751590447009106e-06, "loss": 1.8887, "step": 11936 }, { "epoch": 0.8985490882402755, "grad_norm": 4.335216045379639, "learning_rate": 2.747603452427794e-06, "loss": 1.5954, "step": 11937 }, { "epoch": 0.8986243625209357, "grad_norm": 6.364669322967529, "learning_rate": 2.7436192668658876e-06, "loss": 1.7033, "step": 11938 }, { "epoch": 0.8986996368015958, "grad_norm": 4.1729888916015625, "learning_rate": 2.739637890560215e-06, "loss": 2.0867, "step": 11939 }, { "epoch": 0.8987749110822559, "grad_norm": 3.890747547149658, "learning_rate": 2.7356593237474636e-06, "loss": 1.8854, "step": 11940 }, { "epoch": 0.8988501853629162, "grad_norm": 6.1546406745910645, "learning_rate": 2.7316835666641504e-06, "loss": 1.7402, "step": 11941 }, { "epoch": 0.8989254596435763, "grad_norm": 4.240593433380127, "learning_rate": 2.727710619546625e-06, "loss": 1.5398, "step": 11942 }, { "epoch": 0.8990007339242364, "grad_norm": 5.984617233276367, "learning_rate": 2.72374048263106e-06, "loss": 1.9002, "step": 11943 }, { "epoch": 0.8990760082048966, "grad_norm": 4.601238250732422, "learning_rate": 2.7197731561534767e-06, "loss": 1.7549, "step": 11944 }, { "epoch": 0.8991512824855568, "grad_norm": 4.546549320220947, "learning_rate": 2.7158086403497097e-06, "loss": 2.1923, "step": 11945 }, { "epoch": 0.8992265567662169, "grad_norm": 7.224729537963867, "learning_rate": 2.711846935455453e-06, "loss": 1.619, "step": 11946 }, { "epoch": 0.8993018310468771, "grad_norm": 4.989003658294678, "learning_rate": 2.707888041706208e-06, "loss": 1.9677, "step": 11947 }, { "epoch": 0.8993771053275372, "grad_norm": 4.853872776031494, "learning_rate": 2.7039319593373246e-06, "loss": 1.7004, "step": 11948 }, { "epoch": 0.8994523796081973, "grad_norm": 7.603703022003174, "learning_rate": 2.699978688583976e-06, "loss": 2.2507, "step": 11949 }, { "epoch": 0.8995276538888575, "grad_norm": 7.670825004577637, "learning_rate": 2.696028229681169e-06, "loss": 1.9575, "step": 11950 }, { "epoch": 0.8996029281695177, "grad_norm": 5.305761337280273, "learning_rate": 2.69208058286376e-06, "loss": 2.1033, "step": 11951 }, { "epoch": 0.8996782024501778, "grad_norm": 5.928761005401611, "learning_rate": 2.6881357483664216e-06, "loss": 1.7812, "step": 11952 }, { "epoch": 0.8997534767308379, "grad_norm": 4.458364009857178, "learning_rate": 2.684193726423656e-06, "loss": 1.6437, "step": 11953 }, { "epoch": 0.8998287510114982, "grad_norm": 7.592646598815918, "learning_rate": 2.680254517269809e-06, "loss": 1.8663, "step": 11954 }, { "epoch": 0.8999040252921583, "grad_norm": 3.8419203758239746, "learning_rate": 2.6763181211390596e-06, "loss": 1.8588, "step": 11955 }, { "epoch": 0.8999792995728184, "grad_norm": 5.933088302612305, "learning_rate": 2.6723845382654043e-06, "loss": 2.005, "step": 11956 }, { "epoch": 0.9000545738534786, "grad_norm": 5.551792621612549, "learning_rate": 2.6684537688827006e-06, "loss": 1.7234, "step": 11957 }, { "epoch": 0.9001298481341388, "grad_norm": 4.715183258056641, "learning_rate": 2.6645258132245997e-06, "loss": 1.5114, "step": 11958 }, { "epoch": 0.9002051224147989, "grad_norm": 5.215699195861816, "learning_rate": 2.6606006715246268e-06, "loss": 1.8891, "step": 11959 }, { "epoch": 0.9002803966954591, "grad_norm": 4.994283676147461, "learning_rate": 2.656678344016106e-06, "loss": 1.608, "step": 11960 }, { "epoch": 0.9003556709761192, "grad_norm": 5.674286842346191, "learning_rate": 2.6527588309322226e-06, "loss": 1.976, "step": 11961 }, { "epoch": 0.9004309452567794, "grad_norm": 5.190013408660889, "learning_rate": 2.648842132505963e-06, "loss": 1.5415, "step": 11962 }, { "epoch": 0.9005062195374396, "grad_norm": 4.652156829833984, "learning_rate": 2.6449282489701744e-06, "loss": 1.9476, "step": 11963 }, { "epoch": 0.9005814938180997, "grad_norm": 5.031260013580322, "learning_rate": 2.6410171805575203e-06, "loss": 1.5048, "step": 11964 }, { "epoch": 0.9006567680987598, "grad_norm": 4.505012035369873, "learning_rate": 2.637108927500509e-06, "loss": 1.73, "step": 11965 }, { "epoch": 0.9007320423794201, "grad_norm": 5.676845550537109, "learning_rate": 2.6332034900314827e-06, "loss": 1.7342, "step": 11966 }, { "epoch": 0.9008073166600802, "grad_norm": 4.564072608947754, "learning_rate": 2.6293008683825893e-06, "loss": 2.0508, "step": 11967 }, { "epoch": 0.9008825909407403, "grad_norm": 4.7408366203308105, "learning_rate": 2.6254010627858426e-06, "loss": 1.7701, "step": 11968 }, { "epoch": 0.9009578652214005, "grad_norm": 4.5672221183776855, "learning_rate": 2.6215040734730633e-06, "loss": 2.12, "step": 11969 }, { "epoch": 0.9010331395020607, "grad_norm": 4.8732781410217285, "learning_rate": 2.6176099006759324e-06, "loss": 1.7931, "step": 11970 }, { "epoch": 0.9011084137827208, "grad_norm": 5.545689105987549, "learning_rate": 2.6137185446259315e-06, "loss": 1.6724, "step": 11971 }, { "epoch": 0.9011836880633809, "grad_norm": 5.86617374420166, "learning_rate": 2.6098300055543978e-06, "loss": 1.9748, "step": 11972 }, { "epoch": 0.9012589623440411, "grad_norm": 5.060419082641602, "learning_rate": 2.6059442836924906e-06, "loss": 1.7987, "step": 11973 }, { "epoch": 0.9013342366247012, "grad_norm": 5.573587417602539, "learning_rate": 2.6020613792712032e-06, "loss": 1.3434, "step": 11974 }, { "epoch": 0.9014095109053614, "grad_norm": 3.9352023601531982, "learning_rate": 2.598181292521373e-06, "loss": 1.577, "step": 11975 }, { "epoch": 0.9014847851860216, "grad_norm": 4.468507766723633, "learning_rate": 2.594304023673655e-06, "loss": 1.8203, "step": 11976 }, { "epoch": 0.9015600594666817, "grad_norm": 4.273713111877441, "learning_rate": 2.590429572958536e-06, "loss": 1.6355, "step": 11977 }, { "epoch": 0.9016353337473418, "grad_norm": 4.349310874938965, "learning_rate": 2.5865579406063488e-06, "loss": 1.5269, "step": 11978 }, { "epoch": 0.9017106080280021, "grad_norm": 4.628576278686523, "learning_rate": 2.5826891268472485e-06, "loss": 1.8985, "step": 11979 }, { "epoch": 0.9017858823086622, "grad_norm": 5.092843532562256, "learning_rate": 2.578823131911223e-06, "loss": 1.934, "step": 11980 }, { "epoch": 0.9018611565893223, "grad_norm": 5.034144878387451, "learning_rate": 2.5749599560280947e-06, "loss": 1.7996, "step": 11981 }, { "epoch": 0.9019364308699825, "grad_norm": 4.841254234313965, "learning_rate": 2.5710995994275176e-06, "loss": 1.7984, "step": 11982 }, { "epoch": 0.9020117051506427, "grad_norm": 4.680467128753662, "learning_rate": 2.567242062338987e-06, "loss": 1.9285, "step": 11983 }, { "epoch": 0.9020869794313028, "grad_norm": 4.606339931488037, "learning_rate": 2.563387344991808e-06, "loss": 1.668, "step": 11984 }, { "epoch": 0.902162253711963, "grad_norm": 4.296433448791504, "learning_rate": 2.5595354476151413e-06, "loss": 1.4844, "step": 11985 }, { "epoch": 0.9022375279926231, "grad_norm": 6.967336177825928, "learning_rate": 2.555686370437971e-06, "loss": 1.5619, "step": 11986 }, { "epoch": 0.9023128022732833, "grad_norm": 4.634474277496338, "learning_rate": 2.5518401136891134e-06, "loss": 1.5101, "step": 11987 }, { "epoch": 0.9023880765539435, "grad_norm": 5.470510959625244, "learning_rate": 2.5479966775972198e-06, "loss": 2.1155, "step": 11988 }, { "epoch": 0.9024633508346036, "grad_norm": 4.887975692749023, "learning_rate": 2.544156062390768e-06, "loss": 1.6347, "step": 11989 }, { "epoch": 0.9025386251152637, "grad_norm": 7.093113899230957, "learning_rate": 2.54031826829807e-06, "loss": 1.7189, "step": 11990 }, { "epoch": 0.9026138993959238, "grad_norm": 5.453039646148682, "learning_rate": 2.536483295547282e-06, "loss": 2.0303, "step": 11991 }, { "epoch": 0.9026891736765841, "grad_norm": 3.6220996379852295, "learning_rate": 2.53265114436636e-06, "loss": 1.7392, "step": 11992 }, { "epoch": 0.9027644479572442, "grad_norm": 5.754844665527344, "learning_rate": 2.52882181498314e-06, "loss": 1.8435, "step": 11993 }, { "epoch": 0.9028397222379043, "grad_norm": 6.359145641326904, "learning_rate": 2.5249953076252496e-06, "loss": 1.6707, "step": 11994 }, { "epoch": 0.9029149965185645, "grad_norm": 5.185604095458984, "learning_rate": 2.5211716225201632e-06, "loss": 1.8284, "step": 11995 }, { "epoch": 0.9029902707992247, "grad_norm": 4.033519744873047, "learning_rate": 2.5173507598951927e-06, "loss": 1.6419, "step": 11996 }, { "epoch": 0.9030655450798848, "grad_norm": 4.735548496246338, "learning_rate": 2.513532719977474e-06, "loss": 1.7643, "step": 11997 }, { "epoch": 0.903140819360545, "grad_norm": 3.8570384979248047, "learning_rate": 2.509717502993991e-06, "loss": 2.0708, "step": 11998 }, { "epoch": 0.9032160936412051, "grad_norm": 4.196596145629883, "learning_rate": 2.5059051091715303e-06, "loss": 1.9194, "step": 11999 }, { "epoch": 0.9032913679218653, "grad_norm": 6.576998233795166, "learning_rate": 2.5020955387367372e-06, "loss": 1.9091, "step": 12000 }, { "epoch": 0.9033666422025255, "grad_norm": 5.059309959411621, "learning_rate": 2.498288791916076e-06, "loss": 1.7275, "step": 12001 }, { "epoch": 0.9034419164831856, "grad_norm": 4.960028648376465, "learning_rate": 2.4944848689358534e-06, "loss": 1.6355, "step": 12002 }, { "epoch": 0.9035171907638457, "grad_norm": 5.26534366607666, "learning_rate": 2.4906837700221896e-06, "loss": 1.6269, "step": 12003 }, { "epoch": 0.903592465044506, "grad_norm": 4.87063455581665, "learning_rate": 2.4868854954010645e-06, "loss": 1.6982, "step": 12004 }, { "epoch": 0.9036677393251661, "grad_norm": 6.078065395355225, "learning_rate": 2.4830900452982587e-06, "loss": 1.9179, "step": 12005 }, { "epoch": 0.9037430136058262, "grad_norm": 4.551740646362305, "learning_rate": 2.479297419939419e-06, "loss": 1.9464, "step": 12006 }, { "epoch": 0.9038182878864864, "grad_norm": 5.093257904052734, "learning_rate": 2.475507619549983e-06, "loss": 1.7702, "step": 12007 }, { "epoch": 0.9038935621671466, "grad_norm": 6.582335472106934, "learning_rate": 2.471720644355258e-06, "loss": 1.4875, "step": 12008 }, { "epoch": 0.9039688364478067, "grad_norm": 5.120762348175049, "learning_rate": 2.467936494580364e-06, "loss": 1.7898, "step": 12009 }, { "epoch": 0.9040441107284668, "grad_norm": 4.245059013366699, "learning_rate": 2.464155170450272e-06, "loss": 1.7692, "step": 12010 }, { "epoch": 0.904119385009127, "grad_norm": 5.217390537261963, "learning_rate": 2.4603766721897513e-06, "loss": 1.7489, "step": 12011 }, { "epoch": 0.9041946592897872, "grad_norm": 4.219017028808594, "learning_rate": 2.4566010000234397e-06, "loss": 2.0146, "step": 12012 }, { "epoch": 0.9042699335704473, "grad_norm": 5.43870210647583, "learning_rate": 2.452828154175768e-06, "loss": 1.8117, "step": 12013 }, { "epoch": 0.9043452078511075, "grad_norm": 4.560093879699707, "learning_rate": 2.4490581348710408e-06, "loss": 2.2599, "step": 12014 }, { "epoch": 0.9044204821317676, "grad_norm": 4.2792181968688965, "learning_rate": 2.4452909423333723e-06, "loss": 1.6955, "step": 12015 }, { "epoch": 0.9044957564124277, "grad_norm": 5.564821243286133, "learning_rate": 2.4415265767867014e-06, "loss": 1.7263, "step": 12016 }, { "epoch": 0.904571030693088, "grad_norm": 5.642936706542969, "learning_rate": 2.43776503845482e-06, "loss": 1.6286, "step": 12017 }, { "epoch": 0.9046463049737481, "grad_norm": 5.0309882164001465, "learning_rate": 2.4340063275613335e-06, "loss": 1.7298, "step": 12018 }, { "epoch": 0.9047215792544082, "grad_norm": 4.734522819519043, "learning_rate": 2.4302504443296848e-06, "loss": 2.1225, "step": 12019 }, { "epoch": 0.9047968535350684, "grad_norm": 7.406623363494873, "learning_rate": 2.4264973889831565e-06, "loss": 1.9675, "step": 12020 }, { "epoch": 0.9048721278157286, "grad_norm": 5.505327224731445, "learning_rate": 2.4227471617448596e-06, "loss": 1.994, "step": 12021 }, { "epoch": 0.9049474020963887, "grad_norm": 5.354662895202637, "learning_rate": 2.4189997628377205e-06, "loss": 1.6612, "step": 12022 }, { "epoch": 0.9050226763770489, "grad_norm": 3.991269826889038, "learning_rate": 2.4152551924845334e-06, "loss": 1.8677, "step": 12023 }, { "epoch": 0.905097950657709, "grad_norm": 4.828816890716553, "learning_rate": 2.411513450907876e-06, "loss": 1.5795, "step": 12024 }, { "epoch": 0.9051732249383692, "grad_norm": 5.001769065856934, "learning_rate": 2.4077745383302097e-06, "loss": 2.1248, "step": 12025 }, { "epoch": 0.9052484992190294, "grad_norm": 4.290104389190674, "learning_rate": 2.404038454973778e-06, "loss": 2.1903, "step": 12026 }, { "epoch": 0.9053237734996895, "grad_norm": 5.534820079803467, "learning_rate": 2.400305201060704e-06, "loss": 1.5882, "step": 12027 }, { "epoch": 0.9053990477803496, "grad_norm": 4.812999248504639, "learning_rate": 2.3965747768128933e-06, "loss": 1.7623, "step": 12028 }, { "epoch": 0.9054743220610098, "grad_norm": 4.874666213989258, "learning_rate": 2.3928471824521293e-06, "loss": 1.9715, "step": 12029 }, { "epoch": 0.90554959634167, "grad_norm": 4.669535160064697, "learning_rate": 2.389122418199996e-06, "loss": 1.5057, "step": 12030 }, { "epoch": 0.9056248706223301, "grad_norm": 5.131518840789795, "learning_rate": 2.3854004842779277e-06, "loss": 1.5649, "step": 12031 }, { "epoch": 0.9057001449029902, "grad_norm": 4.108508586883545, "learning_rate": 2.38168138090718e-06, "loss": 1.6892, "step": 12032 }, { "epoch": 0.9057754191836505, "grad_norm": 5.186368465423584, "learning_rate": 2.3779651083088384e-06, "loss": 1.7513, "step": 12033 }, { "epoch": 0.9058506934643106, "grad_norm": 4.305336952209473, "learning_rate": 2.374251666703836e-06, "loss": 1.9857, "step": 12034 }, { "epoch": 0.9059259677449707, "grad_norm": 4.379505634307861, "learning_rate": 2.3705410563129137e-06, "loss": 1.8207, "step": 12035 }, { "epoch": 0.9060012420256309, "grad_norm": 3.9230740070343018, "learning_rate": 2.3668332773566617e-06, "loss": 1.8812, "step": 12036 }, { "epoch": 0.906076516306291, "grad_norm": 4.204113006591797, "learning_rate": 2.363128330055492e-06, "loss": 2.0519, "step": 12037 }, { "epoch": 0.9061517905869512, "grad_norm": 6.357110023498535, "learning_rate": 2.3594262146296676e-06, "loss": 1.7123, "step": 12038 }, { "epoch": 0.9062270648676114, "grad_norm": 6.072261810302734, "learning_rate": 2.3557269312992514e-06, "loss": 2.0566, "step": 12039 }, { "epoch": 0.9063023391482715, "grad_norm": 3.6129419803619385, "learning_rate": 2.352030480284162e-06, "loss": 1.8136, "step": 12040 }, { "epoch": 0.9063776134289316, "grad_norm": 4.159516334533691, "learning_rate": 2.3483368618041403e-06, "loss": 1.9119, "step": 12041 }, { "epoch": 0.9064528877095919, "grad_norm": 7.715047836303711, "learning_rate": 2.3446460760787713e-06, "loss": 2.0005, "step": 12042 }, { "epoch": 0.906528161990252, "grad_norm": 5.2658162117004395, "learning_rate": 2.3409581233274468e-06, "loss": 1.6645, "step": 12043 }, { "epoch": 0.9066034362709121, "grad_norm": 4.586082458496094, "learning_rate": 2.3372730037694245e-06, "loss": 1.8197, "step": 12044 }, { "epoch": 0.9066787105515723, "grad_norm": 3.306954860687256, "learning_rate": 2.3335907176237513e-06, "loss": 1.5572, "step": 12045 }, { "epoch": 0.9067539848322325, "grad_norm": 5.0185627937316895, "learning_rate": 2.3299112651093414e-06, "loss": 1.7469, "step": 12046 }, { "epoch": 0.9068292591128926, "grad_norm": 5.887462139129639, "learning_rate": 2.3262346464449358e-06, "loss": 2.3686, "step": 12047 }, { "epoch": 0.9069045333935527, "grad_norm": 5.155585289001465, "learning_rate": 2.3225608618490766e-06, "loss": 1.917, "step": 12048 }, { "epoch": 0.9069798076742129, "grad_norm": 4.50060510635376, "learning_rate": 2.318889911540184e-06, "loss": 2.0471, "step": 12049 }, { "epoch": 0.907055081954873, "grad_norm": 5.397789001464844, "learning_rate": 2.3152217957364607e-06, "loss": 1.9728, "step": 12050 }, { "epoch": 0.9071303562355332, "grad_norm": 4.0448079109191895, "learning_rate": 2.3115565146559826e-06, "loss": 1.861, "step": 12051 }, { "epoch": 0.9072056305161934, "grad_norm": 4.343610763549805, "learning_rate": 2.3078940685166364e-06, "loss": 1.7512, "step": 12052 }, { "epoch": 0.9072809047968535, "grad_norm": 4.3381428718566895, "learning_rate": 2.304234457536153e-06, "loss": 1.9521, "step": 12053 }, { "epoch": 0.9073561790775136, "grad_norm": 4.190424919128418, "learning_rate": 2.3005776819320648e-06, "loss": 1.7461, "step": 12054 }, { "epoch": 0.9074314533581739, "grad_norm": 4.159946918487549, "learning_rate": 2.2969237419217802e-06, "loss": 1.6084, "step": 12055 }, { "epoch": 0.907506727638834, "grad_norm": 6.220204830169678, "learning_rate": 2.293272637722493e-06, "loss": 2.0885, "step": 12056 }, { "epoch": 0.9075820019194941, "grad_norm": 4.030333042144775, "learning_rate": 2.289624369551269e-06, "loss": 1.7672, "step": 12057 }, { "epoch": 0.9076572762001544, "grad_norm": 5.191592216491699, "learning_rate": 2.2859789376249718e-06, "loss": 1.8069, "step": 12058 }, { "epoch": 0.9077325504808145, "grad_norm": 6.664621829986572, "learning_rate": 2.2823363421603294e-06, "loss": 2.1352, "step": 12059 }, { "epoch": 0.9078078247614746, "grad_norm": 6.185181140899658, "learning_rate": 2.2786965833738684e-06, "loss": 1.6447, "step": 12060 }, { "epoch": 0.9078830990421348, "grad_norm": 4.342257022857666, "learning_rate": 2.2750596614819708e-06, "loss": 1.7512, "step": 12061 }, { "epoch": 0.9079583733227949, "grad_norm": 5.421034336090088, "learning_rate": 2.271425576700831e-06, "loss": 1.653, "step": 12062 }, { "epoch": 0.9080336476034551, "grad_norm": 5.593506813049316, "learning_rate": 2.267794329246498e-06, "loss": 1.6534, "step": 12063 }, { "epoch": 0.9081089218841153, "grad_norm": 4.65397310256958, "learning_rate": 2.264165919334826e-06, "loss": 1.7472, "step": 12064 }, { "epoch": 0.9081841961647754, "grad_norm": 4.712983131408691, "learning_rate": 2.2605403471815278e-06, "loss": 1.9471, "step": 12065 }, { "epoch": 0.9082594704454355, "grad_norm": 4.293585300445557, "learning_rate": 2.256917613002124e-06, "loss": 1.7198, "step": 12066 }, { "epoch": 0.9083347447260958, "grad_norm": 4.533767223358154, "learning_rate": 2.2532977170119764e-06, "loss": 1.4754, "step": 12067 }, { "epoch": 0.9084100190067559, "grad_norm": 6.342230319976807, "learning_rate": 2.2496806594262842e-06, "loss": 1.8504, "step": 12068 }, { "epoch": 0.908485293287416, "grad_norm": 5.326993465423584, "learning_rate": 2.246066440460065e-06, "loss": 1.7505, "step": 12069 }, { "epoch": 0.9085605675680761, "grad_norm": 6.085519313812256, "learning_rate": 2.2424550603281746e-06, "loss": 1.6443, "step": 12070 }, { "epoch": 0.9086358418487364, "grad_norm": 5.617304801940918, "learning_rate": 2.2388465192452914e-06, "loss": 1.8948, "step": 12071 }, { "epoch": 0.9087111161293965, "grad_norm": 4.76896858215332, "learning_rate": 2.2352408174259487e-06, "loss": 1.6311, "step": 12072 }, { "epoch": 0.9087863904100566, "grad_norm": 5.786907196044922, "learning_rate": 2.2316379550844815e-06, "loss": 1.8867, "step": 12073 }, { "epoch": 0.9088616646907168, "grad_norm": 4.617349147796631, "learning_rate": 2.2280379324350785e-06, "loss": 1.6429, "step": 12074 }, { "epoch": 0.908936938971377, "grad_norm": 4.733928203582764, "learning_rate": 2.224440749691742e-06, "loss": 1.5572, "step": 12075 }, { "epoch": 0.9090122132520371, "grad_norm": 4.012433052062988, "learning_rate": 2.220846407068328e-06, "loss": 1.7184, "step": 12076 }, { "epoch": 0.9090874875326973, "grad_norm": 5.539886474609375, "learning_rate": 2.217254904778493e-06, "loss": 2.4661, "step": 12077 }, { "epoch": 0.9091627618133574, "grad_norm": 5.59161376953125, "learning_rate": 2.21366624303575e-06, "loss": 1.7802, "step": 12078 }, { "epoch": 0.9092380360940175, "grad_norm": 5.362966060638428, "learning_rate": 2.2100804220534454e-06, "loss": 1.7354, "step": 12079 }, { "epoch": 0.9093133103746778, "grad_norm": 4.5530781745910645, "learning_rate": 2.206497442044725e-06, "loss": 1.848, "step": 12080 }, { "epoch": 0.9093885846553379, "grad_norm": 5.087313175201416, "learning_rate": 2.202917303222607e-06, "loss": 1.7929, "step": 12081 }, { "epoch": 0.909463858935998, "grad_norm": 4.621016502380371, "learning_rate": 2.1993400057998994e-06, "loss": 1.8301, "step": 12082 }, { "epoch": 0.9095391332166582, "grad_norm": 9.632458686828613, "learning_rate": 2.195765549989276e-06, "loss": 1.837, "step": 12083 }, { "epoch": 0.9096144074973184, "grad_norm": 6.701595306396484, "learning_rate": 2.192193936003223e-06, "loss": 2.0151, "step": 12084 }, { "epoch": 0.9096896817779785, "grad_norm": 5.789704322814941, "learning_rate": 2.1886251640540643e-06, "loss": 2.1068, "step": 12085 }, { "epoch": 0.9097649560586387, "grad_norm": 6.658529281616211, "learning_rate": 2.1850592343539532e-06, "loss": 1.6723, "step": 12086 }, { "epoch": 0.9098402303392988, "grad_norm": 4.172266483306885, "learning_rate": 2.181496147114881e-06, "loss": 1.5237, "step": 12087 }, { "epoch": 0.909915504619959, "grad_norm": 3.8307430744171143, "learning_rate": 2.1779359025486503e-06, "loss": 1.3783, "step": 12088 }, { "epoch": 0.9099907789006191, "grad_norm": 4.280561447143555, "learning_rate": 2.1743785008669194e-06, "loss": 1.4928, "step": 12089 }, { "epoch": 0.9100660531812793, "grad_norm": 5.1558451652526855, "learning_rate": 2.1708239422811525e-06, "loss": 1.6024, "step": 12090 }, { "epoch": 0.9101413274619394, "grad_norm": 4.757821083068848, "learning_rate": 2.167272227002676e-06, "loss": 1.3839, "step": 12091 }, { "epoch": 0.9102166017425996, "grad_norm": 4.467638969421387, "learning_rate": 2.1637233552426028e-06, "loss": 1.9276, "step": 12092 }, { "epoch": 0.9102918760232598, "grad_norm": 5.651608467102051, "learning_rate": 2.160177327211932e-06, "loss": 1.7023, "step": 12093 }, { "epoch": 0.9103671503039199, "grad_norm": 4.382534503936768, "learning_rate": 2.1566341431214443e-06, "loss": 2.0227, "step": 12094 }, { "epoch": 0.91044242458458, "grad_norm": 5.981085300445557, "learning_rate": 2.1530938031817714e-06, "loss": 1.8377, "step": 12095 }, { "epoch": 0.9105176988652403, "grad_norm": 3.827946901321411, "learning_rate": 2.1495563076033896e-06, "loss": 1.7057, "step": 12096 }, { "epoch": 0.9105929731459004, "grad_norm": 5.965029716491699, "learning_rate": 2.146021656596586e-06, "loss": 1.7208, "step": 12097 }, { "epoch": 0.9106682474265605, "grad_norm": 5.827772617340088, "learning_rate": 2.1424898503714985e-06, "loss": 1.8398, "step": 12098 }, { "epoch": 0.9107435217072207, "grad_norm": 4.450931072235107, "learning_rate": 2.1389608891380584e-06, "loss": 1.6344, "step": 12099 }, { "epoch": 0.9108187959878808, "grad_norm": 4.463832378387451, "learning_rate": 2.1354347731060707e-06, "loss": 1.4284, "step": 12100 }, { "epoch": 0.910894070268541, "grad_norm": 4.7808966636657715, "learning_rate": 2.13191150248514e-06, "loss": 2.0314, "step": 12101 }, { "epoch": 0.9109693445492012, "grad_norm": 4.422584056854248, "learning_rate": 2.1283910774847315e-06, "loss": 1.6688, "step": 12102 }, { "epoch": 0.9110446188298613, "grad_norm": 5.107571601867676, "learning_rate": 2.124873498314106e-06, "loss": 1.8038, "step": 12103 }, { "epoch": 0.9111198931105214, "grad_norm": 4.270766258239746, "learning_rate": 2.121358765182385e-06, "loss": 1.79, "step": 12104 }, { "epoch": 0.9111951673911817, "grad_norm": 5.984953880310059, "learning_rate": 2.1178468782985074e-06, "loss": 1.8127, "step": 12105 }, { "epoch": 0.9112704416718418, "grad_norm": 5.883664131164551, "learning_rate": 2.1143378378712388e-06, "loss": 2.1094, "step": 12106 }, { "epoch": 0.9113457159525019, "grad_norm": 3.6939966678619385, "learning_rate": 2.1108316441091904e-06, "loss": 1.4716, "step": 12107 }, { "epoch": 0.911420990233162, "grad_norm": 4.318075656890869, "learning_rate": 2.10732829722079e-06, "loss": 1.742, "step": 12108 }, { "epoch": 0.9114962645138223, "grad_norm": 3.9309732913970947, "learning_rate": 2.1038277974142982e-06, "loss": 1.4777, "step": 12109 }, { "epoch": 0.9115715387944824, "grad_norm": 4.194386005401611, "learning_rate": 2.100330144897822e-06, "loss": 1.6525, "step": 12110 }, { "epoch": 0.9116468130751425, "grad_norm": 5.224255084991455, "learning_rate": 2.0968353398792773e-06, "loss": 1.5894, "step": 12111 }, { "epoch": 0.9117220873558027, "grad_norm": 4.281154632568359, "learning_rate": 2.0933433825664206e-06, "loss": 1.7495, "step": 12112 }, { "epoch": 0.9117973616364629, "grad_norm": 4.29382848739624, "learning_rate": 2.0898542731668415e-06, "loss": 1.9486, "step": 12113 }, { "epoch": 0.911872635917123, "grad_norm": 4.170835971832275, "learning_rate": 2.086368011887957e-06, "loss": 2.0966, "step": 12114 }, { "epoch": 0.9119479101977832, "grad_norm": 5.726946830749512, "learning_rate": 2.0828845989370127e-06, "loss": 1.8433, "step": 12115 }, { "epoch": 0.9120231844784433, "grad_norm": 4.424744129180908, "learning_rate": 2.0794040345210874e-06, "loss": 1.8361, "step": 12116 }, { "epoch": 0.9120984587591034, "grad_norm": 4.886198043823242, "learning_rate": 2.075926318847099e-06, "loss": 1.5234, "step": 12117 }, { "epoch": 0.9121737330397637, "grad_norm": 4.902031421661377, "learning_rate": 2.0724514521217764e-06, "loss": 1.7774, "step": 12118 }, { "epoch": 0.9122490073204238, "grad_norm": 5.974635601043701, "learning_rate": 2.0689794345516935e-06, "loss": 2.0676, "step": 12119 }, { "epoch": 0.9123242816010839, "grad_norm": 4.213400363922119, "learning_rate": 2.065510266343257e-06, "loss": 2.2021, "step": 12120 }, { "epoch": 0.9123995558817442, "grad_norm": 4.379502773284912, "learning_rate": 2.0620439477026966e-06, "loss": 1.7332, "step": 12121 }, { "epoch": 0.9124748301624043, "grad_norm": 5.108627796173096, "learning_rate": 2.0585804788360696e-06, "loss": 1.5354, "step": 12122 }, { "epoch": 0.9125501044430644, "grad_norm": 4.5947651863098145, "learning_rate": 2.055119859949284e-06, "loss": 1.6025, "step": 12123 }, { "epoch": 0.9126253787237246, "grad_norm": 3.9317147731781006, "learning_rate": 2.051662091248041e-06, "loss": 1.9726, "step": 12124 }, { "epoch": 0.9127006530043847, "grad_norm": 4.857736110687256, "learning_rate": 2.048207172937916e-06, "loss": 1.688, "step": 12125 }, { "epoch": 0.9127759272850449, "grad_norm": 4.268885135650635, "learning_rate": 2.044755105224283e-06, "loss": 1.5768, "step": 12126 }, { "epoch": 0.912851201565705, "grad_norm": 7.116901874542236, "learning_rate": 2.041305888312356e-06, "loss": 1.6902, "step": 12127 }, { "epoch": 0.9129264758463652, "grad_norm": 5.009929180145264, "learning_rate": 2.037859522407193e-06, "loss": 1.6505, "step": 12128 }, { "epoch": 0.9130017501270253, "grad_norm": 6.537803649902344, "learning_rate": 2.034416007713652e-06, "loss": 1.505, "step": 12129 }, { "epoch": 0.9130770244076855, "grad_norm": 5.335648536682129, "learning_rate": 2.0309753444364533e-06, "loss": 1.9193, "step": 12130 }, { "epoch": 0.9131522986883457, "grad_norm": 5.426054954528809, "learning_rate": 2.027537532780133e-06, "loss": 2.2388, "step": 12131 }, { "epoch": 0.9132275729690058, "grad_norm": 4.194322109222412, "learning_rate": 2.024102572949066e-06, "loss": 1.5754, "step": 12132 }, { "epoch": 0.9133028472496659, "grad_norm": 6.474128723144531, "learning_rate": 2.020670465147434e-06, "loss": 2.2222, "step": 12133 }, { "epoch": 0.9133781215303262, "grad_norm": 4.180809020996094, "learning_rate": 2.0172412095792793e-06, "loss": 1.5092, "step": 12134 }, { "epoch": 0.9134533958109863, "grad_norm": 5.883711814880371, "learning_rate": 2.013814806448455e-06, "loss": 1.5964, "step": 12135 }, { "epoch": 0.9135286700916464, "grad_norm": 7.152510643005371, "learning_rate": 2.0103912559586545e-06, "loss": 2.1642, "step": 12136 }, { "epoch": 0.9136039443723066, "grad_norm": 4.77456521987915, "learning_rate": 2.0069705583133926e-06, "loss": 1.7848, "step": 12137 }, { "epoch": 0.9136792186529668, "grad_norm": 4.454054832458496, "learning_rate": 2.0035527137160284e-06, "loss": 1.8845, "step": 12138 }, { "epoch": 0.9137544929336269, "grad_norm": 5.078956604003906, "learning_rate": 2.000137722369733e-06, "loss": 1.7626, "step": 12139 }, { "epoch": 0.9138297672142871, "grad_norm": 4.826076030731201, "learning_rate": 1.996725584477521e-06, "loss": 2.1638, "step": 12140 }, { "epoch": 0.9139050414949472, "grad_norm": 4.578750133514404, "learning_rate": 1.9933163002422373e-06, "loss": 1.6142, "step": 12141 }, { "epoch": 0.9139803157756073, "grad_norm": 5.217748641967773, "learning_rate": 1.9899098698665574e-06, "loss": 1.9073, "step": 12142 }, { "epoch": 0.9140555900562676, "grad_norm": 5.196359634399414, "learning_rate": 1.986506293552981e-06, "loss": 2.0435, "step": 12143 }, { "epoch": 0.9141308643369277, "grad_norm": 5.367886066436768, "learning_rate": 1.9831055715038293e-06, "loss": 1.959, "step": 12144 }, { "epoch": 0.9142061386175878, "grad_norm": 4.844086647033691, "learning_rate": 1.979707703921285e-06, "loss": 1.7157, "step": 12145 }, { "epoch": 0.9142814128982479, "grad_norm": 4.099212646484375, "learning_rate": 1.9763126910073315e-06, "loss": 1.7612, "step": 12146 }, { "epoch": 0.9143566871789082, "grad_norm": 4.701624870300293, "learning_rate": 1.9729205329637902e-06, "loss": 1.5493, "step": 12147 }, { "epoch": 0.9144319614595683, "grad_norm": 4.403903961181641, "learning_rate": 1.9695312299923165e-06, "loss": 1.8948, "step": 12148 }, { "epoch": 0.9145072357402284, "grad_norm": 4.937520503997803, "learning_rate": 1.9661447822944047e-06, "loss": 1.6898, "step": 12149 }, { "epoch": 0.9145825100208886, "grad_norm": 3.9335262775421143, "learning_rate": 1.9627611900713495e-06, "loss": 1.7621, "step": 12150 }, { "epoch": 0.9146577843015488, "grad_norm": 4.922421455383301, "learning_rate": 1.959380453524312e-06, "loss": 2.0592, "step": 12151 }, { "epoch": 0.9147330585822089, "grad_norm": 4.673279285430908, "learning_rate": 1.95600257285426e-06, "loss": 1.506, "step": 12152 }, { "epoch": 0.9148083328628691, "grad_norm": 5.225360870361328, "learning_rate": 1.9526275482620095e-06, "loss": 2.0268, "step": 12153 }, { "epoch": 0.9148836071435292, "grad_norm": 5.667407512664795, "learning_rate": 1.949255379948178e-06, "loss": 1.7856, "step": 12154 }, { "epoch": 0.9149588814241894, "grad_norm": 4.161797046661377, "learning_rate": 1.9458860681132506e-06, "loss": 1.8559, "step": 12155 }, { "epoch": 0.9150341557048496, "grad_norm": 4.751323699951172, "learning_rate": 1.942519612957505e-06, "loss": 1.615, "step": 12156 }, { "epoch": 0.9151094299855097, "grad_norm": 4.532934665679932, "learning_rate": 1.9391560146810872e-06, "loss": 1.8495, "step": 12157 }, { "epoch": 0.9151847042661698, "grad_norm": 5.302066802978516, "learning_rate": 1.9357952734839314e-06, "loss": 1.8125, "step": 12158 }, { "epoch": 0.91525997854683, "grad_norm": 5.823980808258057, "learning_rate": 1.932437389565833e-06, "loss": 1.6756, "step": 12159 }, { "epoch": 0.9153352528274902, "grad_norm": 4.314770698547363, "learning_rate": 1.929082363126422e-06, "loss": 1.6791, "step": 12160 }, { "epoch": 0.9154105271081503, "grad_norm": 5.326642990112305, "learning_rate": 1.925730194365127e-06, "loss": 1.9199, "step": 12161 }, { "epoch": 0.9154858013888105, "grad_norm": 5.333718299865723, "learning_rate": 1.922380883481234e-06, "loss": 2.0131, "step": 12162 }, { "epoch": 0.9155610756694706, "grad_norm": 4.871474266052246, "learning_rate": 1.919034430673844e-06, "loss": 1.6673, "step": 12163 }, { "epoch": 0.9156363499501308, "grad_norm": 4.754137992858887, "learning_rate": 1.9156908361418924e-06, "loss": 1.9942, "step": 12164 }, { "epoch": 0.915711624230791, "grad_norm": 5.771067142486572, "learning_rate": 1.9123501000841594e-06, "loss": 1.519, "step": 12165 }, { "epoch": 0.9157868985114511, "grad_norm": 4.481830596923828, "learning_rate": 1.909012222699236e-06, "loss": 1.5606, "step": 12166 }, { "epoch": 0.9158621727921112, "grad_norm": 5.182826042175293, "learning_rate": 1.9056772041855408e-06, "loss": 1.8298, "step": 12167 }, { "epoch": 0.9159374470727714, "grad_norm": 7.040618896484375, "learning_rate": 1.9023450447413439e-06, "loss": 2.2798, "step": 12168 }, { "epoch": 0.9160127213534316, "grad_norm": 4.410822868347168, "learning_rate": 1.8990157445647195e-06, "loss": 1.6769, "step": 12169 }, { "epoch": 0.9160879956340917, "grad_norm": 7.301709175109863, "learning_rate": 1.8956893038535983e-06, "loss": 1.7509, "step": 12170 }, { "epoch": 0.9161632699147518, "grad_norm": 4.752766132354736, "learning_rate": 1.8923657228057168e-06, "loss": 1.5635, "step": 12171 }, { "epoch": 0.9162385441954121, "grad_norm": 5.100418567657471, "learning_rate": 1.8890450016186611e-06, "loss": 2.1537, "step": 12172 }, { "epoch": 0.9163138184760722, "grad_norm": 4.798778057098389, "learning_rate": 1.8857271404898292e-06, "loss": 1.7792, "step": 12173 }, { "epoch": 0.9163890927567323, "grad_norm": 4.980538845062256, "learning_rate": 1.8824121396164628e-06, "loss": 1.6818, "step": 12174 }, { "epoch": 0.9164643670373925, "grad_norm": 4.662041187286377, "learning_rate": 1.8790999991956327e-06, "loss": 1.8831, "step": 12175 }, { "epoch": 0.9165396413180527, "grad_norm": 4.635166645050049, "learning_rate": 1.8757907194242307e-06, "loss": 1.6141, "step": 12176 }, { "epoch": 0.9166149155987128, "grad_norm": 5.585263252258301, "learning_rate": 1.8724843004989944e-06, "loss": 1.8305, "step": 12177 }, { "epoch": 0.916690189879373, "grad_norm": 5.630923271179199, "learning_rate": 1.869180742616461e-06, "loss": 1.8609, "step": 12178 }, { "epoch": 0.9167654641600331, "grad_norm": 7.096811771392822, "learning_rate": 1.8658800459730451e-06, "loss": 1.5213, "step": 12179 }, { "epoch": 0.9168407384406932, "grad_norm": 6.673388957977295, "learning_rate": 1.862582210764935e-06, "loss": 1.6549, "step": 12180 }, { "epoch": 0.9169160127213535, "grad_norm": 5.677751064300537, "learning_rate": 1.8592872371882009e-06, "loss": 1.7664, "step": 12181 }, { "epoch": 0.9169912870020136, "grad_norm": 6.032618045806885, "learning_rate": 1.8559951254386975e-06, "loss": 1.6967, "step": 12182 }, { "epoch": 0.9170665612826737, "grad_norm": 6.274374961853027, "learning_rate": 1.8527058757121574e-06, "loss": 2.2358, "step": 12183 }, { "epoch": 0.917141835563334, "grad_norm": 4.8216400146484375, "learning_rate": 1.8494194882040905e-06, "loss": 1.6153, "step": 12184 }, { "epoch": 0.9172171098439941, "grad_norm": 4.21168851852417, "learning_rate": 1.8461359631098796e-06, "loss": 1.7474, "step": 12185 }, { "epoch": 0.9172923841246542, "grad_norm": 5.958002090454102, "learning_rate": 1.8428553006247129e-06, "loss": 1.9142, "step": 12186 }, { "epoch": 0.9173676584053143, "grad_norm": 5.667016983032227, "learning_rate": 1.8395775009436288e-06, "loss": 2.098, "step": 12187 }, { "epoch": 0.9174429326859745, "grad_norm": 5.322368144989014, "learning_rate": 1.8363025642614662e-06, "loss": 1.63, "step": 12188 }, { "epoch": 0.9175182069666347, "grad_norm": 4.8616414070129395, "learning_rate": 1.8330304907729302e-06, "loss": 2.1501, "step": 12189 }, { "epoch": 0.9175934812472948, "grad_norm": 5.068863391876221, "learning_rate": 1.8297612806725151e-06, "loss": 1.8887, "step": 12190 }, { "epoch": 0.917668755527955, "grad_norm": 4.703795433044434, "learning_rate": 1.8264949341545768e-06, "loss": 1.7179, "step": 12191 }, { "epoch": 0.9177440298086151, "grad_norm": 4.514343738555908, "learning_rate": 1.823231451413293e-06, "loss": 1.9094, "step": 12192 }, { "epoch": 0.9178193040892753, "grad_norm": 5.487853527069092, "learning_rate": 1.8199708326426644e-06, "loss": 1.5176, "step": 12193 }, { "epoch": 0.9178945783699355, "grad_norm": 3.800943374633789, "learning_rate": 1.8167130780365248e-06, "loss": 1.8835, "step": 12194 }, { "epoch": 0.9179698526505956, "grad_norm": 5.158065319061279, "learning_rate": 1.813458187788536e-06, "loss": 2.0343, "step": 12195 }, { "epoch": 0.9180451269312557, "grad_norm": 5.792636394500732, "learning_rate": 1.8102061620921984e-06, "loss": 1.6276, "step": 12196 }, { "epoch": 0.918120401211916, "grad_norm": 5.902648448944092, "learning_rate": 1.8069570011408298e-06, "loss": 2.0031, "step": 12197 }, { "epoch": 0.9181956754925761, "grad_norm": 4.943217754364014, "learning_rate": 1.8037107051275926e-06, "loss": 1.6429, "step": 12198 }, { "epoch": 0.9182709497732362, "grad_norm": 4.20914363861084, "learning_rate": 1.8004672742454598e-06, "loss": 1.5463, "step": 12199 }, { "epoch": 0.9183462240538964, "grad_norm": 6.348606109619141, "learning_rate": 1.7972267086872495e-06, "loss": 1.8233, "step": 12200 }, { "epoch": 0.9184214983345566, "grad_norm": 5.3262529373168945, "learning_rate": 1.793989008645597e-06, "loss": 2.2016, "step": 12201 }, { "epoch": 0.9184967726152167, "grad_norm": 6.567859649658203, "learning_rate": 1.7907541743129864e-06, "loss": 1.5946, "step": 12202 }, { "epoch": 0.9185720468958769, "grad_norm": 4.43633508682251, "learning_rate": 1.787522205881703e-06, "loss": 2.0763, "step": 12203 }, { "epoch": 0.918647321176537, "grad_norm": 4.103714942932129, "learning_rate": 1.7842931035438992e-06, "loss": 1.9192, "step": 12204 }, { "epoch": 0.9187225954571971, "grad_norm": 5.5819993019104, "learning_rate": 1.7810668674915154e-06, "loss": 1.9005, "step": 12205 }, { "epoch": 0.9187978697378573, "grad_norm": 4.494290351867676, "learning_rate": 1.7778434979163483e-06, "loss": 2.0268, "step": 12206 }, { "epoch": 0.9188731440185175, "grad_norm": 4.2096357345581055, "learning_rate": 1.7746229950100223e-06, "loss": 1.8851, "step": 12207 }, { "epoch": 0.9189484182991776, "grad_norm": 6.377787113189697, "learning_rate": 1.77140535896399e-06, "loss": 1.9062, "step": 12208 }, { "epoch": 0.9190236925798377, "grad_norm": 4.032090187072754, "learning_rate": 1.7681905899695206e-06, "loss": 1.854, "step": 12209 }, { "epoch": 0.919098966860498, "grad_norm": 4.690585613250732, "learning_rate": 1.7649786882177277e-06, "loss": 1.8613, "step": 12210 }, { "epoch": 0.9191742411411581, "grad_norm": 4.723318099975586, "learning_rate": 1.7617696538995587e-06, "loss": 1.7904, "step": 12211 }, { "epoch": 0.9192495154218182, "grad_norm": 4.920557975769043, "learning_rate": 1.7585634872057665e-06, "loss": 2.0168, "step": 12212 }, { "epoch": 0.9193247897024784, "grad_norm": 4.147205352783203, "learning_rate": 1.75536018832696e-06, "loss": 1.8378, "step": 12213 }, { "epoch": 0.9194000639831386, "grad_norm": 4.625912189483643, "learning_rate": 1.752159757453553e-06, "loss": 1.9746, "step": 12214 }, { "epoch": 0.9194753382637987, "grad_norm": 4.77776575088501, "learning_rate": 1.748962194775816e-06, "loss": 1.8596, "step": 12215 }, { "epoch": 0.9195506125444589, "grad_norm": 5.684907913208008, "learning_rate": 1.7457675004838247e-06, "loss": 1.8789, "step": 12216 }, { "epoch": 0.919625886825119, "grad_norm": 6.896183490753174, "learning_rate": 1.7425756747674992e-06, "loss": 1.9244, "step": 12217 }, { "epoch": 0.9197011611057792, "grad_norm": 3.7616982460021973, "learning_rate": 1.7393867178165823e-06, "loss": 1.6131, "step": 12218 }, { "epoch": 0.9197764353864394, "grad_norm": 5.706488609313965, "learning_rate": 1.73620062982065e-06, "loss": 1.6967, "step": 12219 }, { "epoch": 0.9198517096670995, "grad_norm": 5.436760425567627, "learning_rate": 1.7330174109691067e-06, "loss": 1.8944, "step": 12220 }, { "epoch": 0.9199269839477596, "grad_norm": 4.784337997436523, "learning_rate": 1.7298370614511895e-06, "loss": 1.8491, "step": 12221 }, { "epoch": 0.9200022582284199, "grad_norm": 5.08333683013916, "learning_rate": 1.7266595814559472e-06, "loss": 1.7878, "step": 12222 }, { "epoch": 0.92007753250908, "grad_norm": 4.940536022186279, "learning_rate": 1.7234849711722845e-06, "loss": 1.9315, "step": 12223 }, { "epoch": 0.9201528067897401, "grad_norm": 6.431300640106201, "learning_rate": 1.720313230788928e-06, "loss": 2.0447, "step": 12224 }, { "epoch": 0.9202280810704002, "grad_norm": 6.289635181427002, "learning_rate": 1.7171443604944104e-06, "loss": 1.8862, "step": 12225 }, { "epoch": 0.9203033553510604, "grad_norm": 6.209627151489258, "learning_rate": 1.7139783604771253e-06, "loss": 1.3237, "step": 12226 }, { "epoch": 0.9203786296317206, "grad_norm": 4.827346324920654, "learning_rate": 1.7108152309252778e-06, "loss": 1.7259, "step": 12227 }, { "epoch": 0.9204539039123807, "grad_norm": 5.893888473510742, "learning_rate": 1.707654972026912e-06, "loss": 2.0293, "step": 12228 }, { "epoch": 0.9205291781930409, "grad_norm": 5.251547813415527, "learning_rate": 1.704497583969883e-06, "loss": 1.5231, "step": 12229 }, { "epoch": 0.920604452473701, "grad_norm": 4.561532974243164, "learning_rate": 1.7013430669419018e-06, "loss": 1.7705, "step": 12230 }, { "epoch": 0.9206797267543612, "grad_norm": 6.910198211669922, "learning_rate": 1.698191421130485e-06, "loss": 1.7174, "step": 12231 }, { "epoch": 0.9207550010350214, "grad_norm": 5.117297649383545, "learning_rate": 1.695042646723005e-06, "loss": 1.8971, "step": 12232 }, { "epoch": 0.9208302753156815, "grad_norm": 5.046881198883057, "learning_rate": 1.6918967439066346e-06, "loss": 1.4779, "step": 12233 }, { "epoch": 0.9209055495963416, "grad_norm": 5.789036750793457, "learning_rate": 1.6887537128683905e-06, "loss": 1.5449, "step": 12234 }, { "epoch": 0.9209808238770019, "grad_norm": 4.644052982330322, "learning_rate": 1.6856135537951123e-06, "loss": 1.3028, "step": 12235 }, { "epoch": 0.921056098157662, "grad_norm": 4.927212238311768, "learning_rate": 1.6824762668734895e-06, "loss": 1.6329, "step": 12236 }, { "epoch": 0.9211313724383221, "grad_norm": 4.547092437744141, "learning_rate": 1.6793418522900062e-06, "loss": 1.5825, "step": 12237 }, { "epoch": 0.9212066467189823, "grad_norm": 4.7649922370910645, "learning_rate": 1.6762103102310078e-06, "loss": 1.6437, "step": 12238 }, { "epoch": 0.9212819209996425, "grad_norm": 4.804971694946289, "learning_rate": 1.67308164088264e-06, "loss": 1.4323, "step": 12239 }, { "epoch": 0.9213571952803026, "grad_norm": 9.616639137268066, "learning_rate": 1.6699558444309094e-06, "loss": 1.9122, "step": 12240 }, { "epoch": 0.9214324695609628, "grad_norm": 5.791726589202881, "learning_rate": 1.6668329210616229e-06, "loss": 1.5747, "step": 12241 }, { "epoch": 0.9215077438416229, "grad_norm": 5.349061012268066, "learning_rate": 1.6637128709604432e-06, "loss": 2.0135, "step": 12242 }, { "epoch": 0.921583018122283, "grad_norm": 5.285614013671875, "learning_rate": 1.6605956943128443e-06, "loss": 1.6782, "step": 12243 }, { "epoch": 0.9216582924029432, "grad_norm": 4.931285381317139, "learning_rate": 1.657481391304122e-06, "loss": 1.907, "step": 12244 }, { "epoch": 0.9217335666836034, "grad_norm": 4.677455902099609, "learning_rate": 1.6543699621194286e-06, "loss": 1.7745, "step": 12245 }, { "epoch": 0.9218088409642635, "grad_norm": 7.338361740112305, "learning_rate": 1.651261406943716e-06, "loss": 1.7279, "step": 12246 }, { "epoch": 0.9218841152449236, "grad_norm": 7.404848575592041, "learning_rate": 1.6481557259617864e-06, "loss": 2.2013, "step": 12247 }, { "epoch": 0.9219593895255839, "grad_norm": 4.182814121246338, "learning_rate": 1.6450529193582587e-06, "loss": 2.0324, "step": 12248 }, { "epoch": 0.922034663806244, "grad_norm": 4.7084059715271, "learning_rate": 1.6419529873175964e-06, "loss": 1.8228, "step": 12249 }, { "epoch": 0.9221099380869041, "grad_norm": 4.797237396240234, "learning_rate": 1.6388559300240692e-06, "loss": 1.7065, "step": 12250 }, { "epoch": 0.9221852123675643, "grad_norm": 5.081038475036621, "learning_rate": 1.6357617476617904e-06, "loss": 2.0336, "step": 12251 }, { "epoch": 0.9222604866482245, "grad_norm": 4.941250801086426, "learning_rate": 1.632670440414702e-06, "loss": 1.8849, "step": 12252 }, { "epoch": 0.9223357609288846, "grad_norm": 5.9697675704956055, "learning_rate": 1.6295820084665847e-06, "loss": 2.1494, "step": 12253 }, { "epoch": 0.9224110352095448, "grad_norm": 6.487722873687744, "learning_rate": 1.6264964520010195e-06, "loss": 2.1117, "step": 12254 }, { "epoch": 0.9224863094902049, "grad_norm": 6.078864574432373, "learning_rate": 1.623413771201443e-06, "loss": 1.5579, "step": 12255 }, { "epoch": 0.9225615837708651, "grad_norm": 4.4235405921936035, "learning_rate": 1.6203339662511086e-06, "loss": 1.9028, "step": 12256 }, { "epoch": 0.9226368580515253, "grad_norm": 4.426524639129639, "learning_rate": 1.617257037333103e-06, "loss": 1.7325, "step": 12257 }, { "epoch": 0.9227121323321854, "grad_norm": 5.306687831878662, "learning_rate": 1.614182984630347e-06, "loss": 1.895, "step": 12258 }, { "epoch": 0.9227874066128455, "grad_norm": 5.716831684112549, "learning_rate": 1.6111118083255717e-06, "loss": 2.1086, "step": 12259 }, { "epoch": 0.9228626808935058, "grad_norm": 5.054379940032959, "learning_rate": 1.608043508601359e-06, "loss": 1.8939, "step": 12260 }, { "epoch": 0.9229379551741659, "grad_norm": 3.8481345176696777, "learning_rate": 1.604978085640102e-06, "loss": 2.072, "step": 12261 }, { "epoch": 0.923013229454826, "grad_norm": 5.156086444854736, "learning_rate": 1.6019155396240437e-06, "loss": 2.0356, "step": 12262 }, { "epoch": 0.9230885037354862, "grad_norm": 4.235472202301025, "learning_rate": 1.598855870735233e-06, "loss": 1.7186, "step": 12263 }, { "epoch": 0.9231637780161464, "grad_norm": 6.332386493682861, "learning_rate": 1.5957990791555633e-06, "loss": 1.9756, "step": 12264 }, { "epoch": 0.9232390522968065, "grad_norm": 5.3916239738464355, "learning_rate": 1.5927451650667446e-06, "loss": 1.86, "step": 12265 }, { "epoch": 0.9233143265774666, "grad_norm": 6.285261154174805, "learning_rate": 1.589694128650343e-06, "loss": 2.4192, "step": 12266 }, { "epoch": 0.9233896008581268, "grad_norm": 5.584387302398682, "learning_rate": 1.5866459700877135e-06, "loss": 1.7704, "step": 12267 }, { "epoch": 0.923464875138787, "grad_norm": 4.821388244628906, "learning_rate": 1.583600689560072e-06, "loss": 1.8477, "step": 12268 }, { "epoch": 0.9235401494194471, "grad_norm": 4.976274490356445, "learning_rate": 1.5805582872484404e-06, "loss": 1.7078, "step": 12269 }, { "epoch": 0.9236154237001073, "grad_norm": 3.925536870956421, "learning_rate": 1.5775187633336907e-06, "loss": 1.8722, "step": 12270 }, { "epoch": 0.9236906979807674, "grad_norm": 6.487788677215576, "learning_rate": 1.574482117996512e-06, "loss": 1.8876, "step": 12271 }, { "epoch": 0.9237659722614275, "grad_norm": 5.969163417816162, "learning_rate": 1.5714483514174204e-06, "loss": 1.6099, "step": 12272 }, { "epoch": 0.9238412465420878, "grad_norm": 4.744826316833496, "learning_rate": 1.5684174637767724e-06, "loss": 1.7138, "step": 12273 }, { "epoch": 0.9239165208227479, "grad_norm": 8.98989200592041, "learning_rate": 1.5653894552547344e-06, "loss": 1.7619, "step": 12274 }, { "epoch": 0.923991795103408, "grad_norm": 3.9739437103271484, "learning_rate": 1.562364326031318e-06, "loss": 2.0059, "step": 12275 }, { "epoch": 0.9240670693840682, "grad_norm": 4.609249591827393, "learning_rate": 1.5593420762863575e-06, "loss": 1.5311, "step": 12276 }, { "epoch": 0.9241423436647284, "grad_norm": 5.350503444671631, "learning_rate": 1.5563227061995256e-06, "loss": 1.5442, "step": 12277 }, { "epoch": 0.9242176179453885, "grad_norm": 4.771426200866699, "learning_rate": 1.5533062159503064e-06, "loss": 1.6395, "step": 12278 }, { "epoch": 0.9242928922260487, "grad_norm": 4.568175315856934, "learning_rate": 1.550292605718029e-06, "loss": 1.6099, "step": 12279 }, { "epoch": 0.9243681665067088, "grad_norm": 4.588810443878174, "learning_rate": 1.5472818756818274e-06, "loss": 1.6252, "step": 12280 }, { "epoch": 0.924443440787369, "grad_norm": 4.478748798370361, "learning_rate": 1.5442740260207034e-06, "loss": 1.9399, "step": 12281 }, { "epoch": 0.9245187150680292, "grad_norm": 4.047736167907715, "learning_rate": 1.5412690569134413e-06, "loss": 2.0048, "step": 12282 }, { "epoch": 0.9245939893486893, "grad_norm": 4.284279823303223, "learning_rate": 1.538266968538704e-06, "loss": 1.7946, "step": 12283 }, { "epoch": 0.9246692636293494, "grad_norm": 4.401327133178711, "learning_rate": 1.5352677610749321e-06, "loss": 2.0512, "step": 12284 }, { "epoch": 0.9247445379100095, "grad_norm": 5.214788913726807, "learning_rate": 1.532271434700433e-06, "loss": 2.213, "step": 12285 }, { "epoch": 0.9248198121906698, "grad_norm": 5.28977632522583, "learning_rate": 1.5292779895933252e-06, "loss": 1.8059, "step": 12286 }, { "epoch": 0.9248950864713299, "grad_norm": 7.314880847930908, "learning_rate": 1.5262874259315663e-06, "loss": 1.7721, "step": 12287 }, { "epoch": 0.92497036075199, "grad_norm": 5.627440929412842, "learning_rate": 1.5232997438929308e-06, "loss": 2.1521, "step": 12288 }, { "epoch": 0.9250456350326502, "grad_norm": 6.540442943572998, "learning_rate": 1.5203149436550324e-06, "loss": 1.8406, "step": 12289 }, { "epoch": 0.9251209093133104, "grad_norm": 5.5621232986450195, "learning_rate": 1.517333025395312e-06, "loss": 1.9183, "step": 12290 }, { "epoch": 0.9251961835939705, "grad_norm": 7.005645275115967, "learning_rate": 1.514353989291023e-06, "loss": 1.8921, "step": 12291 }, { "epoch": 0.9252714578746307, "grad_norm": 4.031583786010742, "learning_rate": 1.5113778355192731e-06, "loss": 1.8887, "step": 12292 }, { "epoch": 0.9253467321552908, "grad_norm": 5.792757987976074, "learning_rate": 1.5084045642569765e-06, "loss": 2.0874, "step": 12293 }, { "epoch": 0.925422006435951, "grad_norm": 4.797659873962402, "learning_rate": 1.5054341756808976e-06, "loss": 1.8219, "step": 12294 }, { "epoch": 0.9254972807166112, "grad_norm": 4.369216442108154, "learning_rate": 1.5024666699676059e-06, "loss": 1.8302, "step": 12295 }, { "epoch": 0.9255725549972713, "grad_norm": 4.578887462615967, "learning_rate": 1.499502047293516e-06, "loss": 1.8271, "step": 12296 }, { "epoch": 0.9256478292779314, "grad_norm": 4.997894763946533, "learning_rate": 1.496540307834865e-06, "loss": 1.8104, "step": 12297 }, { "epoch": 0.9257231035585917, "grad_norm": 4.8700032234191895, "learning_rate": 1.4935814517677226e-06, "loss": 1.7245, "step": 12298 }, { "epoch": 0.9257983778392518, "grad_norm": 4.54118013381958, "learning_rate": 1.490625479267982e-06, "loss": 1.7417, "step": 12299 }, { "epoch": 0.9258736521199119, "grad_norm": 4.5466628074646, "learning_rate": 1.4876723905113688e-06, "loss": 1.8084, "step": 12300 }, { "epoch": 0.9259489264005721, "grad_norm": 4.774020195007324, "learning_rate": 1.4847221856734317e-06, "loss": 1.8558, "step": 12301 }, { "epoch": 0.9260242006812323, "grad_norm": 5.079516887664795, "learning_rate": 1.4817748649295581e-06, "loss": 1.8675, "step": 12302 }, { "epoch": 0.9260994749618924, "grad_norm": 4.72567081451416, "learning_rate": 1.478830428454947e-06, "loss": 1.6151, "step": 12303 }, { "epoch": 0.9261747492425525, "grad_norm": 3.883065938949585, "learning_rate": 1.475888876424647e-06, "loss": 1.5284, "step": 12304 }, { "epoch": 0.9262500235232127, "grad_norm": 6.428706169128418, "learning_rate": 1.4729502090135295e-06, "loss": 1.7748, "step": 12305 }, { "epoch": 0.9263252978038728, "grad_norm": 3.676638126373291, "learning_rate": 1.4700144263962767e-06, "loss": 1.8677, "step": 12306 }, { "epoch": 0.926400572084533, "grad_norm": 5.967966556549072, "learning_rate": 1.467081528747416e-06, "loss": 1.7996, "step": 12307 }, { "epoch": 0.9264758463651932, "grad_norm": 4.967217445373535, "learning_rate": 1.464151516241302e-06, "loss": 1.9483, "step": 12308 }, { "epoch": 0.9265511206458533, "grad_norm": 4.532412528991699, "learning_rate": 1.4612243890521238e-06, "loss": 1.9035, "step": 12309 }, { "epoch": 0.9266263949265134, "grad_norm": 4.576842784881592, "learning_rate": 1.4583001473538693e-06, "loss": 2.3958, "step": 12310 }, { "epoch": 0.9267016692071737, "grad_norm": 6.185211658477783, "learning_rate": 1.4553787913203999e-06, "loss": 1.9932, "step": 12311 }, { "epoch": 0.9267769434878338, "grad_norm": 5.435148239135742, "learning_rate": 1.4524603211253651e-06, "loss": 1.9148, "step": 12312 }, { "epoch": 0.9268522177684939, "grad_norm": 5.024242877960205, "learning_rate": 1.4495447369422766e-06, "loss": 1.8926, "step": 12313 }, { "epoch": 0.9269274920491541, "grad_norm": 4.752802848815918, "learning_rate": 1.4466320389444343e-06, "loss": 1.7478, "step": 12314 }, { "epoch": 0.9270027663298143, "grad_norm": 4.4405694007873535, "learning_rate": 1.4437222273050112e-06, "loss": 2.0847, "step": 12315 }, { "epoch": 0.9270780406104744, "grad_norm": 5.675045490264893, "learning_rate": 1.4408153021969683e-06, "loss": 1.4183, "step": 12316 }, { "epoch": 0.9271533148911346, "grad_norm": 4.446272850036621, "learning_rate": 1.4379112637931292e-06, "loss": 1.6795, "step": 12317 }, { "epoch": 0.9272285891717947, "grad_norm": 5.909229278564453, "learning_rate": 1.435010112266122e-06, "loss": 1.7283, "step": 12318 }, { "epoch": 0.9273038634524549, "grad_norm": 4.777585983276367, "learning_rate": 1.4321118477884199e-06, "loss": 1.8068, "step": 12319 }, { "epoch": 0.9273791377331151, "grad_norm": 5.509268760681152, "learning_rate": 1.4292164705323075e-06, "loss": 1.7152, "step": 12320 }, { "epoch": 0.9274544120137752, "grad_norm": 4.406026840209961, "learning_rate": 1.426323980669908e-06, "loss": 1.5419, "step": 12321 }, { "epoch": 0.9275296862944353, "grad_norm": 4.915528774261475, "learning_rate": 1.423434378373184e-06, "loss": 1.9114, "step": 12322 }, { "epoch": 0.9276049605750954, "grad_norm": 4.654607772827148, "learning_rate": 1.4205476638138926e-06, "loss": 1.5583, "step": 12323 }, { "epoch": 0.9276802348557557, "grad_norm": 5.048783779144287, "learning_rate": 1.417663837163663e-06, "loss": 1.8921, "step": 12324 }, { "epoch": 0.9277555091364158, "grad_norm": 5.955204486846924, "learning_rate": 1.4147828985939139e-06, "loss": 1.485, "step": 12325 }, { "epoch": 0.9278307834170759, "grad_norm": 5.14150857925415, "learning_rate": 1.4119048482759189e-06, "loss": 1.5399, "step": 12326 }, { "epoch": 0.9279060576977362, "grad_norm": 5.628507614135742, "learning_rate": 1.4090296863807638e-06, "loss": 1.5679, "step": 12327 }, { "epoch": 0.9279813319783963, "grad_norm": 6.916476249694824, "learning_rate": 1.406157413079373e-06, "loss": 1.9774, "step": 12328 }, { "epoch": 0.9280566062590564, "grad_norm": 4.837411880493164, "learning_rate": 1.4032880285424877e-06, "loss": 1.9572, "step": 12329 }, { "epoch": 0.9281318805397166, "grad_norm": 5.051713943481445, "learning_rate": 1.4004215329406933e-06, "loss": 1.7368, "step": 12330 }, { "epoch": 0.9282071548203767, "grad_norm": 4.86163330078125, "learning_rate": 1.3975579264443872e-06, "loss": 1.9365, "step": 12331 }, { "epoch": 0.9282824291010369, "grad_norm": 4.9362921714782715, "learning_rate": 1.3946972092238108e-06, "loss": 1.9422, "step": 12332 }, { "epoch": 0.9283577033816971, "grad_norm": 4.248833179473877, "learning_rate": 1.3918393814490172e-06, "loss": 1.639, "step": 12333 }, { "epoch": 0.9284329776623572, "grad_norm": 4.783376693725586, "learning_rate": 1.3889844432899035e-06, "loss": 2.2542, "step": 12334 }, { "epoch": 0.9285082519430173, "grad_norm": 4.6017680168151855, "learning_rate": 1.3861323949161787e-06, "loss": 1.9184, "step": 12335 }, { "epoch": 0.9285835262236776, "grad_norm": 4.360620498657227, "learning_rate": 1.3832832364973959e-06, "loss": 1.8149, "step": 12336 }, { "epoch": 0.9286588005043377, "grad_norm": 4.381224155426025, "learning_rate": 1.3804369682029362e-06, "loss": 2.1166, "step": 12337 }, { "epoch": 0.9287340747849978, "grad_norm": 4.172545909881592, "learning_rate": 1.377593590201981e-06, "loss": 1.7075, "step": 12338 }, { "epoch": 0.928809349065658, "grad_norm": 5.277780532836914, "learning_rate": 1.3747531026635841e-06, "loss": 1.8819, "step": 12339 }, { "epoch": 0.9288846233463182, "grad_norm": 5.23681640625, "learning_rate": 1.371915505756588e-06, "loss": 1.9712, "step": 12340 }, { "epoch": 0.9289598976269783, "grad_norm": 5.093683242797852, "learning_rate": 1.3690807996496858e-06, "loss": 1.9324, "step": 12341 }, { "epoch": 0.9290351719076385, "grad_norm": 4.756047248840332, "learning_rate": 1.3662489845113923e-06, "loss": 1.7555, "step": 12342 }, { "epoch": 0.9291104461882986, "grad_norm": 6.883366107940674, "learning_rate": 1.3634200605100511e-06, "loss": 1.8412, "step": 12343 }, { "epoch": 0.9291857204689588, "grad_norm": 4.924742698669434, "learning_rate": 1.3605940278138328e-06, "loss": 2.1506, "step": 12344 }, { "epoch": 0.9292609947496189, "grad_norm": 4.9706292152404785, "learning_rate": 1.3577708865907424e-06, "loss": 1.8266, "step": 12345 }, { "epoch": 0.9293362690302791, "grad_norm": 5.175417900085449, "learning_rate": 1.3549506370085952e-06, "loss": 2.2428, "step": 12346 }, { "epoch": 0.9294115433109392, "grad_norm": 4.169736862182617, "learning_rate": 1.3521332792350627e-06, "loss": 1.7014, "step": 12347 }, { "epoch": 0.9294868175915993, "grad_norm": 4.357666969299316, "learning_rate": 1.3493188134376112e-06, "loss": 1.7887, "step": 12348 }, { "epoch": 0.9295620918722596, "grad_norm": 5.783558368682861, "learning_rate": 1.3465072397835733e-06, "loss": 1.9423, "step": 12349 }, { "epoch": 0.9296373661529197, "grad_norm": 4.634674072265625, "learning_rate": 1.3436985584400652e-06, "loss": 1.6322, "step": 12350 }, { "epoch": 0.9297126404335798, "grad_norm": 5.010001182556152, "learning_rate": 1.3408927695740703e-06, "loss": 1.8918, "step": 12351 }, { "epoch": 0.92978791471424, "grad_norm": 5.452240467071533, "learning_rate": 1.3380898733523828e-06, "loss": 1.6482, "step": 12352 }, { "epoch": 0.9298631889949002, "grad_norm": 5.481477737426758, "learning_rate": 1.335289869941625e-06, "loss": 1.6214, "step": 12353 }, { "epoch": 0.9299384632755603, "grad_norm": 4.558218955993652, "learning_rate": 1.3324927595082526e-06, "loss": 1.4869, "step": 12354 }, { "epoch": 0.9300137375562205, "grad_norm": 5.196902275085449, "learning_rate": 1.3296985422185383e-06, "loss": 1.5518, "step": 12355 }, { "epoch": 0.9300890118368806, "grad_norm": 5.532809734344482, "learning_rate": 1.3269072182386044e-06, "loss": 1.9082, "step": 12356 }, { "epoch": 0.9301642861175408, "grad_norm": 4.873679161071777, "learning_rate": 1.3241187877343687e-06, "loss": 1.3061, "step": 12357 }, { "epoch": 0.930239560398201, "grad_norm": 4.478184700012207, "learning_rate": 1.3213332508716093e-06, "loss": 1.8496, "step": 12358 }, { "epoch": 0.9303148346788611, "grad_norm": 4.214957237243652, "learning_rate": 1.318550607815905e-06, "loss": 2.0773, "step": 12359 }, { "epoch": 0.9303901089595212, "grad_norm": 4.268503665924072, "learning_rate": 1.3157708587326955e-06, "loss": 1.8211, "step": 12360 }, { "epoch": 0.9304653832401815, "grad_norm": 4.271566867828369, "learning_rate": 1.31299400378721e-06, "loss": 1.7975, "step": 12361 }, { "epoch": 0.9305406575208416, "grad_norm": 4.710085391998291, "learning_rate": 1.3102200431445332e-06, "loss": 1.9764, "step": 12362 }, { "epoch": 0.9306159318015017, "grad_norm": 3.784324884414673, "learning_rate": 1.307448976969572e-06, "loss": 1.7899, "step": 12363 }, { "epoch": 0.9306912060821618, "grad_norm": 6.107129096984863, "learning_rate": 1.3046808054270498e-06, "loss": 2.2463, "step": 12364 }, { "epoch": 0.9307664803628221, "grad_norm": 5.740386009216309, "learning_rate": 1.3019155286815354e-06, "loss": 1.9128, "step": 12365 }, { "epoch": 0.9308417546434822, "grad_norm": 5.818799018859863, "learning_rate": 1.299153146897414e-06, "loss": 1.543, "step": 12366 }, { "epoch": 0.9309170289241423, "grad_norm": 6.177310943603516, "learning_rate": 1.2963936602388926e-06, "loss": 1.5635, "step": 12367 }, { "epoch": 0.9309923032048025, "grad_norm": 4.254126071929932, "learning_rate": 1.293637068870024e-06, "loss": 1.6423, "step": 12368 }, { "epoch": 0.9310675774854626, "grad_norm": 4.227095127105713, "learning_rate": 1.2908833729546822e-06, "loss": 1.7641, "step": 12369 }, { "epoch": 0.9311428517661228, "grad_norm": 4.562884330749512, "learning_rate": 1.288132572656553e-06, "loss": 1.5985, "step": 12370 }, { "epoch": 0.931218126046783, "grad_norm": 5.227180480957031, "learning_rate": 1.2853846681391835e-06, "loss": 2.0847, "step": 12371 }, { "epoch": 0.9312934003274431, "grad_norm": 3.9266233444213867, "learning_rate": 1.2826396595659097e-06, "loss": 1.879, "step": 12372 }, { "epoch": 0.9313686746081032, "grad_norm": 5.308877944946289, "learning_rate": 1.2798975470999286e-06, "loss": 1.6638, "step": 12373 }, { "epoch": 0.9314439488887635, "grad_norm": 6.199954509735107, "learning_rate": 1.2771583309042378e-06, "loss": 1.7566, "step": 12374 }, { "epoch": 0.9315192231694236, "grad_norm": 4.5094523429870605, "learning_rate": 1.2744220111416793e-06, "loss": 1.8317, "step": 12375 }, { "epoch": 0.9315944974500837, "grad_norm": 4.7906293869018555, "learning_rate": 1.2716885879749286e-06, "loss": 1.8685, "step": 12376 }, { "epoch": 0.931669771730744, "grad_norm": 4.097645282745361, "learning_rate": 1.2689580615664775e-06, "loss": 1.6988, "step": 12377 }, { "epoch": 0.9317450460114041, "grad_norm": 3.911189079284668, "learning_rate": 1.2662304320786412e-06, "loss": 1.844, "step": 12378 }, { "epoch": 0.9318203202920642, "grad_norm": 3.939239501953125, "learning_rate": 1.2635056996735783e-06, "loss": 1.7582, "step": 12379 }, { "epoch": 0.9318955945727244, "grad_norm": 5.424371719360352, "learning_rate": 1.2607838645132486e-06, "loss": 1.7457, "step": 12380 }, { "epoch": 0.9319708688533845, "grad_norm": 4.451361656188965, "learning_rate": 1.2580649267594835e-06, "loss": 1.3664, "step": 12381 }, { "epoch": 0.9320461431340447, "grad_norm": 5.039329528808594, "learning_rate": 1.2553488865738927e-06, "loss": 2.0343, "step": 12382 }, { "epoch": 0.9321214174147048, "grad_norm": 6.252554893493652, "learning_rate": 1.2526357441179527e-06, "loss": 1.78, "step": 12383 }, { "epoch": 0.932196691695365, "grad_norm": 5.426802635192871, "learning_rate": 1.2499254995529397e-06, "loss": 1.4672, "step": 12384 }, { "epoch": 0.9322719659760251, "grad_norm": 5.803450107574463, "learning_rate": 1.247218153039975e-06, "loss": 1.8442, "step": 12385 }, { "epoch": 0.9323472402566853, "grad_norm": 4.301825046539307, "learning_rate": 1.2445137047400079e-06, "loss": 2.0996, "step": 12386 }, { "epoch": 0.9324225145373455, "grad_norm": 4.769915580749512, "learning_rate": 1.241812154813804e-06, "loss": 2.3025, "step": 12387 }, { "epoch": 0.9324977888180056, "grad_norm": 5.4182353019714355, "learning_rate": 1.239113503421968e-06, "loss": 1.82, "step": 12388 }, { "epoch": 0.9325730630986657, "grad_norm": 5.053954601287842, "learning_rate": 1.2364177507249219e-06, "loss": 1.7181, "step": 12389 }, { "epoch": 0.932648337379326, "grad_norm": 5.407834529876709, "learning_rate": 1.2337248968829262e-06, "loss": 2.1727, "step": 12390 }, { "epoch": 0.9327236116599861, "grad_norm": 3.902431011199951, "learning_rate": 1.231034942056053e-06, "loss": 1.7494, "step": 12391 }, { "epoch": 0.9327988859406462, "grad_norm": 4.74291467666626, "learning_rate": 1.2283478864042243e-06, "loss": 2.0203, "step": 12392 }, { "epoch": 0.9328741602213064, "grad_norm": 4.747903347015381, "learning_rate": 1.2256637300871621e-06, "loss": 1.5169, "step": 12393 }, { "epoch": 0.9329494345019665, "grad_norm": 4.57589054107666, "learning_rate": 1.22298247326445e-06, "loss": 2.0302, "step": 12394 }, { "epoch": 0.9330247087826267, "grad_norm": 5.166066646575928, "learning_rate": 1.220304116095472e-06, "loss": 1.7973, "step": 12395 }, { "epoch": 0.9330999830632869, "grad_norm": 3.6988258361816406, "learning_rate": 1.2176286587394447e-06, "loss": 1.4378, "step": 12396 }, { "epoch": 0.933175257343947, "grad_norm": 4.444631576538086, "learning_rate": 1.2149561013554245e-06, "loss": 1.6537, "step": 12397 }, { "epoch": 0.9332505316246071, "grad_norm": 4.651283264160156, "learning_rate": 1.2122864441022896e-06, "loss": 1.459, "step": 12398 }, { "epoch": 0.9333258059052674, "grad_norm": 4.353204727172852, "learning_rate": 1.20961968713873e-06, "loss": 1.6164, "step": 12399 }, { "epoch": 0.9334010801859275, "grad_norm": 5.270798206329346, "learning_rate": 1.2069558306232853e-06, "loss": 1.9878, "step": 12400 }, { "epoch": 0.9334763544665876, "grad_norm": 6.042629718780518, "learning_rate": 1.2042948747143235e-06, "loss": 1.659, "step": 12401 }, { "epoch": 0.9335516287472477, "grad_norm": 4.069930553436279, "learning_rate": 1.2016368195700122e-06, "loss": 1.5272, "step": 12402 }, { "epoch": 0.933626903027908, "grad_norm": 4.620057106018066, "learning_rate": 1.1989816653483755e-06, "loss": 1.9253, "step": 12403 }, { "epoch": 0.9337021773085681, "grad_norm": 5.32535982131958, "learning_rate": 1.1963294122072533e-06, "loss": 1.8514, "step": 12404 }, { "epoch": 0.9337774515892282, "grad_norm": 4.083754539489746, "learning_rate": 1.1936800603043141e-06, "loss": 1.8252, "step": 12405 }, { "epoch": 0.9338527258698884, "grad_norm": 5.511929988861084, "learning_rate": 1.1910336097970541e-06, "loss": 1.7242, "step": 12406 }, { "epoch": 0.9339280001505486, "grad_norm": 4.205560684204102, "learning_rate": 1.1883900608427924e-06, "loss": 2.1703, "step": 12407 }, { "epoch": 0.9340032744312087, "grad_norm": 5.5000996589660645, "learning_rate": 1.1857494135986858e-06, "loss": 1.522, "step": 12408 }, { "epoch": 0.9340785487118689, "grad_norm": 5.138461589813232, "learning_rate": 1.1831116682217203e-06, "loss": 2.0386, "step": 12409 }, { "epoch": 0.934153822992529, "grad_norm": 4.082030773162842, "learning_rate": 1.1804768248686926e-06, "loss": 1.6248, "step": 12410 }, { "epoch": 0.9342290972731891, "grad_norm": 6.3812408447265625, "learning_rate": 1.1778448836962385e-06, "loss": 2.1083, "step": 12411 }, { "epoch": 0.9343043715538494, "grad_norm": 4.852584362030029, "learning_rate": 1.1752158448608164e-06, "loss": 1.7771, "step": 12412 }, { "epoch": 0.9343796458345095, "grad_norm": 5.251467704772949, "learning_rate": 1.1725897085187231e-06, "loss": 2.014, "step": 12413 }, { "epoch": 0.9344549201151696, "grad_norm": 5.6337995529174805, "learning_rate": 1.1699664748260676e-06, "loss": 1.8374, "step": 12414 }, { "epoch": 0.9345301943958298, "grad_norm": 5.751669883728027, "learning_rate": 1.1673461439387967e-06, "loss": 2.0189, "step": 12415 }, { "epoch": 0.93460546867649, "grad_norm": 4.690957069396973, "learning_rate": 1.1647287160126807e-06, "loss": 1.8297, "step": 12416 }, { "epoch": 0.9346807429571501, "grad_norm": 4.864229679107666, "learning_rate": 1.1621141912033173e-06, "loss": 1.7293, "step": 12417 }, { "epoch": 0.9347560172378103, "grad_norm": 4.473308086395264, "learning_rate": 1.1595025696661376e-06, "loss": 2.0501, "step": 12418 }, { "epoch": 0.9348312915184704, "grad_norm": 5.315026760101318, "learning_rate": 1.1568938515563843e-06, "loss": 2.2269, "step": 12419 }, { "epoch": 0.9349065657991306, "grad_norm": 4.450246334075928, "learning_rate": 1.1542880370291443e-06, "loss": 1.5225, "step": 12420 }, { "epoch": 0.9349818400797907, "grad_norm": 4.711861610412598, "learning_rate": 1.1516851262393325e-06, "loss": 1.8737, "step": 12421 }, { "epoch": 0.9350571143604509, "grad_norm": 4.109829902648926, "learning_rate": 1.1490851193416751e-06, "loss": 1.8027, "step": 12422 }, { "epoch": 0.935132388641111, "grad_norm": 5.545371055603027, "learning_rate": 1.1464880164907377e-06, "loss": 1.7376, "step": 12423 }, { "epoch": 0.9352076629217712, "grad_norm": 4.459930419921875, "learning_rate": 1.1438938178409187e-06, "loss": 1.7518, "step": 12424 }, { "epoch": 0.9352829372024314, "grad_norm": 5.083422660827637, "learning_rate": 1.1413025235464226e-06, "loss": 1.8885, "step": 12425 }, { "epoch": 0.9353582114830915, "grad_norm": 5.105082988739014, "learning_rate": 1.1387141337612983e-06, "loss": 1.8601, "step": 12426 }, { "epoch": 0.9354334857637516, "grad_norm": 5.203488349914551, "learning_rate": 1.136128648639423e-06, "loss": 1.7542, "step": 12427 }, { "epoch": 0.9355087600444119, "grad_norm": 4.26222038269043, "learning_rate": 1.1335460683344957e-06, "loss": 1.7276, "step": 12428 }, { "epoch": 0.935584034325072, "grad_norm": 6.196890830993652, "learning_rate": 1.1309663930000324e-06, "loss": 2.0614, "step": 12429 }, { "epoch": 0.9356593086057321, "grad_norm": 4.4999518394470215, "learning_rate": 1.1283896227893997e-06, "loss": 1.6308, "step": 12430 }, { "epoch": 0.9357345828863923, "grad_norm": 4.548105239868164, "learning_rate": 1.1258157578557804e-06, "loss": 2.0237, "step": 12431 }, { "epoch": 0.9358098571670525, "grad_norm": 5.871966361999512, "learning_rate": 1.1232447983521743e-06, "loss": 1.6179, "step": 12432 }, { "epoch": 0.9358851314477126, "grad_norm": 10.403579711914062, "learning_rate": 1.1206767444314204e-06, "loss": 1.8935, "step": 12433 }, { "epoch": 0.9359604057283728, "grad_norm": 5.130834579467773, "learning_rate": 1.1181115962461853e-06, "loss": 1.8067, "step": 12434 }, { "epoch": 0.9360356800090329, "grad_norm": 6.020187854766846, "learning_rate": 1.1155493539489636e-06, "loss": 1.8979, "step": 12435 }, { "epoch": 0.936110954289693, "grad_norm": 5.908157825469971, "learning_rate": 1.1129900176920616e-06, "loss": 1.7154, "step": 12436 }, { "epoch": 0.9361862285703533, "grad_norm": 6.477232933044434, "learning_rate": 1.110433587627635e-06, "loss": 1.7109, "step": 12437 }, { "epoch": 0.9362615028510134, "grad_norm": 5.100631237030029, "learning_rate": 1.1078800639076458e-06, "loss": 1.6685, "step": 12438 }, { "epoch": 0.9363367771316735, "grad_norm": 4.069405555725098, "learning_rate": 1.105329446683906e-06, "loss": 1.4602, "step": 12439 }, { "epoch": 0.9364120514123337, "grad_norm": 4.262848854064941, "learning_rate": 1.1027817361080273e-06, "loss": 2.116, "step": 12440 }, { "epoch": 0.9364873256929939, "grad_norm": 5.0627923011779785, "learning_rate": 1.1002369323314777e-06, "loss": 2.0571, "step": 12441 }, { "epoch": 0.936562599973654, "grad_norm": 5.0627923011779785, "learning_rate": 1.1002369323314777e-06, "loss": 2.1289, "step": 12442 }, { "epoch": 0.9366378742543141, "grad_norm": 6.708596706390381, "learning_rate": 1.0976950355055304e-06, "loss": 1.6543, "step": 12443 }, { "epoch": 0.9367131485349743, "grad_norm": 5.497631072998047, "learning_rate": 1.0951560457813036e-06, "loss": 1.9162, "step": 12444 }, { "epoch": 0.9367884228156345, "grad_norm": 5.114773750305176, "learning_rate": 1.0926199633097157e-06, "loss": 1.6112, "step": 12445 }, { "epoch": 0.9368636970962946, "grad_norm": 4.5852131843566895, "learning_rate": 1.090086788241551e-06, "loss": 1.7776, "step": 12446 }, { "epoch": 0.9369389713769548, "grad_norm": 5.624839782714844, "learning_rate": 1.087556520727384e-06, "loss": 1.7819, "step": 12447 }, { "epoch": 0.9370142456576149, "grad_norm": 6.2389044761657715, "learning_rate": 1.0850291609176387e-06, "loss": 2.2336, "step": 12448 }, { "epoch": 0.937089519938275, "grad_norm": 4.557095050811768, "learning_rate": 1.0825047089625506e-06, "loss": 2.0348, "step": 12449 }, { "epoch": 0.9371647942189353, "grad_norm": 4.758711814880371, "learning_rate": 1.0799831650121995e-06, "loss": 1.8398, "step": 12450 }, { "epoch": 0.9372400684995954, "grad_norm": 4.8402485847473145, "learning_rate": 1.0774645292164875e-06, "loss": 1.5905, "step": 12451 }, { "epoch": 0.9373153427802555, "grad_norm": 4.371322154998779, "learning_rate": 1.0749488017251286e-06, "loss": 1.6842, "step": 12452 }, { "epoch": 0.9373906170609158, "grad_norm": 4.830621242523193, "learning_rate": 1.0724359826876806e-06, "loss": 1.9591, "step": 12453 }, { "epoch": 0.9374658913415759, "grad_norm": 6.202361583709717, "learning_rate": 1.0699260722535242e-06, "loss": 2.0032, "step": 12454 }, { "epoch": 0.937541165622236, "grad_norm": 5.951354026794434, "learning_rate": 1.0674190705718733e-06, "loss": 1.8076, "step": 12455 }, { "epoch": 0.9376164399028962, "grad_norm": 3.8482415676116943, "learning_rate": 1.0649149777917532e-06, "loss": 1.6773, "step": 12456 }, { "epoch": 0.9376917141835563, "grad_norm": 4.696192264556885, "learning_rate": 1.0624137940620283e-06, "loss": 1.5832, "step": 12457 }, { "epoch": 0.9377669884642165, "grad_norm": 5.443212032318115, "learning_rate": 1.0599155195313847e-06, "loss": 1.7271, "step": 12458 }, { "epoch": 0.9378422627448767, "grad_norm": 4.137064456939697, "learning_rate": 1.0574201543483375e-06, "loss": 1.4819, "step": 12459 }, { "epoch": 0.9379175370255368, "grad_norm": 7.3501691818237305, "learning_rate": 1.0549276986612288e-06, "loss": 2.0562, "step": 12460 }, { "epoch": 0.9379928113061969, "grad_norm": 4.535752296447754, "learning_rate": 1.0524381526182347e-06, "loss": 2.1746, "step": 12461 }, { "epoch": 0.9380680855868571, "grad_norm": 4.882367134094238, "learning_rate": 1.0499515163673423e-06, "loss": 1.98, "step": 12462 }, { "epoch": 0.9381433598675173, "grad_norm": 3.5462915897369385, "learning_rate": 1.0474677900563723e-06, "loss": 1.5841, "step": 12463 }, { "epoch": 0.9382186341481774, "grad_norm": 4.408980369567871, "learning_rate": 1.0449869738329953e-06, "loss": 1.7213, "step": 12464 }, { "epoch": 0.9382939084288375, "grad_norm": 4.364137172698975, "learning_rate": 1.0425090678446658e-06, "loss": 1.7734, "step": 12465 }, { "epoch": 0.9383691827094978, "grad_norm": 5.5675530433654785, "learning_rate": 1.0400340722386936e-06, "loss": 1.3926, "step": 12466 }, { "epoch": 0.9384444569901579, "grad_norm": 5.3386664390563965, "learning_rate": 1.0375619871622222e-06, "loss": 1.753, "step": 12467 }, { "epoch": 0.938519731270818, "grad_norm": 6.269412517547607, "learning_rate": 1.0350928127622005e-06, "loss": 1.9518, "step": 12468 }, { "epoch": 0.9385950055514782, "grad_norm": 5.968063831329346, "learning_rate": 1.0326265491854114e-06, "loss": 1.8388, "step": 12469 }, { "epoch": 0.9386702798321384, "grad_norm": 5.513698101043701, "learning_rate": 1.0301631965784763e-06, "loss": 1.7869, "step": 12470 }, { "epoch": 0.9387455541127985, "grad_norm": 6.845789432525635, "learning_rate": 1.0277027550878226e-06, "loss": 1.8155, "step": 12471 }, { "epoch": 0.9388208283934587, "grad_norm": 4.660444259643555, "learning_rate": 1.0252452248597277e-06, "loss": 2.068, "step": 12472 }, { "epoch": 0.9388961026741188, "grad_norm": 5.427291393280029, "learning_rate": 1.0227906060402747e-06, "loss": 1.8706, "step": 12473 }, { "epoch": 0.938971376954779, "grad_norm": 5.784024238586426, "learning_rate": 1.0203388987753971e-06, "loss": 1.9764, "step": 12474 }, { "epoch": 0.9390466512354392, "grad_norm": 6.6440534591674805, "learning_rate": 1.0178901032108278e-06, "loss": 2.0299, "step": 12475 }, { "epoch": 0.9391219255160993, "grad_norm": 5.347610950469971, "learning_rate": 1.0154442194921455e-06, "loss": 1.8663, "step": 12476 }, { "epoch": 0.9391971997967594, "grad_norm": 4.229396820068359, "learning_rate": 1.0130012477647499e-06, "loss": 1.7663, "step": 12477 }, { "epoch": 0.9392724740774197, "grad_norm": 4.909701347351074, "learning_rate": 1.0105611881738808e-06, "loss": 2.1304, "step": 12478 }, { "epoch": 0.9393477483580798, "grad_norm": 5.29572057723999, "learning_rate": 1.008124040864572e-06, "loss": 1.5572, "step": 12479 }, { "epoch": 0.9394230226387399, "grad_norm": 7.212335109710693, "learning_rate": 1.0056898059817245e-06, "loss": 2.0612, "step": 12480 }, { "epoch": 0.9394982969194, "grad_norm": 6.81786584854126, "learning_rate": 1.0032584836700277e-06, "loss": 2.0209, "step": 12481 }, { "epoch": 0.9395735712000602, "grad_norm": 6.089557647705078, "learning_rate": 1.0008300740740328e-06, "loss": 2.3121, "step": 12482 }, { "epoch": 0.9396488454807204, "grad_norm": 6.162079811096191, "learning_rate": 9.984045773380968e-07, "loss": 2.0044, "step": 12483 }, { "epoch": 0.9397241197613805, "grad_norm": 4.843301296234131, "learning_rate": 9.95981993606404e-07, "loss": 1.9771, "step": 12484 }, { "epoch": 0.9397993940420407, "grad_norm": 4.402397632598877, "learning_rate": 9.93562323022973e-07, "loss": 1.8666, "step": 12485 }, { "epoch": 0.9398746683227008, "grad_norm": 6.124284744262695, "learning_rate": 9.911455657316492e-07, "loss": 2.1549, "step": 12486 }, { "epoch": 0.939949942603361, "grad_norm": 4.449352264404297, "learning_rate": 9.88731721876096e-07, "loss": 1.7591, "step": 12487 }, { "epoch": 0.9400252168840212, "grad_norm": 5.108132839202881, "learning_rate": 9.86320791599815e-07, "loss": 1.3912, "step": 12488 }, { "epoch": 0.9401004911646813, "grad_norm": 5.851089000701904, "learning_rate": 9.839127750461308e-07, "loss": 1.5195, "step": 12489 }, { "epoch": 0.9401757654453414, "grad_norm": 3.6791093349456787, "learning_rate": 9.815076723581784e-07, "loss": 1.5425, "step": 12490 }, { "epoch": 0.9402510397260017, "grad_norm": 10.067129135131836, "learning_rate": 9.791054836789549e-07, "loss": 1.6841, "step": 12491 }, { "epoch": 0.9403263140066618, "grad_norm": 5.731894493103027, "learning_rate": 9.767062091512514e-07, "loss": 1.7032, "step": 12492 }, { "epoch": 0.9404015882873219, "grad_norm": 4.008547782897949, "learning_rate": 9.743098489177037e-07, "loss": 1.7873, "step": 12493 }, { "epoch": 0.9404768625679821, "grad_norm": 5.378289699554443, "learning_rate": 9.719164031207594e-07, "loss": 1.8939, "step": 12494 }, { "epoch": 0.9405521368486423, "grad_norm": 4.494997978210449, "learning_rate": 9.6952587190271e-07, "loss": 1.6112, "step": 12495 }, { "epoch": 0.9406274111293024, "grad_norm": 5.443055152893066, "learning_rate": 9.671382554056585e-07, "loss": 1.743, "step": 12496 }, { "epoch": 0.9407026854099626, "grad_norm": 5.768131732940674, "learning_rate": 9.647535537715524e-07, "loss": 1.9428, "step": 12497 }, { "epoch": 0.9407779596906227, "grad_norm": 5.022356033325195, "learning_rate": 9.623717671421451e-07, "loss": 1.7744, "step": 12498 }, { "epoch": 0.9408532339712828, "grad_norm": 5.572665691375732, "learning_rate": 9.599928956590343e-07, "loss": 2.1578, "step": 12499 }, { "epoch": 0.940928508251943, "grad_norm": 5.415976047515869, "learning_rate": 9.576169394636348e-07, "loss": 1.7101, "step": 12500 }, { "epoch": 0.9410037825326032, "grad_norm": 4.1262969970703125, "learning_rate": 9.55243898697189e-07, "loss": 1.9986, "step": 12501 }, { "epoch": 0.9410790568132633, "grad_norm": 4.997300624847412, "learning_rate": 9.528737735007675e-07, "loss": 1.6543, "step": 12502 }, { "epoch": 0.9411543310939234, "grad_norm": 5.764066219329834, "learning_rate": 9.505065640152689e-07, "loss": 1.5804, "step": 12503 }, { "epoch": 0.9412296053745837, "grad_norm": 4.818778038024902, "learning_rate": 9.481422703814191e-07, "loss": 1.9397, "step": 12504 }, { "epoch": 0.9413048796552438, "grad_norm": 5.200023174285889, "learning_rate": 9.457808927397616e-07, "loss": 1.9037, "step": 12505 }, { "epoch": 0.9413801539359039, "grad_norm": 7.462325572967529, "learning_rate": 9.434224312306839e-07, "loss": 1.6622, "step": 12506 }, { "epoch": 0.9414554282165641, "grad_norm": 4.497607231140137, "learning_rate": 9.410668859943738e-07, "loss": 1.6973, "step": 12507 }, { "epoch": 0.9415307024972243, "grad_norm": 4.548285484313965, "learning_rate": 9.387142571708751e-07, "loss": 1.7979, "step": 12508 }, { "epoch": 0.9416059767778844, "grad_norm": 6.1044816970825195, "learning_rate": 9.363645449000425e-07, "loss": 1.7162, "step": 12509 }, { "epoch": 0.9416812510585446, "grad_norm": 6.579189777374268, "learning_rate": 9.340177493215641e-07, "loss": 2.1044, "step": 12510 }, { "epoch": 0.9417565253392047, "grad_norm": 5.274760723114014, "learning_rate": 9.31673870574934e-07, "loss": 1.8558, "step": 12511 }, { "epoch": 0.9418317996198649, "grad_norm": 8.2307710647583, "learning_rate": 9.293329087995073e-07, "loss": 2.1402, "step": 12512 }, { "epoch": 0.9419070739005251, "grad_norm": 5.09917688369751, "learning_rate": 9.26994864134434e-07, "loss": 1.5689, "step": 12513 }, { "epoch": 0.9419823481811852, "grad_norm": 4.9971137046813965, "learning_rate": 9.246597367187138e-07, "loss": 1.9743, "step": 12514 }, { "epoch": 0.9420576224618453, "grad_norm": 5.610584735870361, "learning_rate": 9.22327526691158e-07, "loss": 1.8484, "step": 12515 }, { "epoch": 0.9421328967425056, "grad_norm": 4.722575664520264, "learning_rate": 9.199982341904112e-07, "loss": 1.7099, "step": 12516 }, { "epoch": 0.9422081710231657, "grad_norm": 5.282405853271484, "learning_rate": 9.176718593549461e-07, "loss": 1.8279, "step": 12517 }, { "epoch": 0.9422834453038258, "grad_norm": 4.823583602905273, "learning_rate": 9.153484023230519e-07, "loss": 1.7001, "step": 12518 }, { "epoch": 0.9423587195844859, "grad_norm": 6.802163124084473, "learning_rate": 9.13027863232857e-07, "loss": 1.7165, "step": 12519 }, { "epoch": 0.9424339938651461, "grad_norm": 5.163522243499756, "learning_rate": 9.107102422223069e-07, "loss": 1.6006, "step": 12520 }, { "epoch": 0.9425092681458063, "grad_norm": 5.192678928375244, "learning_rate": 9.083955394291799e-07, "loss": 1.8156, "step": 12521 }, { "epoch": 0.9425845424264664, "grad_norm": 4.587754249572754, "learning_rate": 9.060837549910772e-07, "loss": 1.5643, "step": 12522 }, { "epoch": 0.9426598167071266, "grad_norm": 5.609636306762695, "learning_rate": 9.03774889045439e-07, "loss": 2.2141, "step": 12523 }, { "epoch": 0.9427350909877867, "grad_norm": 4.699139595031738, "learning_rate": 9.014689417294997e-07, "loss": 1.9678, "step": 12524 }, { "epoch": 0.9428103652684469, "grad_norm": 4.728618144989014, "learning_rate": 8.99165913180361e-07, "loss": 1.6611, "step": 12525 }, { "epoch": 0.9428856395491071, "grad_norm": 6.487859725952148, "learning_rate": 8.968658035349131e-07, "loss": 1.9741, "step": 12526 }, { "epoch": 0.9429609138297672, "grad_norm": 6.679712772369385, "learning_rate": 8.94568612929908e-07, "loss": 1.6433, "step": 12527 }, { "epoch": 0.9430361881104273, "grad_norm": 5.824100971221924, "learning_rate": 8.922743415018975e-07, "loss": 2.2038, "step": 12528 }, { "epoch": 0.9431114623910876, "grad_norm": 4.816535949707031, "learning_rate": 8.899829893872724e-07, "loss": 1.5266, "step": 12529 }, { "epoch": 0.9431867366717477, "grad_norm": 4.589961051940918, "learning_rate": 8.876945567222461e-07, "loss": 1.6233, "step": 12530 }, { "epoch": 0.9432620109524078, "grad_norm": 4.767477989196777, "learning_rate": 8.854090436428542e-07, "loss": 1.9435, "step": 12531 }, { "epoch": 0.943337285233068, "grad_norm": 3.601264476776123, "learning_rate": 8.831264502849712e-07, "loss": 1.5987, "step": 12532 }, { "epoch": 0.9434125595137282, "grad_norm": 4.306880950927734, "learning_rate": 8.808467767842887e-07, "loss": 1.5511, "step": 12533 }, { "epoch": 0.9434878337943883, "grad_norm": 4.400055408477783, "learning_rate": 8.785700232763316e-07, "loss": 2.0112, "step": 12534 }, { "epoch": 0.9435631080750485, "grad_norm": 5.374870300292969, "learning_rate": 8.762961898964362e-07, "loss": 1.935, "step": 12535 }, { "epoch": 0.9436383823557086, "grad_norm": 5.5986175537109375, "learning_rate": 8.740252767797885e-07, "loss": 1.8258, "step": 12536 }, { "epoch": 0.9437136566363687, "grad_norm": 5.453594207763672, "learning_rate": 8.717572840613697e-07, "loss": 1.5967, "step": 12537 }, { "epoch": 0.943788930917029, "grad_norm": 4.611512660980225, "learning_rate": 8.694922118760273e-07, "loss": 1.7153, "step": 12538 }, { "epoch": 0.9438642051976891, "grad_norm": 5.214768886566162, "learning_rate": 8.672300603583927e-07, "loss": 1.7282, "step": 12539 }, { "epoch": 0.9439394794783492, "grad_norm": 4.61006498336792, "learning_rate": 8.649708296429526e-07, "loss": 1.6251, "step": 12540 }, { "epoch": 0.9440147537590093, "grad_norm": 3.7257626056671143, "learning_rate": 8.627145198640163e-07, "loss": 1.4965, "step": 12541 }, { "epoch": 0.9440900280396696, "grad_norm": 4.95784330368042, "learning_rate": 8.604611311557098e-07, "loss": 1.5681, "step": 12542 }, { "epoch": 0.9441653023203297, "grad_norm": 5.176828861236572, "learning_rate": 8.582106636519871e-07, "loss": 1.8184, "step": 12543 }, { "epoch": 0.9442405766009898, "grad_norm": 4.349226951599121, "learning_rate": 8.559631174866467e-07, "loss": 1.6727, "step": 12544 }, { "epoch": 0.94431585088165, "grad_norm": 6.516700267791748, "learning_rate": 8.537184927932818e-07, "loss": 1.6811, "step": 12545 }, { "epoch": 0.9443911251623102, "grad_norm": 5.723232269287109, "learning_rate": 8.514767897053411e-07, "loss": 1.9636, "step": 12546 }, { "epoch": 0.9444663994429703, "grad_norm": 4.648702621459961, "learning_rate": 8.492380083560736e-07, "loss": 1.9871, "step": 12547 }, { "epoch": 0.9445416737236305, "grad_norm": 4.369608402252197, "learning_rate": 8.470021488785839e-07, "loss": 1.9944, "step": 12548 }, { "epoch": 0.9446169480042906, "grad_norm": 4.019176483154297, "learning_rate": 8.447692114057826e-07, "loss": 2.082, "step": 12549 }, { "epoch": 0.9446922222849508, "grad_norm": 4.5343337059021, "learning_rate": 8.425391960704021e-07, "loss": 1.9889, "step": 12550 }, { "epoch": 0.944767496565611, "grad_norm": 6.490811347961426, "learning_rate": 8.403121030050254e-07, "loss": 1.9175, "step": 12551 }, { "epoch": 0.9448427708462711, "grad_norm": 6.407595157623291, "learning_rate": 8.380879323420354e-07, "loss": 1.9267, "step": 12552 }, { "epoch": 0.9449180451269312, "grad_norm": 3.8672337532043457, "learning_rate": 8.358666842136542e-07, "loss": 1.5596, "step": 12553 }, { "epoch": 0.9449933194075915, "grad_norm": 5.161031246185303, "learning_rate": 8.336483587519317e-07, "loss": 1.9057, "step": 12554 }, { "epoch": 0.9450685936882516, "grad_norm": 4.153258323669434, "learning_rate": 8.314329560887458e-07, "loss": 1.5539, "step": 12555 }, { "epoch": 0.9451438679689117, "grad_norm": 4.513288974761963, "learning_rate": 8.292204763557854e-07, "loss": 1.6304, "step": 12556 }, { "epoch": 0.9452191422495719, "grad_norm": 5.3652873039245605, "learning_rate": 8.270109196845843e-07, "loss": 1.8327, "step": 12557 }, { "epoch": 0.945294416530232, "grad_norm": 6.923835754394531, "learning_rate": 8.248042862064875e-07, "loss": 1.9938, "step": 12558 }, { "epoch": 0.9453696908108922, "grad_norm": 6.175401210784912, "learning_rate": 8.226005760526789e-07, "loss": 1.8228, "step": 12559 }, { "epoch": 0.9454449650915523, "grad_norm": 4.121747970581055, "learning_rate": 8.203997893541593e-07, "loss": 2.1208, "step": 12560 }, { "epoch": 0.9455202393722125, "grad_norm": 4.972888469696045, "learning_rate": 8.182019262417628e-07, "loss": 1.8324, "step": 12561 }, { "epoch": 0.9455955136528726, "grad_norm": 6.506134033203125, "learning_rate": 8.160069868461406e-07, "loss": 1.5009, "step": 12562 }, { "epoch": 0.9456707879335328, "grad_norm": 6.048686504364014, "learning_rate": 8.138149712977716e-07, "loss": 1.6118, "step": 12563 }, { "epoch": 0.945746062214193, "grad_norm": 4.6864237785339355, "learning_rate": 8.116258797269793e-07, "loss": 1.6212, "step": 12564 }, { "epoch": 0.9458213364948531, "grad_norm": 4.848001003265381, "learning_rate": 8.09439712263893e-07, "loss": 1.7511, "step": 12565 }, { "epoch": 0.9458966107755132, "grad_norm": 6.427381992340088, "learning_rate": 8.072564690384643e-07, "loss": 1.7889, "step": 12566 }, { "epoch": 0.9459718850561735, "grad_norm": 6.031688213348389, "learning_rate": 8.050761501804949e-07, "loss": 2.2765, "step": 12567 }, { "epoch": 0.9460471593368336, "grad_norm": 5.7715840339660645, "learning_rate": 8.028987558195922e-07, "loss": 2.0169, "step": 12568 }, { "epoch": 0.9461224336174937, "grad_norm": 6.054158687591553, "learning_rate": 8.007242860851971e-07, "loss": 1.7385, "step": 12569 }, { "epoch": 0.9461977078981539, "grad_norm": 5.718628883361816, "learning_rate": 7.98552741106573e-07, "loss": 1.6291, "step": 12570 }, { "epoch": 0.9462729821788141, "grad_norm": 4.007753849029541, "learning_rate": 7.963841210128109e-07, "loss": 1.6528, "step": 12571 }, { "epoch": 0.9463482564594742, "grad_norm": 5.296968460083008, "learning_rate": 7.942184259328356e-07, "loss": 2.1921, "step": 12572 }, { "epoch": 0.9464235307401344, "grad_norm": 4.816549301147461, "learning_rate": 7.920556559953884e-07, "loss": 1.7772, "step": 12573 }, { "epoch": 0.9464988050207945, "grad_norm": 4.9603095054626465, "learning_rate": 7.898958113290389e-07, "loss": 1.7354, "step": 12574 }, { "epoch": 0.9465740793014547, "grad_norm": 4.698737621307373, "learning_rate": 7.877388920621786e-07, "loss": 1.918, "step": 12575 }, { "epoch": 0.9466493535821149, "grad_norm": 6.573275566101074, "learning_rate": 7.855848983230385e-07, "loss": 1.7081, "step": 12576 }, { "epoch": 0.946724627862775, "grad_norm": 5.029136657714844, "learning_rate": 7.834338302396604e-07, "loss": 1.5041, "step": 12577 }, { "epoch": 0.9467999021434351, "grad_norm": 5.33416748046875, "learning_rate": 7.812856879399311e-07, "loss": 1.5731, "step": 12578 }, { "epoch": 0.9468751764240952, "grad_norm": 4.616922855377197, "learning_rate": 7.791404715515372e-07, "loss": 2.302, "step": 12579 }, { "epoch": 0.9469504507047555, "grad_norm": 4.990634918212891, "learning_rate": 7.7699818120201e-07, "loss": 1.7495, "step": 12580 }, { "epoch": 0.9470257249854156, "grad_norm": 4.4894866943359375, "learning_rate": 7.748588170187087e-07, "loss": 1.8907, "step": 12581 }, { "epoch": 0.9471009992660757, "grad_norm": 4.430594444274902, "learning_rate": 7.727223791288097e-07, "loss": 2.0104, "step": 12582 }, { "epoch": 0.947176273546736, "grad_norm": 4.7303619384765625, "learning_rate": 7.705888676593165e-07, "loss": 1.7304, "step": 12583 }, { "epoch": 0.9472515478273961, "grad_norm": 4.863260269165039, "learning_rate": 7.684582827370557e-07, "loss": 2.0688, "step": 12584 }, { "epoch": 0.9473268221080562, "grad_norm": 5.808121204376221, "learning_rate": 7.663306244886925e-07, "loss": 1.7594, "step": 12585 }, { "epoch": 0.9474020963887164, "grad_norm": 4.721111297607422, "learning_rate": 7.642058930406981e-07, "loss": 1.5769, "step": 12586 }, { "epoch": 0.9474773706693765, "grad_norm": 3.897254705429077, "learning_rate": 7.620840885193992e-07, "loss": 1.6152, "step": 12587 }, { "epoch": 0.9475526449500367, "grad_norm": 5.926482677459717, "learning_rate": 7.59965211050917e-07, "loss": 1.8351, "step": 12588 }, { "epoch": 0.9476279192306969, "grad_norm": 6.151461124420166, "learning_rate": 7.57849260761223e-07, "loss": 1.8595, "step": 12589 }, { "epoch": 0.947703193511357, "grad_norm": 9.226770401000977, "learning_rate": 7.55736237776089e-07, "loss": 1.828, "step": 12590 }, { "epoch": 0.9477784677920171, "grad_norm": 4.510571479797363, "learning_rate": 7.536261422211477e-07, "loss": 2.0181, "step": 12591 }, { "epoch": 0.9478537420726774, "grad_norm": 5.621356010437012, "learning_rate": 7.515189742218265e-07, "loss": 1.7265, "step": 12592 }, { "epoch": 0.9479290163533375, "grad_norm": 4.869679927825928, "learning_rate": 7.494147339033919e-07, "loss": 1.6226, "step": 12593 }, { "epoch": 0.9480042906339976, "grad_norm": 5.842344284057617, "learning_rate": 7.473134213909327e-07, "loss": 2.1404, "step": 12594 }, { "epoch": 0.9480795649146578, "grad_norm": 3.7453935146331787, "learning_rate": 7.452150368093713e-07, "loss": 1.3438, "step": 12595 }, { "epoch": 0.948154839195318, "grad_norm": 4.883222579956055, "learning_rate": 7.431195802834468e-07, "loss": 2.0852, "step": 12596 }, { "epoch": 0.9482301134759781, "grad_norm": 4.403371810913086, "learning_rate": 7.410270519377316e-07, "loss": 1.6844, "step": 12597 }, { "epoch": 0.9483053877566382, "grad_norm": 5.2157111167907715, "learning_rate": 7.389374518966152e-07, "loss": 1.5479, "step": 12598 }, { "epoch": 0.9483806620372984, "grad_norm": 4.309238433837891, "learning_rate": 7.368507802843205e-07, "loss": 2.2691, "step": 12599 }, { "epoch": 0.9484559363179585, "grad_norm": 5.366034984588623, "learning_rate": 7.347670372249038e-07, "loss": 1.8283, "step": 12600 }, { "epoch": 0.9485312105986187, "grad_norm": 6.914779186248779, "learning_rate": 7.326862228422216e-07, "loss": 1.6681, "step": 12601 }, { "epoch": 0.9486064848792789, "grad_norm": 4.132501125335693, "learning_rate": 7.306083372599804e-07, "loss": 1.9501, "step": 12602 }, { "epoch": 0.948681759159939, "grad_norm": 4.667428016662598, "learning_rate": 7.285333806017036e-07, "loss": 1.6994, "step": 12603 }, { "epoch": 0.9487570334405991, "grad_norm": 5.719582557678223, "learning_rate": 7.264613529907482e-07, "loss": 1.8735, "step": 12604 }, { "epoch": 0.9488323077212594, "grad_norm": 4.037753105163574, "learning_rate": 7.243922545502768e-07, "loss": 1.731, "step": 12605 }, { "epoch": 0.9489075820019195, "grad_norm": 6.50754451751709, "learning_rate": 7.223260854033021e-07, "loss": 1.7722, "step": 12606 }, { "epoch": 0.9489828562825796, "grad_norm": 4.7723917961120605, "learning_rate": 7.202628456726423e-07, "loss": 1.632, "step": 12607 }, { "epoch": 0.9490581305632398, "grad_norm": 4.3687334060668945, "learning_rate": 7.182025354809607e-07, "loss": 1.979, "step": 12608 }, { "epoch": 0.9491334048439, "grad_norm": 5.293466091156006, "learning_rate": 7.161451549507314e-07, "loss": 1.916, "step": 12609 }, { "epoch": 0.9492086791245601, "grad_norm": 5.199947834014893, "learning_rate": 7.140907042042621e-07, "loss": 1.5453, "step": 12610 }, { "epoch": 0.9492839534052203, "grad_norm": 5.563015460968018, "learning_rate": 7.12039183363683e-07, "loss": 1.8874, "step": 12611 }, { "epoch": 0.9493592276858804, "grad_norm": 5.206107139587402, "learning_rate": 7.09990592550952e-07, "loss": 1.9164, "step": 12612 }, { "epoch": 0.9494345019665406, "grad_norm": 5.016112327575684, "learning_rate": 7.07944931887855e-07, "loss": 1.7887, "step": 12613 }, { "epoch": 0.9495097762472008, "grad_norm": 3.691054344177246, "learning_rate": 7.059022014959893e-07, "loss": 1.7715, "step": 12614 }, { "epoch": 0.9495850505278609, "grad_norm": 6.079201698303223, "learning_rate": 7.03862401496802e-07, "loss": 1.7812, "step": 12615 }, { "epoch": 0.949660324808521, "grad_norm": 4.421444892883301, "learning_rate": 7.018255320115519e-07, "loss": 1.7483, "step": 12616 }, { "epoch": 0.9497355990891813, "grad_norm": 5.439821720123291, "learning_rate": 6.997915931613197e-07, "loss": 1.6121, "step": 12617 }, { "epoch": 0.9498108733698414, "grad_norm": 4.601471424102783, "learning_rate": 6.977605850670144e-07, "loss": 1.4847, "step": 12618 }, { "epoch": 0.9498861476505015, "grad_norm": 5.457115650177002, "learning_rate": 6.957325078493837e-07, "loss": 1.8872, "step": 12619 }, { "epoch": 0.9499614219311616, "grad_norm": 6.230499267578125, "learning_rate": 6.937073616289813e-07, "loss": 1.9586, "step": 12620 }, { "epoch": 0.9500366962118219, "grad_norm": 4.610445499420166, "learning_rate": 6.916851465261997e-07, "loss": 1.812, "step": 12621 }, { "epoch": 0.950111970492482, "grad_norm": 4.16820764541626, "learning_rate": 6.89665862661254e-07, "loss": 2.0936, "step": 12622 }, { "epoch": 0.9501872447731421, "grad_norm": 4.270386695861816, "learning_rate": 6.876495101541924e-07, "loss": 1.9136, "step": 12623 }, { "epoch": 0.9502625190538023, "grad_norm": 4.340514659881592, "learning_rate": 6.856360891248692e-07, "loss": 1.9056, "step": 12624 }, { "epoch": 0.9503377933344624, "grad_norm": 5.450936794281006, "learning_rate": 6.836255996929886e-07, "loss": 1.7858, "step": 12625 }, { "epoch": 0.9504130676151226, "grad_norm": 5.421398639678955, "learning_rate": 6.816180419780493e-07, "loss": 1.8694, "step": 12626 }, { "epoch": 0.9504883418957828, "grad_norm": 4.780355930328369, "learning_rate": 6.796134160994172e-07, "loss": 1.6679, "step": 12627 }, { "epoch": 0.9505636161764429, "grad_norm": 4.167706489562988, "learning_rate": 6.776117221762468e-07, "loss": 1.5848, "step": 12628 }, { "epoch": 0.950638890457103, "grad_norm": 4.704986095428467, "learning_rate": 6.756129603275374e-07, "loss": 1.478, "step": 12629 }, { "epoch": 0.9507141647377633, "grad_norm": 6.16221284866333, "learning_rate": 6.736171306721162e-07, "loss": 1.8761, "step": 12630 }, { "epoch": 0.9507894390184234, "grad_norm": 5.12618350982666, "learning_rate": 6.71624233328616e-07, "loss": 2.1902, "step": 12631 }, { "epoch": 0.9508647132990835, "grad_norm": 5.246209144592285, "learning_rate": 6.696342684155199e-07, "loss": 1.9131, "step": 12632 }, { "epoch": 0.9509399875797437, "grad_norm": 3.4327824115753174, "learning_rate": 6.676472360511165e-07, "loss": 1.6988, "step": 12633 }, { "epoch": 0.9510152618604039, "grad_norm": 4.417757511138916, "learning_rate": 6.656631363535448e-07, "loss": 1.4728, "step": 12634 }, { "epoch": 0.951090536141064, "grad_norm": 6.891496658325195, "learning_rate": 6.636819694407382e-07, "loss": 1.6197, "step": 12635 }, { "epoch": 0.9511658104217242, "grad_norm": 5.932455062866211, "learning_rate": 6.617037354304745e-07, "loss": 1.8473, "step": 12636 }, { "epoch": 0.9512410847023843, "grad_norm": 4.860350131988525, "learning_rate": 6.597284344403599e-07, "loss": 1.4481, "step": 12637 }, { "epoch": 0.9513163589830445, "grad_norm": 4.502140522003174, "learning_rate": 6.577560665878169e-07, "loss": 1.8193, "step": 12638 }, { "epoch": 0.9513916332637046, "grad_norm": 5.460235118865967, "learning_rate": 6.557866319900907e-07, "loss": 1.9064, "step": 12639 }, { "epoch": 0.9514669075443648, "grad_norm": 4.415683269500732, "learning_rate": 6.538201307642711e-07, "loss": 1.6909, "step": 12640 }, { "epoch": 0.9515421818250249, "grad_norm": 4.433505535125732, "learning_rate": 6.518565630272533e-07, "loss": 1.9376, "step": 12641 }, { "epoch": 0.951617456105685, "grad_norm": 5.381619453430176, "learning_rate": 6.498959288957662e-07, "loss": 1.3601, "step": 12642 }, { "epoch": 0.9516927303863453, "grad_norm": 6.529528617858887, "learning_rate": 6.479382284863611e-07, "loss": 2.0831, "step": 12643 }, { "epoch": 0.9517680046670054, "grad_norm": 4.280087947845459, "learning_rate": 6.459834619154282e-07, "loss": 1.6036, "step": 12644 }, { "epoch": 0.9518432789476655, "grad_norm": 4.9408278465271, "learning_rate": 6.440316292991633e-07, "loss": 2.0676, "step": 12645 }, { "epoch": 0.9519185532283257, "grad_norm": 4.629851341247559, "learning_rate": 6.420827307535959e-07, "loss": 1.767, "step": 12646 }, { "epoch": 0.9519938275089859, "grad_norm": 4.1766276359558105, "learning_rate": 6.401367663945945e-07, "loss": 1.4504, "step": 12647 }, { "epoch": 0.952069101789646, "grad_norm": 4.166589736938477, "learning_rate": 6.381937363378277e-07, "loss": 1.5146, "step": 12648 }, { "epoch": 0.9521443760703062, "grad_norm": 5.057321071624756, "learning_rate": 6.362536406988084e-07, "loss": 1.7973, "step": 12649 }, { "epoch": 0.9522196503509663, "grad_norm": 4.526610851287842, "learning_rate": 6.343164795928724e-07, "loss": 1.5239, "step": 12650 }, { "epoch": 0.9522949246316265, "grad_norm": 5.0684661865234375, "learning_rate": 6.323822531351775e-07, "loss": 1.8554, "step": 12651 }, { "epoch": 0.9523701989122867, "grad_norm": 4.20596170425415, "learning_rate": 6.304509614406984e-07, "loss": 1.714, "step": 12652 }, { "epoch": 0.9524454731929468, "grad_norm": 4.975878715515137, "learning_rate": 6.285226046242543e-07, "loss": 1.701, "step": 12653 }, { "epoch": 0.9525207474736069, "grad_norm": 5.866959095001221, "learning_rate": 6.265971828004813e-07, "loss": 2.022, "step": 12654 }, { "epoch": 0.9525960217542672, "grad_norm": 5.904901027679443, "learning_rate": 6.246746960838379e-07, "loss": 2.2642, "step": 12655 }, { "epoch": 0.9526712960349273, "grad_norm": 4.925893783569336, "learning_rate": 6.227551445886104e-07, "loss": 1.6275, "step": 12656 }, { "epoch": 0.9527465703155874, "grad_norm": 6.75882625579834, "learning_rate": 6.208385284289131e-07, "loss": 2.1533, "step": 12657 }, { "epoch": 0.9528218445962475, "grad_norm": 4.597896575927734, "learning_rate": 6.189248477186715e-07, "loss": 1.5977, "step": 12658 }, { "epoch": 0.9528971188769078, "grad_norm": 5.635746002197266, "learning_rate": 6.170141025716669e-07, "loss": 2.1705, "step": 12659 }, { "epoch": 0.9529723931575679, "grad_norm": 5.3538641929626465, "learning_rate": 6.151062931014751e-07, "loss": 1.7818, "step": 12660 }, { "epoch": 0.953047667438228, "grad_norm": 4.036955833435059, "learning_rate": 6.132014194215108e-07, "loss": 1.8244, "step": 12661 }, { "epoch": 0.9531229417188882, "grad_norm": 4.892913818359375, "learning_rate": 6.112994816450169e-07, "loss": 2.0131, "step": 12662 }, { "epoch": 0.9531982159995483, "grad_norm": 3.978306770324707, "learning_rate": 6.094004798850584e-07, "loss": 1.7382, "step": 12663 }, { "epoch": 0.9532734902802085, "grad_norm": 4.9204020500183105, "learning_rate": 6.075044142545172e-07, "loss": 1.8224, "step": 12664 }, { "epoch": 0.9533487645608687, "grad_norm": 5.848854064941406, "learning_rate": 6.056112848661199e-07, "loss": 2.0312, "step": 12665 }, { "epoch": 0.9534240388415288, "grad_norm": 5.254955291748047, "learning_rate": 6.037210918324099e-07, "loss": 1.8957, "step": 12666 }, { "epoch": 0.9534993131221889, "grad_norm": 5.435256481170654, "learning_rate": 6.01833835265736e-07, "loss": 1.9473, "step": 12667 }, { "epoch": 0.9535745874028492, "grad_norm": 4.806950092315674, "learning_rate": 5.999495152783086e-07, "loss": 2.1219, "step": 12668 }, { "epoch": 0.9536498616835093, "grad_norm": 4.676013946533203, "learning_rate": 5.980681319821379e-07, "loss": 1.9462, "step": 12669 }, { "epoch": 0.9537251359641694, "grad_norm": 5.973019599914551, "learning_rate": 5.961896854890681e-07, "loss": 1.6982, "step": 12670 }, { "epoch": 0.9538004102448296, "grad_norm": 4.057026386260986, "learning_rate": 5.943141759107595e-07, "loss": 1.6977, "step": 12671 }, { "epoch": 0.9538756845254898, "grad_norm": 5.139476299285889, "learning_rate": 5.924416033587177e-07, "loss": 1.8449, "step": 12672 }, { "epoch": 0.9539509588061499, "grad_norm": 4.590887069702148, "learning_rate": 5.905719679442534e-07, "loss": 1.9326, "step": 12673 }, { "epoch": 0.9540262330868101, "grad_norm": 4.804156303405762, "learning_rate": 5.887052697785167e-07, "loss": 1.5958, "step": 12674 }, { "epoch": 0.9541015073674702, "grad_norm": 4.616923809051514, "learning_rate": 5.868415089724688e-07, "loss": 1.7097, "step": 12675 }, { "epoch": 0.9541767816481304, "grad_norm": 4.854588031768799, "learning_rate": 5.849806856369156e-07, "loss": 1.6195, "step": 12676 }, { "epoch": 0.9542520559287905, "grad_norm": 4.186409950256348, "learning_rate": 5.83122799882474e-07, "loss": 1.8288, "step": 12677 }, { "epoch": 0.9543273302094507, "grad_norm": 4.645761013031006, "learning_rate": 5.812678518195836e-07, "loss": 1.9328, "step": 12678 }, { "epoch": 0.9544026044901108, "grad_norm": 8.028436660766602, "learning_rate": 5.794158415585282e-07, "loss": 1.7745, "step": 12679 }, { "epoch": 0.954477878770771, "grad_norm": 4.998651027679443, "learning_rate": 5.775667692093922e-07, "loss": 1.9679, "step": 12680 }, { "epoch": 0.9545531530514312, "grad_norm": 6.120117664337158, "learning_rate": 5.757206348821098e-07, "loss": 1.738, "step": 12681 }, { "epoch": 0.9546284273320913, "grad_norm": 5.068941116333008, "learning_rate": 5.738774386864154e-07, "loss": 1.7005, "step": 12682 }, { "epoch": 0.9547037016127514, "grad_norm": 5.972866535186768, "learning_rate": 5.720371807318881e-07, "loss": 1.7792, "step": 12683 }, { "epoch": 0.9547789758934117, "grad_norm": 4.95200777053833, "learning_rate": 5.701998611279291e-07, "loss": 1.9954, "step": 12684 }, { "epoch": 0.9548542501740718, "grad_norm": 5.003471851348877, "learning_rate": 5.683654799837567e-07, "loss": 1.7011, "step": 12685 }, { "epoch": 0.9549295244547319, "grad_norm": 5.468829154968262, "learning_rate": 5.665340374084172e-07, "loss": 2.0048, "step": 12686 }, { "epoch": 0.9550047987353921, "grad_norm": 5.987268447875977, "learning_rate": 5.647055335107954e-07, "loss": 1.9431, "step": 12687 }, { "epoch": 0.9550800730160522, "grad_norm": 6.3180975914001465, "learning_rate": 5.628799683995767e-07, "loss": 1.9867, "step": 12688 }, { "epoch": 0.9551553472967124, "grad_norm": 5.810031890869141, "learning_rate": 5.610573421832965e-07, "loss": 1.8616, "step": 12689 }, { "epoch": 0.9552306215773726, "grad_norm": 4.753474235534668, "learning_rate": 5.592376549703015e-07, "loss": 1.6691, "step": 12690 }, { "epoch": 0.9553058958580327, "grad_norm": 3.8705475330352783, "learning_rate": 5.574209068687719e-07, "loss": 1.4919, "step": 12691 }, { "epoch": 0.9553811701386928, "grad_norm": 4.97821044921875, "learning_rate": 5.556070979866934e-07, "loss": 1.7721, "step": 12692 }, { "epoch": 0.9554564444193531, "grad_norm": 5.074934005737305, "learning_rate": 5.537962284319021e-07, "loss": 2.0019, "step": 12693 }, { "epoch": 0.9555317187000132, "grad_norm": 4.354022026062012, "learning_rate": 5.519882983120561e-07, "loss": 1.6431, "step": 12694 }, { "epoch": 0.9556069929806733, "grad_norm": 10.771299362182617, "learning_rate": 5.501833077346142e-07, "loss": 2.4806, "step": 12695 }, { "epoch": 0.9556822672613334, "grad_norm": 4.1471662521362305, "learning_rate": 5.483812568068958e-07, "loss": 2.0176, "step": 12696 }, { "epoch": 0.9557575415419937, "grad_norm": 4.193211078643799, "learning_rate": 5.465821456360098e-07, "loss": 1.9431, "step": 12697 }, { "epoch": 0.9558328158226538, "grad_norm": 3.8361949920654297, "learning_rate": 5.447859743289208e-07, "loss": 1.4868, "step": 12698 }, { "epoch": 0.9559080901033139, "grad_norm": 5.013564586639404, "learning_rate": 5.429927429924042e-07, "loss": 1.7217, "step": 12699 }, { "epoch": 0.9559833643839741, "grad_norm": 5.728600978851318, "learning_rate": 5.412024517330638e-07, "loss": 1.868, "step": 12700 }, { "epoch": 0.9560586386646343, "grad_norm": 4.661545753479004, "learning_rate": 5.3941510065732e-07, "loss": 1.8948, "step": 12701 }, { "epoch": 0.9561339129452944, "grad_norm": 5.729572772979736, "learning_rate": 5.376306898714322e-07, "loss": 1.7481, "step": 12702 }, { "epoch": 0.9562091872259546, "grad_norm": 5.605362415313721, "learning_rate": 5.358492194814768e-07, "loss": 1.5649, "step": 12703 }, { "epoch": 0.9562844615066147, "grad_norm": 3.890798807144165, "learning_rate": 5.340706895933578e-07, "loss": 1.845, "step": 12704 }, { "epoch": 0.9563597357872748, "grad_norm": 4.749081134796143, "learning_rate": 5.322951003127963e-07, "loss": 2.0707, "step": 12705 }, { "epoch": 0.9564350100679351, "grad_norm": 3.9745826721191406, "learning_rate": 5.305224517453577e-07, "loss": 1.701, "step": 12706 }, { "epoch": 0.9565102843485952, "grad_norm": 5.145061016082764, "learning_rate": 5.287527439964135e-07, "loss": 1.6462, "step": 12707 }, { "epoch": 0.9565855586292553, "grad_norm": 6.6787543296813965, "learning_rate": 5.269859771711683e-07, "loss": 1.545, "step": 12708 }, { "epoch": 0.9566608329099155, "grad_norm": 4.938272953033447, "learning_rate": 5.252221513746547e-07, "loss": 2.0984, "step": 12709 }, { "epoch": 0.9567361071905757, "grad_norm": 4.221813201904297, "learning_rate": 5.234612667117278e-07, "loss": 1.7497, "step": 12710 }, { "epoch": 0.9568113814712358, "grad_norm": 5.071768283843994, "learning_rate": 5.217033232870594e-07, "loss": 1.9402, "step": 12711 }, { "epoch": 0.956886655751896, "grad_norm": 4.225630283355713, "learning_rate": 5.199483212051659e-07, "loss": 1.5626, "step": 12712 }, { "epoch": 0.9569619300325561, "grad_norm": 5.390611171722412, "learning_rate": 5.181962605703639e-07, "loss": 1.7544, "step": 12713 }, { "epoch": 0.9570372043132163, "grad_norm": 7.954266548156738, "learning_rate": 5.1644714148682e-07, "loss": 1.7385, "step": 12714 }, { "epoch": 0.9571124785938765, "grad_norm": 4.858428001403809, "learning_rate": 5.147009640585065e-07, "loss": 1.8414, "step": 12715 }, { "epoch": 0.9571877528745366, "grad_norm": 5.260420799255371, "learning_rate": 5.129577283892295e-07, "loss": 1.5452, "step": 12716 }, { "epoch": 0.9572630271551967, "grad_norm": 3.7520792484283447, "learning_rate": 5.112174345826281e-07, "loss": 1.6871, "step": 12717 }, { "epoch": 0.9573383014358569, "grad_norm": 4.311631679534912, "learning_rate": 5.094800827421475e-07, "loss": 1.9012, "step": 12718 }, { "epoch": 0.9574135757165171, "grad_norm": 6.373862266540527, "learning_rate": 5.077456729710661e-07, "loss": 1.9115, "step": 12719 }, { "epoch": 0.9574888499971772, "grad_norm": 4.494345188140869, "learning_rate": 5.060142053725014e-07, "loss": 1.8008, "step": 12720 }, { "epoch": 0.9575641242778373, "grad_norm": 4.176302909851074, "learning_rate": 5.042856800493768e-07, "loss": 1.8724, "step": 12721 }, { "epoch": 0.9576393985584976, "grad_norm": 4.273013591766357, "learning_rate": 5.025600971044486e-07, "loss": 1.5219, "step": 12722 }, { "epoch": 0.9577146728391577, "grad_norm": 4.693612575531006, "learning_rate": 5.008374566403018e-07, "loss": 1.6532, "step": 12723 }, { "epoch": 0.9577899471198178, "grad_norm": 4.368964195251465, "learning_rate": 4.991177587593321e-07, "loss": 1.6518, "step": 12724 }, { "epoch": 0.957865221400478, "grad_norm": 4.226772785186768, "learning_rate": 4.974010035637855e-07, "loss": 2.0488, "step": 12725 }, { "epoch": 0.9579404956811381, "grad_norm": 4.707857608795166, "learning_rate": 4.956871911557081e-07, "loss": 1.5817, "step": 12726 }, { "epoch": 0.9580157699617983, "grad_norm": 5.16405725479126, "learning_rate": 4.939763216369797e-07, "loss": 1.7049, "step": 12727 }, { "epoch": 0.9580910442424585, "grad_norm": 4.712795734405518, "learning_rate": 4.922683951093133e-07, "loss": 1.8878, "step": 12728 }, { "epoch": 0.9581663185231186, "grad_norm": 4.737600326538086, "learning_rate": 4.905634116742386e-07, "loss": 1.823, "step": 12729 }, { "epoch": 0.9582415928037787, "grad_norm": 3.9811925888061523, "learning_rate": 4.888613714331081e-07, "loss": 1.6856, "step": 12730 }, { "epoch": 0.958316867084439, "grad_norm": 3.939588785171509, "learning_rate": 4.871622744871074e-07, "loss": 1.8198, "step": 12731 }, { "epoch": 0.9583921413650991, "grad_norm": 4.764878749847412, "learning_rate": 4.854661209372391e-07, "loss": 1.9043, "step": 12732 }, { "epoch": 0.9584674156457592, "grad_norm": 4.294692516326904, "learning_rate": 4.837729108843336e-07, "loss": 1.9113, "step": 12733 }, { "epoch": 0.9585426899264194, "grad_norm": 4.665185451507568, "learning_rate": 4.820826444290605e-07, "loss": 1.8684, "step": 12734 }, { "epoch": 0.9586179642070796, "grad_norm": 5.439671516418457, "learning_rate": 4.803953216718837e-07, "loss": 1.9328, "step": 12735 }, { "epoch": 0.9586932384877397, "grad_norm": 5.113662242889404, "learning_rate": 4.787109427131175e-07, "loss": 1.8658, "step": 12736 }, { "epoch": 0.9587685127683998, "grad_norm": 4.699664115905762, "learning_rate": 4.770295076528985e-07, "loss": 1.5247, "step": 12737 }, { "epoch": 0.95884378704906, "grad_norm": 4.78753662109375, "learning_rate": 4.7535101659117453e-07, "loss": 1.9261, "step": 12738 }, { "epoch": 0.9589190613297202, "grad_norm": 5.380410194396973, "learning_rate": 4.736754696277268e-07, "loss": 2.0376, "step": 12739 }, { "epoch": 0.9589943356103803, "grad_norm": 5.370891094207764, "learning_rate": 4.720028668621701e-07, "loss": 1.7869, "step": 12740 }, { "epoch": 0.9590696098910405, "grad_norm": 6.113171577453613, "learning_rate": 4.7033320839393045e-07, "loss": 2.1247, "step": 12741 }, { "epoch": 0.9591448841717006, "grad_norm": 7.672886371612549, "learning_rate": 4.6866649432226185e-07, "loss": 1.629, "step": 12742 }, { "epoch": 0.9592201584523607, "grad_norm": 4.6829071044921875, "learning_rate": 4.670027247462516e-07, "loss": 1.8026, "step": 12743 }, { "epoch": 0.959295432733021, "grad_norm": 5.635254859924316, "learning_rate": 4.65341899764804e-07, "loss": 1.6993, "step": 12744 }, { "epoch": 0.9593707070136811, "grad_norm": 4.119978427886963, "learning_rate": 4.6368401947665117e-07, "loss": 1.4614, "step": 12745 }, { "epoch": 0.9594459812943412, "grad_norm": 4.866243839263916, "learning_rate": 4.6202908398034203e-07, "loss": 1.5539, "step": 12746 }, { "epoch": 0.9595212555750015, "grad_norm": 4.3326029777526855, "learning_rate": 4.603770933742702e-07, "loss": 1.8544, "step": 12747 }, { "epoch": 0.9595965298556616, "grad_norm": 4.368873596191406, "learning_rate": 4.5872804775662916e-07, "loss": 2.2047, "step": 12748 }, { "epoch": 0.9596718041363217, "grad_norm": 4.2882466316223145, "learning_rate": 4.5708194722546284e-07, "loss": 1.6478, "step": 12749 }, { "epoch": 0.9597470784169819, "grad_norm": 7.5147624015808105, "learning_rate": 4.5543879187861517e-07, "loss": 2.2439, "step": 12750 }, { "epoch": 0.959822352697642, "grad_norm": 4.197399616241455, "learning_rate": 4.5379858181377466e-07, "loss": 1.9333, "step": 12751 }, { "epoch": 0.9598976269783022, "grad_norm": 5.4983696937561035, "learning_rate": 4.521613171284411e-07, "loss": 1.6143, "step": 12752 }, { "epoch": 0.9599729012589624, "grad_norm": 4.430149078369141, "learning_rate": 4.505269979199478e-07, "loss": 1.8785, "step": 12753 }, { "epoch": 0.9600481755396225, "grad_norm": 4.548789978027344, "learning_rate": 4.4889562428545027e-07, "loss": 1.6135, "step": 12754 }, { "epoch": 0.9601234498202826, "grad_norm": 5.231551647186279, "learning_rate": 4.472671963219377e-07, "loss": 1.5345, "step": 12755 }, { "epoch": 0.9601987241009428, "grad_norm": 5.463254451751709, "learning_rate": 4.4564171412619926e-07, "loss": 1.6017, "step": 12756 }, { "epoch": 0.960273998381603, "grad_norm": 4.883767604827881, "learning_rate": 4.4401917779487436e-07, "loss": 1.9248, "step": 12757 }, { "epoch": 0.9603492726622631, "grad_norm": 5.04429292678833, "learning_rate": 4.423995874244247e-07, "loss": 1.8472, "step": 12758 }, { "epoch": 0.9604245469429232, "grad_norm": 5.0260796546936035, "learning_rate": 4.407829431111177e-07, "loss": 1.728, "step": 12759 }, { "epoch": 0.9604998212235835, "grad_norm": 5.662580966949463, "learning_rate": 4.3916924495106537e-07, "loss": 1.7103, "step": 12760 }, { "epoch": 0.9605750955042436, "grad_norm": 5.893969535827637, "learning_rate": 4.37558493040191e-07, "loss": 1.8945, "step": 12761 }, { "epoch": 0.9606503697849037, "grad_norm": 5.4086713790893555, "learning_rate": 4.359506874742569e-07, "loss": 1.8159, "step": 12762 }, { "epoch": 0.9607256440655639, "grad_norm": 5.280806064605713, "learning_rate": 4.343458283488366e-07, "loss": 1.7341, "step": 12763 }, { "epoch": 0.960800918346224, "grad_norm": 4.8617024421691895, "learning_rate": 4.3274391575933715e-07, "loss": 1.4529, "step": 12764 }, { "epoch": 0.9608761926268842, "grad_norm": 3.8887698650360107, "learning_rate": 4.3114494980098785e-07, "loss": 1.8785, "step": 12765 }, { "epoch": 0.9609514669075444, "grad_norm": 4.244451999664307, "learning_rate": 4.2954893056884606e-07, "loss": 1.6456, "step": 12766 }, { "epoch": 0.9610267411882045, "grad_norm": 4.5337982177734375, "learning_rate": 4.279558581577858e-07, "loss": 1.8425, "step": 12767 }, { "epoch": 0.9611020154688646, "grad_norm": 5.106928825378418, "learning_rate": 4.2636573266251476e-07, "loss": 1.2153, "step": 12768 }, { "epoch": 0.9611772897495249, "grad_norm": 5.047542572021484, "learning_rate": 4.2477855417755173e-07, "loss": 1.7941, "step": 12769 }, { "epoch": 0.961252564030185, "grad_norm": 4.429896831512451, "learning_rate": 4.231943227972601e-07, "loss": 2.0056, "step": 12770 }, { "epoch": 0.9613278383108451, "grad_norm": 4.621360778808594, "learning_rate": 4.2161303861580903e-07, "loss": 1.8047, "step": 12771 }, { "epoch": 0.9614031125915053, "grad_norm": 5.414617538452148, "learning_rate": 4.2003470172721213e-07, "loss": 1.9059, "step": 12772 }, { "epoch": 0.9614783868721655, "grad_norm": 5.47637939453125, "learning_rate": 4.184593122252889e-07, "loss": 1.6954, "step": 12773 }, { "epoch": 0.9615536611528256, "grad_norm": 5.423379421234131, "learning_rate": 4.168868702036921e-07, "loss": 1.4541, "step": 12774 }, { "epoch": 0.9616289354334857, "grad_norm": 5.788435459136963, "learning_rate": 4.1531737575590813e-07, "loss": 1.9944, "step": 12775 }, { "epoch": 0.9617042097141459, "grad_norm": 4.989753723144531, "learning_rate": 4.1375082897522344e-07, "loss": 2.0697, "step": 12776 }, { "epoch": 0.9617794839948061, "grad_norm": 4.762087345123291, "learning_rate": 4.1218722995477463e-07, "loss": 1.6014, "step": 12777 }, { "epoch": 0.9618547582754662, "grad_norm": 4.377968788146973, "learning_rate": 4.106265787875152e-07, "loss": 1.7274, "step": 12778 }, { "epoch": 0.9619300325561264, "grad_norm": 5.829367637634277, "learning_rate": 4.090688755662153e-07, "loss": 1.8093, "step": 12779 }, { "epoch": 0.9620053068367865, "grad_norm": 4.503036022186279, "learning_rate": 4.075141203834787e-07, "loss": 1.8075, "step": 12780 }, { "epoch": 0.9620805811174467, "grad_norm": 5.020206928253174, "learning_rate": 4.059623133317314e-07, "loss": 1.8916, "step": 12781 }, { "epoch": 0.9621558553981069, "grad_norm": 5.009835720062256, "learning_rate": 4.0441345450322745e-07, "loss": 1.5915, "step": 12782 }, { "epoch": 0.962231129678767, "grad_norm": 5.395751476287842, "learning_rate": 4.02867543990032e-07, "loss": 1.6678, "step": 12783 }, { "epoch": 0.9623064039594271, "grad_norm": 5.530890464782715, "learning_rate": 4.0132458188405495e-07, "loss": 1.7713, "step": 12784 }, { "epoch": 0.9623816782400874, "grad_norm": 4.732662200927734, "learning_rate": 3.9978456827701185e-07, "loss": 1.6821, "step": 12785 }, { "epoch": 0.9624569525207475, "grad_norm": 6.709630012512207, "learning_rate": 3.982475032604627e-07, "loss": 1.8672, "step": 12786 }, { "epoch": 0.9625322268014076, "grad_norm": 3.9430272579193115, "learning_rate": 3.967133869257733e-07, "loss": 1.7442, "step": 12787 }, { "epoch": 0.9626075010820678, "grad_norm": 4.8096723556518555, "learning_rate": 3.9518221936414304e-07, "loss": 2.0516, "step": 12788 }, { "epoch": 0.962682775362728, "grad_norm": 4.9371113777160645, "learning_rate": 3.93654000666599e-07, "loss": 1.868, "step": 12789 }, { "epoch": 0.9627580496433881, "grad_norm": 5.705292224884033, "learning_rate": 3.921287309239907e-07, "loss": 1.8838, "step": 12790 }, { "epoch": 0.9628333239240483, "grad_norm": 7.7011213302612305, "learning_rate": 3.9060641022698464e-07, "loss": 1.8731, "step": 12791 }, { "epoch": 0.9629085982047084, "grad_norm": 6.318780899047852, "learning_rate": 3.8908703866608606e-07, "loss": 1.9872, "step": 12792 }, { "epoch": 0.9629838724853685, "grad_norm": 4.973534107208252, "learning_rate": 3.8757061633161174e-07, "loss": 1.6144, "step": 12793 }, { "epoch": 0.9630591467660287, "grad_norm": 4.02522611618042, "learning_rate": 3.860571433137117e-07, "loss": 1.746, "step": 12794 }, { "epoch": 0.9631344210466889, "grad_norm": 5.6097092628479, "learning_rate": 3.8454661970235284e-07, "loss": 1.8325, "step": 12795 }, { "epoch": 0.963209695327349, "grad_norm": 4.617707252502441, "learning_rate": 3.8303904558734117e-07, "loss": 1.6652, "step": 12796 }, { "epoch": 0.9632849696080091, "grad_norm": 4.881204128265381, "learning_rate": 3.8153442105828275e-07, "loss": 1.4728, "step": 12797 }, { "epoch": 0.9633602438886694, "grad_norm": 5.9089202880859375, "learning_rate": 3.800327462046338e-07, "loss": 2.2214, "step": 12798 }, { "epoch": 0.9634355181693295, "grad_norm": 5.717357635498047, "learning_rate": 3.7853402111566184e-07, "loss": 1.9396, "step": 12799 }, { "epoch": 0.9635107924499896, "grad_norm": 4.606716632843018, "learning_rate": 3.7703824588046775e-07, "loss": 1.7728, "step": 12800 }, { "epoch": 0.9635860667306498, "grad_norm": 7.8199639320373535, "learning_rate": 3.7554542058796385e-07, "loss": 1.958, "step": 12801 }, { "epoch": 0.96366134101131, "grad_norm": 5.51934289932251, "learning_rate": 3.740555453268957e-07, "loss": 1.4191, "step": 12802 }, { "epoch": 0.9637366152919701, "grad_norm": 4.515603542327881, "learning_rate": 3.725686201858314e-07, "loss": 2.1266, "step": 12803 }, { "epoch": 0.9638118895726303, "grad_norm": 6.134711265563965, "learning_rate": 3.710846452531669e-07, "loss": 1.5383, "step": 12804 }, { "epoch": 0.9638871638532904, "grad_norm": 7.720675468444824, "learning_rate": 3.6960362061711494e-07, "loss": 1.658, "step": 12805 }, { "epoch": 0.9639624381339505, "grad_norm": 3.837897539138794, "learning_rate": 3.681255463657274e-07, "loss": 1.545, "step": 12806 }, { "epoch": 0.9640377124146108, "grad_norm": 4.478659629821777, "learning_rate": 3.6665042258686166e-07, "loss": 1.7612, "step": 12807 }, { "epoch": 0.9641129866952709, "grad_norm": 5.460193157196045, "learning_rate": 3.651782493682199e-07, "loss": 1.9355, "step": 12808 }, { "epoch": 0.964188260975931, "grad_norm": 4.338287830352783, "learning_rate": 3.637090267973042e-07, "loss": 1.5636, "step": 12809 }, { "epoch": 0.9642635352565913, "grad_norm": 4.387434482574463, "learning_rate": 3.6224275496147263e-07, "loss": 1.3001, "step": 12810 }, { "epoch": 0.9643388095372514, "grad_norm": 4.487557411193848, "learning_rate": 3.607794339478776e-07, "loss": 1.7105, "step": 12811 }, { "epoch": 0.9644140838179115, "grad_norm": 5.497860431671143, "learning_rate": 3.5931906384351623e-07, "loss": 1.429, "step": 12812 }, { "epoch": 0.9644893580985717, "grad_norm": 7.424019813537598, "learning_rate": 3.5786164473520235e-07, "loss": 1.9997, "step": 12813 }, { "epoch": 0.9645646323792318, "grad_norm": 4.381494045257568, "learning_rate": 3.5640717670957223e-07, "loss": 1.6431, "step": 12814 }, { "epoch": 0.964639906659892, "grad_norm": 5.804770469665527, "learning_rate": 3.5495565985309566e-07, "loss": 1.8807, "step": 12815 }, { "epoch": 0.9647151809405521, "grad_norm": 4.882290840148926, "learning_rate": 3.535070942520591e-07, "loss": 1.9285, "step": 12816 }, { "epoch": 0.9647904552212123, "grad_norm": 6.2644243240356445, "learning_rate": 3.5206147999257143e-07, "loss": 1.8068, "step": 12817 }, { "epoch": 0.9648657295018724, "grad_norm": 5.2754669189453125, "learning_rate": 3.5061881716056954e-07, "loss": 1.7369, "step": 12818 }, { "epoch": 0.9649410037825326, "grad_norm": 5.636056423187256, "learning_rate": 3.491791058418181e-07, "loss": 1.5516, "step": 12819 }, { "epoch": 0.9650162780631928, "grad_norm": 5.301386833190918, "learning_rate": 3.477423461219098e-07, "loss": 1.661, "step": 12820 }, { "epoch": 0.9650915523438529, "grad_norm": 4.2584614753723145, "learning_rate": 3.463085380862485e-07, "loss": 2.0417, "step": 12821 }, { "epoch": 0.965166826624513, "grad_norm": 4.7916340827941895, "learning_rate": 3.4487768182007163e-07, "loss": 1.8691, "step": 12822 }, { "epoch": 0.9652421009051733, "grad_norm": 5.548417091369629, "learning_rate": 3.4344977740843887e-07, "loss": 2.1189, "step": 12823 }, { "epoch": 0.9653173751858334, "grad_norm": 5.059255599975586, "learning_rate": 3.4202482493623787e-07, "loss": 2.2455, "step": 12824 }, { "epoch": 0.9653926494664935, "grad_norm": 5.060774326324463, "learning_rate": 3.406028244881787e-07, "loss": 1.8513, "step": 12825 }, { "epoch": 0.9654679237471537, "grad_norm": 5.0774641036987305, "learning_rate": 3.391837761487937e-07, "loss": 1.4563, "step": 12826 }, { "epoch": 0.9655431980278139, "grad_norm": 4.104584217071533, "learning_rate": 3.377676800024321e-07, "loss": 1.8978, "step": 12827 }, { "epoch": 0.965618472308474, "grad_norm": 4.80279016494751, "learning_rate": 3.3635453613329314e-07, "loss": 1.8048, "step": 12828 }, { "epoch": 0.9656937465891342, "grad_norm": 4.9074273109436035, "learning_rate": 3.349443446253708e-07, "loss": 1.9201, "step": 12829 }, { "epoch": 0.9657690208697943, "grad_norm": 4.464571475982666, "learning_rate": 3.3353710556250917e-07, "loss": 1.8604, "step": 12830 }, { "epoch": 0.9658442951504544, "grad_norm": 5.140709400177002, "learning_rate": 3.3213281902835237e-07, "loss": 1.8309, "step": 12831 }, { "epoch": 0.9659195694311147, "grad_norm": 5.117081165313721, "learning_rate": 3.307314851063836e-07, "loss": 2.113, "step": 12832 }, { "epoch": 0.9659948437117748, "grad_norm": 7.604523658752441, "learning_rate": 3.293331038799141e-07, "loss": 1.6905, "step": 12833 }, { "epoch": 0.9660701179924349, "grad_norm": 5.077304840087891, "learning_rate": 3.2793767543207176e-07, "loss": 1.7712, "step": 12834 }, { "epoch": 0.966145392273095, "grad_norm": 5.309237480163574, "learning_rate": 3.265451998458069e-07, "loss": 1.6441, "step": 12835 }, { "epoch": 0.9662206665537553, "grad_norm": 4.8570075035095215, "learning_rate": 3.251556772039033e-07, "loss": 1.7237, "step": 12836 }, { "epoch": 0.9662959408344154, "grad_norm": 4.21960973739624, "learning_rate": 3.237691075889615e-07, "loss": 1.6225, "step": 12837 }, { "epoch": 0.9663712151150755, "grad_norm": 4.946710586547852, "learning_rate": 3.2238549108340455e-07, "loss": 1.8562, "step": 12838 }, { "epoch": 0.9664464893957357, "grad_norm": 6.158081531524658, "learning_rate": 3.2100482776949435e-07, "loss": 1.802, "step": 12839 }, { "epoch": 0.9665217636763959, "grad_norm": 5.038981914520264, "learning_rate": 3.1962711772929863e-07, "loss": 1.7337, "step": 12840 }, { "epoch": 0.966597037957056, "grad_norm": 5.092843055725098, "learning_rate": 3.182523610447241e-07, "loss": 1.9241, "step": 12841 }, { "epoch": 0.9666723122377162, "grad_norm": 5.793262958526611, "learning_rate": 3.1688055779749425e-07, "loss": 1.7307, "step": 12842 }, { "epoch": 0.9667475865183763, "grad_norm": 6.396280765533447, "learning_rate": 3.15511708069155e-07, "loss": 1.9269, "step": 12843 }, { "epoch": 0.9668228607990365, "grad_norm": 5.814577579498291, "learning_rate": 3.141458119410856e-07, "loss": 1.8902, "step": 12844 }, { "epoch": 0.9668981350796967, "grad_norm": 4.52952766418457, "learning_rate": 3.127828694944879e-07, "loss": 1.8889, "step": 12845 }, { "epoch": 0.9669734093603568, "grad_norm": 5.8890180587768555, "learning_rate": 3.114228808103747e-07, "loss": 1.9654, "step": 12846 }, { "epoch": 0.9670486836410169, "grad_norm": 4.779413223266602, "learning_rate": 3.100658459695982e-07, "loss": 1.6647, "step": 12847 }, { "epoch": 0.9671239579216772, "grad_norm": 5.064691066741943, "learning_rate": 3.0871176505283263e-07, "loss": 2.1677, "step": 12848 }, { "epoch": 0.9671992322023373, "grad_norm": 4.5716705322265625, "learning_rate": 3.073606381405747e-07, "loss": 1.5954, "step": 12849 }, { "epoch": 0.9672745064829974, "grad_norm": 4.712538719177246, "learning_rate": 3.06012465313138e-07, "loss": 1.8211, "step": 12850 }, { "epoch": 0.9673497807636576, "grad_norm": 6.174914360046387, "learning_rate": 3.0466724665067504e-07, "loss": 2.2294, "step": 12851 }, { "epoch": 0.9674250550443177, "grad_norm": 4.360692501068115, "learning_rate": 3.0332498223314964e-07, "loss": 1.6953, "step": 12852 }, { "epoch": 0.9675003293249779, "grad_norm": 6.030859470367432, "learning_rate": 3.019856721403591e-07, "loss": 1.568, "step": 12853 }, { "epoch": 0.967575603605638, "grad_norm": 4.935276985168457, "learning_rate": 3.006493164519231e-07, "loss": 1.6172, "step": 12854 }, { "epoch": 0.9676508778862982, "grad_norm": 4.2524590492248535, "learning_rate": 2.99315915247278e-07, "loss": 1.7122, "step": 12855 }, { "epoch": 0.9677261521669583, "grad_norm": 5.106043815612793, "learning_rate": 2.979854686056993e-07, "loss": 1.5194, "step": 12856 }, { "epoch": 0.9678014264476185, "grad_norm": 6.843307018280029, "learning_rate": 2.9665797660626825e-07, "loss": 2.0082, "step": 12857 }, { "epoch": 0.9678767007282787, "grad_norm": 5.756411075592041, "learning_rate": 2.9533343932791056e-07, "loss": 1.8969, "step": 12858 }, { "epoch": 0.9679519750089388, "grad_norm": 4.695230007171631, "learning_rate": 2.940118568493577e-07, "loss": 1.7083, "step": 12859 }, { "epoch": 0.9680272492895989, "grad_norm": 4.5687737464904785, "learning_rate": 2.9269322924918573e-07, "loss": 1.6967, "step": 12860 }, { "epoch": 0.9681025235702592, "grad_norm": 4.319023609161377, "learning_rate": 2.913775566057653e-07, "loss": 1.4636, "step": 12861 }, { "epoch": 0.9681777978509193, "grad_norm": 6.475977897644043, "learning_rate": 2.900648389973226e-07, "loss": 1.5725, "step": 12862 }, { "epoch": 0.9682530721315794, "grad_norm": 5.457620620727539, "learning_rate": 2.887550765018898e-07, "loss": 1.7592, "step": 12863 }, { "epoch": 0.9683283464122396, "grad_norm": 4.101484298706055, "learning_rate": 2.874482691973324e-07, "loss": 1.8931, "step": 12864 }, { "epoch": 0.9684036206928998, "grad_norm": 3.980106830596924, "learning_rate": 2.8614441716133254e-07, "loss": 1.85, "step": 12865 }, { "epoch": 0.9684788949735599, "grad_norm": 4.795080661773682, "learning_rate": 2.8484352047140063e-07, "loss": 1.4535, "step": 12866 }, { "epoch": 0.9685541692542201, "grad_norm": 8.221390724182129, "learning_rate": 2.835455792048747e-07, "loss": 1.8154, "step": 12867 }, { "epoch": 0.9686294435348802, "grad_norm": 4.251120567321777, "learning_rate": 2.822505934389097e-07, "loss": 1.8558, "step": 12868 }, { "epoch": 0.9687047178155404, "grad_norm": 4.5144362449646, "learning_rate": 2.809585632504941e-07, "loss": 2.1037, "step": 12869 }, { "epoch": 0.9687799920962006, "grad_norm": 5.695852279663086, "learning_rate": 2.796694887164275e-07, "loss": 1.7386, "step": 12870 }, { "epoch": 0.9688552663768607, "grad_norm": 4.657914638519287, "learning_rate": 2.783833699133542e-07, "loss": 2.0847, "step": 12871 }, { "epoch": 0.9689305406575208, "grad_norm": 3.799586296081543, "learning_rate": 2.771002069177131e-07, "loss": 1.7108, "step": 12872 }, { "epoch": 0.9690058149381809, "grad_norm": 4.43876838684082, "learning_rate": 2.7581999980579863e-07, "loss": 2.1726, "step": 12873 }, { "epoch": 0.9690810892188412, "grad_norm": 4.370305061340332, "learning_rate": 2.745427486537111e-07, "loss": 1.839, "step": 12874 }, { "epoch": 0.9691563634995013, "grad_norm": 5.2520270347595215, "learning_rate": 2.732684535373786e-07, "loss": 1.6447, "step": 12875 }, { "epoch": 0.9692316377801614, "grad_norm": 4.5966997146606445, "learning_rate": 2.719971145325517e-07, "loss": 1.676, "step": 12876 }, { "epoch": 0.9693069120608216, "grad_norm": 5.579090595245361, "learning_rate": 2.707287317148088e-07, "loss": 1.8823, "step": 12877 }, { "epoch": 0.9693821863414818, "grad_norm": 7.404440402984619, "learning_rate": 2.694633051595563e-07, "loss": 1.9279, "step": 12878 }, { "epoch": 0.9694574606221419, "grad_norm": 3.836246967315674, "learning_rate": 2.682008349420173e-07, "loss": 1.6043, "step": 12879 }, { "epoch": 0.9695327349028021, "grad_norm": 5.04543924331665, "learning_rate": 2.669413211372429e-07, "loss": 2.0549, "step": 12880 }, { "epoch": 0.9696080091834622, "grad_norm": 5.575264930725098, "learning_rate": 2.6568476382010656e-07, "loss": 1.6289, "step": 12881 }, { "epoch": 0.9696832834641224, "grad_norm": 4.055024147033691, "learning_rate": 2.6443116306530955e-07, "loss": 1.8076, "step": 12882 }, { "epoch": 0.9697585577447826, "grad_norm": 5.3261613845825195, "learning_rate": 2.6318051894737017e-07, "loss": 1.593, "step": 12883 }, { "epoch": 0.9698338320254427, "grad_norm": 4.643156051635742, "learning_rate": 2.6193283154063997e-07, "loss": 1.6863, "step": 12884 }, { "epoch": 0.9699091063061028, "grad_norm": 4.882751941680908, "learning_rate": 2.6068810091928187e-07, "loss": 1.5136, "step": 12885 }, { "epoch": 0.9699843805867631, "grad_norm": 5.036129474639893, "learning_rate": 2.594463271573033e-07, "loss": 1.8994, "step": 12886 }, { "epoch": 0.9700596548674232, "grad_norm": 4.722573280334473, "learning_rate": 2.5820751032851755e-07, "loss": 1.6588, "step": 12887 }, { "epoch": 0.9701349291480833, "grad_norm": 6.217869758605957, "learning_rate": 2.5697165050657113e-07, "loss": 2.0225, "step": 12888 }, { "epoch": 0.9702102034287435, "grad_norm": 5.278621673583984, "learning_rate": 2.557387477649331e-07, "loss": 1.7792, "step": 12889 }, { "epoch": 0.9702854777094037, "grad_norm": 5.1986083984375, "learning_rate": 2.5450880217688935e-07, "loss": 1.7527, "step": 12890 }, { "epoch": 0.9703607519900638, "grad_norm": 5.341720104217529, "learning_rate": 2.532818138155646e-07, "loss": 1.5141, "step": 12891 }, { "epoch": 0.970436026270724, "grad_norm": 4.697822570800781, "learning_rate": 2.5205778275390056e-07, "loss": 1.7573, "step": 12892 }, { "epoch": 0.9705113005513841, "grad_norm": 4.658453464508057, "learning_rate": 2.508367090646502e-07, "loss": 1.6563, "step": 12893 }, { "epoch": 0.9705865748320442, "grad_norm": 4.803254127502441, "learning_rate": 2.4961859282041643e-07, "loss": 1.9874, "step": 12894 }, { "epoch": 0.9706618491127044, "grad_norm": 4.559871196746826, "learning_rate": 2.484034340936081e-07, "loss": 1.926, "step": 12895 }, { "epoch": 0.9707371233933646, "grad_norm": 4.435745716094971, "learning_rate": 2.471912329564618e-07, "loss": 1.6567, "step": 12896 }, { "epoch": 0.9708123976740247, "grad_norm": 4.745160102844238, "learning_rate": 2.459819894810367e-07, "loss": 1.8007, "step": 12897 }, { "epoch": 0.9708876719546848, "grad_norm": 5.0204997062683105, "learning_rate": 2.447757037392251e-07, "loss": 2.0072, "step": 12898 }, { "epoch": 0.9709629462353451, "grad_norm": 5.456808090209961, "learning_rate": 2.4357237580273084e-07, "loss": 1.8933, "step": 12899 }, { "epoch": 0.9710382205160052, "grad_norm": 5.426011562347412, "learning_rate": 2.423720057430967e-07, "loss": 1.873, "step": 12900 }, { "epoch": 0.9711134947966653, "grad_norm": 4.370813846588135, "learning_rate": 2.411745936316712e-07, "loss": 1.6608, "step": 12901 }, { "epoch": 0.9711887690773255, "grad_norm": 3.82869815826416, "learning_rate": 2.399801395396417e-07, "loss": 1.6807, "step": 12902 }, { "epoch": 0.9712640433579857, "grad_norm": 5.512392520904541, "learning_rate": 2.387886435380182e-07, "loss": 1.6068, "step": 12903 }, { "epoch": 0.9713393176386458, "grad_norm": 5.068700313568115, "learning_rate": 2.3760010569762737e-07, "loss": 1.7812, "step": 12904 }, { "epoch": 0.971414591919306, "grad_norm": 6.03415584564209, "learning_rate": 2.364145260891293e-07, "loss": 1.9484, "step": 12905 }, { "epoch": 0.9714898661999661, "grad_norm": 6.718302249908447, "learning_rate": 2.352319047829954e-07, "loss": 1.5456, "step": 12906 }, { "epoch": 0.9715651404806263, "grad_norm": 4.510940074920654, "learning_rate": 2.3405224184954166e-07, "loss": 1.8412, "step": 12907 }, { "epoch": 0.9716404147612865, "grad_norm": 4.695608139038086, "learning_rate": 2.328755373588787e-07, "loss": 1.3734, "step": 12908 }, { "epoch": 0.9717156890419466, "grad_norm": 5.0936689376831055, "learning_rate": 2.317017913809727e-07, "loss": 2.0167, "step": 12909 }, { "epoch": 0.9717909633226067, "grad_norm": 5.769432544708252, "learning_rate": 2.3053100398559015e-07, "loss": 2.051, "step": 12910 }, { "epoch": 0.971866237603267, "grad_norm": 3.9330129623413086, "learning_rate": 2.293631752423364e-07, "loss": 1.9386, "step": 12911 }, { "epoch": 0.9719415118839271, "grad_norm": 5.308951377868652, "learning_rate": 2.281983052206338e-07, "loss": 1.8434, "step": 12912 }, { "epoch": 0.9720167861645872, "grad_norm": 4.602841377258301, "learning_rate": 2.2703639398973242e-07, "loss": 2.1663, "step": 12913 }, { "epoch": 0.9720920604452473, "grad_norm": 5.422677040100098, "learning_rate": 2.2587744161869927e-07, "loss": 1.6994, "step": 12914 }, { "epoch": 0.9721673347259076, "grad_norm": 4.921991348266602, "learning_rate": 2.2472144817643482e-07, "loss": 1.4174, "step": 12915 }, { "epoch": 0.9722426090065677, "grad_norm": 7.114505767822266, "learning_rate": 2.2356841373166182e-07, "loss": 1.8411, "step": 12916 }, { "epoch": 0.9723178832872278, "grad_norm": 4.967130184173584, "learning_rate": 2.224183383529199e-07, "loss": 1.7277, "step": 12917 }, { "epoch": 0.972393157567888, "grad_norm": 5.603796482086182, "learning_rate": 2.2127122210857663e-07, "loss": 1.7817, "step": 12918 }, { "epoch": 0.9724684318485481, "grad_norm": 5.356256484985352, "learning_rate": 2.20127065066833e-07, "loss": 1.5158, "step": 12919 }, { "epoch": 0.9725437061292083, "grad_norm": 4.639211177825928, "learning_rate": 2.1898586729570126e-07, "loss": 1.6356, "step": 12920 }, { "epoch": 0.9726189804098685, "grad_norm": 4.742597579956055, "learning_rate": 2.1784762886302157e-07, "loss": 2.0759, "step": 12921 }, { "epoch": 0.9726942546905286, "grad_norm": 4.5918097496032715, "learning_rate": 2.167123498364565e-07, "loss": 1.4234, "step": 12922 }, { "epoch": 0.9727695289711887, "grad_norm": 4.690528392791748, "learning_rate": 2.1558003028349648e-07, "loss": 1.5495, "step": 12923 }, { "epoch": 0.972844803251849, "grad_norm": 4.799502372741699, "learning_rate": 2.1445067027145994e-07, "loss": 1.6469, "step": 12924 }, { "epoch": 0.9729200775325091, "grad_norm": 5.731314182281494, "learning_rate": 2.1332426986747644e-07, "loss": 1.9086, "step": 12925 }, { "epoch": 0.9729953518131692, "grad_norm": 4.05239200592041, "learning_rate": 2.1220082913852024e-07, "loss": 1.9317, "step": 12926 }, { "epoch": 0.9730706260938294, "grad_norm": 5.346749305725098, "learning_rate": 2.1108034815136013e-07, "loss": 1.4937, "step": 12927 }, { "epoch": 0.9731459003744896, "grad_norm": 5.038517951965332, "learning_rate": 2.0996282697261505e-07, "loss": 1.7887, "step": 12928 }, { "epoch": 0.9732211746551497, "grad_norm": 5.209228038787842, "learning_rate": 2.0884826566871517e-07, "loss": 1.9106, "step": 12929 }, { "epoch": 0.9732964489358099, "grad_norm": 4.027022361755371, "learning_rate": 2.0773666430591866e-07, "loss": 1.8402, "step": 12930 }, { "epoch": 0.97337172321647, "grad_norm": 4.628996849060059, "learning_rate": 2.0662802295030592e-07, "loss": 1.9488, "step": 12931 }, { "epoch": 0.9734469974971302, "grad_norm": 5.011528968811035, "learning_rate": 2.055223416677854e-07, "loss": 2.1942, "step": 12932 }, { "epoch": 0.9735222717777903, "grad_norm": 4.232499122619629, "learning_rate": 2.0441962052408782e-07, "loss": 1.7977, "step": 12933 }, { "epoch": 0.9735975460584505, "grad_norm": 6.438435077667236, "learning_rate": 2.0331985958476075e-07, "loss": 2.1995, "step": 12934 }, { "epoch": 0.9736728203391106, "grad_norm": 7.225154399871826, "learning_rate": 2.0222305891518521e-07, "loss": 2.0804, "step": 12935 }, { "epoch": 0.9737480946197707, "grad_norm": 4.778552055358887, "learning_rate": 2.0112921858056466e-07, "loss": 1.8135, "step": 12936 }, { "epoch": 0.973823368900431, "grad_norm": 5.350754261016846, "learning_rate": 2.0003833864592482e-07, "loss": 1.6589, "step": 12937 }, { "epoch": 0.9738986431810911, "grad_norm": 4.418392658233643, "learning_rate": 1.989504191761138e-07, "loss": 1.9128, "step": 12938 }, { "epoch": 0.9739739174617512, "grad_norm": 4.4751482009887695, "learning_rate": 1.978654602358021e-07, "loss": 1.7483, "step": 12939 }, { "epoch": 0.9740491917424114, "grad_norm": 5.576937198638916, "learning_rate": 1.967834618894937e-07, "loss": 1.751, "step": 12940 }, { "epoch": 0.9741244660230716, "grad_norm": 5.267410755157471, "learning_rate": 1.957044242015038e-07, "loss": 2.1266, "step": 12941 }, { "epoch": 0.9741997403037317, "grad_norm": 4.853013038635254, "learning_rate": 1.946283472359811e-07, "loss": 1.9482, "step": 12942 }, { "epoch": 0.9742750145843919, "grad_norm": 5.489080429077148, "learning_rate": 1.9355523105689666e-07, "loss": 2.2334, "step": 12943 }, { "epoch": 0.974350288865052, "grad_norm": 5.7867960929870605, "learning_rate": 1.9248507572804385e-07, "loss": 1.8813, "step": 12944 }, { "epoch": 0.9744255631457122, "grad_norm": 5.590057849884033, "learning_rate": 1.9141788131303851e-07, "loss": 1.4356, "step": 12945 }, { "epoch": 0.9745008374263724, "grad_norm": 3.8147857189178467, "learning_rate": 1.9035364787532427e-07, "loss": 1.8349, "step": 12946 }, { "epoch": 0.9745761117070325, "grad_norm": 5.534485340118408, "learning_rate": 1.8929237547816726e-07, "loss": 2.3447, "step": 12947 }, { "epoch": 0.9746513859876926, "grad_norm": 5.00258207321167, "learning_rate": 1.8823406418465028e-07, "loss": 1.778, "step": 12948 }, { "epoch": 0.9747266602683529, "grad_norm": 5.148022651672363, "learning_rate": 1.8717871405769526e-07, "loss": 1.7496, "step": 12949 }, { "epoch": 0.974801934549013, "grad_norm": 5.67739725112915, "learning_rate": 1.8612632516004092e-07, "loss": 1.613, "step": 12950 }, { "epoch": 0.9748772088296731, "grad_norm": 4.235254764556885, "learning_rate": 1.850768975542372e-07, "loss": 2.0982, "step": 12951 }, { "epoch": 0.9749524831103332, "grad_norm": 4.133965969085693, "learning_rate": 1.840304313026786e-07, "loss": 1.7344, "step": 12952 }, { "epoch": 0.9750277573909935, "grad_norm": 4.7754998207092285, "learning_rate": 1.8298692646757653e-07, "loss": 1.8371, "step": 12953 }, { "epoch": 0.9751030316716536, "grad_norm": 6.86676549911499, "learning_rate": 1.8194638311095914e-07, "loss": 1.7308, "step": 12954 }, { "epoch": 0.9751783059523137, "grad_norm": 4.804413318634033, "learning_rate": 1.8090880129468246e-07, "loss": 1.7924, "step": 12955 }, { "epoch": 0.9752535802329739, "grad_norm": 5.413994789123535, "learning_rate": 1.7987418108043608e-07, "loss": 1.9852, "step": 12956 }, { "epoch": 0.975328854513634, "grad_norm": 4.407379150390625, "learning_rate": 1.7884252252971524e-07, "loss": 1.9656, "step": 12957 }, { "epoch": 0.9754041287942942, "grad_norm": 4.6286797523498535, "learning_rate": 1.7781382570385418e-07, "loss": 1.4735, "step": 12958 }, { "epoch": 0.9754794030749544, "grad_norm": 4.9368743896484375, "learning_rate": 1.7678809066400405e-07, "loss": 1.6398, "step": 12959 }, { "epoch": 0.9755546773556145, "grad_norm": 5.2719197273254395, "learning_rate": 1.7576531747114378e-07, "loss": 1.8267, "step": 12960 }, { "epoch": 0.9756299516362746, "grad_norm": 5.103287220001221, "learning_rate": 1.7474550618607477e-07, "loss": 1.9093, "step": 12961 }, { "epoch": 0.9757052259169349, "grad_norm": 5.483565330505371, "learning_rate": 1.7372865686942075e-07, "loss": 1.6723, "step": 12962 }, { "epoch": 0.975780500197595, "grad_norm": 5.80971622467041, "learning_rate": 1.7271476958163336e-07, "loss": 1.8963, "step": 12963 }, { "epoch": 0.9758557744782551, "grad_norm": 6.123488426208496, "learning_rate": 1.717038443829755e-07, "loss": 2.1776, "step": 12964 }, { "epoch": 0.9759310487589153, "grad_norm": 6.915956497192383, "learning_rate": 1.7069588133355464e-07, "loss": 2.0053, "step": 12965 }, { "epoch": 0.9760063230395755, "grad_norm": 5.975083351135254, "learning_rate": 1.6969088049328398e-07, "loss": 1.675, "step": 12966 }, { "epoch": 0.9760815973202356, "grad_norm": 6.502622604370117, "learning_rate": 1.686888419219157e-07, "loss": 1.8324, "step": 12967 }, { "epoch": 0.9761568716008958, "grad_norm": 5.546345233917236, "learning_rate": 1.6768976567901328e-07, "loss": 1.6187, "step": 12968 }, { "epoch": 0.9762321458815559, "grad_norm": 4.889604091644287, "learning_rate": 1.666936518239681e-07, "loss": 1.9573, "step": 12969 }, { "epoch": 0.976307420162216, "grad_norm": 8.208456993103027, "learning_rate": 1.657005004159995e-07, "loss": 1.9067, "step": 12970 }, { "epoch": 0.9763826944428762, "grad_norm": 5.595680236816406, "learning_rate": 1.647103115141435e-07, "loss": 1.7742, "step": 12971 }, { "epoch": 0.9764579687235364, "grad_norm": 5.283383846282959, "learning_rate": 1.6372308517726976e-07, "loss": 1.9905, "step": 12972 }, { "epoch": 0.9765332430041965, "grad_norm": 4.882968902587891, "learning_rate": 1.6273882146405904e-07, "loss": 1.7688, "step": 12973 }, { "epoch": 0.9766085172848566, "grad_norm": 4.411565780639648, "learning_rate": 1.6175752043302573e-07, "loss": 1.5687, "step": 12974 }, { "epoch": 0.9766837915655169, "grad_norm": 4.612016677856445, "learning_rate": 1.6077918214250642e-07, "loss": 1.9334, "step": 12975 }, { "epoch": 0.976759065846177, "grad_norm": 7.30670690536499, "learning_rate": 1.598038066506602e-07, "loss": 2.1734, "step": 12976 }, { "epoch": 0.9768343401268371, "grad_norm": 8.496973037719727, "learning_rate": 1.5883139401546844e-07, "loss": 1.9697, "step": 12977 }, { "epoch": 0.9769096144074974, "grad_norm": 4.6441497802734375, "learning_rate": 1.5786194429474044e-07, "loss": 1.9611, "step": 12978 }, { "epoch": 0.9769848886881575, "grad_norm": 3.7294232845306396, "learning_rate": 1.5689545754610792e-07, "loss": 2.0013, "step": 12979 }, { "epoch": 0.9770601629688176, "grad_norm": 5.306214332580566, "learning_rate": 1.5593193382702486e-07, "loss": 1.9306, "step": 12980 }, { "epoch": 0.9771354372494778, "grad_norm": 5.148443222045898, "learning_rate": 1.5497137319476773e-07, "loss": 1.894, "step": 12981 }, { "epoch": 0.9772107115301379, "grad_norm": 6.160584926605225, "learning_rate": 1.5401377570644083e-07, "loss": 1.5534, "step": 12982 }, { "epoch": 0.9772859858107981, "grad_norm": 5.997537136077881, "learning_rate": 1.5305914141897082e-07, "loss": 1.6788, "step": 12983 }, { "epoch": 0.9773612600914583, "grad_norm": 7.141962051391602, "learning_rate": 1.5210747038911234e-07, "loss": 1.7355, "step": 12984 }, { "epoch": 0.9774365343721184, "grad_norm": 5.960862636566162, "learning_rate": 1.511587626734312e-07, "loss": 1.6497, "step": 12985 }, { "epoch": 0.9775118086527785, "grad_norm": 4.968209266662598, "learning_rate": 1.5021301832833235e-07, "loss": 1.7074, "step": 12986 }, { "epoch": 0.9775870829334388, "grad_norm": 5.550293922424316, "learning_rate": 1.4927023741003187e-07, "loss": 2.1063, "step": 12987 }, { "epoch": 0.9776623572140989, "grad_norm": 4.9811835289001465, "learning_rate": 1.4833041997457942e-07, "loss": 1.7594, "step": 12988 }, { "epoch": 0.977737631494759, "grad_norm": 6.835657119750977, "learning_rate": 1.473935660778414e-07, "loss": 1.6718, "step": 12989 }, { "epoch": 0.9778129057754192, "grad_norm": 4.086740493774414, "learning_rate": 1.4645967577551212e-07, "loss": 1.3885, "step": 12990 }, { "epoch": 0.9778881800560794, "grad_norm": 5.872897624969482, "learning_rate": 1.4552874912310832e-07, "loss": 1.7537, "step": 12991 }, { "epoch": 0.9779634543367395, "grad_norm": 5.258413791656494, "learning_rate": 1.4460078617597462e-07, "loss": 1.7176, "step": 12992 }, { "epoch": 0.9780387286173996, "grad_norm": 3.5322299003601074, "learning_rate": 1.4367578698927242e-07, "loss": 1.8511, "step": 12993 }, { "epoch": 0.9781140028980598, "grad_norm": 4.950979232788086, "learning_rate": 1.427537516179911e-07, "loss": 1.6264, "step": 12994 }, { "epoch": 0.97818927717872, "grad_norm": 4.316150188446045, "learning_rate": 1.4183468011694235e-07, "loss": 1.6371, "step": 12995 }, { "epoch": 0.9782645514593801, "grad_norm": 6.338021755218506, "learning_rate": 1.409185725407658e-07, "loss": 2.0498, "step": 12996 }, { "epoch": 0.9783398257400403, "grad_norm": 7.142224311828613, "learning_rate": 1.4000542894391232e-07, "loss": 1.8443, "step": 12997 }, { "epoch": 0.9784151000207004, "grad_norm": 4.912839412689209, "learning_rate": 1.390952493806774e-07, "loss": 1.5096, "step": 12998 }, { "epoch": 0.9784903743013605, "grad_norm": 5.672285079956055, "learning_rate": 1.3818803390515667e-07, "loss": 1.9076, "step": 12999 }, { "epoch": 0.9785656485820208, "grad_norm": 4.521214008331299, "learning_rate": 1.3728378257129027e-07, "loss": 1.6593, "step": 13000 }, { "epoch": 0.9786409228626809, "grad_norm": 4.792366027832031, "learning_rate": 1.3638249543283522e-07, "loss": 2.3685, "step": 13001 }, { "epoch": 0.978716197143341, "grad_norm": 8.043989181518555, "learning_rate": 1.3548417254336532e-07, "loss": 2.1338, "step": 13002 }, { "epoch": 0.9787914714240012, "grad_norm": 4.693892478942871, "learning_rate": 1.345888139562823e-07, "loss": 2.0123, "step": 13003 }, { "epoch": 0.9788667457046614, "grad_norm": 4.125730037689209, "learning_rate": 1.336964197248103e-07, "loss": 2.1044, "step": 13004 }, { "epoch": 0.9789420199853215, "grad_norm": 4.785754680633545, "learning_rate": 1.328069899020068e-07, "loss": 1.724, "step": 13005 }, { "epoch": 0.9790172942659817, "grad_norm": 4.683404445648193, "learning_rate": 1.319205245407462e-07, "loss": 1.9796, "step": 13006 }, { "epoch": 0.9790925685466418, "grad_norm": 4.532181739807129, "learning_rate": 1.3103702369371973e-07, "loss": 1.8935, "step": 13007 }, { "epoch": 0.979167842827302, "grad_norm": 5.674066543579102, "learning_rate": 1.3015648741345199e-07, "loss": 1.7685, "step": 13008 }, { "epoch": 0.9792431171079622, "grad_norm": 5.136986255645752, "learning_rate": 1.2927891575228446e-07, "loss": 1.5399, "step": 13009 }, { "epoch": 0.9793183913886223, "grad_norm": 4.792853355407715, "learning_rate": 1.2840430876239206e-07, "loss": 2.0651, "step": 13010 }, { "epoch": 0.9793936656692824, "grad_norm": 4.621176242828369, "learning_rate": 1.275326664957721e-07, "loss": 1.8561, "step": 13011 }, { "epoch": 0.9794689399499426, "grad_norm": 4.739818572998047, "learning_rate": 1.266639890042276e-07, "loss": 1.7558, "step": 13012 }, { "epoch": 0.9795442142306028, "grad_norm": 8.046173095703125, "learning_rate": 1.2579827633941166e-07, "loss": 1.9338, "step": 13013 }, { "epoch": 0.9796194885112629, "grad_norm": 5.873472690582275, "learning_rate": 1.2493552855278313e-07, "loss": 1.7114, "step": 13014 }, { "epoch": 0.979694762791923, "grad_norm": 4.823851585388184, "learning_rate": 1.240757456956232e-07, "loss": 1.4271, "step": 13015 }, { "epoch": 0.9797700370725833, "grad_norm": 6.730445384979248, "learning_rate": 1.2321892781905763e-07, "loss": 1.7472, "step": 13016 }, { "epoch": 0.9798453113532434, "grad_norm": 5.2308454513549805, "learning_rate": 1.223650749740124e-07, "loss": 1.986, "step": 13017 }, { "epoch": 0.9799205856339035, "grad_norm": 6.757125377655029, "learning_rate": 1.2151418721124684e-07, "loss": 2.0117, "step": 13018 }, { "epoch": 0.9799958599145637, "grad_norm": 3.811600685119629, "learning_rate": 1.2066626458134278e-07, "loss": 1.8664, "step": 13019 }, { "epoch": 0.9800711341952238, "grad_norm": 4.637233257293701, "learning_rate": 1.198213071347154e-07, "loss": 1.7718, "step": 13020 }, { "epoch": 0.980146408475884, "grad_norm": 4.9097795486450195, "learning_rate": 1.1897931492158565e-07, "loss": 2.0681, "step": 13021 }, { "epoch": 0.9802216827565442, "grad_norm": 4.5483078956604, "learning_rate": 1.1814028799201348e-07, "loss": 2.0928, "step": 13022 }, { "epoch": 0.9802969570372043, "grad_norm": 4.831428050994873, "learning_rate": 1.1730422639587013e-07, "loss": 1.893, "step": 13023 }, { "epoch": 0.9803722313178644, "grad_norm": 8.606104850769043, "learning_rate": 1.1647113018286027e-07, "loss": 2.0254, "step": 13024 }, { "epoch": 0.9804475055985247, "grad_norm": 5.141611576080322, "learning_rate": 1.156409994025165e-07, "loss": 2.1332, "step": 13025 }, { "epoch": 0.9805227798791848, "grad_norm": 4.279614448547363, "learning_rate": 1.1481383410417712e-07, "loss": 1.8194, "step": 13026 }, { "epoch": 0.9805980541598449, "grad_norm": 6.046935081481934, "learning_rate": 1.1398963433701393e-07, "loss": 1.5399, "step": 13027 }, { "epoch": 0.9806733284405051, "grad_norm": 4.307223796844482, "learning_rate": 1.1316840015003772e-07, "loss": 1.9866, "step": 13028 }, { "epoch": 0.9807486027211653, "grad_norm": 5.5199737548828125, "learning_rate": 1.1235013159205387e-07, "loss": 1.8194, "step": 13029 }, { "epoch": 0.9808238770018254, "grad_norm": 12.91816520690918, "learning_rate": 1.1153482871170684e-07, "loss": 2.1331, "step": 13030 }, { "epoch": 0.9808991512824855, "grad_norm": 5.013204097747803, "learning_rate": 1.107224915574745e-07, "loss": 2.1744, "step": 13031 }, { "epoch": 0.9809744255631457, "grad_norm": 5.964956283569336, "learning_rate": 1.0991312017764044e-07, "loss": 2.1114, "step": 13032 }, { "epoch": 0.9810496998438059, "grad_norm": 5.463572025299072, "learning_rate": 1.0910671462031618e-07, "loss": 1.708, "step": 13033 }, { "epoch": 0.981124974124466, "grad_norm": 4.063724994659424, "learning_rate": 1.0830327493344672e-07, "loss": 1.6479, "step": 13034 }, { "epoch": 0.9812002484051262, "grad_norm": 5.764823913574219, "learning_rate": 1.0750280116479383e-07, "loss": 1.8031, "step": 13035 }, { "epoch": 0.9812755226857863, "grad_norm": 4.788595199584961, "learning_rate": 1.0670529336194168e-07, "loss": 1.7181, "step": 13036 }, { "epoch": 0.9813507969664464, "grad_norm": 4.862103462219238, "learning_rate": 1.0591075157230235e-07, "loss": 1.9211, "step": 13037 }, { "epoch": 0.9814260712471067, "grad_norm": 5.095187664031982, "learning_rate": 1.0511917584310472e-07, "loss": 1.7008, "step": 13038 }, { "epoch": 0.9815013455277668, "grad_norm": 4.9878830909729, "learning_rate": 1.0433056622140558e-07, "loss": 1.9138, "step": 13039 }, { "epoch": 0.9815766198084269, "grad_norm": 4.880194664001465, "learning_rate": 1.0354492275408966e-07, "loss": 1.9044, "step": 13040 }, { "epoch": 0.9816518940890872, "grad_norm": 4.527126789093018, "learning_rate": 1.027622454878585e-07, "loss": 1.5647, "step": 13041 }, { "epoch": 0.9817271683697473, "grad_norm": 5.153310298919678, "learning_rate": 1.01982534469236e-07, "loss": 2.1593, "step": 13042 }, { "epoch": 0.9818024426504074, "grad_norm": 4.031414985656738, "learning_rate": 1.0120578974458506e-07, "loss": 1.7779, "step": 13043 }, { "epoch": 0.9818777169310676, "grad_norm": 5.343712329864502, "learning_rate": 1.0043201136006874e-07, "loss": 1.8917, "step": 13044 }, { "epoch": 0.9819529912117277, "grad_norm": 5.463987350463867, "learning_rate": 9.966119936170026e-08, "loss": 1.7369, "step": 13045 }, { "epoch": 0.9820282654923879, "grad_norm": 5.339509963989258, "learning_rate": 9.889335379528186e-08, "loss": 1.7345, "step": 13046 }, { "epoch": 0.9821035397730481, "grad_norm": 4.834533214569092, "learning_rate": 9.812847470647702e-08, "loss": 1.8672, "step": 13047 }, { "epoch": 0.9821788140537082, "grad_norm": 7.370434761047363, "learning_rate": 9.73665621407549e-08, "loss": 2.0389, "step": 13048 }, { "epoch": 0.9822540883343683, "grad_norm": 5.545389175415039, "learning_rate": 9.660761614339598e-08, "loss": 1.9579, "step": 13049 }, { "epoch": 0.9823293626150285, "grad_norm": 5.122701644897461, "learning_rate": 9.58516367595308e-08, "loss": 1.7374, "step": 13050 }, { "epoch": 0.9824046368956887, "grad_norm": 6.175267696380615, "learning_rate": 9.50986240340901e-08, "loss": 1.5349, "step": 13051 }, { "epoch": 0.9824799111763488, "grad_norm": 5.517742156982422, "learning_rate": 9.434857801184915e-08, "loss": 1.9744, "step": 13052 }, { "epoch": 0.9825551854570089, "grad_norm": 3.5727803707122803, "learning_rate": 9.360149873738345e-08, "loss": 1.6103, "step": 13053 }, { "epoch": 0.9826304597376692, "grad_norm": 4.6505584716796875, "learning_rate": 9.285738625511298e-08, "loss": 1.9874, "step": 13054 }, { "epoch": 0.9827057340183293, "grad_norm": 4.945466995239258, "learning_rate": 9.211624060926904e-08, "loss": 1.9691, "step": 13055 }, { "epoch": 0.9827810082989894, "grad_norm": 4.510948657989502, "learning_rate": 9.137806184391639e-08, "loss": 1.5888, "step": 13056 }, { "epoch": 0.9828562825796496, "grad_norm": 4.9971137046813965, "learning_rate": 9.06428500029366e-08, "loss": 1.7101, "step": 13057 }, { "epoch": 0.9829315568603098, "grad_norm": 4.093054294586182, "learning_rate": 8.991060513002803e-08, "loss": 1.7063, "step": 13058 }, { "epoch": 0.9830068311409699, "grad_norm": 3.8774163722991943, "learning_rate": 8.918132726872808e-08, "loss": 1.7303, "step": 13059 }, { "epoch": 0.9830821054216301, "grad_norm": 8.15949535369873, "learning_rate": 8.845501646239096e-08, "loss": 2.1858, "step": 13060 }, { "epoch": 0.9831573797022902, "grad_norm": 4.288280010223389, "learning_rate": 8.773167275418214e-08, "loss": 1.8548, "step": 13061 }, { "epoch": 0.9832326539829503, "grad_norm": 5.814545154571533, "learning_rate": 8.701129618712279e-08, "loss": 1.6416, "step": 13062 }, { "epoch": 0.9833079282636106, "grad_norm": 5.314117908477783, "learning_rate": 8.62938868040175e-08, "loss": 1.8706, "step": 13063 }, { "epoch": 0.9833832025442707, "grad_norm": 4.418406009674072, "learning_rate": 8.55794446475211e-08, "loss": 1.7888, "step": 13064 }, { "epoch": 0.9834584768249308, "grad_norm": 4.633051872253418, "learning_rate": 8.486796976011069e-08, "loss": 1.537, "step": 13065 }, { "epoch": 0.983533751105591, "grad_norm": 6.053751468658447, "learning_rate": 8.415946218407466e-08, "loss": 1.848, "step": 13066 }, { "epoch": 0.9836090253862512, "grad_norm": 4.960136890411377, "learning_rate": 8.345392196153489e-08, "loss": 1.7763, "step": 13067 }, { "epoch": 0.9836842996669113, "grad_norm": 5.135873317718506, "learning_rate": 8.275134913443005e-08, "loss": 1.9022, "step": 13068 }, { "epoch": 0.9837595739475714, "grad_norm": 4.405609607696533, "learning_rate": 8.205174374453228e-08, "loss": 1.7956, "step": 13069 }, { "epoch": 0.9838348482282316, "grad_norm": 5.821405410766602, "learning_rate": 8.135510583342498e-08, "loss": 1.9378, "step": 13070 }, { "epoch": 0.9839101225088918, "grad_norm": 4.579228401184082, "learning_rate": 8.066143544253058e-08, "loss": 1.9251, "step": 13071 }, { "epoch": 0.9839853967895519, "grad_norm": 5.583433151245117, "learning_rate": 7.997073261307164e-08, "loss": 2.0254, "step": 13072 }, { "epoch": 0.9840606710702121, "grad_norm": 5.75313663482666, "learning_rate": 7.928299738612088e-08, "loss": 1.7007, "step": 13073 }, { "epoch": 0.9841359453508722, "grad_norm": 4.439338684082031, "learning_rate": 7.859822980255115e-08, "loss": 1.5112, "step": 13074 }, { "epoch": 0.9842112196315324, "grad_norm": 6.740578651428223, "learning_rate": 7.79164299030799e-08, "loss": 2.1429, "step": 13075 }, { "epoch": 0.9842864939121926, "grad_norm": 4.406201362609863, "learning_rate": 7.72375977282358e-08, "loss": 1.6211, "step": 13076 }, { "epoch": 0.9843617681928527, "grad_norm": 4.113922595977783, "learning_rate": 7.656173331836991e-08, "loss": 1.7484, "step": 13077 }, { "epoch": 0.9844370424735128, "grad_norm": 4.590651035308838, "learning_rate": 7.588883671366675e-08, "loss": 1.725, "step": 13078 }, { "epoch": 0.9845123167541731, "grad_norm": 5.183358669281006, "learning_rate": 7.521890795411657e-08, "loss": 1.6976, "step": 13079 }, { "epoch": 0.9845875910348332, "grad_norm": 5.077670574188232, "learning_rate": 7.45519470795597e-08, "loss": 1.6645, "step": 13080 }, { "epoch": 0.9846628653154933, "grad_norm": 4.6408796310424805, "learning_rate": 7.388795412963112e-08, "loss": 1.5964, "step": 13081 }, { "epoch": 0.9847381395961535, "grad_norm": 4.676085948944092, "learning_rate": 7.32269291438159e-08, "loss": 1.876, "step": 13082 }, { "epoch": 0.9848134138768136, "grad_norm": 7.34224271774292, "learning_rate": 7.256887216139929e-08, "loss": 2.0622, "step": 13083 }, { "epoch": 0.9848886881574738, "grad_norm": 5.3807373046875, "learning_rate": 7.191378322150555e-08, "loss": 1.6785, "step": 13084 }, { "epoch": 0.984963962438134, "grad_norm": 4.592549800872803, "learning_rate": 7.126166236307575e-08, "loss": 1.8521, "step": 13085 }, { "epoch": 0.9850392367187941, "grad_norm": 4.799075126647949, "learning_rate": 7.061250962488441e-08, "loss": 1.763, "step": 13086 }, { "epoch": 0.9851145109994542, "grad_norm": 4.773825168609619, "learning_rate": 6.996632504551182e-08, "loss": 1.8987, "step": 13087 }, { "epoch": 0.9851897852801145, "grad_norm": 5.173593044281006, "learning_rate": 6.932310866337721e-08, "loss": 1.5234, "step": 13088 }, { "epoch": 0.9852650595607746, "grad_norm": 4.833860874176025, "learning_rate": 6.868286051671669e-08, "loss": 1.5358, "step": 13089 }, { "epoch": 0.9853403338414347, "grad_norm": 4.161336898803711, "learning_rate": 6.804558064358868e-08, "loss": 1.5907, "step": 13090 }, { "epoch": 0.9854156081220948, "grad_norm": 4.897314548492432, "learning_rate": 6.741126908187956e-08, "loss": 1.4992, "step": 13091 }, { "epoch": 0.9854908824027551, "grad_norm": 4.734277725219727, "learning_rate": 6.67799258693036e-08, "loss": 1.6127, "step": 13092 }, { "epoch": 0.9855661566834152, "grad_norm": 4.174915313720703, "learning_rate": 6.615155104338077e-08, "loss": 1.5206, "step": 13093 }, { "epoch": 0.9856414309640753, "grad_norm": 5.145100116729736, "learning_rate": 6.552614464147566e-08, "loss": 2.0955, "step": 13094 }, { "epoch": 0.9857167052447355, "grad_norm": 5.781323432922363, "learning_rate": 6.490370670076407e-08, "loss": 2.1061, "step": 13095 }, { "epoch": 0.9857919795253957, "grad_norm": 5.626537799835205, "learning_rate": 6.428423725824417e-08, "loss": 1.9053, "step": 13096 }, { "epoch": 0.9858672538060558, "grad_norm": 5.205971717834473, "learning_rate": 6.366773635074208e-08, "loss": 1.6103, "step": 13097 }, { "epoch": 0.985942528086716, "grad_norm": 5.263709545135498, "learning_rate": 6.30542040149118e-08, "loss": 2.2085, "step": 13098 }, { "epoch": 0.9860178023673761, "grad_norm": 5.402046203613281, "learning_rate": 6.244364028722971e-08, "loss": 1.9088, "step": 13099 }, { "epoch": 0.9860930766480362, "grad_norm": 4.687032222747803, "learning_rate": 6.18360452039779e-08, "loss": 1.7946, "step": 13100 }, { "epoch": 0.9861683509286965, "grad_norm": 3.61903715133667, "learning_rate": 6.123141880128857e-08, "loss": 1.8414, "step": 13101 }, { "epoch": 0.9862436252093566, "grad_norm": 4.242746353149414, "learning_rate": 6.062976111509966e-08, "loss": 2.0542, "step": 13102 }, { "epoch": 0.9863188994900167, "grad_norm": 7.258364200592041, "learning_rate": 6.003107218118253e-08, "loss": 1.6877, "step": 13103 }, { "epoch": 0.986394173770677, "grad_norm": 5.150749683380127, "learning_rate": 5.94353520351254e-08, "loss": 1.9453, "step": 13104 }, { "epoch": 0.9864694480513371, "grad_norm": 5.658138275146484, "learning_rate": 5.8842600712338825e-08, "loss": 1.845, "step": 13105 }, { "epoch": 0.9865447223319972, "grad_norm": 5.427069187164307, "learning_rate": 5.825281824805573e-08, "loss": 1.7285, "step": 13106 }, { "epoch": 0.9866199966126574, "grad_norm": 4.351526737213135, "learning_rate": 5.7666004677353616e-08, "loss": 1.9474, "step": 13107 }, { "epoch": 0.9866952708933175, "grad_norm": 7.731470584869385, "learning_rate": 5.708216003509903e-08, "loss": 1.341, "step": 13108 }, { "epoch": 0.9867705451739777, "grad_norm": 5.416434288024902, "learning_rate": 5.65012843560142e-08, "loss": 1.9776, "step": 13109 }, { "epoch": 0.9868458194546378, "grad_norm": 4.176894664764404, "learning_rate": 5.592337767462152e-08, "loss": 2.0167, "step": 13110 }, { "epoch": 0.986921093735298, "grad_norm": 6.282223224639893, "learning_rate": 5.5348440025276835e-08, "loss": 1.9449, "step": 13111 }, { "epoch": 0.9869963680159581, "grad_norm": 5.001054286956787, "learning_rate": 5.477647144216391e-08, "loss": 1.5154, "step": 13112 }, { "epoch": 0.9870716422966183, "grad_norm": 6.220345497131348, "learning_rate": 5.420747195927778e-08, "loss": 1.773, "step": 13113 }, { "epoch": 0.9871469165772785, "grad_norm": 5.983572959899902, "learning_rate": 5.364144161044693e-08, "loss": 1.893, "step": 13114 }, { "epoch": 0.9872221908579386, "grad_norm": 4.614436626434326, "learning_rate": 5.307838042931667e-08, "loss": 1.8526, "step": 13115 }, { "epoch": 0.9872974651385987, "grad_norm": 4.497433185577393, "learning_rate": 5.251828844937135e-08, "loss": 1.8405, "step": 13116 }, { "epoch": 0.987372739419259, "grad_norm": 4.064354419708252, "learning_rate": 5.1961165703889866e-08, "loss": 1.7328, "step": 13117 }, { "epoch": 0.9874480136999191, "grad_norm": 4.049808502197266, "learning_rate": 5.1407012226006855e-08, "loss": 1.4904, "step": 13118 }, { "epoch": 0.9875232879805792, "grad_norm": 4.754990100860596, "learning_rate": 5.0855828048657073e-08, "loss": 1.6312, "step": 13119 }, { "epoch": 0.9875985622612394, "grad_norm": 6.001760959625244, "learning_rate": 5.03076132046032e-08, "loss": 1.8852, "step": 13120 }, { "epoch": 0.9876738365418996, "grad_norm": 4.823469638824463, "learning_rate": 4.9762367726446935e-08, "loss": 1.6977, "step": 13121 }, { "epoch": 0.9877491108225597, "grad_norm": 5.914470672607422, "learning_rate": 4.922009164659014e-08, "loss": 2.2929, "step": 13122 }, { "epoch": 0.9878243851032199, "grad_norm": 5.593662738800049, "learning_rate": 4.8680784997273684e-08, "loss": 1.8996, "step": 13123 }, { "epoch": 0.98789965938388, "grad_norm": 6.188007831573486, "learning_rate": 4.8144447810560825e-08, "loss": 1.7584, "step": 13124 }, { "epoch": 0.9879749336645401, "grad_norm": 7.864127159118652, "learning_rate": 4.7611080118331596e-08, "loss": 1.9415, "step": 13125 }, { "epoch": 0.9880502079452004, "grad_norm": 5.918152332305908, "learning_rate": 4.708068195229398e-08, "loss": 1.8846, "step": 13126 }, { "epoch": 0.9881254822258605, "grad_norm": 4.015069007873535, "learning_rate": 4.655325334397831e-08, "loss": 1.6841, "step": 13127 }, { "epoch": 0.9882007565065206, "grad_norm": 6.2730302810668945, "learning_rate": 4.602879432473728e-08, "loss": 1.9123, "step": 13128 }, { "epoch": 0.9882760307871807, "grad_norm": 4.26728630065918, "learning_rate": 4.550730492575705e-08, "loss": 1.6457, "step": 13129 }, { "epoch": 0.988351305067841, "grad_norm": 4.477145195007324, "learning_rate": 4.498878517802396e-08, "loss": 1.5855, "step": 13130 }, { "epoch": 0.9884265793485011, "grad_norm": 4.947357654571533, "learning_rate": 4.447323511237999e-08, "loss": 1.8056, "step": 13131 }, { "epoch": 0.9885018536291612, "grad_norm": 4.1914286613464355, "learning_rate": 4.396065475945621e-08, "loss": 1.7953, "step": 13132 }, { "epoch": 0.9885771279098214, "grad_norm": 5.948415756225586, "learning_rate": 4.3451044149733777e-08, "loss": 1.6858, "step": 13133 }, { "epoch": 0.9886524021904816, "grad_norm": 3.814768075942993, "learning_rate": 4.2944403313505135e-08, "loss": 1.8172, "step": 13134 }, { "epoch": 0.9887276764711417, "grad_norm": 5.156869411468506, "learning_rate": 4.2440732280885075e-08, "loss": 1.5084, "step": 13135 }, { "epoch": 0.9888029507518019, "grad_norm": 4.52300500869751, "learning_rate": 4.194003108182742e-08, "loss": 1.8068, "step": 13136 }, { "epoch": 0.988878225032462, "grad_norm": 4.190985679626465, "learning_rate": 4.1442299746080603e-08, "loss": 1.8246, "step": 13137 }, { "epoch": 0.9889534993131222, "grad_norm": 5.313257694244385, "learning_rate": 4.094753830324871e-08, "loss": 1.792, "step": 13138 }, { "epoch": 0.9890287735937824, "grad_norm": 5.041929244995117, "learning_rate": 4.0455746782736007e-08, "loss": 1.8836, "step": 13139 }, { "epoch": 0.9891040478744425, "grad_norm": 4.644732475280762, "learning_rate": 3.9966925213774654e-08, "loss": 1.5816, "step": 13140 }, { "epoch": 0.9891793221551026, "grad_norm": 6.309013366699219, "learning_rate": 3.9481073625430296e-08, "loss": 1.9388, "step": 13141 }, { "epoch": 0.9892545964357629, "grad_norm": 5.337410926818848, "learning_rate": 3.8998192046585394e-08, "loss": 2.1094, "step": 13142 }, { "epoch": 0.989329870716423, "grad_norm": 8.428162574768066, "learning_rate": 3.851828050594475e-08, "loss": 2.0227, "step": 13143 }, { "epoch": 0.9894051449970831, "grad_norm": 5.643184661865234, "learning_rate": 3.804133903203e-08, "loss": 1.776, "step": 13144 }, { "epoch": 0.9894804192777433, "grad_norm": 5.555056095123291, "learning_rate": 3.756736765320734e-08, "loss": 1.6066, "step": 13145 }, { "epoch": 0.9895556935584034, "grad_norm": 5.052450180053711, "learning_rate": 3.709636639764313e-08, "loss": 1.8886, "step": 13146 }, { "epoch": 0.9896309678390636, "grad_norm": 5.18364143371582, "learning_rate": 3.662833529334275e-08, "loss": 1.3593, "step": 13147 }, { "epoch": 0.9897062421197237, "grad_norm": 4.390613555908203, "learning_rate": 3.6163274368122834e-08, "loss": 2.2483, "step": 13148 }, { "epoch": 0.9897815164003839, "grad_norm": 3.8033335208892822, "learning_rate": 3.570118364963904e-08, "loss": 1.9823, "step": 13149 }, { "epoch": 0.989856790681044, "grad_norm": 7.855715751647949, "learning_rate": 3.5242063165352725e-08, "loss": 1.7753, "step": 13150 }, { "epoch": 0.9899320649617042, "grad_norm": 4.596234321594238, "learning_rate": 3.478591294256428e-08, "loss": 2.1249, "step": 13151 }, { "epoch": 0.9900073392423644, "grad_norm": 4.507048606872559, "learning_rate": 3.43327330083798e-08, "loss": 1.61, "step": 13152 }, { "epoch": 0.9900826135230245, "grad_norm": 4.837597370147705, "learning_rate": 3.3882523389755504e-08, "loss": 1.4973, "step": 13153 }, { "epoch": 0.9901578878036846, "grad_norm": 4.154238224029541, "learning_rate": 3.343528411344221e-08, "loss": 1.7266, "step": 13154 }, { "epoch": 0.9902331620843449, "grad_norm": 4.838868141174316, "learning_rate": 3.299101520602976e-08, "loss": 1.582, "step": 13155 }, { "epoch": 0.990308436365005, "grad_norm": 3.949821710586548, "learning_rate": 3.2549716693930365e-08, "loss": 1.5112, "step": 13156 }, { "epoch": 0.9903837106456651, "grad_norm": 4.174725532531738, "learning_rate": 3.2111388603378586e-08, "loss": 2.0756, "step": 13157 }, { "epoch": 0.9904589849263253, "grad_norm": 4.587085247039795, "learning_rate": 3.167603096043137e-08, "loss": 2.0162, "step": 13158 }, { "epoch": 0.9905342592069855, "grad_norm": 3.627316474914551, "learning_rate": 3.1243643790968e-08, "loss": 1.8721, "step": 13159 }, { "epoch": 0.9906095334876456, "grad_norm": 5.89481782913208, "learning_rate": 3.0814227120695707e-08, "loss": 1.9895, "step": 13160 }, { "epoch": 0.9906848077683058, "grad_norm": 6.094086170196533, "learning_rate": 3.0387780975138505e-08, "loss": 1.8393, "step": 13161 }, { "epoch": 0.9907600820489659, "grad_norm": 5.741156578063965, "learning_rate": 2.996430537964834e-08, "loss": 1.8518, "step": 13162 }, { "epoch": 0.990835356329626, "grad_norm": 5.1079936027526855, "learning_rate": 2.954380035939952e-08, "loss": 2.0655, "step": 13163 }, { "epoch": 0.9909106306102863, "grad_norm": 4.007481575012207, "learning_rate": 2.912626593938872e-08, "loss": 1.5509, "step": 13164 }, { "epoch": 0.9909859048909464, "grad_norm": 5.811824798583984, "learning_rate": 2.8711702144446074e-08, "loss": 1.8812, "step": 13165 }, { "epoch": 0.9910611791716065, "grad_norm": 4.512807846069336, "learning_rate": 2.830010899920188e-08, "loss": 1.7058, "step": 13166 }, { "epoch": 0.9911364534522666, "grad_norm": 4.473937511444092, "learning_rate": 2.7891486528136558e-08, "loss": 1.5665, "step": 13167 }, { "epoch": 0.9912117277329269, "grad_norm": 4.295932292938232, "learning_rate": 2.7485834755530683e-08, "loss": 1.6102, "step": 13168 }, { "epoch": 0.991287002013587, "grad_norm": 5.402720928192139, "learning_rate": 2.70831537055094e-08, "loss": 1.7475, "step": 13169 }, { "epoch": 0.9913622762942471, "grad_norm": 4.579638481140137, "learning_rate": 2.668344340200357e-08, "loss": 1.649, "step": 13170 }, { "epoch": 0.9914375505749073, "grad_norm": 4.465633392333984, "learning_rate": 2.6286703868777518e-08, "loss": 1.6987, "step": 13171 }, { "epoch": 0.9915128248555675, "grad_norm": 4.027198314666748, "learning_rate": 2.5892935129417927e-08, "loss": 1.6258, "step": 13172 }, { "epoch": 0.9915880991362276, "grad_norm": 6.073844909667969, "learning_rate": 2.5502137207333853e-08, "loss": 1.7642, "step": 13173 }, { "epoch": 0.9916633734168878, "grad_norm": 4.4179911613464355, "learning_rate": 2.5114310125751162e-08, "loss": 2.1268, "step": 13174 }, { "epoch": 0.9917386476975479, "grad_norm": 8.082771301269531, "learning_rate": 2.4729453907729182e-08, "loss": 2.4059, "step": 13175 }, { "epoch": 0.9918139219782081, "grad_norm": 4.373549938201904, "learning_rate": 2.4347568576144064e-08, "loss": 1.7302, "step": 13176 }, { "epoch": 0.9918891962588683, "grad_norm": 4.33629035949707, "learning_rate": 2.3968654153699866e-08, "loss": 1.9269, "step": 13177 }, { "epoch": 0.9919644705395284, "grad_norm": 5.392022609710693, "learning_rate": 2.3592710662923012e-08, "loss": 1.9855, "step": 13178 }, { "epoch": 0.9920397448201885, "grad_norm": 4.506936073303223, "learning_rate": 2.3219738126162293e-08, "loss": 1.6856, "step": 13179 }, { "epoch": 0.9921150191008488, "grad_norm": 6.104228973388672, "learning_rate": 2.2849736565588865e-08, "loss": 1.5727, "step": 13180 }, { "epoch": 0.9921902933815089, "grad_norm": 4.354866981506348, "learning_rate": 2.2482706003201794e-08, "loss": 1.884, "step": 13181 }, { "epoch": 0.992265567662169, "grad_norm": 4.863189697265625, "learning_rate": 2.211864646081141e-08, "loss": 1.5875, "step": 13182 }, { "epoch": 0.9923408419428292, "grad_norm": 7.900731563568115, "learning_rate": 2.1757557960061513e-08, "loss": 2.2157, "step": 13183 }, { "epoch": 0.9924161162234894, "grad_norm": 5.169208526611328, "learning_rate": 2.139944052242937e-08, "loss": 1.7159, "step": 13184 }, { "epoch": 0.9924913905041495, "grad_norm": 4.612951755523682, "learning_rate": 2.104429416918685e-08, "loss": 2.0171, "step": 13185 }, { "epoch": 0.9925666647848097, "grad_norm": 5.994222164154053, "learning_rate": 2.06921189214615e-08, "loss": 1.592, "step": 13186 }, { "epoch": 0.9926419390654698, "grad_norm": 6.375983715057373, "learning_rate": 2.0342914800181022e-08, "loss": 1.7444, "step": 13187 }, { "epoch": 0.99271721334613, "grad_norm": 4.1232171058654785, "learning_rate": 1.999668182610659e-08, "loss": 1.4631, "step": 13188 }, { "epoch": 0.9927924876267901, "grad_norm": 4.350636959075928, "learning_rate": 1.9653420019821735e-08, "loss": 1.5617, "step": 13189 }, { "epoch": 0.9928677619074503, "grad_norm": 4.18228816986084, "learning_rate": 1.9313129401732355e-08, "loss": 1.7492, "step": 13190 }, { "epoch": 0.9929430361881104, "grad_norm": 6.83117151260376, "learning_rate": 1.897580999206672e-08, "loss": 1.4845, "step": 13191 }, { "epoch": 0.9930183104687705, "grad_norm": 4.776318550109863, "learning_rate": 1.864146181087545e-08, "loss": 1.7254, "step": 13192 }, { "epoch": 0.9930935847494308, "grad_norm": 5.618453025817871, "learning_rate": 1.8310084878037093e-08, "loss": 1.442, "step": 13193 }, { "epoch": 0.9931688590300909, "grad_norm": 5.079207897186279, "learning_rate": 1.7981679213247005e-08, "loss": 1.5268, "step": 13194 }, { "epoch": 0.993244133310751, "grad_norm": 7.378453254699707, "learning_rate": 1.765624483603956e-08, "loss": 1.7862, "step": 13195 }, { "epoch": 0.9933194075914112, "grad_norm": 4.970518112182617, "learning_rate": 1.733378176574929e-08, "loss": 1.8096, "step": 13196 }, { "epoch": 0.9933946818720714, "grad_norm": 5.081238746643066, "learning_rate": 1.70142900215442e-08, "loss": 1.8428, "step": 13197 }, { "epoch": 0.9934699561527315, "grad_norm": 6.832697868347168, "learning_rate": 1.6697769622431303e-08, "loss": 2.2327, "step": 13198 }, { "epoch": 0.9935452304333917, "grad_norm": 4.939356803894043, "learning_rate": 1.6384220587212228e-08, "loss": 1.5723, "step": 13199 }, { "epoch": 0.9936205047140518, "grad_norm": 5.165452480316162, "learning_rate": 1.6073642934533172e-08, "loss": 1.8158, "step": 13200 }, { "epoch": 0.993695778994712, "grad_norm": 5.340014934539795, "learning_rate": 1.5766036682857142e-08, "loss": 1.7935, "step": 13201 }, { "epoch": 0.9937710532753722, "grad_norm": 3.8375563621520996, "learning_rate": 1.5461401850463964e-08, "loss": 2.3243, "step": 13202 }, { "epoch": 0.9938463275560323, "grad_norm": 7.1038103103637695, "learning_rate": 1.5159738455478022e-08, "loss": 1.7332, "step": 13203 }, { "epoch": 0.9939216018366924, "grad_norm": 5.9426422119140625, "learning_rate": 1.4861046515818322e-08, "loss": 1.667, "step": 13204 }, { "epoch": 0.9939968761173527, "grad_norm": 4.481041431427002, "learning_rate": 1.4565326049242877e-08, "loss": 1.8162, "step": 13205 }, { "epoch": 0.9940721503980128, "grad_norm": 6.005204677581787, "learning_rate": 1.4272577073337624e-08, "loss": 2.037, "step": 13206 }, { "epoch": 0.9941474246786729, "grad_norm": 5.24858283996582, "learning_rate": 1.3982799605505303e-08, "loss": 1.6847, "step": 13207 }, { "epoch": 0.994222698959333, "grad_norm": 5.851583957672119, "learning_rate": 1.3695993662965478e-08, "loss": 1.9851, "step": 13208 }, { "epoch": 0.9942979732399932, "grad_norm": 5.044328212738037, "learning_rate": 1.3412159262771174e-08, "loss": 1.8975, "step": 13209 }, { "epoch": 0.9943732475206534, "grad_norm": 5.559646129608154, "learning_rate": 1.3131296421797778e-08, "loss": 1.7824, "step": 13210 }, { "epoch": 0.9944485218013135, "grad_norm": 6.9781904220581055, "learning_rate": 1.2853405156743048e-08, "loss": 1.8259, "step": 13211 }, { "epoch": 0.9945237960819737, "grad_norm": 5.184886455535889, "learning_rate": 1.257848548412155e-08, "loss": 1.7331, "step": 13212 }, { "epoch": 0.9945990703626338, "grad_norm": 4.714271068572998, "learning_rate": 1.2306537420281316e-08, "loss": 1.7014, "step": 13213 }, { "epoch": 0.994674344643294, "grad_norm": 4.957435607910156, "learning_rate": 1.2037560981381646e-08, "loss": 1.821, "step": 13214 }, { "epoch": 0.9947496189239542, "grad_norm": 6.1166090965271, "learning_rate": 1.177155618342085e-08, "loss": 1.7948, "step": 13215 }, { "epoch": 0.9948248932046143, "grad_norm": 5.278020858764648, "learning_rate": 1.1508523042214058e-08, "loss": 1.8824, "step": 13216 }, { "epoch": 0.9949001674852744, "grad_norm": 5.908717632293701, "learning_rate": 1.1248461573382108e-08, "loss": 1.3429, "step": 13217 }, { "epoch": 0.9949754417659347, "grad_norm": 5.376378536224365, "learning_rate": 1.0991371792401506e-08, "loss": 1.9484, "step": 13218 }, { "epoch": 0.9950507160465948, "grad_norm": 4.909424304962158, "learning_rate": 1.0737253714548923e-08, "loss": 1.9071, "step": 13219 }, { "epoch": 0.9951259903272549, "grad_norm": 5.313304901123047, "learning_rate": 1.0486107354928942e-08, "loss": 1.7426, "step": 13220 }, { "epoch": 0.9952012646079151, "grad_norm": 4.2952775955200195, "learning_rate": 1.0237932728479616e-08, "loss": 1.7724, "step": 13221 }, { "epoch": 0.9952765388885753, "grad_norm": 4.796300888061523, "learning_rate": 9.992729849944704e-09, "loss": 2.1162, "step": 13222 }, { "epoch": 0.9953518131692354, "grad_norm": 4.420007228851318, "learning_rate": 9.750498733901436e-09, "loss": 1.6545, "step": 13223 }, { "epoch": 0.9954270874498956, "grad_norm": 6.675930023193359, "learning_rate": 9.511239394754955e-09, "loss": 2.1507, "step": 13224 }, { "epoch": 0.9955023617305557, "grad_norm": 4.241837978363037, "learning_rate": 9.27495184672722e-09, "loss": 1.5117, "step": 13225 }, { "epoch": 0.9955776360112158, "grad_norm": 5.348954677581787, "learning_rate": 9.04163610386255e-09, "loss": 1.4654, "step": 13226 }, { "epoch": 0.995652910291876, "grad_norm": 4.765196323394775, "learning_rate": 8.811292180038733e-09, "loss": 1.742, "step": 13227 }, { "epoch": 0.9957281845725362, "grad_norm": 5.047995567321777, "learning_rate": 8.583920088933717e-09, "loss": 1.7765, "step": 13228 }, { "epoch": 0.9958034588531963, "grad_norm": 4.878895282745361, "learning_rate": 8.35951984408112e-09, "loss": 2.0499, "step": 13229 }, { "epoch": 0.9958787331338564, "grad_norm": 6.066239833831787, "learning_rate": 8.138091458809172e-09, "loss": 2.1483, "step": 13230 }, { "epoch": 0.9959540074145167, "grad_norm": 4.200047492980957, "learning_rate": 7.919634946285115e-09, "loss": 1.3955, "step": 13231 }, { "epoch": 0.9960292816951768, "grad_norm": 5.785281658172607, "learning_rate": 7.704150319493008e-09, "loss": 1.9654, "step": 13232 }, { "epoch": 0.9961045559758369, "grad_norm": 5.729581832885742, "learning_rate": 7.491637591250378e-09, "loss": 1.9358, "step": 13233 }, { "epoch": 0.9961798302564971, "grad_norm": 3.953002691268921, "learning_rate": 7.282096774180458e-09, "loss": 1.8323, "step": 13234 }, { "epoch": 0.9962551045371573, "grad_norm": 4.2614874839782715, "learning_rate": 7.075527880751054e-09, "loss": 1.5823, "step": 13235 }, { "epoch": 0.9963303788178174, "grad_norm": 6.689180374145508, "learning_rate": 6.87193092323013e-09, "loss": 2.2737, "step": 13236 }, { "epoch": 0.9964056530984776, "grad_norm": 4.424046516418457, "learning_rate": 6.671305913724668e-09, "loss": 1.4145, "step": 13237 }, { "epoch": 0.9964809273791377, "grad_norm": 6.797179698944092, "learning_rate": 6.473652864169566e-09, "loss": 1.6863, "step": 13238 }, { "epoch": 0.9965562016597979, "grad_norm": 5.1269073486328125, "learning_rate": 6.278971786305432e-09, "loss": 1.5198, "step": 13239 }, { "epoch": 0.9966314759404581, "grad_norm": 4.8358235359191895, "learning_rate": 6.087262691706341e-09, "loss": 1.8027, "step": 13240 }, { "epoch": 0.9967067502211182, "grad_norm": 7.849664688110352, "learning_rate": 5.898525591774284e-09, "loss": 2.0837, "step": 13241 }, { "epoch": 0.9967820245017783, "grad_norm": 5.3083271980285645, "learning_rate": 5.712760497722514e-09, "loss": 2.1192, "step": 13242 }, { "epoch": 0.9968572987824386, "grad_norm": 3.818995714187622, "learning_rate": 5.529967420597748e-09, "loss": 2.0152, "step": 13243 }, { "epoch": 0.9969325730630987, "grad_norm": 5.439818382263184, "learning_rate": 5.350146371269071e-09, "loss": 1.7853, "step": 13244 }, { "epoch": 0.9970078473437588, "grad_norm": 5.298344135284424, "learning_rate": 5.17329736042238e-09, "loss": 2.2856, "step": 13245 }, { "epoch": 0.9970831216244189, "grad_norm": 5.614660263061523, "learning_rate": 4.999420398571486e-09, "loss": 1.7458, "step": 13246 }, { "epoch": 0.9971583959050792, "grad_norm": 6.490902423858643, "learning_rate": 4.828515496052566e-09, "loss": 1.7767, "step": 13247 }, { "epoch": 0.9972336701857393, "grad_norm": 5.269622325897217, "learning_rate": 4.66058266302416e-09, "loss": 1.6769, "step": 13248 }, { "epoch": 0.9973089444663994, "grad_norm": 6.375268459320068, "learning_rate": 4.495621909478276e-09, "loss": 2.1209, "step": 13249 }, { "epoch": 0.9973842187470596, "grad_norm": 4.250128746032715, "learning_rate": 4.3336332452070805e-09, "loss": 1.8069, "step": 13250 }, { "epoch": 0.9974594930277197, "grad_norm": 5.310784339904785, "learning_rate": 4.174616679852861e-09, "loss": 1.7188, "step": 13251 }, { "epoch": 0.9975347673083799, "grad_norm": 4.131718158721924, "learning_rate": 4.018572222858064e-09, "loss": 1.585, "step": 13252 }, { "epoch": 0.9976100415890401, "grad_norm": 5.407041549682617, "learning_rate": 3.865499883509704e-09, "loss": 1.4805, "step": 13253 }, { "epoch": 0.9976853158697002, "grad_norm": 4.429222583770752, "learning_rate": 3.71539967090051e-09, "loss": 1.8644, "step": 13254 }, { "epoch": 0.9977605901503603, "grad_norm": 4.958990097045898, "learning_rate": 3.5682715939566737e-09, "loss": 1.7753, "step": 13255 }, { "epoch": 0.9978358644310206, "grad_norm": 4.494583606719971, "learning_rate": 3.424115661421201e-09, "loss": 1.56, "step": 13256 }, { "epoch": 0.9979111387116807, "grad_norm": 5.914072036743164, "learning_rate": 3.2829318818650144e-09, "loss": 1.9059, "step": 13257 }, { "epoch": 0.9979864129923408, "grad_norm": 4.745224475860596, "learning_rate": 3.144720263686951e-09, "loss": 1.6141, "step": 13258 }, { "epoch": 0.998061687273001, "grad_norm": 13.653491973876953, "learning_rate": 3.009480815091559e-09, "loss": 1.898, "step": 13259 }, { "epoch": 0.9981369615536612, "grad_norm": 4.860389709472656, "learning_rate": 2.8772135441279546e-09, "loss": 1.4189, "step": 13260 }, { "epoch": 0.9982122358343213, "grad_norm": 4.856529235839844, "learning_rate": 2.7479184586620688e-09, "loss": 1.6266, "step": 13261 }, { "epoch": 0.9982875101149815, "grad_norm": 4.535051345825195, "learning_rate": 2.621595566365542e-09, "loss": 1.6652, "step": 13262 }, { "epoch": 0.9983627843956416, "grad_norm": 4.056579113006592, "learning_rate": 2.498244874760136e-09, "loss": 1.8611, "step": 13263 }, { "epoch": 0.9984380586763018, "grad_norm": 9.294820785522461, "learning_rate": 2.3778663911733222e-09, "loss": 2.0419, "step": 13264 }, { "epoch": 0.998513332956962, "grad_norm": 5.269387245178223, "learning_rate": 2.2604601227660394e-09, "loss": 1.7935, "step": 13265 }, { "epoch": 0.9985886072376221, "grad_norm": 4.996938705444336, "learning_rate": 2.146026076510488e-09, "loss": 1.5363, "step": 13266 }, { "epoch": 0.9986638815182822, "grad_norm": 7.4594526290893555, "learning_rate": 2.034564259217886e-09, "loss": 1.7581, "step": 13267 }, { "epoch": 0.9987391557989423, "grad_norm": 7.098395347595215, "learning_rate": 1.926074677510714e-09, "loss": 1.9823, "step": 13268 }, { "epoch": 0.9988144300796026, "grad_norm": 4.721424102783203, "learning_rate": 1.820557337833817e-09, "loss": 1.4479, "step": 13269 }, { "epoch": 0.9988897043602627, "grad_norm": 4.402496337890625, "learning_rate": 1.7180122464655057e-09, "loss": 1.9812, "step": 13270 }, { "epoch": 0.9989649786409228, "grad_norm": 5.043260097503662, "learning_rate": 1.6184394095009048e-09, "loss": 2.0613, "step": 13271 }, { "epoch": 0.999040252921583, "grad_norm": 4.3816704750061035, "learning_rate": 1.521838832863054e-09, "loss": 1.8314, "step": 13272 }, { "epoch": 0.9991155272022432, "grad_norm": 4.691875457763672, "learning_rate": 1.4282105222862553e-09, "loss": 1.829, "step": 13273 }, { "epoch": 0.9991908014829033, "grad_norm": 4.684067726135254, "learning_rate": 1.3375544833382769e-09, "loss": 2.0828, "step": 13274 }, { "epoch": 0.9992660757635635, "grad_norm": 5.453580856323242, "learning_rate": 1.249870721409252e-09, "loss": 2.0721, "step": 13275 }, { "epoch": 0.9993413500442236, "grad_norm": 3.6280970573425293, "learning_rate": 1.1651592417172285e-09, "loss": 1.7303, "step": 13276 }, { "epoch": 0.9994166243248838, "grad_norm": 5.092569828033447, "learning_rate": 1.0834200492915169e-09, "loss": 1.7451, "step": 13277 }, { "epoch": 0.999491898605544, "grad_norm": 4.0405778884887695, "learning_rate": 1.004653148994894e-09, "loss": 1.7597, "step": 13278 }, { "epoch": 0.9995671728862041, "grad_norm": 4.947397708892822, "learning_rate": 9.288585455069498e-10, "loss": 1.7801, "step": 13279 }, { "epoch": 0.9996424471668642, "grad_norm": 6.2149882316589355, "learning_rate": 8.560362433351898e-10, "loss": 1.7064, "step": 13280 }, { "epoch": 0.9997177214475245, "grad_norm": 4.5937628746032715, "learning_rate": 7.861862468094838e-10, "loss": 2.1554, "step": 13281 }, { "epoch": 0.9997929957281846, "grad_norm": 4.7368388175964355, "learning_rate": 7.193085600820659e-10, "loss": 1.733, "step": 13282 }, { "epoch": 0.9998682700088447, "grad_norm": 5.895082473754883, "learning_rate": 6.554031871275345e-10, "loss": 1.9067, "step": 13283 }, { "epoch": 0.9999435442895049, "grad_norm": 4.6465888023376465, "learning_rate": 5.944701317428525e-10, "loss": 1.9208, "step": 13284 }, { "epoch": 0.9999435442895049, "step": 13284, "total_flos": 8.115413063038927e+18, "train_loss": 2.0213059956806316, "train_runtime": 88296.7738, "train_samples_per_second": 2.407, "train_steps_per_second": 0.15 } ], "logging_steps": 1.0, "max_steps": 13284, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.115413063038927e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }